4 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
9 * 10-02-07,Nigel Brown(EMBL): changed ifstream to InFileStream to handle
10 * cross-platform end-of-lines.
16 #include "RSFFileParser.h"
22 * Constructor sets up the chartab array.
25 RSFFileParser::RSFFileParser(string filePath)
31 vector<Sequence> RSFFileParser::getSeqRange(int firstSeq, int no, string *offendingSeq)
33 vector<Sequence> seqRangeVector;
38 Sequence tempSeq = getSeq(firstSeq + i, offendingSeq);
39 if (parseExitCode!=OK) {
40 seqRangeVector.clear();
41 return seqRangeVector;
43 seqRangeVector.push_back(tempSeq);
45 return seqRangeVector;
50 * get the sequence seqNum from the file.
51 * @param seqNum The number of the sequence to get.
52 * @return The sequence seqNum.
54 Sequence RSFFileParser::getSeq(int seqNum, string *offendingSeq)
56 char _line[MAXLINE + 1];
57 char _sname[MAXNAMES + 1];
58 string characterSeq = "";
66 int _currentSeqNum = 0; // Not at any sequence yet!
70 _fileIn = new InFileStream; //nige
71 _fileIn->open(fileName.c_str()); //nige
72 _fileIn->seekg(0, std::ios::beg); // start at the beginning
74 // Need to get the cursor to the begining of the correct sequence.
75 // This will be the case when we get to the seqNum {
76 while (_currentSeqNum != seqNum)
80 if(!_fileIn->getline(_line, MAXLINE + 1)) // If we cannot get anymore!
83 return Sequence(blank, blank, blank);
87 if(_currentSeqNum == seqNum) // Found the sequence
91 // Get next line so that we are past the '{' line
92 _fileIn->getline(_line, MAXLINE + 1);
95 while (!keyword(_line, "name"))
97 if (!_fileIn->getline(_line, MAXLINE + 1))
100 return Sequence(blank, blank, blank);
103 for (i = 5; i <= (int)strlen(_line); i++)
110 strncpy(_sname, _line + i, MAXNAMES); // remember entryname
111 for (i = 0; i <= (int)strlen(_sname); i++)
113 if (_sname[i] == ' ')
120 _sname[MAXNAMES] = EOS;
121 utilityObject->rTrim(_sname);
122 utilityObject->blankToUnderscore(_sname); // replace blanks with '_'
123 name = string(_sname);
126 while (!keyword(_line, "sequence"))
128 if (!_fileIn->getline(_line, MAXLINE + 1))
131 return Sequence(blank, blank, blank);
135 while (_fileIn->getline(_line, MAXLINE + 1))
137 for (i = 0; i <= MAXLINE; i++)
140 if (c == EOS || c == '}')
162 if ((int)characterSeq.length() > userParameters->getMaxAllowedSeqLength())
164 parseExitCode=SEQUENCETOOBIG;
165 if (offendingSeq!=NULL)
166 offendingSeq->assign(name);
168 return Sequence(blank, blank, blank);
170 return Sequence(characterSeq, name, title);
175 cerr << "There was an exception in the RSFFileParser::getSeq function.\n"
176 << "Need to end program\n";
182 * count the number of sequences in a GCG RSF alignment file
183 * @return The number of sequences in the file.
185 int RSFFileParser::countSeqs()
187 char _line[MAXLINE + 1];
193 _fileIn = new InFileStream; //nige
194 _fileIn->open(fileName.c_str()); //nige
195 _fileIn->seekg(0, std::ios::beg); // start at the beginning
197 if(!_fileIn->is_open())
199 return 0; // No sequences found!
203 while (_fileIn->getline(_line, MAXLINE + 1))
205 // NOTE needed to change to -1 and -2 (it was -2 and -3)
206 // This is because getline does not put the \n in!
207 if (_line[strlen(_line) - 1] == '.' && _line[strlen(_line) - 2] == '.')
213 while (_fileIn->getline(_line, MAXLINE + 1))
226 cerr << "An exception has occured in the function RSFFileParser::countSeqs()\n"
227 << "Program needs to terminate.\nPlease contact the Clustal developers\n";
233 * Get the secondary structure information from the file.
234 * @param gapPenaltyMask
235 * @param secStructMask
236 * @param secStructName
237 * @param structPenalties
240 void RSFFileParser::getSecStructure(vector<char>& gapPenaltyMask, vector<char>& secStructMask,
241 string& secStructName, int &structPenalties, int length)
243 char _title[MAXLINE + 1];
244 char _line[MAXLINE + 1];
245 char _lin2[MAXLINE + 1];
246 char _sname[MAXNAMES + 1];
252 secStructMask.clear();
253 secStructMask.assign(length, '.');
254 _fileIn = new InFileStream; //nige
255 _fileIn->open(fileName.c_str()); //nige
256 _fileIn->seekg(0, std::ios::beg); // Need to start at begining
259 while (_fileIn->getline(_line, MAXLINE + 1))
261 if (_line[strlen(_line) - 1] == '.' && _line[strlen(_line) - 2] == '.')
267 // find the start of the sequence entry
270 while (_fileIn->getline(_line, MAXLINE + 1))
276 while (!keyword(_line, "name"))
278 if (!_fileIn->getline(_line, MAXLINE + 1))
285 for (i = 5; i <= (int)strlen(_line); i++)
292 strncpy(_sname, _line + i, MAXNAMES); // remember entryname
293 for (i = 0; i <= (int)strlen(_sname); i++)
295 if (_sname[i] == ' ')
301 _sname[MAXNAMES] = EOS;
302 utilityObject->rTrim(_sname);
303 utilityObject->blankToUnderscore(_sname); // replace blanks with '_'
305 // look for secondary structure feature table / gap penalty mask
306 while (_fileIn->getline(_line, MAXLINE + 1))
308 if (keyword(_line, "feature"))
310 if (userParameters->getInteractive())
312 strcpy(_title, "Found secondary structure in alignment file: ");
313 strcat(_title, _sname);
314 (*_lin2) = utilityObject->promptForYesNo(_title,
315 "Use it to set local gap penalties ");
321 if ((*_lin2 != 'n') && (*_lin2 != 'N'))
323 structPenalties = SECST;
324 secStructMask.assign(length, '.');
327 if (keyword(_line, "feature"))
329 getRSFFeature(&_line[7], secStructMask, length);
331 _fileIn->getline(_line, MAXLINE + 1);
333 while (!keyword(_line, "sequence"));
339 _fileIn->getline(_line, MAXLINE + 1);
341 while (!keyword(_line, "sequence"));
343 secStructName = string(_sname);
345 else if (keyword(_line, "sequence"))
350 if (structPenalties != NONE)
361 cerr << "An exception has occured in the function RSFFileParser::getSecStructure()\n"
362 << "Program needs to terminate.\nPlease contact the Clustal developers\n";
368 * get a feature from the file. Called by getSecStructure
370 * @param secStructMask
373 void RSFFileParser::getRSFFeature(char* line, vector<char>& secStructMask, int length)
376 char str1[MAXLINE + 1], str2[MAXLINE + 1], feature[MAXLINE + 1];
377 int i, tmp, startPos, endPos;
381 if (sscanf(line, "%d%d%d%s%s%s", &startPos, &endPos, &tmp, str1, str2,
387 if (strcmp(feature, "HELIX") == 0)
392 else if (strcmp(feature, "STRAND") == 0)
402 if (startPos >= length || endPos >= length)
406 secStructMask[startPos - 1] = s;
407 for (i = startPos; i < endPos - 1; i++)
409 secStructMask[i] = c;
411 secStructMask[endPos - 1] = s;
415 cerr << "An exception has occured in the function RSFFileParser::getRSFFeature()\n"
416 << "Program needs to terminate.\nPlease contact the Clustal developers\n";
422 * keyword checks if code is on the line!
427 bool RSFFileParser::keyword(char *line, const char *code)
432 for (i = 0; !isspace(line[i]) && line[i] != EOS; i++)
437 return (strcmp(key, code) == 0);