4 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
9 * 10-02-07,Nigel Brown(EMBL): changed ifstream to InFileStream to handle
10 * cross-platform end-of-lines.
16 #include "GDEFileParser.h"
22 * Constructor sets up the chartab array.
25 GDEFileParser::GDEFileParser(string filePath)
32 * Nothing to do in destruction of object.
34 GDEFileParser::~GDEFileParser()
40 vector<Sequence> GDEFileParser::getSeqRange(int firstSeq, int no, string *offendingSeq)
42 vector<Sequence> seqRangeVector;
47 Sequence tempSeq = getSeq(firstSeq + i, offendingSeq);
48 if (parseExitCode!=OK) {
49 seqRangeVector.clear();
50 return seqRangeVector;
52 seqRangeVector.push_back(tempSeq);
54 return seqRangeVector;
59 * The getSeq function is used to get sequence number seqNum from the file.
61 Sequence GDEFileParser::getSeq(int seqNum, string *offendingSeq)
63 char _line[MAXLINE + 1];
64 char _sname[MAXNAMES + 1];
65 string characterSeq = "";
74 int _currentSeqNum = 0; // Not at any sequence yet!
78 _fileIn = new InFileStream; //nige
79 _fileIn->open(fileName.c_str()); //nige
80 _fileIn->seekg(0, std::ios::beg);
82 bool dnaFlagSet = userParameters->getDNAFlag();
83 while (_currentSeqNum != seqNum)
85 while((*_line != '#' && dnaFlagSet) ||
86 (*_line != '%' && !dnaFlagSet))
88 if(!_fileIn->getline(_line, MAXLINE + 1))
91 return Sequence(blank, blank, blank);
95 if(_currentSeqNum == seqNum) // Found the sequence
99 // Get next line so that we are past the '#' or '%' line
100 _fileIn->getline(_line, MAXLINE + 1); //nige
103 for (i = 1; i <= MAXNAMES; i++)
105 if (_line[i] == '(' || _line[i] == '\n' || _line[i] == '\r')
109 _sname[i - 1] = _line[i];
114 for (i--; i > 0; i--)
116 if (isspace(_sname[i]))
125 utilityObject->blankToUnderscore(_sname);
126 name = string(_sname);
129 while (_fileIn->getline(_line, MAXLINE + 1))
131 if (*_line == '%' || *_line == '#' || *_line == '"')
135 for (i = 0; i <= MAXLINE; i++)
138 if (c == '\n' || c == EOS)
153 if ((int)characterSeq.length() > userParameters->getMaxAllowedSeqLength())
155 parseExitCode=SEQUENCETOOBIG;
156 if (offendingSeq!=NULL)
157 offendingSeq->assign(name);
159 return Sequence(blank, blank, blank);
161 return Sequence(characterSeq, name, title);
166 cerr << "There was an exception in the GDEnFileParser::getSeq function.\n"
167 << "Need to end program\n";
174 * The countSeqs function returns the number of sequences in the file.
176 int GDEFileParser::countSeqs()
178 char line[MAXLINE + 1];
183 _fileIn = new InFileStream; //nige
184 _fileIn->open(fileName.c_str()); //nige
186 if(!_fileIn->is_open())
188 return 0; // No sequences found!
191 while (_fileIn->getline(line, MAXLINE + 1))
193 if ((*line == '%') && (userParameters->getDNAFlag() == false))
197 else if ((*line == '#') && (userParameters->getDNAFlag() == true))
209 cerr << "An exception has occured in the function GDEFileParser::countSeqs()\n"
210 << "Program needs to terminate.\nPlease contact the Clustal developers\n";
216 * getSecStructure gets the secondary structure from the file.
218 void GDEFileParser::getSecStructure(vector<char>& gapPenaltyMask, vector<char>& secStructMask,
219 string& secStructName, int &structPenalties, int length)
221 char _title[MAXLINE + 1];
222 char _line[MAXLINE + 1];
223 char _lin2[MAXLINE + 1];
224 char _sname[MAXNAMES + 1];
225 int i, len, offset = 0;
230 _fileIn = new InFileStream; //nige
231 _fileIn->open(fileName.c_str()); //nige
232 _fileIn->seekg(0, std::ios::beg);
234 // NOTE I think I should empty the masks before pushing onto them!
235 gapPenaltyMask.clear();
236 secStructMask.clear();
241 // search for the next comment line
242 while (*_line != '"')
244 if (!_fileIn->getline(_line, MAXLINE + 1))
251 // is it a secondary structure entry?
252 if (strncmp(&_line[1], "SS_", 3) == 0)
254 for (i = 1; i <= MAXNAMES - 3; i++)
256 if (_line[i + 3] == '(' || _line[i + 3] == '\n' || _line[i + 3] == '\r')
260 _sname[i - 1] = _line[i + 3];
266 // Is it possible for this to be executed????????????????
267 // if _line contains ( then we break and dont put it into _sname
268 // So how can sname have it???????
269 if (_sname[i - 1] == '(')
271 sscanf(&_line[i + 3], "%d", &offset);
277 for (i--; i > 0; i--)
279 if (isspace(_sname[i]))
289 utilityObject->blankToUnderscore(_sname);
290 secStructName = string(_sname);
292 if (userParameters->getInteractive())
294 strcpy(_title, "Found secondary structure in alignment file: ");
295 strcat(_title, _sname);
296 (*_lin2) = utilityObject->promptForYesNo(_title,
297 "Use it to set local gap penalties ");
303 if ((*_lin2 != 'n') && (*_lin2 != 'N'))
305 structPenalties = SECST;
306 for (i = 0; i < length; i++)
308 secStructMask.push_back('.');
311 while (_fileIn->getline(_line, MAXLINE + 1))
313 if (*_line == '%' || *_line == '#' || *_line == '"')
317 for (i = offset; i < length; i++)
320 if (c == '\n' || c == EOS)
325 secStructMask[len++] = c;
327 if (len >= length) // NOTE i put in >=
335 // or is it a gap penalty mask entry?
336 else if (strncmp(&_line[1], "GM_", 3) == 0)
338 for (i = 1; i <= MAXNAMES - 3; i++)
340 if (_line[i + 3] == '(' || _line[i + 3] == '\n')
344 _sname[i - 1] = _line[i + 3];
350 // Again I dont think it is possible for _sname to have ( !!!!
351 if (_sname[i - 1] == '(')
353 sscanf(&_line[i + 3], "%d", &offset);
359 for (i--; i > 0; i--)
361 if (isspace(_sname[i]))
371 utilityObject->blankToUnderscore(_sname);
372 secStructName = string(_sname);
374 if (userParameters->getInteractive())
376 strcpy(_title, "Found gap penalty mask in alignment file: ");
377 strcat(_title, _sname);
378 (*_lin2) = utilityObject->promptForYesNo(_title,
379 "Use it to set local gap penalties ");
385 if ((*_lin2 != 'n') && (*_lin2 != 'N'))
387 structPenalties = GMASK;
388 for (i = 0; i < length; i++)
390 gapPenaltyMask.push_back('1');
393 while (_fileIn->getline(_line, MAXLINE + 1))
395 if (*_line == '%' || *_line == '#' || *_line == '"')
399 for (i = offset; i < length; i++)
402 if (c == '\n' || c == EOS)
407 gapPenaltyMask[len++] = c;
409 if (len >= length) // NOTE I put in >=
416 if (structPenalties != NONE)
426 cerr << "An exception has occured in the function GDEFileParser::getSecStructure()\n"
427 << "Program needs to terminate.\nPlease contact the Clustal developers\n";