4 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
9 * 10-02-07,Nigel Brown(EMBL): changed ifstream to InFileStream to handle
10 * cross-platform end-of-lines.
16 #include "MSFFileParser.h"
22 * MSFFileParser contructor sets up the chartab array.
26 MSFFileParser::MSFFileParser(string filePath)
34 vector<Sequence> MSFFileParser::getSeqRange(int firstSeq, int no, string *offendingSeq)
36 vector<Sequence> seqRangeVector;
41 Sequence tempSeq = getSeq(firstSeq + i);
42 if (parseExitCode!=OK) {
43 seqRangeVector.clear();
44 return seqRangeVector;
46 seqRangeVector.push_back(tempSeq);
48 return seqRangeVector;
54 * The function getSeq finds the sequence seqNum in the file and returns it.
55 * @param seqNum The number of the sequence in the file to get.
56 * @return A sequence object containing the seqNum'th sequence from the file.
58 Sequence MSFFileParser::getSeq(int seqNum, string *offendingSeq)
60 char _line[MAXLINE + 1];
61 char _sname[MAXNAMES + 1];
62 string characterSeq = "";
73 _fileIn = new InFileStream; //nige
74 _fileIn->open(fileName.c_str()); //nige
75 _fileIn->seekg(0, std::ios::beg);
79 if (!_fileIn->getline(_line, MAXLINE + 1))
82 return Sequence(blank, blank, blank);
85 if (utilityObject->lineType(_line, "//"))
92 while (_fileIn->getline(_line, MAXLINE + 1))
94 if (!utilityObject->blankLine(_line))
96 for (i = 1; i < seqNum; i++)
98 _fileIn->getline(_line, MAXLINE + 1);
100 for (j = 0; j <= (int)strlen(_line); j++)
107 for (k = j; k <= (int)strlen(_line); k++)
115 // Get the name of the sequence
116 strncpy(_sname, _line + j, utilityObject->MIN(MAXNAMES, k - j));
117 _sname[utilityObject->MIN(MAXNAMES, k - j)] = EOS;
118 utilityObject->rTrim(_sname);
119 utilityObject->blankToUnderscore(_sname);
120 name = string(_sname);
122 for (i = k; i <= MAXLINE; i++)
125 if (c == '.' || c == '~')
133 if (c == '\n' || c == EOS)
147 if (!_fileIn->getline(_line, MAXLINE + 1))
150 return Sequence(characterSeq, name, title);
152 if (utilityObject->blankLine(_line))
161 if ((int)characterSeq.length() > userParameters->getMaxAllowedSeqLength())
163 parseExitCode=SEQUENCETOOBIG;
164 if (offendingSeq!=NULL)
165 offendingSeq->assign(name);
167 return Sequence(blank, blank, blank);
169 return Sequence(characterSeq, name, title);;
174 cerr << "An exception has occured in the function MSFFileParser::getSeq()\n"
175 << "Program needs to terminate.\nPlease contact the Clustal developers\n";
181 * The function countSeqs counts the number of sequences in the file.
182 * @return The number of sequences in the file.
184 int MSFFileParser::countSeqs()
186 char _line[MAXLINE + 1];
191 _fileIn = new InFileStream; //nige
192 _fileIn->open(fileName.c_str()); //nige
194 if(!_fileIn->is_open())
196 return 0; // No sequences found!
199 while (_fileIn->getline(_line, MAXLINE + 1))
201 if (utilityObject->lineType(_line, "//"))
207 while (_fileIn->getline(_line, MAXLINE + 1))
209 if (!utilityObject->blankLine(_line))
213 // Look for next non- blank line
217 while (_fileIn->getline(_line, MAXLINE + 1))
219 if (utilityObject->blankLine(_line))
227 return 0; // if you got to here-funny format/no seqs.
232 cerr << "An exception has occured in the function MSFFileParser::countSeqs()\n"
233 << "Program needs to terminate.\nPlease contact the Clustal developers\n";
239 * There is no secondary structure information in MSF files. Set structPenalties to NONE.
240 * @param gapPenaltyMask
241 * @param secStructMask
242 * @param secStructName
243 * @param structPenalties
246 void MSFFileParser::getSecStructure(vector<char>& gapPenaltyMask, vector<char>& secStructMask,
247 string& secStructName, int &structPenalties, int length)
249 structPenalties = NONE;