4 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
9 * 10-02-07,Nigel Brown(EMBL): changed ifstream to InFileStream to handle
10 * cross-platform end-of-lines.
16 #include "PIRFileParser.h"
22 * PIRFileParser contructor sets up the chartab array.
25 PIRFileParser::PIRFileParser(string filePath)
33 * get range of sequences
35 vector<Sequence> PIRFileParser::getSeqRange(int firstSeq, int no, string *offendingSeq)
37 vector<Sequence> seqRangeVector;
42 Sequence tempSeq = getSeq(firstSeq + i, offendingSeq);
43 if (parseExitCode!=OK) {
44 seqRangeVector.clear();
45 return seqRangeVector;
47 seqRangeVector.push_back(tempSeq);
49 return seqRangeVector;
55 * The function getSeq finds the sequence 'seqNum' in the file and returns it.
56 * @param seqNum The number of the sequence to get from the file.
57 * @return The 'seqNum' sequence from the file.
59 Sequence PIRFileParser::getSeq(int seqNum, string *offendingSeq)
61 char _line[MAXLINE + 1];
62 char _sname[MAXNAMES + 1];
63 char _title[MAXTITLES + 1];
64 string characterSeq = "";
72 int _currentSeqNum = 0;
76 _fileIn = new InFileStream; //nige
77 _fileIn->open(fileName.c_str()); //nige
78 _fileIn->seekg(0, std::ios::beg);
80 // Read in lines until we get to the begining of sequence seqNum.
81 while (_currentSeqNum != seqNum)
85 if(!_fileIn->getline(_line, MAXLINE + 1)) // If we cannot get anymore!
88 return Sequence(blank, blank, blank);
92 if(_currentSeqNum == seqNum) // Found the sequence
96 // Get next line so that we are past the '>' line
97 _fileIn->getline(_line, MAXLINE + 1);
100 // line contains the name of the sequence
101 for (i = 4; i <= (int)strlen(_line); i++)
109 strncpy(_sname, _line + i, MAXNAMES); // remember entryname
110 _sname[MAXNAMES] = EOS;
111 utilityObject->rTrim(_sname);
112 utilityObject->blankToUnderscore(_sname); // replace blanks with '_'
113 name = string(_sname);
115 _fileIn->getline(_line, MAXLINE + 1);
116 strncpy(_title, _line, MAXTITLES);
117 _title[MAXTITLES] = EOS;
119 if (_title[i - 1] == '\n')
123 title = string(_title);
125 while (_fileIn->getline(_line, MAXLINE + 1))
127 for (i = 0; i <= MAXLINE; i++)
130 if (c == '\n' || c == EOS || c == '*')
148 if ((int)characterSeq.length() > userParameters->getMaxAllowedSeqLength())
150 parseExitCode=SEQUENCETOOBIG;
151 if (offendingSeq!=NULL)
152 offendingSeq->assign(name);
154 return Sequence(blank, blank, blank);
156 return Sequence(characterSeq, name, title);
161 cerr << "There was an exception in the PIRFileParser::getSeq function.\n"
162 << "Need to end program\n";
168 * The function countSeqs finds the number of sequences in the file and returns it.
169 * @return The number of sequences in the file.
171 int PIRFileParser::countSeqs()
173 char line[MAXLINE + 1], c;
180 _fileIn = new InFileStream; //nige
181 _fileIn->open(fileName.c_str()); //nige
183 if(!_fileIn->is_open())
185 return 0; // No sequences found!
188 // Get to begining of sequences!
189 while (_fileIn->getline(line, MAXLINE + 1))
191 if (!utilityObject->blankLine(line))
197 // Now check the 1st sequence to make sure it ends with *
199 while (_fileIn->getline(line, MAXLINE + 1))
201 // Look for end of first seq
206 for (i = 0; seqOk == false; i++)
211 seqOk = true; // ok - end of sequence found
214 if (c == '\n' || c == EOS)
228 utilityObject->error("PIR format sequence end marker '*'\nmissing for one or more sequences.\n");
229 return 0; // funny format
234 while (_fileIn->getline(line, MAXLINE + 1))
238 // Look for start of next seq
240 while (_fileIn->getline(line, MAXLINE + 1))
242 // Look for end of seq
246 utilityObject->error("PIR format sequence end marker '*'\nmissing for one or more sequences.\n");
247 return 0; // funny format
249 for (i = 0; seqOk == false; i++)
254 seqOk = true; // ok - sequence found
257 if (c == '\n' || c == EOS)
278 cerr << "An exception has occured in the function PIRFileParser::countSeqs()\n"
279 << "Program needs to terminate.\nPlease contact the Clustal developers\n";
285 * There is no secondary structure information in PIR files!
286 * @param gapPenaltyMask
287 * @param secStructMask
288 * @param secStructName
289 * @param structPenalties
292 void PIRFileParser::getSecStructure(vector<char>& gapPenaltyMask, vector<char>& secStructMask,
293 string& secStructName, int &structPenalties, int length)
295 structPenalties = NONE;