--- /dev/null
+/**
+ * Author: Mark Larkin
+ *
+ * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
+ */
+/**
+ * Changes:
+ *
+ * 10-02-07,Nigel Brown(EMBL): changed ifstream to InFileStream to handle
+ * cross-platform end-of-lines.
+ */
+
+#ifdef HAVE_CONFIG_H
+ #include "config.h"
+#endif
+#include "PIRFileParser.h"
+
+namespace clustalw
+{
+
+/**
+ * PIRFileParser contructor sets up the chartab array.
+ * @param filePath
+ */
+PIRFileParser::PIRFileParser(string filePath)
+{
+ fileName = filePath;
+ fillCharTab();
+}
+
+
+/*
+ * get range of sequences
+ */
+ vector<Sequence> PIRFileParser::getSeqRange(int firstSeq, int no, string *offendingSeq)
+{
+ vector<Sequence> seqRangeVector;
+ int i;
+
+ for (i=0; i<no; i++)
+ {
+ Sequence tempSeq = getSeq(firstSeq + i, offendingSeq);
+ if (parseExitCode!=OK) {
+ seqRangeVector.clear();
+ return seqRangeVector;
+ }
+ seqRangeVector.push_back(tempSeq);
+ }
+ return seqRangeVector;
+}
+
+
+
+/**
+ * The function getSeq finds the sequence 'seqNum' in the file and returns it.
+ * @param seqNum The number of the sequence to get from the file.
+ * @return The 'seqNum' sequence from the file.
+ */
+ Sequence PIRFileParser::getSeq(int seqNum, string *offendingSeq)
+{
+ char _line[MAXLINE + 1];
+ char _sname[MAXNAMES + 1];
+ char _title[MAXTITLES + 1];
+ string characterSeq = "";
+ string name = "";
+ string title = "";
+ string blank = "";
+
+ _line[0] = EOS;
+ int i;
+ unsigned char c;
+ int _currentSeqNum = 0;
+
+ try
+ {
+ _fileIn = new InFileStream; //nige
+ _fileIn->open(fileName.c_str()); //nige
+ _fileIn->seekg(0, std::ios::beg);
+
+ // Read in lines until we get to the begining of sequence seqNum.
+ while (_currentSeqNum != seqNum)
+ {
+ while(*_line != '>')
+ {
+ if(!_fileIn->getline(_line, MAXLINE + 1)) // If we cannot get anymore!
+ {
+ _fileIn->close();
+ return Sequence(blank, blank, blank);
+ }
+ }
+ ++_currentSeqNum;
+ if(_currentSeqNum == seqNum) // Found the sequence
+ {
+ break;
+ }
+ // Get next line so that we are past the '>' line
+ _fileIn->getline(_line, MAXLINE + 1);
+ }
+
+ // line contains the name of the sequence
+ for (i = 4; i <= (int)strlen(_line); i++)
+ {
+ if (_line[i] != ' ')
+ {
+ break;
+ }
+ }
+
+ strncpy(_sname, _line + i, MAXNAMES); // remember entryname
+ _sname[MAXNAMES] = EOS;
+ utilityObject->rTrim(_sname);
+ utilityObject->blankToUnderscore(_sname); // replace blanks with '_'
+ name = string(_sname);
+
+ _fileIn->getline(_line, MAXLINE + 1);
+ strncpy(_title, _line, MAXTITLES);
+ _title[MAXTITLES] = EOS;
+ i = strlen(_title);
+ if (_title[i - 1] == '\n')
+ {
+ _title[i - 1] = EOS;
+ }
+ title = string(_title);
+
+ while (_fileIn->getline(_line, MAXLINE + 1))
+ {
+ for (i = 0; i <= MAXLINE; i++)
+ {
+ c = _line[i];
+ if (c == '\n' || c == EOS || c == '*')
+ {
+ break;
+ }
+
+ c = chartab[c];
+ if (c)
+ {
+ characterSeq += c;
+ }
+ }
+ if (c == '*')
+ {
+ break;
+ }
+ }
+ _fileIn->close();
+
+ if ((int)characterSeq.length() > userParameters->getMaxAllowedSeqLength())
+ {
+ parseExitCode=SEQUENCETOOBIG;
+ if (offendingSeq!=NULL)
+ offendingSeq->assign(name);
+ // return empty seq
+ return Sequence(blank, blank, blank);
+ }
+ return Sequence(characterSeq, name, title);
+ }
+ catch(...)
+ {
+ _fileIn->close();
+ cerr << "There was an exception in the PIRFileParser::getSeq function.\n"
+ << "Need to end program\n";
+ exit(1);
+ }
+}
+
+/**
+ * The function countSeqs finds the number of sequences in the file and returns it.
+ * @return The number of sequences in the file.
+ */
+int PIRFileParser::countSeqs()
+{
+ char line[MAXLINE + 1], c;
+ line[0] = EOS;
+ int numSeqs, i;
+ bool seqOk;
+
+ try
+ {
+ _fileIn = new InFileStream; //nige
+ _fileIn->open(fileName.c_str()); //nige
+
+ if(!_fileIn->is_open())
+ {
+ return 0; // No sequences found!
+ }
+
+ // Get to begining of sequences!
+ while (_fileIn->getline(line, MAXLINE + 1))
+ {
+ if (!utilityObject->blankLine(line))
+ {
+ break;
+ }
+ }
+
+ // Now check the 1st sequence to make sure it ends with *
+ seqOk = false;
+ while (_fileIn->getline(line, MAXLINE + 1))
+ {
+ // Look for end of first seq
+ if (*line == '>')
+ {
+ break;
+ }
+ for (i = 0; seqOk == false; i++)
+ {
+ c = line[i];
+ if (c == '*')
+ {
+ seqOk = true; // ok - end of sequence found
+ break;
+ } // EOL
+ if (c == '\n' || c == EOS)
+ {
+ break;
+ }
+ // EOL
+ }
+ if (seqOk == true)
+ {
+ break;
+ }
+ }
+ if (seqOk == false)
+ {
+ _fileIn->close();
+ utilityObject->error("PIR format sequence end marker '*'\nmissing for one or more sequences.\n");
+ return 0; // funny format
+ }
+
+ numSeqs = 1;
+
+ while (_fileIn->getline(line, MAXLINE + 1))
+ {
+ if (*line == '>')
+ {
+ // Look for start of next seq
+ seqOk = false;
+ while (_fileIn->getline(line, MAXLINE + 1))
+ {
+ // Look for end of seq
+ if (*line == '>')
+ {
+ _fileIn->close();
+ utilityObject->error("PIR format sequence end marker '*'\nmissing for one or more sequences.\n");
+ return 0; // funny format
+ }
+ for (i = 0; seqOk == false; i++)
+ {
+ c = line[i];
+ if (c == '*')
+ {
+ seqOk = true; // ok - sequence found
+ break;
+ }
+ if (c == '\n' || c == EOS)
+ {
+ break;
+ }
+ }
+ if (seqOk == true)
+ {
+ numSeqs++;
+ break;
+ }
+ }
+ }
+ }
+
+ _fileIn->close();
+
+ return numSeqs;
+ }
+ catch(...)
+ {
+ _fileIn->close();
+ cerr << "An exception has occured in the function PIRFileParser::countSeqs()\n"
+ << "Program needs to terminate.\nPlease contact the Clustal developers\n";
+ exit(1);
+ }
+}
+
+/**
+ * There is no secondary structure information in PIR files!
+ * @param gapPenaltyMask
+ * @param secStructMask
+ * @param secStructName
+ * @param structPenalties
+ * @param length
+ */
+void PIRFileParser::getSecStructure(vector<char>& gapPenaltyMask, vector<char>& secStructMask,
+ string& secStructName, int &structPenalties, int length)
+{
+ structPenalties = NONE;
+}
+
+}
+