/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * Changes: * * 10-02-07,Nigel Brown(EMBL): changed ifstream to InFileStream to handle * cross-platform end-of-lines. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "PIRFileParser.h" namespace clustalw { /** * PIRFileParser contructor sets up the chartab array. * @param filePath */ PIRFileParser::PIRFileParser(string filePath) { fileName = filePath; fillCharTab(); } /* * get range of sequences */ vector PIRFileParser::getSeqRange(int firstSeq, int no, string *offendingSeq) { vector seqRangeVector; int i; for (i=0; iopen(fileName.c_str()); //nige _fileIn->seekg(0, std::ios::beg); // Read in lines until we get to the begining of sequence seqNum. while (_currentSeqNum != seqNum) { while(*_line != '>') { if(!_fileIn->getline(_line, MAXLINE + 1)) // If we cannot get anymore! { _fileIn->close(); return Sequence(blank, blank, blank); } } ++_currentSeqNum; if(_currentSeqNum == seqNum) // Found the sequence { break; } // Get next line so that we are past the '>' line _fileIn->getline(_line, MAXLINE + 1); } // line contains the name of the sequence for (i = 4; i <= (int)strlen(_line); i++) { if (_line[i] != ' ') { break; } } strncpy(_sname, _line + i, MAXNAMES); // remember entryname _sname[MAXNAMES] = EOS; utilityObject->rTrim(_sname); utilityObject->blankToUnderscore(_sname); // replace blanks with '_' name = string(_sname); _fileIn->getline(_line, MAXLINE + 1); strncpy(_title, _line, MAXTITLES); _title[MAXTITLES] = EOS; i = strlen(_title); if (_title[i - 1] == '\n') { _title[i - 1] = EOS; } title = string(_title); while (_fileIn->getline(_line, MAXLINE + 1)) { for (i = 0; i <= MAXLINE; i++) { c = _line[i]; if (c == '\n' || c == EOS || c == '*') { break; } c = chartab[c]; if (c) { characterSeq += c; } } if (c == '*') { break; } } _fileIn->close(); if ((int)characterSeq.length() > userParameters->getMaxAllowedSeqLength()) { parseExitCode=SEQUENCETOOBIG; if (offendingSeq!=NULL) offendingSeq->assign(name); // return empty seq return Sequence(blank, blank, blank); } return Sequence(characterSeq, name, title); } catch(...) { _fileIn->close(); cerr << "There was an exception in the PIRFileParser::getSeq function.\n" << "Need to end program\n"; exit(1); } } /** * The function countSeqs finds the number of sequences in the file and returns it. * @return The number of sequences in the file. */ int PIRFileParser::countSeqs() { char line[MAXLINE + 1], c; line[0] = EOS; int numSeqs, i; bool seqOk; try { _fileIn = new InFileStream; //nige _fileIn->open(fileName.c_str()); //nige if(!_fileIn->is_open()) { return 0; // No sequences found! } // Get to begining of sequences! while (_fileIn->getline(line, MAXLINE + 1)) { if (!utilityObject->blankLine(line)) { break; } } // Now check the 1st sequence to make sure it ends with * seqOk = false; while (_fileIn->getline(line, MAXLINE + 1)) { // Look for end of first seq if (*line == '>') { break; } for (i = 0; seqOk == false; i++) { c = line[i]; if (c == '*') { seqOk = true; // ok - end of sequence found break; } // EOL if (c == '\n' || c == EOS) { break; } // EOL } if (seqOk == true) { break; } } if (seqOk == false) { _fileIn->close(); utilityObject->error("PIR format sequence end marker '*'\nmissing for one or more sequences.\n"); return 0; // funny format } numSeqs = 1; while (_fileIn->getline(line, MAXLINE + 1)) { if (*line == '>') { // Look for start of next seq seqOk = false; while (_fileIn->getline(line, MAXLINE + 1)) { // Look for end of seq if (*line == '>') { _fileIn->close(); utilityObject->error("PIR format sequence end marker '*'\nmissing for one or more sequences.\n"); return 0; // funny format } for (i = 0; seqOk == false; i++) { c = line[i]; if (c == '*') { seqOk = true; // ok - sequence found break; } if (c == '\n' || c == EOS) { break; } } if (seqOk == true) { numSeqs++; break; } } } } _fileIn->close(); return numSeqs; } catch(...) { _fileIn->close(); cerr << "An exception has occured in the function PIRFileParser::countSeqs()\n" << "Program needs to terminate.\nPlease contact the Clustal developers\n"; exit(1); } } /** * There is no secondary structure information in PIR files! * @param gapPenaltyMask * @param secStructMask * @param secStructName * @param structPenalties * @param length */ void PIRFileParser::getSecStructure(vector& gapPenaltyMask, vector& secStructMask, string& secStructName, int &structPenalties, int length) { structPenalties = NONE; } }