Delete unneeded directory
[jabaws.git] / website / archive / binaries / mac / src / clustalw / src / fileInput / PearsonFileParser.cpp
diff --git a/website/archive/binaries/mac/src/clustalw/src/fileInput/PearsonFileParser.cpp b/website/archive/binaries/mac/src/clustalw/src/fileInput/PearsonFileParser.cpp
deleted file mode 100644 (file)
index 61ef200..0000000
+++ /dev/null
@@ -1,453 +0,0 @@
-/**
- * Author: Mark Larkin
- * 
- * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.  
- */
-/** 
- * Changes: 
- *
- * Mark 24-1-2007. I am now using the char "delimiter" for the delimiter when using
- * getline. This is to get around the problem of some files having '\r'.
- *
- * 10-02-07,Nigel Brown(EMBL): Changed ifstream to InFileStream to handle
- * cross-platform end-of-lines and removed delimiter member.
- *
- * 28-12-07,Paul McGettigan : replaced array processing with string processing this fixes bug #72
- *
- * 9-2-2008, Paul McGettigan : fixed problem where space after '>' but before sequence name was causing
- *                             alignment to fail due to no sequence name being read in
- * 15-2-2008, Paul McGettigan : fixed bug 91 where Pseudo -FASTA format files were not being processed as 
- *                              previously in v1.83
- */
-
-#ifdef HAVE_CONFIG_H
-    #include "config.h"
-#endif
-#include "PearsonFileParser.h"
-
-namespace clustalw
-{
-
-/**
- * Constructor for the Pearson file parser.
- * @param filePath 
- * @return 
- */
-PearsonFileParser::PearsonFileParser(string filePath)
-{
-    fileName = filePath;
-    fillCharTab();
-}
-
-
-/**
- * reads fasta/pearson file in one go instead of calling getSeq for
- * each single sequence.
- *
- * FIXME AW: only PearsonFileParser::getSeqRange is special, rest is the
- * same. should be defined in FileParser and then overloaded in special
- * cases like here
- */
-vector<Sequence>
-PearsonFileParser::getSeqRange(int firstSeq, int nSeqsToRead, string *offendingSeq)
-{
-    string characterSeq = "";
-    string name = "";
-    string title = "";
-    string blank = "";
-    string greater = ">";
-    //_line[0] = EOS;
-    vector<Sequence> seqRangeVector;
-    
-    string line;
-    
-    
-    //int i, j;
-    int nSeqsRead = 0;
-    unsigned char c;
-    char delim;
-    int _currentSeqNum = 0; // Not at any sequence yet!
-    
-    try
-    {
-       delim=FileParser::getDelimiter(fileName);
-       //cout << "delim = " << delim << endl;
-       ifstream _fileIn;
-       _fileIn.open(fileName.c_str(),ios::in);
-
-        // Read in lines until we get to the begining of sequence firstSeq.
-        string line="";
-
-        do {
-          std::getline(_fileIn,line,delim);
-          if(line.substr(0,1) == greater){
-              _currentSeqNum++;
-          }
-        } while(_currentSeqNum <firstSeq);
-        
-        
-        while (nSeqsRead < nSeqsToRead)
-        {
-            // get sequence name from current line (excluded '>' and read up to first ' ' or MAXNAMES
-            // remove the first char i.e. '>'
-            name=line.substr(1,MAXNAMES);
-            //if(name.find(">") != string::npos){
-            //  andreas wilm: exit if angle bracket within header?
-            //}
-            
-            while(name.substr(0,1)==" "){
-                name=name.substr(1,MAXNAMES);
-            }
-            //int i;
-            //i = name.find(" ");
-            if(name.find(" ") != string::npos){
-                name=name.substr(0,name.find(" "));
-            }
-            utilityObject->rTrim(&name); // also replaces linef
-
-            name=utilityObject->blankToUnderscore(name); // replace blanks with '_'
-            
-            
-            // Read in lines until we get to the begining of next sequence.
-            
-            title = ""; // No title information
-            
-            while(std::getline(_fileIn,line,delim) ){
-                 
-               string::const_iterator iterator1 = line.begin();
-                while(iterator1 != line.end()){
-
-                    // Andreas Wilm (UCD): exit if angle brackets within sequence
-                    if(*iterator1=='>' && iterator1!=line.begin()) {
-                        /* error output handled in Clustal.cpp
-                        cerr << "\nMultiple angle brackets inside sequence found:"
-                             << " invalid format.\n"
-                             << "Maybe you forgot a linebreak between sequences?\n";
-                        */
-                        
-                        parseExitCode=BADFORMAT;
-                        _fileIn.close();
-                        seqRangeVector.clear();
-                        return seqRangeVector;
-                    }
-                       
-                    if(*iterator1 =='\n' || *iterator1 =='\r' || *iterator1 == EOS || *iterator1 =='>'){
-                        break;
-                    }
-                    c = *iterator1;
-
-                    c = chartab[c];
-                    if(c){
-                        characterSeq.append(1,c);
-                    }
-                    iterator1++;
-                }
-                if(*iterator1 == '>'){
-                    break;
-               } 
-            }
-            
-            // check sequence
-            if ((int)characterSeq.length() > userParameters->getMaxAllowedSeqLength())
-            {
-                /* error output handled in Clustal.cpp */
-                parseExitCode=SEQUENCETOOBIG;
-                if (offendingSeq!=NULL)
-                    offendingSeq->assign(name);
-                _fileIn.close();
-                seqRangeVector.clear();
-                return seqRangeVector;
-            }
-            else if (characterSeq.length() == 0)
-            {
-                parseExitCode=EMPTYSEQUENCE;
-                if (offendingSeq!=NULL)
-                    offendingSeq->assign(name);
-                _fileIn.close();
-                seqRangeVector.clear();
-                return seqRangeVector;
-            }
-
-            seqRangeVector.push_back(Sequence(characterSeq, name, title));
-            characterSeq = "";
-            nSeqsRead++;
-        } // while (nSeqsRead < nSeqsToRead)
-
-        _fileIn.close();
-
-        return seqRangeVector;
-    }
-
-    catch(...)
-    {
-        cerr << "There was an exception in the PearsonFileParser::getSeqRange function.\n"
-             << "Need to end program\n";
-        exit(1);
-    }
-}
-
-
-
-/**
- * The function getSeq is used to get the sequence 'seqNum' in the file. It returns a
- * sequence object containing the sequence.
- * Deprecated: where possible use faster getSeqRange which reads
- * sequences in one go
- * @param seqNum The number of the sequence to get.
- * @return 
- */
-    Sequence PearsonFileParser::getSeq(int seqNum, string *offendingSeq)
-{
-    //char _line[MAXLINE + 1];
-    //char tseq[MAXLINE + 1];
-    //char sname[MAXNAMES + 1];
-    //sname [MAXNAMES] = '\0';
-    string characterSeq = "";
-    string name = "";
-    string title = "";
-    string blank = "";
-    string greater = ">";
-    //_line[0] = EOS;
-    
-    string line;
-    
-    cerr << "Use of PearsonFileParser::getSeq is deprecated!\n";
-    //int i, j;
-    unsigned char c;
-    char delim;
-    int _currentSeqNum = 0; // Not at any sequence yet!
-    
-    try
-    {
-        /*
-        _fileIn = new InFileStream; //nige
-        _fileIn->open(fileName.c_str());  //nige
-        _fileIn->seekg(0, std::ios::beg); // start at the beginning
-       */
-       delim=FileParser::getDelimiter(fileName);
-       //cout << "delim = " << delim << endl;
-       ifstream _fileIn;
-       _fileIn.open(fileName.c_str(),ios::in);
-
-        //////////////////////////////////////////////////
-        //PMcG replace char array with string processing
-        //////////////////////////////////////////////////
-
-        // Read in lines until we get to the begining of sequence seqNum.
-        string line="";
-
-        do {
-          std::getline(_fileIn,line,delim);
-          if(line.substr(0,1) == greater){
-            _currentSeqNum++;
-          }
-        } while(_currentSeqNum <seqNum);
-        
-        
-        // get sequence name from current line (excluded '>' and read up to first ' ' or MAXNAMES
-        // remove the first char i.e. '>'
-        name=line.substr(1,MAXNAMES);
-           
-        //////////////////////////////////////
-        // PMcG 9-2-2008 need to handle spaces at start of sequence name to conform to 1.83 handling
-        //////////////////////////////////////
-        while(name.substr(0,1)==" "){
-          name=name.substr(1,MAXNAMES);
-        }
-        //int i;
-        //i = name.find(" ");
-        if(name.find(" ") != string::npos){
-          name=name.substr(0,name.find(" "));
-        } 
-        name=utilityObject->blankToUnderscore(name); // replace blanks with '_'
-
-
-        // Read in lines until we get to the begining of sequence seqNum.
-          
-        /* PMcG replace char array with string processing
-        while (_currentSeqNum != seqNum)
-        {
-            while(*_line != '>')
-            {
-                if(!_fileIn->getline(_line, MAXLINE + 1))
-                {
-                    freeFileResources(_fileIn);
-                    return Sequence(blank, blank, blank);
-                }
-            }
-            ++_currentSeqNum;
-            if(_currentSeqNum == seqNum) // Found the sequence
-            {
-                break;
-            }
-            // Get next line so that we are past the '>' line
-            _fileIn->getline(_line, MAXLINE + 1);
-        }
-
-        // line contains the name of the sequence
-        for (i = 1; i <= strlen(_line); i++)
-        {
-            if (_line[i] != ' ')
-            {
-                break;
-            }
-        }
-        strncpy(sname, _line + i, MAXNAMES); // remember entryname 
-        for (i = 1; i <= strlen(sname); i++)
-        {
-            if (sname[i] == ' ')
-            {
-                break;
-            }
-        }
-        sname[i] = EOS;
-        utilityObject->rTrim(sname);
-        utilityObject->blankToUnderscore(sname); // replace blanks with '_'
-        name = string(sname);        
-        */
-        
-        title = ""; // No title information
-
-        string seqLine = "";
-        while(std::getline(_fileIn,seqLine,delim) ){
-          string::const_iterator iterator1 = seqLine.begin();
-          while(iterator1 != seqLine.end()){
-            if(*iterator1 =='\n' || *iterator1 =='\r' || *iterator1 == EOS || *iterator1 =='>'){
-              break;
-            }
-            c = *iterator1;
-            c = chartab[c];
-            // PMcG 15-02-2008 bug 91
-            // strip out spaces and numbers from pseudo_fasta files
-            // but need to maintain gaps if present in sequence input
-            // to replicate behaviour of v1.83
-            //if(*iterator1 != ' ' && !isdigit(*iterator1)){
-            if(c){
-              characterSeq.append(1,c);
-            }
-            iterator1++;
-          }
-          if(*iterator1 == '>'){
-            break;
-          }
-        }
- /*
-        while (_fileIn->getline(_line, MAXLINE + 1))
-        {
-            for (i = 0; i <= MAXLINE; i++)
-            {
-                c = _line[i];
-                if (c == '\n' || c == EOS || c == '>')
-                {
-                    break;
-                }
-
-                c = chartab[c];
-                if (c)
-                {
-                    characterSeq += c;
-                }
-            }
-            if (c == '>')
-            {
-                break;
-            }
-        }
-*/
-
-        _fileIn.close();
-
-        if ((int)characterSeq.length() > userParameters->getMaxAllowedSeqLength())
-        {
-            parseExitCode=SEQUENCETOOBIG;
-            // return empty seq
-            return Sequence(blank, blank, blank);
-        }
-        else if (characterSeq.length() == 0)
-        {
-            parseExitCode=EMPTYSEQUENCE;
-            // return empty seq
-            return Sequence(blank, blank, blank);
-        }
-        
-        return Sequence(characterSeq, name, title);
-    }
-
-    catch(...)
-    {
-        cerr << "There was an exception in the PearsonFileParser::getSeq function.\n"
-             << "Need to end program\n";
-        exit(1);
-    }
-}
-
-/**
- * The function countSeqs, counts the number of sequences in a file.
- * @return The number of sequences in the file.
- */
-int PearsonFileParser::countSeqs()
-{
-    //char line[1000 + 1];
-    int _nseqs = 0;
-    string line2;
-    char delim;
-
-    try
-    {
-        //_fileIn = new InFileStream;  //nige
-        //_fileIn->open(fileName.c_str());  //nige
-        delim=FileParser::getDelimiter(fileName);
-        ifstream _fileIn;
-        _fileIn.open(fileName.c_str(),ios::in);
-
-    
-        if(!_fileIn.is_open())
-        {
-            return 0; // No sequences found!
-        }
-    
-        /* while ((*_fileIn) >> line2/@_fileIn->getline(line, 1000 + 1)@/)
-           {
-           /@if(_nseqs == 50)
-           {
-           cout << "\n\n" << line << "\n\n";
-           exit(1);
-           }@/
-            */
-        while (std::getline(_fileIn,line2,delim)) {                 
-            if (line2[0] == '>')
-                {
-                    _nseqs++;
-                }
-        }
-        _fileIn.close();
-        return _nseqs;
-    }
-    catch(...)
-    {
-        freeFileResources(_fileIn);
-        cerr << "An exception has occured in the function PearsonFileParser::countSeqs()\n"
-             << "Program needs to terminate.\nPlease contact the Clustal developers\n";
-        exit(1);    
-    }
-}
-
-/**
- * There is no secondary structure information in the Pearson file. This is here to 
- * set the structPenalties to NONE.
- * @param gapPenaltyMask 
- * @param secStructMask 
- * @param secStructName 
- * @param structPenalties 
- * @param length 
- */
-void PearsonFileParser::getSecStructure(vector<char>& gapPenaltyMask, 
-                         vector<char>& secStructMask, string& secStructName, 
-                          int &structPenalties, int length)
-{
-    structPenalties = NONE;
-}
-
-}
-
-