Next version of JABA
[jabaws.git] / binaries / src / clustalw / src / fileInput / MSFFileParser.cpp
1 /**
2  * Author: Mark Larkin
3  * 
4  * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.  
5  */
6 /**
7  * Changes:
8  *
9  * 10-02-07,Nigel Brown(EMBL): changed ifstream to InFileStream to handle
10  * cross-platform end-of-lines.
11  */
12
13 #ifdef HAVE_CONFIG_H
14     #include "config.h"
15 #endif
16 #include "MSFFileParser.h"
17
18 namespace clustalw
19 {
20
21 /**
22  * MSFFileParser contructor sets up the chartab array.
23  * @param filePath 
24  * @return 
25  */
26 MSFFileParser::MSFFileParser(string filePath)
27 {
28     fileName = filePath; 
29     fillCharTab();
30 }
31
32
33     
34     vector<Sequence> MSFFileParser::getSeqRange(int firstSeq, int no, string *offendingSeq)
35 {
36     vector<Sequence> seqRangeVector;
37     int i;
38
39     for (i=0; i<no; i++)
40     { 
41         Sequence tempSeq = getSeq(firstSeq + i);
42         if (parseExitCode!=OK) {
43             seqRangeVector.clear();
44             return seqRangeVector;
45         }
46         seqRangeVector.push_back(tempSeq);
47     }
48     return seqRangeVector;
49 }
50
51
52
53 /**
54  * The function getSeq finds the sequence seqNum in the file and returns it.
55  * @param seqNum The number of the sequence in the file to get.
56  * @return A sequence object containing the seqNum'th sequence from the file.
57  */
58 Sequence MSFFileParser::getSeq(int seqNum, string *offendingSeq)
59 {
60     char _line[MAXLINE + 1];
61     char _sname[MAXNAMES + 1];
62     string characterSeq = "";
63     string name = "";
64     string title = "";
65     string blank = "";
66     
67     _line[0] = EOS;
68     int i, j, k;
69     unsigned char c;
70
71     try
72     {
73         _fileIn = new InFileStream;  //nige
74         _fileIn->open(fileName.c_str());  //nige
75         _fileIn->seekg(0, std::ios::beg);       
76         
77         for (i = 0;; i++)
78         {
79             if (!_fileIn->getline(_line, MAXLINE + 1))
80             {
81                 _fileIn->close();
82                 return Sequence(blank, blank, blank);
83             }
84             // read the title
85             if (utilityObject->lineType(_line, "//"))
86             {
87                 break;
88             }
89             // lines...ignore
90         }
91
92         while (_fileIn->getline(_line, MAXLINE + 1))
93         {
94             if (!utilityObject->blankLine(_line))
95             {
96                 for (i = 1; i < seqNum; i++)
97                 {
98                     _fileIn->getline(_line, MAXLINE + 1);
99                 }
100                 for (j = 0; j <= (int)strlen(_line); j++)
101                 {
102                     if (_line[j] != ' ')
103                     {
104                         break;
105                     }
106                 }
107                 for (k = j; k <= (int)strlen(_line); k++)
108                 {
109                     if (_line[k] == ' ')
110                     {
111                         break;
112                     }
113                 }
114                 
115                 // Get the name of the sequence
116                 strncpy(_sname, _line + j, utilityObject->MIN(MAXNAMES, k - j));
117                 _sname[utilityObject->MIN(MAXNAMES, k - j)] = EOS;
118                 utilityObject->rTrim(_sname);
119                 utilityObject->blankToUnderscore(_sname);
120                 name = string(_sname);
121
122                 for (i = k; i <= MAXLINE; i++)
123                 {
124                     c = _line[i];
125                     if (c == '.' || c == '~')
126                     {
127                         c = '-';
128                     }
129                     if (c == '*')
130                     {
131                         c = 'X';
132                     }
133                     if (c == '\n' || c == EOS)
134                     {
135                         break;
136                     }
137                     // EOL 
138                     c = chartab[c];
139                     if (c)
140                     {
141                         characterSeq += c;
142                     }
143                 }
144
145                 for (i = 0;; i++)
146                 {
147                     if (!_fileIn->getline(_line, MAXLINE + 1))
148                     {
149                         _fileIn->close();
150                         return Sequence(characterSeq, name, title);
151                     }
152                     if (utilityObject->blankLine(_line))
153                     {
154                         break;
155                     }
156                 }
157             }
158         }
159         _fileIn->close();
160         
161         if ((int)characterSeq.length() > userParameters->getMaxAllowedSeqLength())
162         {
163             parseExitCode=SEQUENCETOOBIG;
164             if (offendingSeq!=NULL)
165                 offendingSeq->assign(name);
166             // return empty seq
167             return Sequence(blank, blank, blank);
168         }
169         return Sequence(characterSeq, name, title);;
170     }
171     catch(...)
172     {
173         _fileIn->close();
174         cerr << "An exception has occured in the function MSFFileParser::getSeq()\n"
175              << "Program needs to terminate.\nPlease contact the Clustal developers\n";
176         exit(1);
177     }
178 }
179
180 /**
181  * The function countSeqs counts the number of sequences in the file.
182  * @return The number of sequences in the file.
183  */
184 int MSFFileParser::countSeqs()
185 {
186     char _line[MAXLINE + 1];
187     int _numSeqs;
188     
189     try
190     {
191         _fileIn = new InFileStream;  //nige
192         _fileIn->open(fileName.c_str());  //nige
193     
194         if(!_fileIn->is_open())
195         {
196             return 0; // No sequences found!
197         }
198     
199         while (_fileIn->getline(_line, MAXLINE + 1))
200         {
201             if (utilityObject->lineType(_line, "//"))
202             {
203                 break;
204             }
205         }
206
207         while (_fileIn->getline(_line, MAXLINE + 1))
208         {
209             if (!utilityObject->blankLine(_line))
210             {
211                 break;
212             }
213             // Look for next non- blank line
214         } 
215         _numSeqs = 1;
216
217         while (_fileIn->getline(_line, MAXLINE + 1))
218         {
219             if (utilityObject->blankLine(_line))
220             {
221                 _fileIn->close();
222                 return _numSeqs;
223             }
224             _numSeqs++;
225         }
226
227         return 0; // if you got to here-funny format/no seqs.
228     }
229     catch(...)
230     {
231         _fileIn->close();
232         cerr << "An exception has occured in the function MSFFileParser::countSeqs()\n"
233              << "Program needs to terminate.\nPlease contact the Clustal developers\n";
234         exit(1);    
235     }
236 }
237
238 /**
239  * There is no secondary structure information in MSF files. Set structPenalties to NONE.
240  * @param gapPenaltyMask 
241  * @param secStructMask 
242  * @param secStructName 
243  * @param structPenalties 
244  * @param length 
245  */
246 void MSFFileParser::getSecStructure(vector<char>& gapPenaltyMask, vector<char>& secStructMask,
247                                     string& secStructName, int &structPenalties, int length)
248 {
249     structPenalties = NONE;
250 }
251
252
253 }
254