2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)
3 * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
23 import javax.xml.parsers.ParserConfigurationException;
25 import org.xml.sax.SAXException;
27 import fr.orsay.lri.varna.exceptions.ExceptionFileFormatOrSyntax;
28 import fr.orsay.lri.varna.exceptions.ExceptionLoadingFailed;
29 import fr.orsay.lri.varna.exceptions.ExceptionPermissionDenied;
30 import fr.orsay.lri.varna.exceptions.ExceptionUnmatchedClosingParentheses;
32 import jalview.datamodel.*;
33 import jalview.util.*;
41 public class MSFfile extends AlignFile
45 * Creates a new MSFfile object.
52 * Creates a new MSFfile object.
61 * @throws SAXException
62 * @throws ParserConfigurationException
63 * @throws ExceptionFileFormatOrSyntax
64 * @throws ExceptionLoadingFailed
65 * @throws ExceptionPermissionDenied
66 * @throws InterruptedException
67 * @throws ExceptionUnmatchedClosingParentheses
69 public MSFfile(String inFile, String type) throws IOException, ExceptionFileFormatOrSyntax, ParserConfigurationException, SAXException, ExceptionPermissionDenied, ExceptionLoadingFailed, InterruptedException, ExceptionUnmatchedClosingParentheses
74 public MSFfile(FileParse source) throws IOException, ExceptionFileFormatOrSyntax, ParserConfigurationException, SAXException, ExceptionPermissionDenied, ExceptionLoadingFailed, InterruptedException, ExceptionUnmatchedClosingParentheses
80 // TODO Auto-generated constructor stub
86 public void parse() throws IOException
89 boolean seqFlag = false;
90 String key = new String();
91 Vector headers = new Vector();
92 Hashtable seqhash = new Hashtable();
97 while ((line = nextLine()) != null)
99 StringTokenizer str = new StringTokenizer(line);
101 while (str.hasMoreTokens())
103 String inStr = str.nextToken();
105 // If line has header information add to the headers vector
106 if (inStr.indexOf("Name:") != -1)
108 key = str.nextToken();
109 headers.addElement(key);
112 // if line has // set SeqFlag to 1 so we know sequences are coming
113 if (inStr.indexOf("//") != -1)
118 // Process lines as sequence lines if seqFlag is set
119 if ((inStr.indexOf("//") == -1) && (seqFlag == true))
121 // seqeunce id is the first field
124 StringBuffer tempseq;
126 // Get sequence from hash if it exists
127 if (seqhash.containsKey(key))
129 tempseq = (StringBuffer) seqhash.get(key);
133 tempseq = new StringBuffer();
134 seqhash.put(key, tempseq);
137 // loop through the rest of the words
138 while (str.hasMoreTokens())
140 // append the word to the sequence
141 tempseq.append(str.nextToken());
146 } catch (IOException e)
148 System.err.println("Exception parsing MSFFile " + e);
152 this.noSeqs = headers.size();
154 // Add sequences to the hash
155 for (i = 0; i < headers.size(); i++)
157 if (seqhash.get(headers.elementAt(i)) != null)
159 String head = headers.elementAt(i).toString();
160 String seq = seqhash.get(head).toString();
162 if (maxLength < head.length())
164 maxLength = head.length();
167 // Replace ~ with a sensible gap character
168 seq = seq.replace('~', '-');
170 Sequence newSeq = parseId(head);
172 newSeq.setSequence(seq);
174 seqs.addElement(newSeq);
178 System.err.println("MSFFile Parser: Can't find sequence for "
179 + headers.elementAt(i));
190 * @return DOCUMENT ME!
192 public int checkSum(String seq)
195 String sequence = seq.toUpperCase();
197 for (int i = 0; i < sequence.length(); i++)
202 int value = sequence.charAt(i);
205 check += (i % 57 + 1) * value;
207 } catch (Exception e)
209 System.err.println("Exception during MSF Checksum calculation");
214 return check % 10000;
225 * @return DOCUMENT ME!
227 public String print(SequenceI[] seqs)
230 boolean is_NA = jalview.util.Comparison.isNucleotide(seqs);
232 SequenceI[] s = new SequenceI[seqs.length];
234 StringBuffer out = new StringBuffer("!!" + (is_NA ? "NA" : "AA")
235 + "_MULTIPLE_ALIGNMENT 1.0");
236 // TODO: JBPNote : Jalview doesn't remember NA or AA yet.
243 while ((i < seqs.length) && (seqs[i] != null))
245 // Replace all internal gaps with . and external spaces with ~
246 s[i] = new Sequence(seqs[i].getName(), seqs[i].getSequenceAsString()
247 .replace('-', '.'), seqs[i].getStart(),seqs[i].getEnd());
249 StringBuffer sb = new StringBuffer();
250 sb.append(s[i].getSequence());
252 for (int ii = 0; ii < sb.length(); ii++)
254 if (sb.charAt(ii) == '.')
256 sb.setCharAt(ii, '~');
264 for (int ii = sb.length() - 1; ii > 0; ii--)
266 if (sb.charAt(ii) == '.')
268 sb.setCharAt(ii, '~');
276 s[i].setSequence(sb.toString());
278 if (s[i].getSequence().length > max)
280 max = s[i].getSequence().length;
286 Format maxLenpad = new Format("%" + (new String("" + max)).length()
288 Format maxChkpad = new Format("%" + (new String("1" + max)).length()
293 int[] checksums = new int[s.length];
296 checksums[i] = checkSum(s[i].getSequenceAsString());
297 bigChecksum += checksums[i];
302 out.append(" MSF: " + s[0].getSequence().length + " Type: "
303 + (is_NA ? "N" : "P") + " Check: " + (bigChecksum % 10000)
309 String[] nameBlock = new String[s.length];
310 String[] idBlock = new String[s.length];
313 while ((i < s.length) && (s[i] != null))
316 nameBlock[i] = new String(" Name: " + printId(s[i]) + " ");
318 idBlock[i] = new String("Len: "
319 + maxLenpad.form(s[i].getSequence().length) + " Check: "
320 + maxChkpad.form(checksums[i]) + " Weight: 1.00"+newline);
322 if (s[i].getName().length() > maxid)
324 maxid = s[i].getName().length();
327 if (nameBlock[i].length() > maxNB)
329 maxNB = nameBlock[i].length();
345 Format nbFormat = new Format("%-" + maxNB + "s");
347 for (i = 0; (i < s.length) && (s[i] != null); i++)
349 out.append(nbFormat.form(nameBlock[i]) + idBlock[i]);
354 out.append(newline);out.append("//");
359 int nochunks = (max / len) + 1;
361 if ((max % len) == 0)
366 for (i = 0; i < nochunks; i++)
370 while ((j < s.length) && (s[j] != null))
372 String name = printId(s[j]);
374 out.append(new Format("%-" + maxid + "s").form(name + " "));
376 for (int k = 0; k < 5; k++)
378 int start = (i * 50) + (k * 10);
379 int end = start + 10;
381 if ((end < s[j].getSequence().length)
382 && (start < s[j].getSequence().length))
384 out.append(s[j].getSequence(start, end));
397 if (start < s[j].getSequence().length)
399 out.append(s[j].getSequenceAsString().substring(start));
418 return out.toString();
424 * @return DOCUMENT ME!
426 public String print()
428 return print(getSeqsAsArray());