2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.4)
3 * Copyright (C) 2008 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 import jalview.datamodel.*;
25 import jalview.util.*;
38 * Creates a new MSFfile object.
45 * Creates a new MSFfile object.
47 * @param inFile DOCUMENT ME!
48 * @param type DOCUMENT ME!
50 * @throws IOException DOCUMENT ME!
52 public MSFfile(String inFile, String type)
58 public MSFfile(FileParse source) throws IOException
63 // TODO Auto-generated constructor stub
73 boolean seqFlag = false;
74 String key = new String();
75 Vector headers = new Vector();
76 Hashtable seqhash = new Hashtable();
81 while ( (line = nextLine()) != null)
83 StringTokenizer str = new StringTokenizer(line);
85 while (str.hasMoreTokens())
87 String inStr = str.nextToken();
89 //If line has header information add to the headers vector
90 if (inStr.indexOf("Name:") != -1)
92 key = str.nextToken();
93 headers.addElement(key);
96 //if line has // set SeqFlag to 1 so we know sequences are coming
97 if (inStr.indexOf("//") != -1)
102 //Process lines as sequence lines if seqFlag is set
103 if ( (inStr.indexOf("//") == -1) && (seqFlag == true))
105 //seqeunce id is the first field
108 StringBuffer tempseq;
110 //Get sequence from hash if it exists
111 if (seqhash.containsKey(key))
113 tempseq = (StringBuffer) seqhash.get(key);
117 tempseq = new StringBuffer();
118 seqhash.put(key, tempseq);
121 //loop through the rest of the words
122 while (str.hasMoreTokens())
124 //append the word to the sequence
125 tempseq.append(str.nextToken());
131 catch (IOException e)
133 System.err.println("Exception parsing MSFFile " + e);
137 this.noSeqs = headers.size();
139 //Add sequences to the hash
140 for (i = 0; i < headers.size(); i++)
142 if (seqhash.get(headers.elementAt(i)) != null)
144 String head = headers.elementAt(i).toString();
145 String seq = seqhash.get(head).toString();
147 if (maxLength < head.length())
149 maxLength = head.length();
152 // Replace ~ with a sensible gap character
153 seq = seq.replace('~', '-');
155 Sequence newSeq = parseId(head);
157 newSeq.setSequence(seq);
159 seqs.addElement(newSeq);
163 System.err.println("MSFFile Parser: Can't find sequence for " +
164 headers.elementAt(i));
172 * @param seq DOCUMENT ME!
174 * @return DOCUMENT ME!
176 public int checkSum(String seq)
179 String sequence = seq.toUpperCase();
181 for (int i = 0; i < sequence.length(); i++)
186 int value = sequence.charAt(i);
189 check += (i % 57 + 1) * value;
194 System.err.println("Exception during MSF Checksum calculation");
199 return check % 10000;
205 * @param s DOCUMENT ME!
206 * @param is_NA DOCUMENT ME!
208 * @return DOCUMENT ME!
210 public String print(SequenceI[] seqs)
213 boolean is_NA = jalview.util.Comparison.isNucleotide(seqs);
215 SequenceI[] s = new SequenceI[seqs.length];
217 StringBuffer out = new StringBuffer("!!" + (is_NA ? "NA" : "AA") +
218 "_MULTIPLE_ALIGNMENT 1.0\n\n"); // TODO: JBPNote : Jalview doesn't remember NA or AA yet.
224 while ( (i < seqs.length) && (seqs[i] != null))
226 // Replace all internal gaps with . and external spaces with ~
227 s[i] = new Sequence(seqs[i].getName(),
228 seqs[i].getSequenceAsString().replace('-', '.'));
230 StringBuffer sb = new StringBuffer();
231 sb.append(s[i].getSequence());
233 for (int ii = 0; ii < sb.length(); ii++)
235 if (sb.charAt(ii) == '.')
237 sb.setCharAt(ii, '~');
245 for (int ii = sb.length() - 1; ii > 0; ii--)
247 if (sb.charAt(ii) == '.')
249 sb.setCharAt(ii, '~');
257 s[i].setSequence(sb.toString());
259 if (s[i].getSequence().length > max)
261 max = s[i].getSequence().length;
267 Format maxLenpad = new Format("%" + (new String("" + max)).length() +
269 Format maxChkpad = new Format("%" + (new String("1" + max)).length() +
274 int[] checksums = new int[s.length];
277 checksums[i] = checkSum(s[i].getSequenceAsString());
278 bigChecksum += checksums[i];
283 out.append(" MSF: " + s[0].getSequence().length + " Type: " +
284 (is_NA ? "N" : "P") + " Check: " + (bigChecksum % 10000) +
287 String[] nameBlock = new String[s.length];
288 String[] idBlock = new String[s.length];
291 while ( (i < s.length) && (s[i] != null))
294 nameBlock[i] = new String(" Name: " + printId(s[i]) + " ");
296 idBlock[i] = new String("Len: " +
297 maxLenpad.form(s[i].getSequence().length) +
299 maxChkpad.form(checksums[i]) + " Weight: 1.00\n");
301 if (s[i].getName().length() > maxid)
303 maxid = s[i].getName().length();
306 if (nameBlock[i].length() > maxNB)
308 maxNB = nameBlock[i].length();
324 Format nbFormat = new Format("%-" + maxNB + "s");
326 for (i = 0; (i < s.length) && (s[i] != null); i++)
328 out.append(nbFormat.form(nameBlock[i]) + idBlock[i]);
332 out.append("\n\n//\n\n");
336 int nochunks = (max / len) + 1;
338 if ( (max % len) == 0)
343 for (i = 0; i < nochunks; i++)
347 while ( (j < s.length) && (s[j] != null))
349 String name = printId(s[j]);
351 out.append(new Format("%-" + maxid + "s").form(name + " "));
353 for (int k = 0; k < 5; k++)
355 int start = (i * 50) + (k * 10);
356 int end = start + 10;
358 if ( (end < s[j].getSequence().length) &&
359 (start < s[j].getSequence().length))
361 out.append(s[j].getSequence(start, end));
374 if (start < s[j].getSequence().length)
376 out.append(s[j].getSequenceAsString().substring(start));
395 return out.toString();
401 * @return DOCUMENT ME!
403 public String print()
405 return print(getSeqsAsArray());