X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FMSFfile.java;h=df2bed2a185239ae054549dca4dd7d3ad3b9c7d0;hb=refs%2Fheads%2Fportforward%2FJAL-2675_2102b1to2103;hp=e2d370425c8bfb7116d601b9b9e8244a96c1a7d7;hpb=3a993bbe274824870c78bd7695c42fa93908cb30;p=jalview.git diff --git a/src/jalview/io/MSFfile.java b/src/jalview/io/MSFfile.java index e2d3704..df2bed2 100755 --- a/src/jalview/io/MSFfile.java +++ b/src/jalview/io/MSFfile.java @@ -1,37 +1,43 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer - * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.io; -import java.io.*; -import java.util.*; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceI; +import jalview.util.Comparison; +import jalview.util.Format; -import jalview.datamodel.*; -import jalview.util.*; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Hashtable; +import java.util.List; +import java.util.StringTokenizer; /** * DOCUMENT ME! - * + * * @author $author$ * @version $Revision$ */ -public class MSFfile - extends AlignFile +public class MSFfile extends AlignFile { /** @@ -43,14 +49,16 @@ public class MSFfile /** * Creates a new MSFfile object. - * - * @param inFile DOCUMENT ME! - * @param type DOCUMENT ME! - * - * @throws IOException DOCUMENT ME! + * + * @param inFile + * DOCUMENT ME! + * @param type + * DOCUMENT ME! + * + * @throws IOException + * DOCUMENT ME! */ - public MSFfile(String inFile, String type) - throws IOException + public MSFfile(String inFile, DataSourceType type) throws IOException { super(inFile, type); } @@ -59,76 +67,72 @@ public class MSFfile { super(source); } -{ - // TODO Auto-generated constructor stub - } /** - * DOCUMENT ME! + * Read and parse MSF sequence data */ - public void parse() - throws IOException + @Override + public void parse() throws IOException { - int i = 0; boolean seqFlag = false; - String key = new String(); - Vector headers = new Vector(); - Hashtable seqhash = new Hashtable(); - String line; + List headers = new ArrayList(); + Hashtable seqhash = new Hashtable(); try { - while ( (line = nextLine()) != null) + String line; + while ((line = nextLine()) != null) { StringTokenizer str = new StringTokenizer(line); + String key = null; while (str.hasMoreTokens()) { String inStr = str.nextToken(); - //If line has header information add to the headers vector + // If line has header information add to the headers vector if (inStr.indexOf("Name:") != -1) { key = str.nextToken(); - headers.addElement(key); + headers.add(key); } - //if line has // set SeqFlag to 1 so we know sequences are coming + // if line has // set SeqFlag so we know sequences are coming if (inStr.indexOf("//") != -1) { seqFlag = true; } - //Process lines as sequence lines if seqFlag is set - if ( (inStr.indexOf("//") == -1) && (seqFlag == true)) + // Process lines as sequence lines if seqFlag is set + if ((inStr.indexOf("//") == -1) && seqFlag) { - //seqeunce id is the first field + // sequence id is the first field key = inStr; - StringBuffer tempseq; + StringBuilder tempseq; - //Get sequence from hash if it exists + // Get sequence from hash if it exists if (seqhash.containsKey(key)) { - tempseq = (StringBuffer) seqhash.get(key); + tempseq = seqhash.get(key); } else { - tempseq = new StringBuffer(); + tempseq = new StringBuilder(64); seqhash.put(key, tempseq); } - //loop through the rest of the words + // loop through the rest of the words while (str.hasMoreTokens()) { - //append the word to the sequence - tempseq.append(str.nextToken()); + // append the word to the sequence + String sequenceBlock = str.nextToken(); + tempseq.append(sequenceBlock); } } } } - } - catch (IOException e) + } catch (IOException e) { System.err.println("Exception parsing MSFFile " + e); e.printStackTrace(); @@ -136,12 +140,12 @@ public class MSFfile this.noSeqs = headers.size(); - //Add sequences to the hash - for (i = 0; i < headers.size(); i++) + // Add sequences to the hash + for (int i = 0; i < headers.size(); i++) { - if (seqhash.get(headers.elementAt(i)) != null) + if (seqhash.get(headers.get(i)) != null) { - String head = headers.elementAt(i).toString(); + String head = headers.get(i); String seq = seqhash.get(head).toString(); if (maxLength < head.length()) @@ -149,8 +153,11 @@ public class MSFfile maxLength = head.length(); } - // Replace ~ with a sensible gap character - seq = seq.replace('~', '-'); + /* + * replace ~ (leading/trailing positions) with the gap character; + * use '.' as this is the internal gap character required by MSF + */ + seq = seq.replace('~', '.'); Sequence newSeq = parseId(head); @@ -160,17 +167,18 @@ public class MSFfile } else { - System.err.println("MSFFile Parser: Can't find sequence for " + - headers.elementAt(i)); + System.err.println("MSFFile Parser: Can't find sequence for " + + headers.get(i)); } } } /** * DOCUMENT ME! - * - * @param seq DOCUMENT ME! - * + * + * @param seq + * DOCUMENT ME! + * * @return DOCUMENT ME! */ public int checkSum(String seq) @@ -188,8 +196,7 @@ public class MSFfile { check += (i % 57 + 1) * value; } - } - catch (Exception e) + } catch (Exception e) { System.err.println("Exception during MSF Checksum calculation"); e.printStackTrace(); @@ -201,34 +208,41 @@ public class MSFfile /** * DOCUMENT ME! - * - * @param s DOCUMENT ME! - * @param is_NA DOCUMENT ME! - * + * + * @param s + * DOCUMENT ME! + * @param is_NA + * DOCUMENT ME! + * * @return DOCUMENT ME! */ - public String print(SequenceI[] seqs) + @Override + public String print(SequenceI[] sqs, boolean jvSuffix) { - boolean is_NA = jalview.util.Comparison.isNucleotide(seqs); - - SequenceI[] s = new SequenceI[seqs.length]; + boolean is_NA = Comparison.isNucleotide(sqs); - StringBuffer out = new StringBuffer("!!" + (is_NA ? "NA" : "AA") + - "_MULTIPLE_ALIGNMENT 1.0\n\n"); // TODO: JBPNote : Jalview doesn't remember NA or AA yet. + SequenceI[] s = new SequenceI[sqs.length]; + StringBuilder out = new StringBuilder(256); + out.append("!!").append(is_NA ? "NA" : "AA") + .append("_MULTIPLE_ALIGNMENT 1.0"); + // TODO: JBPNote : Jalview doesn't remember NA or AA yet. + out.append(newline); + out.append(newline); int max = 0; int maxid = 0; int i = 0; - while ( (i < seqs.length) && (seqs[i] != null)) + while ((i < sqs.length) && (sqs[i] != null)) { - // Replace all internal gaps with . and external spaces with ~ - s[i] = new Sequence(seqs[i].getName(), - seqs[i].getSequenceAsString().replace('-', '.')); + /* + * modify to MSF format: uses '.' for internal gaps, + * and '~' for leading or trailing gaps + */ + String seqString = sqs[i].getSequenceAsString().replace('-', '.'); - StringBuffer sb = new StringBuffer(); - sb.append(s[i].getSequence()); + StringBuilder sb = new StringBuilder(seqString); for (int ii = 0; ii < sb.length(); ii++) { @@ -253,21 +267,21 @@ public class MSFfile break; } } + s[i] = new Sequence(sqs[i].getName(), sb.toString(), + sqs[i].getStart(), sqs[i].getEnd()); - s[i].setSequence(sb.toString()); - - if (s[i].getSequence().length > max) + if (sb.length() > max) { - max = s[i].getSequence().length; + max = sb.length(); } i++; } - Format maxLenpad = new Format("%" + (new String("" + max)).length() + - "d"); - Format maxChkpad = new Format("%" + (new String("1" + max)).length() + - "d"); + Format maxLenpad = new Format( + "%" + (new String("" + max)).length() + "d"); + Format maxChkpad = new Format( + "%" + (new String("1" + max)).length() + "d"); i = 0; int bigChecksum = 0; @@ -280,23 +294,25 @@ public class MSFfile } long maxNB = 0; - out.append(" MSF: " + s[0].getSequence().length + " Type: " + - (is_NA ? "N" : "P") + " Check: " + (bigChecksum % 10000) + - " ..\n\n\n"); + out.append(" MSF: " + s[0].getLength() + " Type: " + + (is_NA ? "N" : "P") + " Check: " + (bigChecksum % 10000) + + " .."); + out.append(newline); + out.append(newline); + out.append(newline); String[] nameBlock = new String[s.length]; String[] idBlock = new String[s.length]; i = 0; - while ( (i < s.length) && (s[i] != null)) + while ((i < s.length) && (s[i] != null)) { - nameBlock[i] = new String(" Name: " + printId(s[i]) + " "); + nameBlock[i] = new String(" Name: " + printId(s[i], jvSuffix) + " "); - idBlock[i] = new String("Len: " + - maxLenpad.form(s[i].getSequence().length) + - " Check: " + - maxChkpad.form(checksums[i]) + " Weight: 1.00\n"); + idBlock[i] = new String("Len: " + maxLenpad.form(s[i].getLength()) + + " Check: " + maxChkpad.form(checksums[i]) + + " Weight: 1.00" + newline); if (s[i].getName().length() > maxid) { @@ -329,24 +345,22 @@ public class MSFfile } maxid++; - out.append("\n\n//\n\n"); - + out.append(newline); + out.append(newline); + out.append("//"); + out.append(newline); + out.append(newline); int len = 50; - int nochunks = (max / len) + 1; - - if ( (max % len) == 0) - { - nochunks--; - } + int nochunks = (max / len) + (max % len > 0 ? 1 : 0); for (i = 0; i < nochunks; i++) { int j = 0; - while ( (j < s.length) && (s[j] != null)) + while ((j < s.length) && (s[j] != null)) { - String name = printId(s[j]); + String name = printId(s[j], jvSuffix); out.append(new Format("%-" + maxid + "s").form(name + " ")); @@ -355,8 +369,9 @@ public class MSFfile int start = (i * 50) + (k * 10); int end = start + 10; - if ( (end < s[j].getSequence().length) && - (start < s[j].getSequence().length)) + int length = s[j].getLength(); + if ((end < length) + && (start < length)) { out.append(s[j].getSequence(start, end)); @@ -366,21 +381,21 @@ public class MSFfile } else { - out.append("\n"); + out.append(newline); } } else { - if (start < s[j].getSequence().length) + if (start < length) { out.append(s[j].getSequenceAsString().substring(start)); - out.append("\n"); + out.append(newline); } else { if (k == 0) { - out.append("\n"); + out.append(newline); } } } @@ -389,19 +404,9 @@ public class MSFfile j++; } - out.append("\n"); + out.append(newline); } return out.toString(); } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public String print() - { - return print(getSeqsAsArray()); - } }