X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FMSFfile.java;h=b9a14bafa245ca5a21f06350a7d85401d1808c77;hb=49cc4c921eac8fd966dfe4fa4b86d00655ea2f67;hp=63827a17a0a4a1ef6f8703ff5853233e2f0e581b;hpb=f960a9572b838aa32b8618324a12e9e4767f98cf;p=jalview.git diff --git a/src/jalview/io/MSFfile.java b/src/jalview/io/MSFfile.java index 63827a1..b9a14ba 100755 --- a/src/jalview/io/MSFfile.java +++ b/src/jalview/io/MSFfile.java @@ -1,78 +1,127 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer + * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ package jalview.io; -import jalview.datamodel.*; -import jalview.util.*; - import java.io.*; import java.util.*; -public class MSFfile extends AlignFile { +import jalview.datamodel.*; +import jalview.util.*; +/** + * DOCUMENT ME! + * + * @author $author$ + * @version $Revision$ + */ +public class MSFfile + extends AlignFile +{ + + /** + * Creates a new MSFfile object. + */ public MSFfile() - {} - - public MSFfile(String inStr) { - super(inStr); + { } - public MSFfile(String inFile, String type) throws IOException { - super(inFile,type); + /** + * Creates a new MSFfile object. + * + * @param inFile DOCUMENT ME! + * @param type DOCUMENT ME! + * + * @throws IOException DOCUMENT ME! + */ + public MSFfile(String inFile, String type) + throws IOException + { + super(inFile, type); } - private static com.stevesoft.pat.Regex gapre = new com.stevesoft.pat.Regex("\\~","-"); - private static com.stevesoft.pat.Regex re2gap = new com.stevesoft.pat.Regex("["+jalview.util.Comparison.GapChars+"]","\\~"); - - public void parse() { - int i = 0; - boolean seqFlag = false; - String key = new String(); - Vector headers = new Vector(); + /** + * DOCUMENT ME! + */ + public void parse() + throws IOException + { + int i = 0; + boolean seqFlag = false; + String key = new String(); + Vector headers = new Vector(); Hashtable seqhash = new Hashtable(); - String line; - - try { - while ((line = nextLine()) != null) { - - StringTokenizer str = new StringTokenizer(line); + String line; + + try + { + while ( (line = nextLine()) != null) + { + StringTokenizer str = new StringTokenizer(line); + + while (str.hasMoreTokens()) + { + String inStr = str.nextToken(); + + //If line has header information add to the headers vector + if (inStr.indexOf("Name:") != -1) + { + key = str.nextToken(); + headers.addElement(key); + } - while (str.hasMoreTokens()) { + //if line has // set SeqFlag to 1 so we know sequences are coming + if (inStr.indexOf("//") != -1) + { + seqFlag = true; + } - String inStr = str.nextToken(); + //Process lines as sequence lines if seqFlag is set + if ( (inStr.indexOf("//") == -1) && (seqFlag == true)) + { + //seqeunce id is the first field + key = inStr; - //If line has header information add to the headers vector - if (inStr.indexOf("Name:") != -1) { - key = str.nextToken(); - headers.addElement(key); - } + StringBuffer tempseq; - //if line has // set SeqFlag to 1 so we know sequences are coming - if (inStr.indexOf("//") != -1) { - seqFlag = true; - } + //Get sequence from hash if it exists + if (seqhash.containsKey(key)) + { + tempseq = (StringBuffer) seqhash.get(key); + } + else + { + tempseq = new StringBuffer(); + seqhash.put(key, tempseq); + } - //Process lines as sequence lines if seqFlag is set - if (( inStr.indexOf("//") == -1) && (seqFlag == true)) { - //seqeunce id is the first field - key = inStr; - StringBuffer tempseq; - - //Get sequence from hash if it exists - if (seqhash.containsKey(key)) { - tempseq = (StringBuffer)seqhash.get(key); - } else { - tempseq = new StringBuffer(); - seqhash.put(key,tempseq); - } - - //loop through the rest of the words - while (str.hasMoreTokens()) { - //append the word to the sequence - tempseq.append(str.nextToken()); + //loop through the rest of the words + while (str.hasMoreTokens()) + { + //append the word to the sequence + tempseq.append(str.nextToken()); + } } } } } - } catch (IOException e) { + catch (IOException e) + { System.err.println("Exception parsing MSFFile " + e); e.printStackTrace(); } @@ -80,175 +129,271 @@ public class MSFfile extends AlignFile { this.noSeqs = headers.size(); //Add sequences to the hash - for (i = 0; i < headers.size(); i++ ) { - - if ( seqhash.get(headers.elementAt(i)) != null) { - String head = headers.elementAt(i).toString(); - String seq = seqhash.get(head).toString(); - - int start = 1; - int end = seq.length(); - - if (maxLength < head.length() ) { - maxLength = head.length(); + for (i = 0; i < headers.size(); i++) + { + if (seqhash.get(headers.elementAt(i)) != null) + { + String head = headers.elementAt(i).toString(); + String seq = seqhash.get(head).toString(); + + if (maxLength < head.length()) + { + maxLength = head.length(); } - if (head.indexOf("/") > 0 ) { + // Replace ~ with a sensible gap character + seq = seq.replace('~', '-'); - StringTokenizer st = new StringTokenizer(head,"/"); + Sequence newSeq = parseId(head); - if (st.countTokens() == 2) { - - head = st.nextToken(); - String tmp = st.nextToken(); - st = new StringTokenizer(tmp,"-"); - if (st.countTokens() == 2) { - start = Integer.valueOf(st.nextToken()).intValue(); - end = Integer.valueOf(st.nextToken()).intValue(); - } - } - } - // Replace ~ with a sensible gap character - seq = gapre.replaceAll(seq); - Sequence newSeq = new Sequence(head,seq,start,end); + newSeq.setSequence(seq); seqs.addElement(newSeq); - - } else { - System.err.println("MSFFile Parser: Can't find sequence for " + headers.elementAt(i)); + } + else + { + System.err.println("MSFFile Parser: Can't find sequence for " + + headers.elementAt(i)); } } - } - public static int checkSum(String seq) { - //String chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.*~&@"; + /** + * DOCUMENT ME! + * + * @param seq DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public int checkSum(String seq) + { int check = 0; + String sequence = seq.toUpperCase(); - String index = "--------------------------------------&---*---.-----------------@ABCDEFGHIJKLMNOPQRSTUVWXYZ------ABCDEFGHIJKLMNOPQRSTUVWXYZ----@"; - index += "--------------------------------------------------------------------------------------------------------------------------------"; + for (int i = 0; i < sequence.length(); i++) + { + try + { - for(int i = 0; i < seq.length(); i++) { - try { - if (i max) { - max = sq.length(); + while ( (i < seqs.length) && (seqs[i] != null)) + { + // Replace all internal gaps with . and external spaces with ~ + s[i] = new Sequence(seqs[i].getName(), + seqs[i].getSequenceAsString().replace('-', '.')); + + StringBuffer sb = new StringBuffer(); + sb.append(s[i].getSequence()); + + for (int ii = 0; ii < sb.length(); ii++) + { + if (sb.charAt(ii) == '.') + { + sb.setCharAt(ii, '~'); + } + else + { + break; + } } + + for (int ii = sb.length() - 1; ii > 0; ii--) + { + if (sb.charAt(ii) == '.') + { + sb.setCharAt(ii, '~'); + } + else + { + break; + } + } + + s[i].setSequence(sb.toString()); + + if (s[i].getSequence().length > max) + { + max = s[i].getSequence().length; + } + i++; } - Format maxLenpad = new Format("%"+(new String(""+max)).length()+"d"); - Format maxChkpad = new Format("%"+(new String("1"+max)).length()+"d"); + + Format maxLenpad = new Format("%" + (new String("" + max)).length() + + "d"); + Format maxChkpad = new Format("%" + (new String("1" + max)).length() + + "d"); i = 0; - long bigcheck = checkSum(big); - long maxNB=0; - out.append(" MSF: " + s[0].getSequence().length() + " Type: "+(is_NA?"N":"P")+" Check: " + bigcheck + " ..\n\n\n"); - String nameBlock[] = new String[s.length]; - String idBlock[] = new String[s.length]; - while (i < s.length && s[i] != null) { - String seq = s[i].getSequence(); - String name = s[i].getName()+ "/" + s[i].getStart() + "-" + s[i].getEnd(); - int check = checkSum(s[i].getSequence()); - nameBlock[i]=new String(" Name: "+name+" "); - idBlock[i] = new String("Len: " + maxLenpad.form(s[i].getSequence().length()) + " Check:" + maxChkpad.form(check) + " Weight: 1.00\n"); - - if (name.length() > maxid) { - maxid = name.length(); + + int bigChecksum = 0; + int[] checksums = new int[s.length]; + while (i < s.length) + { + checksums[i] = checkSum(s[i].getSequenceAsString()); + bigChecksum += checksums[i]; + i++; + } + + long maxNB = 0; + out.append(" MSF: " + s[0].getSequence().length + " Type: " + + (is_NA ? "N" : "P") + " Check: " + (bigChecksum % 10000) + + " ..\n\n\n"); + + String[] nameBlock = new String[s.length]; + String[] idBlock = new String[s.length]; + + i = 0; + while ( (i < s.length) && (s[i] != null)) + { + + nameBlock[i] = new String(" Name: " + printId(s[i]) + " "); + + idBlock[i] = new String("Len: " + + maxLenpad.form(s[i].getSequence().length) + + " Check: " + + maxChkpad.form(checksums[i]) + " Weight: 1.00\n"); + + if (s[i].getName().length() > maxid) + { + maxid = s[i].getName().length(); } - if (nameBlock[i].length()>maxNB) { - maxNB=nameBlock[i].length(); + + if (nameBlock[i].length() > maxNB) + { + maxNB = nameBlock[i].length(); } i++; } - if (maxid < 10) { + + if (maxid < 10) + { maxid = 10; } - if (maxNB<15) { - maxNB=15; + + if (maxNB < 15) + { + maxNB = 15; } - Format nbFormat = new Format("%-"+maxNB+"s"); - for (i=0;i