X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FMSFfile.java;h=2a6e10f0dee360545d7cf37987bb789d0a31adb6;hb=c68ce4c63bd79f23b87aac3eb0afa7f6b42a3098;hp=b9cad0afe74deb195039a865c729f17834cf22fa;hpb=1ecf6419aba86993b3c223bf5ec0fa79427baf85;p=jalview.git diff --git a/src/jalview/io/MSFfile.java b/src/jalview/io/MSFfile.java index b9cad0a..2a6e10f 100755 --- a/src/jalview/io/MSFfile.java +++ b/src/jalview/io/MSFfile.java @@ -1,230 +1,301 @@ +/* +* Jalview - A Sequence Alignment Editor and Viewer +* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License +* as published by the Free Software Foundation; either version 2 +* of the License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +*/ package jalview.io; import jalview.datamodel.*; + import jalview.util.*; import java.io.*; -import java.util.*; - -public class MSFfile extends AlignFile { - - public MSFfile() - {} - - public MSFfile(String inStr) { - super(inStr); - } - public MSFfile(String inFile, String type) throws IOException { - super(inFile,type); - } - - public void parse() { +import java.util.*; - int i = 0; - boolean seqFlag = false; - String key = new String(); - Vector headers = new Vector(); - Hashtable seqhash = new Hashtable(); - String line; - try { - while ((line = nextLine()) != null) { +public class MSFfile extends AlignFile { + private static com.stevesoft.pat.Regex gapre = new com.stevesoft.pat.Regex("\\~", + "-"); + private static com.stevesoft.pat.Regex re2gap = new com.stevesoft.pat.Regex( + "[" + jalview.util.Comparison.GapChars + "]", "\\~"); - StringTokenizer str = new StringTokenizer(line); + public MSFfile() { + } - while (str.hasMoreTokens()) { + public MSFfile(String inStr) { + super(inStr); + } - String inStr = str.nextToken(); + public MSFfile(String inFile, String type) throws IOException { + super(inFile, type); + } - //If line has header information add to the headers vector - if (inStr.indexOf("Name:") != -1) { - key = str.nextToken(); - headers.addElement(key); + public void parse() { + int i = 0; + boolean seqFlag = false; + String key = new String(); + Vector headers = new Vector(); + Hashtable seqhash = new Hashtable(); + String line; + + try { + while ((line = nextLine()) != null) { + StringTokenizer str = new StringTokenizer(line); + + while (str.hasMoreTokens()) { + String inStr = str.nextToken(); + + //If line has header information add to the headers vector + if (inStr.indexOf("Name:") != -1) { + key = str.nextToken(); + headers.addElement(key); + } + + //if line has // set SeqFlag to 1 so we know sequences are coming + if (inStr.indexOf("//") != -1) { + seqFlag = true; + } + + //Process lines as sequence lines if seqFlag is set + if ((inStr.indexOf("//") == -1) && (seqFlag == true)) { + //seqeunce id is the first field + key = inStr; + + StringBuffer tempseq; + + //Get sequence from hash if it exists + if (seqhash.containsKey(key)) { + tempseq = (StringBuffer) seqhash.get(key); + } else { + tempseq = new StringBuffer(); + seqhash.put(key, tempseq); + } + + //loop through the rest of the words + while (str.hasMoreTokens()) { + //append the word to the sequence + tempseq.append(str.nextToken()); + } + } + } + } + } catch (IOException e) { + System.err.println("Exception parsing MSFFile " + e); + e.printStackTrace(); } - //if line has // set SeqFlag to 1 so we know sequences are coming - if (inStr.indexOf("//") != -1) { - seqFlag = true; - } + this.noSeqs = headers.size(); - //Process lines as sequence lines if seqFlag is set - if (( inStr.indexOf("//") == -1) && (seqFlag == true)) { - //seqeunce id is the first field - key = inStr; - StringBuffer tempseq; - - //Get sequence from hash if it exists - if (seqhash.containsKey(key)) { - tempseq = (StringBuffer)seqhash.get(key); - } else { - tempseq = new StringBuffer(); - seqhash.put(key,tempseq); - } - - //loop through the rest of the words - while (str.hasMoreTokens()) { - //append the word to the sequence - tempseq.append(str.nextToken()); - } - } - } - } - } catch (IOException e) { - System.out.println("Exception parsing MSFFile " + e); - } + //Add sequences to the hash + for (i = 0; i < headers.size(); i++) { + if (seqhash.get(headers.elementAt(i)) != null) { + String head = headers.elementAt(i).toString(); + String seq = seqhash.get(head).toString(); - this.noSeqs = headers.size(); + int start = 1; + int end = seq.length(); - //Add sequences to the hash - for (i = 0; i < headers.size(); i++ ) { + if (maxLength < head.length()) { + maxLength = head.length(); + } - if ( seqhash.get(headers.elementAt(i)) != null) { - String head = headers.elementAt(i).toString(); - String seq = seqhash.get(head).toString(); + if (head.indexOf("/") > 0) { + StringTokenizer st = new StringTokenizer(head, "/"); - int start = 1; - int end = seq.length(); + if (st.countTokens() == 2) { + head = st.nextToken(); - if (maxLength < head.length() ) { - maxLength = head.length(); - } + String tmp = st.nextToken(); + st = new StringTokenizer(tmp, "-"); - if (head.indexOf("/") > 0 ) { + if (st.countTokens() == 2) { + start = Integer.valueOf(st.nextToken()).intValue(); + end = Integer.valueOf(st.nextToken()).intValue(); + } + } + } - StringTokenizer st = new StringTokenizer(head,"/"); + // Replace ~ with a sensible gap character + seq = gapre.replaceAll(seq); - if (st.countTokens() == 2) { + Sequence newSeq = new Sequence(head, seq, start, end); - head = st.nextToken(); - String tmp = st.nextToken(); - st = new StringTokenizer(tmp,"-"); - if (st.countTokens() == 2) { - start = Integer.valueOf(st.nextToken()).intValue(); - end = Integer.valueOf(st.nextToken()).intValue(); + seqs.addElement(newSeq); + } else { + System.err.println("MSFFile Parser: Can't find sequence for " + + headers.elementAt(i)); } - } } + } - Sequence newSeq = new Sequence(head,seq,start,end); + public static int checkSum(String seq) { + //String chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.*~&@"; + int check = 0; + + String index = "--------------------------------------&---*---.-----------------@ABCDEFGHIJKLMNOPQRSTUVWXYZ------ABCDEFGHIJKLMNOPQRSTUVWXYZ----@"; + index += "--------------------------------------------------------------------------------------------------------------------------------"; + + for (int i = 0; i < seq.length(); i++) { + try { + if (i < seq.length()) { + int pos = index.indexOf(seq.substring(i, i + 1)); + + if (!index.substring(pos, pos + 1).equals("_")) { + check += (((i % 57) + 1) * pos); + } + } + } catch (Exception e) { + System.err.println("Exception during MSF Checksum calculation"); + e.printStackTrace(); + } + } - seqs.addElement(newSeq); + return check % 10000; + } - } else { - System.out.println("Can't find sequence for " + headers.elementAt(i)); - } + public static String print(SequenceI[] s) { + return print(s, false); } - } + public static String print(SequenceI[] s, boolean is_NA) { + StringBuffer out = new StringBuffer("!!" + (is_NA ? "NA" : "AA") + + "_MULTIPLE_ALIGNMENT 1.0\n\n"); // TODO: JBPNote : Jalview doesn't remember NA or AA yet. + + int max = 0; + int maxid = 0; + int i = 0; + String big = ""; - public static int checkSum(String seq) { - //String chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.*~&@"; - int check = 0; + while ((i < s.length) && (s[i] != null)) { + String sq; + big += (sq = s[i].getSequence()); - String index = "--------------------------------------&---*---.-----------------@ABCDEFGHIJKLMNOPQRSTUVWXYZ------ABCDEFGHIJKLMNOPQRSTUVWXYZ----@"; - index += "--------------------------------------------------------------------------------------------------------------------------------"; + if (sq.length() > max) { + max = sq.length(); + } - for(int i = 0; i < seq.length(); i++) { - try { - if (i maxid) { + maxid = name.length(); + } - int max = 0; - int maxid = 0; + if (nameBlock[i].length() > maxNB) { + maxNB = nameBlock[i].length(); + } - int i = 0; - String big = ""; - while (i < s.length && s[i] != null) { - big += s[i].getSequence(); - i++; - } - i = 0; - int bigcheck = checkSum(big); - - out.append(" MSF: " + s[0].getSequence().length() + " Type: P Check: " + bigcheck + " ..\n\n\n"); - - while (i < s.length && s[i] != null) { - String seq = s[i].getSequence(); - String name = s[i].getName()+ "/" + s[i].getStart() + "-" + s[i].getEnd(); - int check = checkSum(s[i].getSequence()); - out.append(" Name: " + name + " oo Len: " + s[i].getSequence().length() + " Check: " + check + " Weight: 1.00\n"); - if (seq.length() > max) { - max = seq.length(); - } - if (name.length() > maxid) { - maxid = name.length(); - } - i++; - } + i++; + } - if (maxid < 10) { - maxid = 10; - } - maxid++; - out.append( "\n\n//\n\n"); + if (maxid < 10) { + maxid = 10; + } - int len = 50; + if (maxNB < 15) { + maxNB = 15; + } - int nochunks = max / len + 1; - if (max%len == 0) { - nochunks--; - } - for (i = 0; i < nochunks; i++) { - int j = 0; - while (j < s.length && s[j] != null) { - String name = s[j].getName(); - out.append( new Format("%-" + maxid + "s").form(name + "/" + s[j].getStart() + "-" + s[j].getEnd()) + " "); - for (int k = 0; k < 5; k++) { - - int start = i*50 + k*10; - int end = start + 10; - - if (end < s[j].getSequence().length() && start < s[j].getSequence().length() ) { - out.append(s[j].getSequence().substring(start,end)); - if (k < 4) { - out.append(" "); - } else { - out.append("\n"); - } - } else { - if (start < s[j].getSequence().length()) { - out.append(s[j].getSequence().substring(start)); - out.append("\n"); - } else { - if (k == 0) { - out.append("\n"); - } - } - } + Format nbFormat = new Format("%-" + maxNB + "s"); + + for (i = 0; (i < s.length) && (s[i] != null); i++) { + out.append(nbFormat.form(nameBlock[i]) + idBlock[i]); } - j++; - } - out.append("\n"); - } - return out.toString(); - } - public String print() { - return print(getSeqsAsArray()); - } -} + maxid++; + out.append("\n\n//\n\n"); + int len = 50; + int nochunks = (max / len) + 1; + if ((max % len) == 0) { + nochunks--; + } + for (i = 0; i < nochunks; i++) { + int j = 0; + + while ((j < s.length) && (s[j] != null)) { + String name = s[j].getName(); + out.append(new Format("%-" + maxid + "s").form(name + "/" + + s[j].getStart() + "-" + s[j].getEnd()) + " "); + + for (int k = 0; k < 5; k++) { + int start = (i * 50) + (k * 10); + int end = start + 10; + + if ((end < s[j].getSequence().length()) && + (start < s[j].getSequence().length())) { + out.append(re2gap.replaceAll(s[j].getSequence() + .substring(start, end))); + + if (k < 4) { + // out.append(" "); + } else { + out.append("\n"); + } + } else { + if (start < s[j].getSequence().length()) { + out.append(re2gap.replaceAll( + s[j].getSequence().substring(start))); + out.append("\n"); + } else { + if (k == 0) { + out.append("\n"); + } + } + } + } + + j++; + } + out.append("\n"); + } + return out.toString(); + } + public String print() { + return print(getSeqsAsArray()); + } +}