X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FMSFfile.java;h=bae4342276b8a701109efd851b0ca3593c14a3f7;hb=4f24ef0848cf3c43ba0467f31fce1baa5c57aad5;hp=2a6e10f0dee360545d7cf37987bb789d0a31adb6;hpb=588042b69abf8e60bcc950b24c283933c7dd422f;p=jalview.git diff --git a/src/jalview/io/MSFfile.java b/src/jalview/io/MSFfile.java index 2a6e10f..bae4342 100755 --- a/src/jalview/io/MSFfile.java +++ b/src/jalview/io/MSFfile.java @@ -27,24 +27,51 @@ import java.io.*; import java.util.*; -public class MSFfile extends AlignFile { - private static com.stevesoft.pat.Regex gapre = new com.stevesoft.pat.Regex("\\~", - "-"); - private static com.stevesoft.pat.Regex re2gap = new com.stevesoft.pat.Regex( - "[" + jalview.util.Comparison.GapChars + "]", "\\~"); - - public MSFfile() { +/** + * DOCUMENT ME! + * + * @author $author$ + * @version $Revision$ + */ +public class MSFfile extends AlignFile +{ + + + /** + * Creates a new MSFfile object. + */ + public MSFfile() + { } - public MSFfile(String inStr) { + /** + * Creates a new MSFfile object. + * + * @param inStr DOCUMENT ME! + */ + public MSFfile(String inStr) + { super(inStr); } - public MSFfile(String inFile, String type) throws IOException { + /** + * Creates a new MSFfile object. + * + * @param inFile DOCUMENT ME! + * @param type DOCUMENT ME! + * + * @throws IOException DOCUMENT ME! + */ + public MSFfile(String inFile, String type) throws IOException + { super(inFile, type); } - public void parse() { + /** + * DOCUMENT ME! + */ + public void parse() throws IOException + { int i = 0; boolean seqFlag = false; String key = new String(); @@ -52,48 +79,60 @@ public class MSFfile extends AlignFile { Hashtable seqhash = new Hashtable(); String line; - try { - while ((line = nextLine()) != null) { + try + { + while ((line = nextLine()) != null) + { StringTokenizer str = new StringTokenizer(line); - while (str.hasMoreTokens()) { + while (str.hasMoreTokens()) + { String inStr = str.nextToken(); //If line has header information add to the headers vector - if (inStr.indexOf("Name:") != -1) { + if (inStr.indexOf("Name:") != -1) + { key = str.nextToken(); headers.addElement(key); } //if line has // set SeqFlag to 1 so we know sequences are coming - if (inStr.indexOf("//") != -1) { + if (inStr.indexOf("//") != -1) + { seqFlag = true; } //Process lines as sequence lines if seqFlag is set - if ((inStr.indexOf("//") == -1) && (seqFlag == true)) { + if ((inStr.indexOf("//") == -1) && (seqFlag == true)) + { //seqeunce id is the first field key = inStr; StringBuffer tempseq; //Get sequence from hash if it exists - if (seqhash.containsKey(key)) { + if (seqhash.containsKey(key)) + { tempseq = (StringBuffer) seqhash.get(key); - } else { + } + else + { tempseq = new StringBuffer(); seqhash.put(key, tempseq); } //loop through the rest of the words - while (str.hasMoreTokens()) { + while (str.hasMoreTokens()) + { //append the word to the sequence tempseq.append(str.nextToken()); } } } } - } catch (IOException e) { + } + catch (IOException e) + { System.err.println("Exception parsing MSFFile " + e); e.printStackTrace(); } @@ -101,64 +140,68 @@ public class MSFfile extends AlignFile { this.noSeqs = headers.size(); //Add sequences to the hash - for (i = 0; i < headers.size(); i++) { - if (seqhash.get(headers.elementAt(i)) != null) { + for (i = 0; i < headers.size(); i++) + { + if (seqhash.get(headers.elementAt(i)) != null) + { String head = headers.elementAt(i).toString(); String seq = seqhash.get(head).toString(); - int start = 1; - int end = seq.length(); - - if (maxLength < head.length()) { + if (maxLength < head.length()) + { maxLength = head.length(); } - if (head.indexOf("/") > 0) { - StringTokenizer st = new StringTokenizer(head, "/"); - - if (st.countTokens() == 2) { - head = st.nextToken(); - - String tmp = st.nextToken(); - st = new StringTokenizer(tmp, "-"); - - if (st.countTokens() == 2) { - start = Integer.valueOf(st.nextToken()).intValue(); - end = Integer.valueOf(st.nextToken()).intValue(); - } - } + // Replace ~ with a sensible gap character + seq = seq.replace('~', '-'); + if (!isValidProteinSequence(seq)) + { + throw new IOException(AppletFormatAdapter. + INVALID_CHARACTERS + + " : " + head + + " : " + invalidCharacter); } - // Replace ~ with a sensible gap character - seq = gapre.replaceAll(seq); - Sequence newSeq = new Sequence(head, seq, start, end); + Sequence newSeq = parseId(head); + + newSeq.setSequence(seq); seqs.addElement(newSeq); - } else { + } + else + { System.err.println("MSFFile Parser: Can't find sequence for " + headers.elementAt(i)); } } } - public static int checkSum(String seq) { - //String chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.*~&@"; + /** + * DOCUMENT ME! + * + * @param seq DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public int checkSum(String seq) + { int check = 0; + String sequence = seq.toUpperCase(); - String index = "--------------------------------------&---*---.-----------------@ABCDEFGHIJKLMNOPQRSTUVWXYZ------ABCDEFGHIJKLMNOPQRSTUVWXYZ----@"; - index += "--------------------------------------------------------------------------------------------------------------------------------"; - - for (int i = 0; i < seq.length(); i++) { - try { - if (i < seq.length()) { - int pos = index.indexOf(seq.substring(i, i + 1)); + for (int i = 0; i < sequence.length(); i++) + { + try + { - if (!index.substring(pos, pos + 1).equals("_")) { - check += (((i % 57) + 1) * pos); + int value = sequence.charAt(i); + if (value!=-1) + { + check += (i % 57 +1) * value; } - } - } catch (Exception e) { + } + catch (Exception e) + { System.err.println("Exception during MSF Checksum calculation"); e.printStackTrace(); } @@ -167,25 +210,60 @@ public class MSFfile extends AlignFile { return check % 10000; } - public static String print(SequenceI[] s) { - return print(s, false); - } - public static String print(SequenceI[] s, boolean is_NA) { + /** + * DOCUMENT ME! + * + * @param s DOCUMENT ME! + * @param is_NA DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public String print(SequenceI[] seqs) + { + + boolean is_NA = jalview.util.Comparison.isNucleotide(seqs); + + SequenceI [] s = new SequenceI[seqs.length]; + StringBuffer out = new StringBuffer("!!" + (is_NA ? "NA" : "AA") + "_MULTIPLE_ALIGNMENT 1.0\n\n"); // TODO: JBPNote : Jalview doesn't remember NA or AA yet. int max = 0; int maxid = 0; int i = 0; - String big = ""; - while ((i < s.length) && (s[i] != null)) { - String sq; - big += (sq = s[i].getSequence()); + while ((i < seqs.length) && (seqs[i] != null)) + { + // Replace all internal gaps with . and external spaces with ~ + s[i] =new Sequence(seqs[i].getName(),seqs[i].getSequence().replace('-', '.')); + + StringBuffer sb = new StringBuffer(s[i].getSequence()); + for (int ii = 0; ii < sb.length(); ii++) + { + if (sb.charAt(ii) == '.') + { + sb.setCharAt(ii, '~'); + } + else + break; + } + + for (int ii = sb.length() - 1; ii > 0; ii--) + { + if (sb.charAt(ii) == '.') + { + sb.setCharAt(ii,'~'); + } + else + break; + } - if (sq.length() > max) { - max = sq.length(); + s[i].setSequence(sb.toString()); + + if (s[i].getSequence().length() > max) + { + max = s[i].getSequence().length(); } i++; @@ -197,46 +275,59 @@ public class MSFfile extends AlignFile { "d"); i = 0; - long bigcheck = checkSum(big); + int bigChecksum = 0; + int [] checksums = new int[s.length]; + while ( i < s.length ) + { + checksums[i] = checkSum(s[i].getSequence()); + bigChecksum += checksums[i]; + i++; + } + long maxNB = 0; out.append(" MSF: " + s[0].getSequence().length() + " Type: " + - (is_NA ? "N" : "P") + " Check: " + bigcheck + " ..\n\n\n"); + (is_NA ? "N" : "P") + " Check: " + (bigChecksum%10000) + " ..\n\n\n"); String[] nameBlock = new String[s.length]; String[] idBlock = new String[s.length]; - while ((i < s.length) && (s[i] != null)) { - String seq = s[i].getSequence(); - String name = s[i].getName() + "/" + s[i].getStart() + "-" + - s[i].getEnd(); - int check = checkSum(s[i].getSequence()); - nameBlock[i] = new String(" Name: " + name + " "); + i=0; + while ((i < s.length) && (s[i] != null)) + { + + nameBlock[i] = new String(" Name: " + printId(s[i])+" "); + idBlock[i] = new String("Len: " + - maxLenpad.form(s[i].getSequence().length()) + " Check:" + - maxChkpad.form(check) + " Weight: 1.00\n"); + maxLenpad.form(s[i].getSequence().length()) + " Check: " + + maxChkpad.form(checksums[i]) + " Weight: 1.00\n"); - if (name.length() > maxid) { - maxid = name.length(); + if (s[i].getName().length() > maxid) + { + maxid = s[i].getName().length(); } - if (nameBlock[i].length() > maxNB) { + if (nameBlock[i].length() > maxNB) + { maxNB = nameBlock[i].length(); } i++; } - if (maxid < 10) { + if (maxid < 10) + { maxid = 10; } - if (maxNB < 15) { + if (maxNB < 15) + { maxNB = 15; } Format nbFormat = new Format("%-" + maxNB + "s"); - for (i = 0; (i < s.length) && (s[i] != null); i++) { + for (i = 0; (i < s.length) && (s[i] != null); i++) + { out.append(nbFormat.form(nameBlock[i]) + idBlock[i]); } @@ -247,39 +338,52 @@ public class MSFfile extends AlignFile { int nochunks = (max / len) + 1; - if ((max % len) == 0) { + if ((max % len) == 0) + { nochunks--; } - for (i = 0; i < nochunks; i++) { + for (i = 0; i < nochunks; i++) + { int j = 0; - while ((j < s.length) && (s[j] != null)) { - String name = s[j].getName(); - out.append(new Format("%-" + maxid + "s").form(name + "/" + - s[j].getStart() + "-" + s[j].getEnd()) + " "); + while ((j < s.length) && (s[j] != null)) + { + String name = printId( s[j] ); + + out.append(new Format("%-" + maxid + "s").form(name+" ")); - for (int k = 0; k < 5; k++) { + + for (int k = 0; k < 5; k++) + { int start = (i * 50) + (k * 10); int end = start + 10; if ((end < s[j].getSequence().length()) && - (start < s[j].getSequence().length())) { - out.append(re2gap.replaceAll(s[j].getSequence() - .substring(start, end))); + (start < s[j].getSequence().length())) + { + out.append(s[j].getSequence().substring(start, end)); - if (k < 4) { - // out.append(" "); - } else { + if (k < 4) + { + out.append(" "); + } + else + { out.append("\n"); } - } else { - if (start < s[j].getSequence().length()) { - out.append(re2gap.replaceAll( - s[j].getSequence().substring(start))); + } + else + { + if (start < s[j].getSequence().length()) + { + out.append(s[j].getSequence().substring(start)); out.append("\n"); - } else { - if (k == 0) { + } + else + { + if (k == 0) + { out.append("\n"); } } @@ -295,7 +399,13 @@ public class MSFfile extends AlignFile { return out.toString(); } - public String print() { + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public String print() + { return print(getSeqsAsArray()); } }