X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FMSFfile.java;h=bae4342276b8a701109efd851b0ca3593c14a3f7;hb=56e5bdc625697d50d7d3f422616f0f1b40ca2828;hp=5f812153ae1bc4d806ac884532087e3505ac7776;hpb=efc31b4a8d5cee63555586804a2b79c06bdb5a14;p=jalview.git diff --git a/src/jalview/io/MSFfile.java b/src/jalview/io/MSFfile.java index 5f81215..bae4342 100755 --- a/src/jalview/io/MSFfile.java +++ b/src/jalview/io/MSFfile.java @@ -35,10 +35,7 @@ import java.util.*; */ public class MSFfile extends AlignFile { - private static com.stevesoft.pat.Regex gapre = new com.stevesoft.pat.Regex("\\~", - "-"); - private static com.stevesoft.pat.Regex re2gap = new com.stevesoft.pat.Regex( - "[" + jalview.util.Comparison.GapChars + "]", "\\~"); + /** * Creates a new MSFfile object. @@ -73,7 +70,7 @@ public class MSFfile extends AlignFile /** * DOCUMENT ME! */ - public void parse() + public void parse() throws IOException { int i = 0; boolean seqFlag = false; @@ -150,37 +147,25 @@ public class MSFfile extends AlignFile String head = headers.elementAt(i).toString(); String seq = seqhash.get(head).toString(); - int start = 1; - int end = seq.length(); - if (maxLength < head.length()) { maxLength = head.length(); } - if (head.indexOf("/") > 0) + // Replace ~ with a sensible gap character + seq = seq.replace('~', '-'); + if (!isValidProteinSequence(seq)) { - StringTokenizer st = new StringTokenizer(head, "/"); - - if (st.countTokens() == 2) - { - head = st.nextToken(); - - String tmp = st.nextToken(); - st = new StringTokenizer(tmp, "-"); - - if (st.countTokens() == 2) - { - start = Integer.valueOf(st.nextToken()).intValue(); - end = Integer.valueOf(st.nextToken()).intValue(); - } - } + throw new IOException(AppletFormatAdapter. + INVALID_CHARACTERS + + " : " + head + + " : " + invalidCharacter); } - // Replace ~ with a sensible gap character - seq = gapre.replaceAll(seq); - Sequence newSeq = new Sequence(head, seq, start, end); + Sequence newSeq = parseId(head); + + newSeq.setSequence(seq); seqs.addElement(newSeq); } @@ -199,24 +184,20 @@ public class MSFfile extends AlignFile * * @return DOCUMENT ME! */ - public static int checkSum(String seq) + public int checkSum(String seq) { - //String chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.*~&@"; int check = 0; String sequence = seq.toUpperCase(); - String index = "--------------------------------------&---*---.-----------------@ABCDEFGHIJKLMNOPQRSTUVWXYZ------ABCDEFGHIJKLMNOPQRSTUVWXYZ----@"; - index += "--------------------------------------------------------------------------------------------------------------------------------"; - for (int i = 0; i < sequence.length(); i++) { try { - int pos = index.indexOf(sequence.charAt(i)); - if (index.charAt(pos)!='_') + int value = sequence.charAt(i); + if (value!=-1) { - check += (((i % 57) + 1) * pos); + check += (i % 57 +1) * value; } } catch (Exception e) @@ -229,17 +210,6 @@ public class MSFfile extends AlignFile return check % 10000; } - /** - * DOCUMENT ME! - * - * @param s DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public static String print(SequenceI[] s) - { - return print(s, false); - } /** * DOCUMENT ME! @@ -249,24 +219,51 @@ public class MSFfile extends AlignFile * * @return DOCUMENT ME! */ - public static String print(SequenceI[] s, boolean is_NA) + public String print(SequenceI[] seqs) { + + boolean is_NA = jalview.util.Comparison.isNucleotide(seqs); + + SequenceI [] s = new SequenceI[seqs.length]; + StringBuffer out = new StringBuffer("!!" + (is_NA ? "NA" : "AA") + "_MULTIPLE_ALIGNMENT 1.0\n\n"); // TODO: JBPNote : Jalview doesn't remember NA or AA yet. int max = 0; int maxid = 0; int i = 0; - String big = ""; - while ((i < s.length) && (s[i] != null)) + while ((i < seqs.length) && (seqs[i] != null)) { - String sq; - big += (sq = s[i].getSequence()); + // Replace all internal gaps with . and external spaces with ~ + s[i] =new Sequence(seqs[i].getName(),seqs[i].getSequence().replace('-', '.')); - if (sq.length() > max) + StringBuffer sb = new StringBuffer(s[i].getSequence()); + for (int ii = 0; ii < sb.length(); ii++) + { + if (sb.charAt(ii) == '.') { - max = sq.length(); + sb.setCharAt(ii, '~'); + } + else + break; + } + + for (int ii = sb.length() - 1; ii > 0; ii--) + { + if (sb.charAt(ii) == '.') + { + sb.setCharAt(ii,'~'); + } + else + break; + } + + s[i].setSequence(sb.toString()); + + if (s[i].getSequence().length() > max) + { + max = s[i].getSequence().length(); } i++; @@ -278,27 +275,35 @@ public class MSFfile extends AlignFile "d"); i = 0; - long bigcheck = checkSum(big); + int bigChecksum = 0; + int [] checksums = new int[s.length]; + while ( i < s.length ) + { + checksums[i] = checkSum(s[i].getSequence()); + bigChecksum += checksums[i]; + i++; + } + long maxNB = 0; out.append(" MSF: " + s[0].getSequence().length() + " Type: " + - (is_NA ? "N" : "P") + " Check: " + bigcheck + " ..\n\n\n"); + (is_NA ? "N" : "P") + " Check: " + (bigChecksum%10000) + " ..\n\n\n"); String[] nameBlock = new String[s.length]; String[] idBlock = new String[s.length]; + i=0; while ((i < s.length) && (s[i] != null)) { - String name = s[i].getName() + "/" + s[i].getStart() + "-" + - s[i].getEnd(); - int check = checkSum(s[i].getSequence()); - nameBlock[i] = new String(" Name: " + name + " "); + + nameBlock[i] = new String(" Name: " + printId(s[i])+" "); + idBlock[i] = new String("Len: " + - maxLenpad.form(s[i].getSequence().length()) + " Check:" + - maxChkpad.form(check) + " Weight: 1.00\n"); + maxLenpad.form(s[i].getSequence().length()) + " Check: " + + maxChkpad.form(checksums[i]) + " Weight: 1.00\n"); - if (name.length() > maxid) + if (s[i].getName().length() > maxid) { - maxid = name.length(); + maxid = s[i].getName().length(); } if (nameBlock[i].length() > maxNB) @@ -344,9 +349,10 @@ public class MSFfile extends AlignFile while ((j < s.length) && (s[j] != null)) { - String name = s[j].getName(); - out.append(new Format("%-" + maxid + "s").form(name + "/" + - s[j].getStart() + "-" + s[j].getEnd()) + " "); + String name = printId( s[j] ); + + out.append(new Format("%-" + maxid + "s").form(name+" ")); + for (int k = 0; k < 5; k++) { @@ -356,12 +362,11 @@ public class MSFfile extends AlignFile if ((end < s[j].getSequence().length()) && (start < s[j].getSequence().length())) { - out.append(re2gap.replaceAll(s[j].getSequence() - .substring(start, end))); + out.append(s[j].getSequence().substring(start, end)); if (k < 4) { - // out.append(" "); + out.append(" "); } else { @@ -372,8 +377,7 @@ public class MSFfile extends AlignFile { if (start < s[j].getSequence().length()) { - out.append(re2gap.replaceAll( - s[j].getSequence().substring(start))); + out.append(s[j].getSequence().substring(start)); out.append("\n"); } else