JAL-2997 handle tab-delimited trailing column number when parsing
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Wed, 23 May 2018 14:33:08 +0000 (15:33 +0100)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Wed, 23 May 2018 14:33:08 +0000 (15:33 +0100)
examples/testdata/test.aln
src/jalview/io/ClustalFile.java

index 08a7ac3..6582b12 100644 (file)
@@ -1,53 +1,13 @@
 CLUSTAL
 
-FER_CAPAA/1-97     -----------------------------------------------------------A
-FER_CAPAN/1-144    MA------SVSATMISTSFMPRKPAVTSL-KPIPNVGE--ALFGLKS-A--NGGKVTCMA
-FER1_SOLLC/1-144   MA------SISGTMISTSFLPRKPAVTSL-KAISNVGE--ALFGLKS-G--RNGRITCMA
-Q93XJ9_SOLTU/1-144 MA------SISGTMISTSFLPRKPVVTSL-KAISNVGE--ALFGLKS-G--RNGRITCMA
-FER1_PEA/1-149     MATT---PALYGTAVSTSFLRTQPMPMSV-TTTKAFSN--GFLGLKT-SLKRGDLAVAMA
-Q7XA98_TRIPR/1-152 MATT---PALYGTAVSTSFMRRQPVPMSV-ATTTTTKAFPSGFGLKSVSTKRGDLAVAMA
-FER1_MESCR/1-148   MAAT--TAALSGATMSTAFAPK--TPPMTAALPTNVGR--ALFGLKS-SASR-GRVTAMA
-FER1_SPIOL/1-147   MAAT--TTTMMG--MATTFVPKPQAPPMMAALPSNTGR--SLFGLKT-GSR--GGRMTMA
-FER3_RAPSA/1-96    -----------------------------------------------------------A
-FER1_ARATH/1-148   MAST----ALSSAIVGTSFIRRSPAPISLRSLPSANTQ--SLFGLKS-GTARGGRVTAMA
-FER_BRANA/1-96     -----------------------------------------------------------A
-FER2_ARATH/1-148   MAST----ALSSAIVSTSFLRRQQTPISLRSLPFANTQ--SLFGLKS-STARGGRVTAMA
-Q93Z60_ARATH/1-118 MAST----ALSSAIVSTSFLRRQQTPISLRSLPFANTQ--SLFGLKS-STARGGRVTAMA
-FER1_MAIZE/1-150   MATVLGSPRAPAFFFSSSSLRAAPAPTAV--ALPAAKV--GIMGRSA-SSRR--RLRAQA
-O80429_MAIZE/1-140 MAAT---------ALSMSILR---APPPCFSSPLRLRV--AVAKPLA-APMRRQLLRAQA
-1A70|/1-97         -----------------------------------------------------------A
-
-FER_CAPAA/1-97     SYKVKLITPDGPIEFDCPDDVYILDQAEEAGHDLPYSCRAGSCSSCAGKIAGGAVDQTDG
-FER_CAPAN/1-144    SYKVKLITPDGPIEFDCPDNVYILDQAEEAGHDLPYSCRAGSCSSCAGKIAGGAVDQTDG
-FER1_SOLLC/1-144   SYKVKLITPEGPIEFECPDDVYILDQAEEEGHDLPYSCRAGSCSSCAGKVTAGSVDQSDG
-Q93XJ9_SOLTU/1-144 SYKVKLITPDGPIEFECPDDVYILDQAEEEGHDLPYSCRAGSCSSCAGKVTAGTVDQSDG
-FER1_PEA/1-149     SYKVKLVTPDGTQEFECPSDVYILDHAEEVGIDLPYSCRAGSCSSCAGKVVGGEVDQSDG
-Q7XA98_TRIPR/1-152 TYKVKLITPEGPQEFDCPDDVYILDHAEEVGIELPYSCRAGSCSSCAGKVVNGNVNQEDG
-FER1_MESCR/1-148   AYKVTLVTPEGKQELECPDDVYILDAAEEAGIDLPYSCRAGSCSSCAGKVTSGSVNQDDG
-FER1_SPIOL/1-147   AYKVTLVTPTGNVEFQCPDDVYILDAAEEEGIDLPYSCRAGSCSSCAGKLKTGSLNQDDQ
-FER3_RAPSA/1-96    TYKVKFITPEGEQEVECDDDVYVLDAAEEAGIDLPYSCRAGSCSSCAGKVVSGSVDQSDQ
-FER1_ARATH/1-148   TYKVKFITPEGELEVECDDDVYVLDAAEEAGIDLPYSCRAGSCSSCAGKVVSGSVDQSDQ
-FER_BRANA/1-96     TYKVKFITPEGEQEVECDDDVYVLDAAEEAGIDLPYSCRAGSCSSCAGKVVSGFVDQSDE
-FER2_ARATH/1-148   TYKVKFITPEGEQEVECEEDVYVLDAAEEAGLDLPYSCRAGSCSSCAGKVVSGSIDQSDQ
-Q93Z60_ARATH/1-118 TYKVKFITPEGEQEVECEEDVYVLDAAEEAGLDLPYSCRAGSCSSCAGKVVSGSIDQSDQ
-FER1_MAIZE/1-150   TYNVKLITPEGEVELQVPDDVYILDQAEEDGIDLPYSCRAGSCSSCAGKVVSGSVDQSDQ
-O80429_MAIZE/1-140 TYNVKLITPEGEVELQVPDDVYILDFAEEEGIDLPFSCRAGSCSSCAGKVVSGSVDQSDQ
-1A70|/1-97         AYKVTLVTPTGNVEFQCPDDVYILDAAEEEGIDLPYSCRAGSCSSCAGKLKTGSLNQDDQ
-
-FER_CAPAA/1-97     NFLDDDQLEEGWVLTCVAYPQSDVTIETHKEAELVG-
-FER_CAPAN/1-144    NFLDDDQLEEGWVLTCVAYPQSDVTIETHKEAELVG-
-FER1_SOLLC/1-144   NFLDEDQEAAGFVLTCVAYPKGDVTIETHKEEELTA-
-Q93XJ9_SOLTU/1-144 KFLDDDQEAAGFVLTCVAYPKCDVTIETHKEEELTA-
-FER1_PEA/1-149     SFLDDEQIEAGFVLTCVAYPTSDVVIETHKEEDLTA-
-Q7XA98_TRIPR/1-152 SFLDDEQIEGGWVLTCVAFPTSDVTIETHKEEELTA-
-FER1_MESCR/1-148   SFLDDDQIKEGWVLTCVAYPTGDVTIETHKEEELTA-
-FER1_SPIOL/1-147   SFLDDDQIDEGWVLTCAAYPVSDVTIETHKEEELTA-
-FER3_RAPSA/1-96    SFLDDDQIAEGFVLTCAAYPTSDVTIETHREEDMV--
-FER1_ARATH/1-148   SFLDDEQIGEGFVLTCAAYPTSDVTIETHKEEDIV--
-FER_BRANA/1-96     SFLDDDQIAEGFVLTCAAYPTSDVTIETHKEEELV--
-FER2_ARATH/1-148   SFLDDEQMSEGYVLTCVAYPTSDVVIETHKEEAIM--
-Q93Z60_ARATH/1-118 SFLDD--------------------------------
-FER1_MAIZE/1-150   SYLDDGQIADGWVLTCHAYPTSDVVIETHKEEELTGA
-O80429_MAIZE/1-140 SFLNDNQVADGWVLTCAAYPTSDVVIETHKEDDLL--
-1A70|/1-97         SFLDDDQIDEGWVLTCAAYPVSDVTIETHKKEELTA
-
+FER_CAPAA/1-97     -----------------------------------------------------------A 1
+FER_CAPAN/1-144    MA------SVSATMISTSFMPRKPAVTSL-KPIPNVGE--ALFGLKS-A--NGGKVTCMA 48
+FER1_SOLLC/1-144   MA------SISGTMISTSFLPRKPAVTSL-KAISNVGE--ALFGLKS-G--RNGRITCMA 48
+Q93XJ9_SOLTU/1-144 MA------SISGTMISTSFLPRKPVVTSL-KAISNVGE--ALFGLKS-G--RNGRITCMA 48
+FER1_PEA/1-149     MATT---PALYGTAVSTSFLRTQPMPMSV-TTTKAFSN--GFLGLKT-SLKRGDLAVAMA 53
+
+FER_CAPAA/1-97     SYKVKLI 8
+FER_CAPAN/1-144    SYKVKLI 55
+FER1_SOLLC/1-144   SYKVKLI 55
+Q93XJ9_SOLTU/1-144 SYKVKLI 55
+FER1_PEA/1-149     SYKVKLV 60
index c21b02c..a37afd9 100755 (executable)
@@ -26,7 +26,8 @@ import jalview.datamodel.SequenceI;
 import jalview.util.Format;
 
 import java.io.IOException;
-import java.util.Hashtable;
+import java.util.HashMap;
+import java.util.Map;
 import java.util.StringTokenizer;
 import java.util.Vector;
 
@@ -59,12 +60,11 @@ public class ClustalFile extends AlignFile
   {
     int i = 0;
     boolean flag = false;
-    boolean rna = false;
     boolean top = false;
-    StringBuffer pssecstr = new StringBuffer(),
-            consstr = new StringBuffer();
-    Vector headers = new Vector();
-    Hashtable seqhash = new Hashtable();
+    StringBuffer pssecstr = new StringBuffer();
+    StringBuffer consstr = new StringBuffer();
+    Vector<String> headers = new Vector<>();
+    Map<String, StringBuffer> seqhash = new HashMap<>();
     StringBuffer tempseq;
     String line, id;
     StringTokenizer str;
@@ -79,7 +79,7 @@ public class ClustalFile extends AlignFile
         }
         if (line.indexOf(" ") != 0)
         {
-          str = new StringTokenizer(line, " ");
+          str = new StringTokenizer(line);
 
           if (str.hasMoreTokens())
           {
@@ -95,7 +95,7 @@ public class ClustalFile extends AlignFile
               {
                 if (seqhash.containsKey(id))
                 {
-                  tempseq = (StringBuffer) seqhash.get(id);
+                  tempseq = seqhash.get(id);
                 }
                 else
                 {
@@ -173,7 +173,7 @@ public class ClustalFile extends AlignFile
       AlignmentAnnotation lastssa = null;
       if (pssecstr.length() == maxLength)
       {
-        Vector ss = new Vector();
+        Vector<AlignmentAnnotation> ss = new Vector<>();
         AlignmentAnnotation ssa = lastssa = StockholmFile
                 .parseAnnotationRow(ss, "secondary structure",
                         pssecstr.toString());
@@ -182,7 +182,7 @@ public class ClustalFile extends AlignFile
       }
       if (consstr.length() == maxLength)
       {
-        Vector ss = new Vector();
+        Vector<AlignmentAnnotation> ss = new Vector<>();
         AlignmentAnnotation ssa = StockholmFile.parseAnnotationRow(ss,
                 "secondary structure", consstr.toString());
         ssa.label = "Consensus Secondary Structure";
@@ -238,19 +238,19 @@ public class ClustalFile extends AlignFile
         out.append(new Format("%-" + maxid + "s")
                 .form(printId(s[j], jvsuffix) + " "));
 
-        int start = i * len;
-        int end = start + len;
+        int chunkStart = i * len;
+        int chunkEnd = chunkStart + len;
 
         int length = s[j].getLength();
-        if ((end < length) && (start < length))
+        if ((chunkEnd < length) && (chunkStart < length))
         {
-          out.append(s[j].getSequenceAsString(start, end));
+          out.append(s[j].getSequenceAsString(chunkStart, chunkEnd));
         }
         else
         {
-          if (start < length)
+          if (chunkStart < length)
           {
-            out.append(s[j].getSequenceAsString().substring(start));
+            out.append(s[j].getSequenceAsString().substring(chunkStart));
           }
         }