From 27cc4ae38ba352502c07a14e8c73ba84664825e3 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Wed, 23 May 2018 15:33:08 +0100 Subject: [PATCH] JAL-2997 handle tab-delimited trailing column number when parsing --- examples/testdata/test.aln | 62 +++++++-------------------------------- src/jalview/io/ClustalFile.java | 32 ++++++++++---------- 2 files changed, 27 insertions(+), 67 deletions(-) diff --git a/examples/testdata/test.aln b/examples/testdata/test.aln index 08a7ac3..6582b12 100644 --- a/examples/testdata/test.aln +++ b/examples/testdata/test.aln @@ -1,53 +1,13 @@ CLUSTAL -FER_CAPAA/1-97 -----------------------------------------------------------A -FER_CAPAN/1-144 MA------SVSATMISTSFMPRKPAVTSL-KPIPNVGE--ALFGLKS-A--NGGKVTCMA -FER1_SOLLC/1-144 MA------SISGTMISTSFLPRKPAVTSL-KAISNVGE--ALFGLKS-G--RNGRITCMA -Q93XJ9_SOLTU/1-144 MA------SISGTMISTSFLPRKPVVTSL-KAISNVGE--ALFGLKS-G--RNGRITCMA -FER1_PEA/1-149 MATT---PALYGTAVSTSFLRTQPMPMSV-TTTKAFSN--GFLGLKT-SLKRGDLAVAMA -Q7XA98_TRIPR/1-152 MATT---PALYGTAVSTSFMRRQPVPMSV-ATTTTTKAFPSGFGLKSVSTKRGDLAVAMA -FER1_MESCR/1-148 MAAT--TAALSGATMSTAFAPK--TPPMTAALPTNVGR--ALFGLKS-SASR-GRVTAMA -FER1_SPIOL/1-147 MAAT--TTTMMG--MATTFVPKPQAPPMMAALPSNTGR--SLFGLKT-GSR--GGRMTMA -FER3_RAPSA/1-96 -----------------------------------------------------------A -FER1_ARATH/1-148 MAST----ALSSAIVGTSFIRRSPAPISLRSLPSANTQ--SLFGLKS-GTARGGRVTAMA -FER_BRANA/1-96 -----------------------------------------------------------A -FER2_ARATH/1-148 MAST----ALSSAIVSTSFLRRQQTPISLRSLPFANTQ--SLFGLKS-STARGGRVTAMA -Q93Z60_ARATH/1-118 MAST----ALSSAIVSTSFLRRQQTPISLRSLPFANTQ--SLFGLKS-STARGGRVTAMA -FER1_MAIZE/1-150 MATVLGSPRAPAFFFSSSSLRAAPAPTAV--ALPAAKV--GIMGRSA-SSRR--RLRAQA -O80429_MAIZE/1-140 MAAT---------ALSMSILR---APPPCFSSPLRLRV--AVAKPLA-APMRRQLLRAQA -1A70|/1-97 -----------------------------------------------------------A - -FER_CAPAA/1-97 SYKVKLITPDGPIEFDCPDDVYILDQAEEAGHDLPYSCRAGSCSSCAGKIAGGAVDQTDG -FER_CAPAN/1-144 SYKVKLITPDGPIEFDCPDNVYILDQAEEAGHDLPYSCRAGSCSSCAGKIAGGAVDQTDG -FER1_SOLLC/1-144 SYKVKLITPEGPIEFECPDDVYILDQAEEEGHDLPYSCRAGSCSSCAGKVTAGSVDQSDG -Q93XJ9_SOLTU/1-144 SYKVKLITPDGPIEFECPDDVYILDQAEEEGHDLPYSCRAGSCSSCAGKVTAGTVDQSDG -FER1_PEA/1-149 SYKVKLVTPDGTQEFECPSDVYILDHAEEVGIDLPYSCRAGSCSSCAGKVVGGEVDQSDG -Q7XA98_TRIPR/1-152 TYKVKLITPEGPQEFDCPDDVYILDHAEEVGIELPYSCRAGSCSSCAGKVVNGNVNQEDG -FER1_MESCR/1-148 AYKVTLVTPEGKQELECPDDVYILDAAEEAGIDLPYSCRAGSCSSCAGKVTSGSVNQDDG -FER1_SPIOL/1-147 AYKVTLVTPTGNVEFQCPDDVYILDAAEEEGIDLPYSCRAGSCSSCAGKLKTGSLNQDDQ -FER3_RAPSA/1-96 TYKVKFITPEGEQEVECDDDVYVLDAAEEAGIDLPYSCRAGSCSSCAGKVVSGSVDQSDQ -FER1_ARATH/1-148 TYKVKFITPEGELEVECDDDVYVLDAAEEAGIDLPYSCRAGSCSSCAGKVVSGSVDQSDQ -FER_BRANA/1-96 TYKVKFITPEGEQEVECDDDVYVLDAAEEAGIDLPYSCRAGSCSSCAGKVVSGFVDQSDE -FER2_ARATH/1-148 TYKVKFITPEGEQEVECEEDVYVLDAAEEAGLDLPYSCRAGSCSSCAGKVVSGSIDQSDQ -Q93Z60_ARATH/1-118 TYKVKFITPEGEQEVECEEDVYVLDAAEEAGLDLPYSCRAGSCSSCAGKVVSGSIDQSDQ -FER1_MAIZE/1-150 TYNVKLITPEGEVELQVPDDVYILDQAEEDGIDLPYSCRAGSCSSCAGKVVSGSVDQSDQ -O80429_MAIZE/1-140 TYNVKLITPEGEVELQVPDDVYILDFAEEEGIDLPFSCRAGSCSSCAGKVVSGSVDQSDQ -1A70|/1-97 AYKVTLVTPTGNVEFQCPDDVYILDAAEEEGIDLPYSCRAGSCSSCAGKLKTGSLNQDDQ - -FER_CAPAA/1-97 NFLDDDQLEEGWVLTCVAYPQSDVTIETHKEAELVG- -FER_CAPAN/1-144 NFLDDDQLEEGWVLTCVAYPQSDVTIETHKEAELVG- -FER1_SOLLC/1-144 NFLDEDQEAAGFVLTCVAYPKGDVTIETHKEEELTA- -Q93XJ9_SOLTU/1-144 KFLDDDQEAAGFVLTCVAYPKCDVTIETHKEEELTA- -FER1_PEA/1-149 SFLDDEQIEAGFVLTCVAYPTSDVVIETHKEEDLTA- -Q7XA98_TRIPR/1-152 SFLDDEQIEGGWVLTCVAFPTSDVTIETHKEEELTA- -FER1_MESCR/1-148 SFLDDDQIKEGWVLTCVAYPTGDVTIETHKEEELTA- -FER1_SPIOL/1-147 SFLDDDQIDEGWVLTCAAYPVSDVTIETHKEEELTA- -FER3_RAPSA/1-96 SFLDDDQIAEGFVLTCAAYPTSDVTIETHREEDMV-- -FER1_ARATH/1-148 SFLDDEQIGEGFVLTCAAYPTSDVTIETHKEEDIV-- -FER_BRANA/1-96 SFLDDDQIAEGFVLTCAAYPTSDVTIETHKEEELV-- -FER2_ARATH/1-148 SFLDDEQMSEGYVLTCVAYPTSDVVIETHKEEAIM-- -Q93Z60_ARATH/1-118 SFLDD-------------------------------- -FER1_MAIZE/1-150 SYLDDGQIADGWVLTCHAYPTSDVVIETHKEEELTGA -O80429_MAIZE/1-140 SFLNDNQVADGWVLTCAAYPTSDVVIETHKEDDLL-- -1A70|/1-97 SFLDDDQIDEGWVLTCAAYPVSDVTIETHKKEELTA - +FER_CAPAA/1-97 -----------------------------------------------------------A 1 +FER_CAPAN/1-144 MA------SVSATMISTSFMPRKPAVTSL-KPIPNVGE--ALFGLKS-A--NGGKVTCMA 48 +FER1_SOLLC/1-144 MA------SISGTMISTSFLPRKPAVTSL-KAISNVGE--ALFGLKS-G--RNGRITCMA 48 +Q93XJ9_SOLTU/1-144 MA------SISGTMISTSFLPRKPVVTSL-KAISNVGE--ALFGLKS-G--RNGRITCMA 48 +FER1_PEA/1-149 MATT---PALYGTAVSTSFLRTQPMPMSV-TTTKAFSN--GFLGLKT-SLKRGDLAVAMA 53 + +FER_CAPAA/1-97 SYKVKLI 8 +FER_CAPAN/1-144 SYKVKLI 55 +FER1_SOLLC/1-144 SYKVKLI 55 +Q93XJ9_SOLTU/1-144 SYKVKLI 55 +FER1_PEA/1-149 SYKVKLV 60 diff --git a/src/jalview/io/ClustalFile.java b/src/jalview/io/ClustalFile.java index c21b02c..a37afd9 100755 --- a/src/jalview/io/ClustalFile.java +++ b/src/jalview/io/ClustalFile.java @@ -26,7 +26,8 @@ import jalview.datamodel.SequenceI; import jalview.util.Format; import java.io.IOException; -import java.util.Hashtable; +import java.util.HashMap; +import java.util.Map; import java.util.StringTokenizer; import java.util.Vector; @@ -59,12 +60,11 @@ public class ClustalFile extends AlignFile { int i = 0; boolean flag = false; - boolean rna = false; boolean top = false; - StringBuffer pssecstr = new StringBuffer(), - consstr = new StringBuffer(); - Vector headers = new Vector(); - Hashtable seqhash = new Hashtable(); + StringBuffer pssecstr = new StringBuffer(); + StringBuffer consstr = new StringBuffer(); + Vector headers = new Vector<>(); + Map seqhash = new HashMap<>(); StringBuffer tempseq; String line, id; StringTokenizer str; @@ -79,7 +79,7 @@ public class ClustalFile extends AlignFile } if (line.indexOf(" ") != 0) { - str = new StringTokenizer(line, " "); + str = new StringTokenizer(line); if (str.hasMoreTokens()) { @@ -95,7 +95,7 @@ public class ClustalFile extends AlignFile { if (seqhash.containsKey(id)) { - tempseq = (StringBuffer) seqhash.get(id); + tempseq = seqhash.get(id); } else { @@ -173,7 +173,7 @@ public class ClustalFile extends AlignFile AlignmentAnnotation lastssa = null; if (pssecstr.length() == maxLength) { - Vector ss = new Vector(); + Vector ss = new Vector<>(); AlignmentAnnotation ssa = lastssa = StockholmFile .parseAnnotationRow(ss, "secondary structure", pssecstr.toString()); @@ -182,7 +182,7 @@ public class ClustalFile extends AlignFile } if (consstr.length() == maxLength) { - Vector ss = new Vector(); + Vector ss = new Vector<>(); AlignmentAnnotation ssa = StockholmFile.parseAnnotationRow(ss, "secondary structure", consstr.toString()); ssa.label = "Consensus Secondary Structure"; @@ -238,19 +238,19 @@ public class ClustalFile extends AlignFile out.append(new Format("%-" + maxid + "s") .form(printId(s[j], jvsuffix) + " ")); - int start = i * len; - int end = start + len; + int chunkStart = i * len; + int chunkEnd = chunkStart + len; int length = s[j].getLength(); - if ((end < length) && (start < length)) + if ((chunkEnd < length) && (chunkStart < length)) { - out.append(s[j].getSequenceAsString(start, end)); + out.append(s[j].getSequenceAsString(chunkStart, chunkEnd)); } else { - if (start < length) + if (chunkStart < length) { - out.append(s[j].getSequenceAsString().substring(start)); + out.append(s[j].getSequenceAsString().substring(chunkStart)); } } -- 1.7.10.2