From 74aab3b256df66c0dc8785c349565c191d3ef0f8 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Thu, 15 Nov 2012 22:43:30 +0100 Subject: [PATCH 1/1] Fixed T-Coffee file scores parser to support extended format including residue numbers --- .gitignore | 2 ++ src/jalview/io/TCoffeeScoreFile.java | 14 +++++++-- test/jalview/io/TCoffeeScoreFileTest.java | 28 +++++++++++------- .../io/tcoffee.score_ascii_with_residue_numbers | 30 ++++++++++++++++++++ 4 files changed, 62 insertions(+), 12 deletions(-) create mode 100644 test/jalview/io/tcoffee.score_ascii_with_residue_numbers diff --git a/.gitignore b/.gitignore index f256683..3407192 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ +.project /dist /classes +.externalToolBuilders/Jalview Release indices [Builder].launch diff --git a/src/jalview/io/TCoffeeScoreFile.java b/src/jalview/io/TCoffeeScoreFile.java index 42a2caa..ad5f52a 100644 --- a/src/jalview/io/TCoffeeScoreFile.java +++ b/src/jalview/io/TCoffeeScoreFile.java @@ -30,6 +30,8 @@ import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * A file parse for T-Coffee score ascii format. This file contains the @@ -138,7 +140,7 @@ public class TCoffeeScoreFile extends AlignFile /** * @return The 'width' of the score matrix i.e. the number of columns. Since - * teh score value are supposd to be calculated for an 'aligned' MSA, + * the score value are supposed to be calculated for an 'aligned' MSA, * all the entries have to have the same width. */ public int getWidth() @@ -393,6 +395,8 @@ public class TCoffeeScoreFile extends AlignFile } } + static Pattern SCORES_WITH_RESIDUE_NUMS = Pattern.compile("^\\d+\\s([^\\s]+)\\s+\\d+$"); + /** * Read a scores block ihe provided stream. * @@ -452,6 +456,11 @@ public class TCoffeeScoreFile extends AlignFile String id = line.substring(0, p).trim(); String val = line.substring(p + 1).trim(); + Matcher m = SCORES_WITH_RESIDUE_NUMS.matcher(val); + if( m.matches() ) { + val = m.group(1); + } + result.items.put(id, val); } while ((line = reader.nextLine()) != null); @@ -540,7 +549,8 @@ public class TCoffeeScoreFile extends AlignFile { if (al.getHeight() != getHeight() || al.getWidth() != getWidth()) { - warningMessage = "Alignment shape does not match T-Coffee score file shape."; + String info = String.format("align w: %s, h: %s; score: w: %s; h: %s ", al.getWidth(), al.getHeight(), getWidth(), getHeight() ); + warningMessage = "Alignment shape does not match T-Coffee score file shape -- " + info; return false; } boolean added = false; diff --git a/test/jalview/io/TCoffeeScoreFileTest.java b/test/jalview/io/TCoffeeScoreFileTest.java index 7308446..3147054 100644 --- a/test/jalview/io/TCoffeeScoreFileTest.java +++ b/test/jalview/io/TCoffeeScoreFileTest.java @@ -21,12 +21,9 @@ import static org.junit.Assert.*; import jalview.io.TCoffeeScoreFile.Block; import jalview.io.TCoffeeScoreFile.Header; -import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; -import java.io.FileReader; import java.io.IOException; -import java.io.StringReader; import java.util.List; import org.junit.Test; @@ -34,14 +31,15 @@ import org.junit.Test; public class TCoffeeScoreFileTest { final static File SCORE_FILE = new File("test/jalview/io/tcoffee.score_ascii"); - final static File ALIGN_FILE = new File("test/jalview/io/tcoffee.fasta_aln"); + final static File ALIGN_FILE = new File("test/jalview/io/tcoffee.fasta_aln"); @Test public void testReadHeader() throws IOException, FileNotFoundException { - TCoffeeScoreFile scoreFile = new TCoffeeScoreFile(SCORE_FILE.getPath(),AppletFormatAdapter.FILE); - assertTrue(scoreFile.getWarningMessage(),scoreFile.isValid()); - Header header = scoreFile.header; + TCoffeeScoreFile scoreFile = new TCoffeeScoreFile(SCORE_FILE.getPath(),AppletFormatAdapter.FILE); + assertTrue(scoreFile.getWarningMessage(),scoreFile.isValid()); + + Header header = scoreFile.header; assertNotNull(header); assertEquals( "T-COFFEE, Version_9.02.r1228 (2012-02-16 18:15:12 - Revision 1228 - Build 336)", header.head ); assertEquals( 90, header.score ); @@ -60,9 +58,10 @@ public class TCoffeeScoreFileTest { @Test public void testWrongFile() { try { - TCoffeeScoreFile result = new TCoffeeScoreFile(ALIGN_FILE.getPath(), FormatAdapter.FILE); - assertFalse(result.isValid()); - } catch (IOException x) + TCoffeeScoreFile result = new TCoffeeScoreFile(ALIGN_FILE.getPath(), FormatAdapter.FILE); + assertFalse(result.isValid()); + } + catch (IOException x) { assertTrue("File not found exception thrown",x instanceof FileNotFoundException); } @@ -173,6 +172,15 @@ public class TCoffeeScoreFileTest { } + @Test + public void testHeightAndWidthWithResidueNumbers() throws IOException { + String file = "test/jalview/io/tcoffee.score_ascii_with_residue_numbers"; + TCoffeeScoreFile result = new TCoffeeScoreFile(file, FormatAdapter.FILE); + assertTrue(result.isValid()); + assertEquals( 5, result.getHeight() ); + assertEquals( 84, result.getWidth() ); + } + } diff --git a/test/jalview/io/tcoffee.score_ascii_with_residue_numbers b/test/jalview/io/tcoffee.score_ascii_with_residue_numbers new file mode 100644 index 0000000..ec518e3 --- /dev/null +++ b/test/jalview/io/tcoffee.score_ascii_with_residue_numbers @@ -0,0 +1,30 @@ +T-COFFEE, Version_9.01 (2011-10-13 19:18:54 - Revision 1115 - Build 6) +Cedric Notredame +CPU TIME:0 sec. +SCORE=45 +* + BAD AVG GOOD +* +1aboA : 50 +1ycsB : 50 +1pht : 42 +1vie : 41 +1ihvA : 39 +cons : 45 + +1aboA 1 65-556---666667666555555665566-------66666-------54445665--5555-55556777665--- 54 +1ycsB 1 653556---665667776555666666667-------6666544-----44455565--5555-55557766655--- 57 +1pht 1 764567---765656566656666665555444445677776554455555544666556665-67667876655333 74 +1vie 1 44-----------344--44455555555544555555555655------------5--5565444545666655--- 48 +1ihvA 1 75-4443444444444444-3445555556-------55555-------444--------------456665554--- 45 +cons 1 542344111454434444333444554444211112234333111111122222332112222122335655444111 78 + + +1aboA 55 ---679 57 +1ycsB 58 ---678 60 +1pht 75 323578 80 +1vie 49 ---678 51 +1ihvA 46 --3579 49 +cons 79 111568 84 + + -- 1.7.10.2