From 73a369b0c2d5991400985d533107b70450b3d613 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Tue, 21 Feb 2017 13:36:59 +0000 Subject: [PATCH] JAL-2416 support roundtrip print/parse of ScoreMatrix --- resources/scoreModel/blosum62.scm | 4 +- resources/scoreModel/dna.scm | 5 +-- resources/scoreModel/pam250.scm | 3 +- src/jalview/analysis/scoremodels/ScoreMatrix.java | 47 +++++++++++++++++++- src/jalview/io/ScoreMatrixFile.java | 39 ++++++++++++++++ .../analysis/scoremodels/ScoreMatrixTest.java | 29 ++++++++++++ .../analysis/scoremodels/ScoreModelsTest.java | 1 - test/jalview/io/ScoreMatrixFileTest.java | 24 ++++++++++ 8 files changed, 143 insertions(+), 9 deletions(-) diff --git a/resources/scoreModel/blosum62.scm b/resources/scoreModel/blosum62.scm index c7af6b0..5bb1b53 100644 --- a/resources/scoreModel/blosum62.scm +++ b/resources/scoreModel/blosum62.scm @@ -9,10 +9,10 @@ ARNDCQEGHILKMFPSTWYVBZX * # Scores are not symbol case sensitive, unless column(s) are provided for lower case characters # The 'guide symbol' at the start of each row of score values is optional # -# Comment header line with symbols is provided as a guide +# Header line with symbols may be provided as a guide # Values may be integer or floating point, delimited by tab, space, comma or combinations # -# A R N D C Q E G H I L K M F P S T W Y V B Z X * + A R N D C Q E G H I L K M F P S T W Y V B Z X * # A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4 -4 R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4 -4 diff --git a/resources/scoreModel/dna.scm b/resources/scoreModel/dna.scm index 4a196fb..d32647b 100644 --- a/resources/scoreModel/dna.scm +++ b/resources/scoreModel/dna.scm @@ -15,11 +15,10 @@ ACGTUIXRYN - # Scores are not case sensitive, unless column(s) are provided for lower case characters # # -# Comment line with symbols is provided as a guide +# Header line with symbols is provided as a guide # Values may be integer or floating point, delimited by tab, space, comma or combinations # -# A C G T U I X R Y N - -# + A C G T U I X R Y N - 10 -8 -8 -8 -8 1 1 1 -8 1 1 1 -8 10 -8 -8 -8 1 1 -8 1 1 1 1 -8 -8 10 -8 -8 1 1 1 -8 1 1 1 diff --git a/resources/scoreModel/pam250.scm b/resources/scoreModel/pam250.scm index 8df39a1..02903ad 100644 --- a/resources/scoreModel/pam250.scm +++ b/resources/scoreModel/pam250.scm @@ -8,11 +8,10 @@ ARNDCQEGHILKMFPSTWYVBZX * # Scores are not case sensitive, unless column(s) are provided for lower case characters # # -# Comment line with symbols is provided as a guide +# Header line with symbols is provided as a guide # Values may be integer or floating point, delimited by tab, space, comma or combinations # # A R N D C Q E G H I L K M F P S T W Y V B Z X * -# 2 -2 0 0 -2 0 0 1 -1 -1 -2 -1 -1 -3 1 1 1 -6 -3 0 0 0 0 -8 -8 -2 6 0 -1 -4 1 -1 -3 2 -2 -3 3 0 -4 0 0 -1 2 -4 -2 -1 0 -1 -8 -8 0 0 2 2 -4 1 1 0 2 -2 -3 1 -2 -3 0 1 0 -4 -2 -2 2 1 0 -8 -8 diff --git a/src/jalview/analysis/scoremodels/ScoreMatrix.java b/src/jalview/analysis/scoremodels/ScoreMatrix.java index 22c81f1..13ce6a8 100644 --- a/src/jalview/analysis/scoremodels/ScoreMatrix.java +++ b/src/jalview/analysis/scoremodels/ScoreMatrix.java @@ -231,7 +231,12 @@ public class ScoreMatrix implements PairwiseScoreModelI } /** - * Print the score matrix, optionally formatted as html, with the alphabet symbols as column headings and at the start of each row + * Print the score matrix, optionally formatted as html, with the alphabet + * symbols as column headings and at the start of each row. + *

+ * The non-html format should give an output which can be parsed as a score + * matrix file + * * @param html * @return */ @@ -247,6 +252,11 @@ public class ScoreMatrix implements PairwiseScoreModelI sb.append(""); sb.append(html ? "" : ""); } + else + { + sb.append("ScoreMatrix ").append(getName()).append("\n"); + sb.append(symbols).append("\n"); + } for (char sym : symbols) { if (html) @@ -338,4 +348,39 @@ public class ScoreMatrix implements PairwiseScoreModelI } return new Matrix(values); } + + /** + * Answers a hashcode computed from the symbol alphabet and the matrix score + * values + */ + @Override + public int hashCode() + { + int hs = Arrays.hashCode(symbols); + for (float[] row : matrix) + { + hs = hs * 31 + Arrays.hashCode(row); + } + return hs; + } + + /** + * Answers true if the argument is a ScoreMatrix with the same symbol alphabet + * and score values, else false + */ + @Override + public boolean equals(Object obj) + { + if (!(obj instanceof ScoreMatrix)) + { + return false; + } + ScoreMatrix sm = (ScoreMatrix) obj; + if (Arrays.equals(symbols, sm.symbols) + && Arrays.deepEquals(matrix, sm.matrix)) + { + return true; + } + return false; + } } diff --git a/src/jalview/io/ScoreMatrixFile.java b/src/jalview/io/ScoreMatrixFile.java index 97fe46b..eee7d68 100644 --- a/src/jalview/io/ScoreMatrixFile.java +++ b/src/jalview/io/ScoreMatrixFile.java @@ -137,6 +137,14 @@ public class ScoreMatrixFile extends AlignFile implements } /* + * permit an uncommented line with delimited residue headings + */ + if (isHeaderLine(data, alphabet)) + { + continue; + } + + /* * subsequent lines should be the symbol scores * optionally with the symbol as the first column for readability */ @@ -202,6 +210,37 @@ public class ScoreMatrixFile extends AlignFile implements return sm; } + /** + * Answers true if the data line consists of the alphabet characters, + * delimited (as to provide a heading row). Otherwise returns false (e.g. if + * the data is a row of score values). + * + * @param data + * @param alphabet + * @return + */ + private boolean isHeaderLine(String data, String alphabet) + { + StringTokenizer scoreLine = new StringTokenizer(data, DELIMITERS); + int i = 0; + while (scoreLine.hasMoreElements()) + { + /* + * skip over characters in the alphabet that are + * also a delimiter (e.g. space) + */ + char symbol = alphabet.charAt(i++); + if (!DELIMITERS.contains(String.valueOf(symbol))) + { + if (!String.valueOf(symbol).equals(scoreLine.nextToken())) + { + return false; + } + } + } + return true; + } + public String getMatrixName() { return matrixName; diff --git a/test/jalview/analysis/scoremodels/ScoreMatrixTest.java b/test/jalview/analysis/scoremodels/ScoreMatrixTest.java index 97cb742..002084d 100644 --- a/test/jalview/analysis/scoremodels/ScoreMatrixTest.java +++ b/test/jalview/analysis/scoremodels/ScoreMatrixTest.java @@ -1,9 +1,18 @@ package jalview.analysis.scoremodels; + import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; +import jalview.io.DataSourceType; +import jalview.io.FileParse; +import jalview.io.ScoreMatrixFile; import jalview.math.MatrixI; +import java.io.IOException; +import java.net.MalformedURLException; + import org.testng.annotations.Test; public class ScoreMatrixTest @@ -190,4 +199,24 @@ public class ScoreMatrixTest // Q.G + I.W + A.C = -2 + -3 + 0 = -5 assertEquals(pairwise.getValue(2, 3), -5d); } + + /** + * Test that the result of outputMatrix can be reparsed to give an identical + * ScoreMatrix + * + * @throws IOException + * @throws MalformedURLException + */ + @Test(groups = "Functional") + public void testOutputMatrix_roundTrip() throws MalformedURLException, + IOException + { + ScoreMatrix sm = ScoreModels.getInstance().getBlosum62(); + String output = sm.outputMatrix(false); + FileParse fp = new FileParse(output, DataSourceType.PASTE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + ScoreMatrix sm2 = parser.parseMatrix(); + assertNotNull(sm2); + assertTrue(sm2.equals(sm)); + } } diff --git a/test/jalview/analysis/scoremodels/ScoreModelsTest.java b/test/jalview/analysis/scoremodels/ScoreModelsTest.java index f63843d..594adcd 100644 --- a/test/jalview/analysis/scoremodels/ScoreModelsTest.java +++ b/test/jalview/analysis/scoremodels/ScoreModelsTest.java @@ -101,7 +101,6 @@ public class ScoreModelsTest if (psm instanceof ScoreMatrix) { ScoreMatrix sm = (ScoreMatrix) psm; - System.out.println("ScoreMatrix " + sm.getName()); System.out.println(sm.outputMatrix(asHtml)); } } diff --git a/test/jalview/io/ScoreMatrixFileTest.java b/test/jalview/io/ScoreMatrixFileTest.java index 123de6b..44bb8aa 100644 --- a/test/jalview/io/ScoreMatrixFileTest.java +++ b/test/jalview/io/ScoreMatrixFileTest.java @@ -257,4 +257,28 @@ public class ScoreMatrixFileTest "Format error: expected 'ScoreMatrix ', found 'ScoreMatrix' at line 1"); } } + + /** + * Test a successful parse of a (small) score matrix file + * + * @throws IOException + * @throws MalformedURLException + */ + @Test(groups = "Functional") + public void testParse_withResidueHeading() throws MalformedURLException, + IOException + { + String data = "ScoreMatrix MyTest\n" + "ABC\n" + "\tA\tB\tC\n" + + "A\t1.0\t2.0\t3.0\n" + "B\t4.0\t5.0\t6.0\n" + + "C\t7.0\t8.0\t9.0\n"; + FileParse fp = new FileParse(data, DataSourceType.PASTE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + ScoreMatrix sm = parser.parseMatrix(); + + assertNotNull(sm); + assertEquals(sm.getName(), "MyTest"); + assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f); + assertEquals(sm.getPairwiseScore('B', 'c'), 6.0f); + assertEquals(sm.getSize(), 3); + } } -- 1.7.10.2