From: gmungoc Date: Tue, 14 Feb 2017 16:47:52 +0000 (+0000) Subject: Parsing moved to (new) ScoreMatrixFile, drag and drop to alignment now X-Git-Tag: Release_2_10_2~3^2~105^2~2^2~118 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=45e015aabe8f35a4a13be26e7630641ef8c94fbb;p=jalview.git Parsing moved to (new) ScoreMatrixFile, drag and drop to alignment now supported --- diff --git a/resources/lang/Messages.properties b/resources/lang/Messages.properties index fa74c0d..bfd48ed 100644 --- a/resources/lang/Messages.properties +++ b/resources/lang/Messages.properties @@ -331,6 +331,7 @@ label.colour_residues_above_occurrence = Colour residues above % occurrence label.set_this_label_text = set this label text label.sequences_from = Sequences from {0} label.successfully_loaded_file = Successfully loaded file {0} +label.successfully_loaded_matrix = Successfully loaded score matrix {0} label.successfully_saved_to_file_in_format = Successfully saved to file: {0} in {1} format. label.copied_sequences_to_clipboard = Copied {0} sequences to clipboard. label.check_file_matches_sequence_ids_alignment = Check that the file matches sequence IDs in the alignment. diff --git a/src/jalview/analysis/scoremodels/ScoreMatrix.java b/src/jalview/analysis/scoremodels/ScoreMatrix.java index 3e63209..b12f55f 100644 --- a/src/jalview/analysis/scoremodels/ScoreMatrix.java +++ b/src/jalview/analysis/scoremodels/ScoreMatrix.java @@ -22,22 +22,13 @@ package jalview.analysis.scoremodels; import jalview.api.analysis.ScoreModelI; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; import java.util.Arrays; -import java.util.StringTokenizer; public class ScoreMatrix extends PairwiseSeqScoreModel implements ScoreModelI { public static final short UNMAPPED = (short) -1; - private static final String DELIMITERS = " ,\t"; - - private static final String COMMENT_CHAR = "#"; - private static final String BAD_ASCII_ERROR = "Unexpected character %s in getPairwiseScore"; private static final int MAX_ASCII = 127; @@ -249,172 +240,4 @@ public class ScoreMatrix extends PairwiseSeqScoreModel implements } return sb.toString(); } - - /** - * Parse a score matrix from the given input stream and returns a ScoreMatrix - * object. If parsing fails, error messages are written to syserr and null is - * returned. It is the caller's responsibility to close the input stream. - * Expected format: - * - *
-   * ScoreMatrix displayName
-   * # comment lines begin with hash sign
-   * # symbol alphabet should be the next non-comment line
-   * ARNDCQEGHILKMFPSTWYVBZX *
-   * # scores matrix, with space, comma or tab delimited values
-   * # [i, j] = score for substituting symbol[i] with symbol[j]
-   * # first column in each row is optionally the 'substituted' symbol
-   * A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4 -4
-   * ..etc..
-   * 
- * - * @param is - * @return - */ - public static ScoreMatrix parse(InputStream is) - { - ScoreMatrix sm = null; - BufferedReader br = new BufferedReader(new InputStreamReader(is)); - int lineNo = 0; - String name = null; - String alphabet = null; - float[][] scores = null; - int size = 0; - int row = 0; - - try - { - String data; - - while ((data = br.readLine()) != null) - { - lineNo++; - data = data.trim(); - if (data.startsWith(COMMENT_CHAR)) - { - continue; - } - if (data.toLowerCase().startsWith("scorematrix")) - { - /* - * Parse name from ScoreMatrix - */ - if (name != null) - { - System.err - .println("Warning: 'ScoreMatrix' repeated in file at line " - + lineNo); - } - StringTokenizer nameLine = new StringTokenizer(data, DELIMITERS); - if (nameLine.countTokens() != 2) - { - System.err - .println("Format error: expected 'ScoreMatrix ', found '" - + data + "' at line " + lineNo); - return null; - } - nameLine.nextToken(); - name = nameLine.nextToken(); - continue; - } - else if (name == null) - { - System.err - .println("Format error: 'ScoreMatrix ' should be the first non-comment line"); - return null; - } - - /* - * next line after ScoreMatrix should be the alphabet of scored symbols - */ - if (alphabet == null) - { - alphabet = data; - size = alphabet.length(); - scores = new float[size][]; - continue; - } - - /* - * too much information? - */ - if (row >= size && data.length() > 0) { - System.err - .println("Unexpected extra input line in score model file " - + data); - return null; - } - - /* - * subsequent lines should be the symbol scores - * optionally with the symbol as the first column for readability - */ - StringTokenizer scoreLine = new StringTokenizer(data, DELIMITERS); - if (scoreLine.countTokens() == size + 1) - { - /* - * check 'guide' symbol is the row'th letter of the alphabet - */ - String symbol = scoreLine.nextToken(); - if (symbol.length() > 1 - || symbol.charAt(0) != alphabet.charAt(row)) - { - System.err - .println(String - .format("Error parsing score matrix at line %d, expected %s but found %s", - lineNo, alphabet.charAt(row), symbol)); - return null; - } - } - if (scoreLine.countTokens() != size) - { - System.err.println(String.format( - "Expected %d scores at line %d but found %d", size, - lineNo, scoreLine.countTokens())); - return null; - } - scores[row] = new float[size]; - int col = 0; - String value = null; - while (scoreLine.hasMoreTokens()) - { - try - { - value = scoreLine.nextToken(); - scores[row][col] = Float.valueOf(value); - col++; - } catch (NumberFormatException e) - { - System.err.println(String.format( - "Invalid score value %s at line %d column %d", value, - lineNo, col)); - return null; - } - } - row++; - } - } catch (IOException e) - { - System.err.println("Error reading score matrix file: " - + e.getMessage() + " at line " + lineNo); - } - - /* - * out of data - check we found enough - */ - if (row < size) - { - System.err - .println(String - .format("Expected %d rows of score data in score matrix but only found %d", - size, row)); - return null; - } - - /* - * If we get here, then name, alphabet and scores have been parsed successfully - */ - sm = new ScoreMatrix(name, alphabet.toCharArray(), scores); - return sm; - } } diff --git a/src/jalview/analysis/scoremodels/ScoreModels.java b/src/jalview/analysis/scoremodels/ScoreModels.java index f1990c0..9af68d0 100644 --- a/src/jalview/analysis/scoremodels/ScoreModels.java +++ b/src/jalview/analysis/scoremodels/ScoreModels.java @@ -1,10 +1,11 @@ package jalview.analysis.scoremodels; import jalview.api.analysis.ScoreModelI; +import jalview.io.DataSourceType; +import jalview.io.FileParse; +import jalview.io.ScoreMatrixFile; import java.io.IOException; -import java.io.InputStream; -import java.net.URL; import java.util.Map; import java.util.TreeMap; @@ -39,54 +40,36 @@ public class ScoreModels * using TreeMap keeps models ordered alphabetically by name */ models = new TreeMap(String.CASE_INSENSITIVE_ORDER); - loadScoreMatrix("/scoreModel/blosum62.scm"); - loadScoreMatrix("/scoreModel/pam250.scm"); - loadScoreMatrix("/scoreModel/dna.scm"); + loadScoreMatrix("scoreModel/blosum62.scm"); + loadScoreMatrix("scoreModel/pam250.scm"); + loadScoreMatrix("scoreModel/dna.scm"); registerScoreModel(new FeatureScoreModel()); registerScoreModel(new PIDScoreModel()); } /** * Try to load a score matrix from the given resource file, and if successful, - * register it. Answers true if successful, else false. Any errors are - * reported on syserr but not thrown. + * register it. Answers true if successful, else false. * * @param string */ boolean loadScoreMatrix(String resourcePath) { - URL url = this.getClass().getResource(resourcePath); - if (url == null) - { - System.err.println("Failed to locate " + resourcePath); - return false; - } - boolean success = false; - InputStream is = null; try { - is = url.openStream(); - ScoreMatrix sm = ScoreMatrix.parse(is); - if (sm != null) - { - registerScoreModel(sm); - success = true; - } + /* + * delegate parsing to ScoreMatrixFile + */ + FileParse fp = new FileParse(resourcePath, DataSourceType.CLASSLOADER); + ScoreMatrix sm = new ScoreMatrixFile(fp).parseMatrix(); + registerScoreModel(sm); + return true; } catch (IOException e) { - } finally - { - if (is != null) - { - try - { - is.close(); - } catch (IOException e) - { - } - } + System.err.println("Error reading " + resourcePath + ": " + + e.getMessage()); } - return success; + return false; } /** diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index d6dfd9d..0da8381 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -69,6 +69,7 @@ import jalview.io.FileFormat; import jalview.io.FileFormatI; import jalview.io.FileFormats; import jalview.io.FileLoader; +import jalview.io.FileParse; import jalview.io.FormatAdapter; import jalview.io.HtmlSvgOutput; import jalview.io.IdentifyFile; @@ -77,6 +78,7 @@ import jalview.io.JalviewFileChooser; import jalview.io.JalviewFileView; import jalview.io.JnetAnnotationMaker; import jalview.io.NewickFile; +import jalview.io.ScoreMatrixFile; import jalview.io.TCoffeeScoreFile; import jalview.jbgui.GAlignFrame; import jalview.schemes.ColourSchemeI; @@ -4605,10 +4607,13 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, } /** - * Attempt to load a "dropped" file or URL string: First by testing whether - * it's an Annotation file, then a JNet file, and finally a features file. If - * all are false then the user may have dropped an alignment file onto this - * AlignFrame. + * Attempt to load a "dropped" file or URL string, by testing in turn for + *
    + *
  • an Annotation file
  • + *
  • a JNet file
  • + *
  • a features file
  • + *
  • else try to interpret as an alignment file
  • + *
* * @param file * either a filename or a URL string. @@ -4682,7 +4687,18 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, { format = new IdentifyFile().identify(file, sourceType); } - if (FileFormat.Jnet.equals(format)) + if (FileFormat.ScoreMatrix == format) + { + ScoreMatrixFile sm = new ScoreMatrixFile(new FileParse(file, + sourceType)); + sm.parse(); + // todo: i18n this message + statusBar + .setText(MessageManager.formatMessage( + "label.successfully_loaded_matrix", + sm.getMatrixName())); + } + else if (FileFormat.Jnet.equals(format)) { JPredFile predictions = new JPredFile(file, sourceType); new JnetAnnotationMaker(); diff --git a/src/jalview/io/FileFormat.java b/src/jalview/io/FileFormat.java index a11147c..3354b88 100644 --- a/src/jalview/io/FileFormat.java +++ b/src/jalview/io/FileFormat.java @@ -256,6 +256,21 @@ public enum FileFormat implements FileFormatI return new FeaturesFile(); } }, + ScoreMatrix("Substitution matrix", "", false, false) + { + @Override + public AlignmentFileReaderI getReader(FileParse source) + throws IOException + { + return new ScoreMatrixFile(source); + } + + @Override + public AlignmentFileWriterI getWriter(AlignmentI al) + { + return null; + } + }, PDB("PDB", "pdb,ent", true, false) { @Override diff --git a/src/jalview/io/IdentifyFile.java b/src/jalview/io/IdentifyFile.java index 0556e76..4b6f8e4 100755 --- a/src/jalview/io/IdentifyFile.java +++ b/src/jalview/io/IdentifyFile.java @@ -141,6 +141,11 @@ public class IdentifyFile } data = data.toUpperCase(); + if (data.startsWith(ScoreMatrixFile.SCOREMATRIX)) + { + reply = FileFormat.ScoreMatrix; + break; + } if (data.startsWith("##GFF-VERSION")) { // GFF - possibly embedded in a Jalview features file! diff --git a/src/jalview/io/ScoreMatrixFile.java b/src/jalview/io/ScoreMatrixFile.java new file mode 100644 index 0000000..d927b3f --- /dev/null +++ b/src/jalview/io/ScoreMatrixFile.java @@ -0,0 +1,207 @@ +package jalview.io; + +import jalview.analysis.scoremodels.ScoreMatrix; +import jalview.analysis.scoremodels.ScoreModels; +import jalview.datamodel.SequenceI; + +import java.io.IOException; +import java.util.StringTokenizer; + +/** + * A class that can parse a file containing a substitution matrix and register + * it for use in Jalview + * + * @author gmcarstairs + * + */ +// TODO modify the AlignFile / IdentifyFile pattern so that non-alignment files +// like this are handled more naturally +public class ScoreMatrixFile extends AlignFile implements + AlignmentFileReaderI +{ + // first non-comment line identifier - also checked in IdentifyFile + public static final String SCOREMATRIX = "SCOREMATRIX"; + + private static final String DELIMITERS = " ,\t"; + + private static final String COMMENT_CHAR = "#"; + + private String matrixName; + + /** + * Constructor + * + * @param source + * @throws IOException + */ + public ScoreMatrixFile(FileParse source) throws IOException + { + super(false, source); + } + + @Override + public String print(SequenceI[] sqs, boolean jvsuffix) + { + return null; + } + + /** + * Parses the score matrix file, and if successful registers the matrix so it + * will be shown in Jalview menus. + */ + @Override + public void parse() throws IOException + { + ScoreMatrix sm = parseMatrix(); + + ScoreModels.getInstance().registerScoreModel(sm); + } + + /** + * Parses the score matrix file and constructs a ScoreMatrix object. If an + * error is found in parsing, it is thrown as FileFormatException. Any + * warnings are written to syserr. + * + * @return + * @throws IOException + */ + public ScoreMatrix parseMatrix() throws IOException + { + ScoreMatrix sm = null; + int lineNo = 0; + String name = null; + String alphabet = null; + float[][] scores = null; + int size = 0; + int row = 0; + String err = null; + String data; + + while ((data = nextLine()) != null) + { + lineNo++; + data = data.trim(); + if (data.startsWith(COMMENT_CHAR) || data.length() == 0) + { + continue; + } + if (data.toUpperCase().startsWith(SCOREMATRIX)) + { + /* + * Parse name from ScoreMatrix + */ + if (name != null) + { + System.err + .println("Warning: 'ScoreMatrix' repeated in file at line " + + lineNo); + } + StringTokenizer nameLine = new StringTokenizer(data, DELIMITERS); + if (nameLine.countTokens() != 2) + { + err = "Format error: expected 'ScoreMatrix ', found '" + + data + "' at line " + lineNo; + throw new FileFormatException(err); + } + nameLine.nextToken(); + name = nameLine.nextToken(); + continue; + } + else if (name == null) + { + err = "Format error: 'ScoreMatrix ' should be the first non-comment line"; + throw new FileFormatException(err); + } + + /* + * next line after ScoreMatrix should be the alphabet of scored symbols + */ + if (alphabet == null) + { + alphabet = data; + size = alphabet.length(); + scores = new float[size][]; + continue; + } + + /* + * too much information + */ + if (row >= size) + { + err = "Unexpected extra input line in score model file: '" + data + + "'"; + throw new FileFormatException(err); + } + + /* + * subsequent lines should be the symbol scores + * optionally with the symbol as the first column for readability + */ + StringTokenizer scoreLine = new StringTokenizer(data, DELIMITERS); + if (scoreLine.countTokens() == size + 1) + { + /* + * check 'guide' symbol is the row'th letter of the alphabet + */ + String symbol = scoreLine.nextToken(); + if (symbol.length() > 1 || symbol.charAt(0) != alphabet.charAt(row)) + { + err = String + .format("Error parsing score matrix at line %d, expected '%s' but found '%s'", + lineNo, alphabet.charAt(row), symbol); + throw new FileFormatException(err); + } + } + if (scoreLine.countTokens() != size) + { + err = String.format("Expected %d scores at line %d but found %d", + size, lineNo, scoreLine.countTokens()); + throw new FileFormatException(err); + } + scores[row] = new float[size]; + int col = 0; + String value = null; + while (scoreLine.hasMoreTokens()) + { + try + { + value = scoreLine.nextToken(); + scores[row][col] = Float.valueOf(value); + col++; + } catch (NumberFormatException e) + { + err = String.format( + "Invalid score value '%s' at line %d column %d", value, + lineNo, col); + throw new FileFormatException(err); + } + } + row++; + } + + /* + * out of data - check we found enough + */ + if (row < size) + { + err = String + .format("Expected %d rows of score data in score matrix but only found %d", + size, row); + throw new FileFormatException(err); + } + + /* + * If we get here, then name, alphabet and scores have been parsed successfully + */ + sm = new ScoreMatrix(name, alphabet.toCharArray(), scores); + matrixName = name; + + return sm; + } + + public String getMatrixName() + { + return matrixName; + } +} diff --git a/test/jalview/analysis/scoremodels/ScoreMatrixTest.java b/test/jalview/analysis/scoremodels/ScoreMatrixTest.java index 7c62854..cbc834d 100644 --- a/test/jalview/analysis/scoremodels/ScoreMatrixTest.java +++ b/test/jalview/analysis/scoremodels/ScoreMatrixTest.java @@ -1,12 +1,6 @@ package jalview.analysis.scoremodels; import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertNotNull; -import static org.testng.Assert.assertNull; -import static org.testng.Assert.assertTrue; - -import java.io.ByteArrayInputStream; import org.testng.annotations.Test; @@ -62,130 +56,4 @@ public class ScoreMatrixTest } } } - - /** - * Test a successful parse of a (small) score matrix file - */ - @Test(groups = "Functional") - public void testParse() - { - /* - * some messy but valid input data, with comma, space - * or tab (or combinations) as score value delimiters - * this example includes 'guide' symbols on score rows - */ - String data = "ScoreMatrix MyTest\n" + "ATU tx-\n" - + "A,1.1,1.2,1.3,1.4, 1.5, 1.6, 1.7\n" - + "T,2.1 2.2 2.3 2.4 2.5 2.6 2.7\n" - + "U\t3.1\t3.2\t3.3\t3.4\t3.5\t3.6\t3.7\n" - + " 4.1 ,4.2,\t,4.3 ,\t4.4\t, \4.5,4.6 4.7\n" - + "t, 5.1,5.3,5.3,5.4,5.5, 5.6, 5.7\n" - + "x\t6.1, 6.2 6.3 6.4 6.5 6.6 6.7\n" - + "-, \t7.1\t7.2 7.3, 7.4, 7.5\t,7.6,7.7\n"; - ScoreMatrix sm = ScoreMatrix.parse(new ByteArrayInputStream(data - .getBytes())); - assertNotNull(sm); - assertEquals(sm.getName(), "MyTest"); - assertTrue(sm.isDNA()); - assertFalse(sm.isProtein()); - assertEquals(sm.getPairwiseScore('A', 'A'), 1.1f); - assertEquals(sm.getPairwiseScore('A', 'T'), 1.2f); - assertEquals(sm.getPairwiseScore('a', 'T'), 1.2f); // A/a equivalent - assertEquals(sm.getPairwiseScore('A', 't'), 1.5f); // T/t not equivalent - assertEquals(sm.getPairwiseScore('a', 't'), 1.5f); - assertEquals(sm.getPairwiseScore('T', ' '), 2.4f); - assertEquals(sm.getPairwiseScore('U', 'x'), 3.6f); - assertEquals(sm.getPairwiseScore('u', 'x'), 3.6f); - assertEquals(sm.getPairwiseScore('U', 'X'), 0f); // X (upper) unmapped - assertEquals(sm.getPairwiseScore('A', '.'), 0f); // . unmapped - assertEquals(sm.getPairwiseScore('-', '-'), 7.7f); - assertEquals(sm.getPairwiseScore('A', (char) 128), 0f); // out of range - - /* - * without guide symbols on score rows - */ - data = "ScoreMatrix MyTest\nXY\n1 2\n3 4\n"; - sm = ScoreMatrix.parse(new ByteArrayInputStream(data.getBytes())); - assertNotNull(sm); - assertEquals(sm.getPairwiseScore('X', 'X'), 1f); - assertEquals(sm.getPairwiseScore('X', 'y'), 2f); - assertEquals(sm.getPairwiseScore('y', 'x'), 3f); - assertEquals(sm.getPairwiseScore('y', 'Y'), 4f); - assertEquals(sm.getPairwiseScore('D', 'R'), 0f); - } - - @Test(groups = "Functional") - public void testParse_invalidInput() - { - /* - * valid first - */ - String data = "ScoreMatrix MyTest\nXY\n1 2\n3 4\n"; - ScoreMatrix sm = ScoreMatrix.parse(new ByteArrayInputStream(data - .getBytes())); - assertNotNull(sm); - - /* - * Name missing - */ - data = "ScoreMatrix\nXY\n1 2\n3 4\n"; - sm = ScoreMatrix.parse(new ByteArrayInputStream(data.getBytes())); - assertNull(sm); - - /* - * ScoreMatrix header missing - */ - data = "XY\n1 2\n3 4\n"; - sm = ScoreMatrix.parse(new ByteArrayInputStream(data.getBytes())); - assertNull(sm); - - /* - * Not enough rows - */ - data = "ScoreMatrix MyTest\nXY\n1 2\n"; - sm = ScoreMatrix.parse(new ByteArrayInputStream(data.getBytes())); - assertNull(sm); - - /* - * Not enough columns - */ - data = "ScoreMatrix MyTest\nXY\n1 2\n3\n"; - sm = ScoreMatrix.parse(new ByteArrayInputStream(data.getBytes())); - assertNull(sm); - - /* - * Too many columns - */ - data = "ScoreMatrix MyTest\nXY\n1 2\n3 4 5\n"; - sm = ScoreMatrix.parse(new ByteArrayInputStream(data.getBytes())); - assertNull(sm); - - /* - * Too many rows - */ - data = "ScoreMatrix MyTest\nXY\n1 2\n3 4\n6 7"; - sm = ScoreMatrix.parse(new ByteArrayInputStream(data.getBytes())); - assertNull(sm); - - /* - * unsupported delimiter | - */ - data = "ScoreMatrix MyTest\nXY\n1|2\n3|4\n"; - sm = ScoreMatrix.parse(new ByteArrayInputStream(data.getBytes())); - assertNull(sm); - - /* - * Bad float value - */ - data = "ScoreMatrix MyTest\nXY\n1 2\n3 four\n"; - sm = ScoreMatrix.parse(new ByteArrayInputStream(data.getBytes())); - assertNull(sm); - - /* - * Bad guide character on scores row - */ - data = "ScoreMatrix MyTest\nXY\nX 1 2\ny 3 4\n"; - sm = ScoreMatrix.parse(new ByteArrayInputStream(data.getBytes())); - assertNull(sm); - } } diff --git a/test/jalview/io/IdentifyFileTest.java b/test/jalview/io/IdentifyFileTest.java index 3d800d8..2e4b9e0 100644 --- a/test/jalview/io/IdentifyFileTest.java +++ b/test/jalview/io/IdentifyFileTest.java @@ -110,7 +110,7 @@ public class IdentifyFileTest { "examples/testdata/cullpdb_pc25_res3.0_R0.3_d150729_chains9361.fasta.15316", FileFormat.Fasta }, - + { "resources/scoreModel/pam250.scm", FileFormat.ScoreMatrix } // { "examples/testdata/test.amsa", "AMSA" }, // { "examples/test.jnet", "JnetFile" }, }; diff --git a/test/jalview/io/ScoreMatrixFileTest.java b/test/jalview/io/ScoreMatrixFileTest.java new file mode 100644 index 0000000..77b7282 --- /dev/null +++ b/test/jalview/io/ScoreMatrixFileTest.java @@ -0,0 +1,260 @@ +package jalview.io; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import jalview.analysis.scoremodels.ScoreMatrix; + +import java.io.IOException; +import java.net.MalformedURLException; + +import org.testng.annotations.Test; + +public class ScoreMatrixFileTest +{ + + /** + * Test a successful parse of a (small) score matrix file + * + * @throws IOException + * @throws MalformedURLException + */ + @Test(groups = "Functional") + public void testParse() throws MalformedURLException, IOException + { + /* + * some messy but valid input data, with comma, space + * or tab (or combinations) as score value delimiters + * this example includes 'guide' symbols on score rows + */ + String data = "ScoreMatrix MyTest\n" + "ATU tx-\n" + + "A,1.1,1.2,1.3,1.4, 1.5, 1.6, 1.7\n" + + "T,2.1 2.2 2.3 2.4 2.5 2.6 2.7\n" + + "U\t3.1\t3.2\t3.3\t3.4\t3.5\t3.6\t3.7\n" + + " 4.1 ,4.2,\t,4.3 ,\t4.4\t, \4.5,4.6 4.7\n" + + "t, 5.1,5.3,5.3,5.4,5.5, 5.6, 5.7\n" + + "x\t6.1, 6.2 6.3 6.4 6.5 6.6 6.7\n" + + "-, \t7.1\t7.2 7.3, 7.4, 7.5\t,7.6,7.7\n"; + FileParse fp = new FileParse(data, DataSourceType.PASTE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + ScoreMatrix sm = parser.parseMatrix(); + + assertNotNull(sm); + assertEquals(sm.getName(), "MyTest"); + assertTrue(sm.isDNA()); + assertFalse(sm.isProtein()); + assertEquals(sm.getPairwiseScore('A', 'A'), 1.1f); + assertEquals(sm.getPairwiseScore('A', 'T'), 1.2f); + assertEquals(sm.getPairwiseScore('a', 'T'), 1.2f); // A/a equivalent + assertEquals(sm.getPairwiseScore('A', 't'), 1.5f); // T/t not equivalent + assertEquals(sm.getPairwiseScore('a', 't'), 1.5f); + assertEquals(sm.getPairwiseScore('T', ' '), 2.4f); + assertEquals(sm.getPairwiseScore('U', 'x'), 3.6f); + assertEquals(sm.getPairwiseScore('u', 'x'), 3.6f); + assertEquals(sm.getPairwiseScore('U', 'X'), 0f); // X (upper) unmapped + assertEquals(sm.getPairwiseScore('A', '.'), 0f); // . unmapped + assertEquals(sm.getPairwiseScore('-', '-'), 7.7f); + assertEquals(sm.getPairwiseScore('A', (char) 128), 0f); // out of range + + /* + * without guide symbols on score rows + */ + data = "ScoreMatrix MyTest\nXY\n1 2\n3 4\n"; + fp = new FileParse(data, DataSourceType.PASTE); + parser = new ScoreMatrixFile(fp); + sm = parser.parseMatrix(); + assertNotNull(sm); + assertEquals(sm.getPairwiseScore('X', 'X'), 1f); + assertEquals(sm.getPairwiseScore('X', 'y'), 2f); + assertEquals(sm.getPairwiseScore('y', 'x'), 3f); + assertEquals(sm.getPairwiseScore('y', 'Y'), 4f); + assertEquals(sm.getPairwiseScore('D', 'R'), 0f); + } + + @Test(groups = "Functional") + public void testParse_headerMissing() + { + String data; + + data = "XY\n1 2\n3 4\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Format error: 'ScoreMatrix ' should be the first non-comment line"); + } + } + + @Test(groups = "Functional") + public void testParse_notEnoughRows() + { + String data = "ScoreMatrix MyTest\nXY\n1 2\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Expected 2 rows of score data in score matrix but only found 1"); + } + } + + @Test(groups = "Functional") + public void testParse_notEnoughColumns() + { + String data = "ScoreMatrix MyTest\nXY\n1 2\n3\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Expected 2 scores at line 4 but found 1"); + } + } + + @Test(groups = "Functional") + public void testParse_tooManyColumns() + { + /* + * with two too many columns: + */ + String data = "ScoreMatrix MyTest\nXY\n1 2\n3 4 5 6\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Expected 2 scores at line 4 but found 4"); + } + + /* + * with guide character and one too many columns: + */ + data = "ScoreMatrix MyTest\nXY\nX 1 2\nY 3 4 5\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Expected 2 scores at line 4 but found 4"); + } + + /* + * with no guide character and one too many columns: + * parser guesses the first column is the guide character + */ + data = "ScoreMatrix MyTest\nXY\n1 2\n3 4 5\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Error parsing score matrix at line 4, expected 'Y' but found '3'"); + } + } + + @Test(groups = "Functional") + public void testParse_tooManyRows() + { + String data = "ScoreMatrix MyTest\nXY\n1 2\n3 4\n6 7"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Unexpected extra input line in score model file: '6 7'"); + } + } + + @Test(groups = "Functional") + public void testParse_badDelimiter() + { + String data = "ScoreMatrix MyTest\nXY\n1|2\n3|4\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Expected 2 scores at line 3 but found 1"); + } + } + + @Test(groups = "Functional") + public void testParse_badFloat() + { + String data = "ScoreMatrix MyTest\nXY\n1 2\n3 four\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Invalid score value 'four' at line 4 column 1"); + } + } + + @Test(groups = "Functional") + public void testParse_badGuideCharacter() + { + String data = "ScoreMatrix MyTest\nXY\nX 1 2\ny 3 4\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Error parsing score matrix at line 4, expected 'Y' but found 'y'"); + } + } + + @Test(groups = "Functional") + public void testParse_nameMissing() + { + /* + * Name missing + */ + String data = "ScoreMatrix\nXY\n1 2\n3 4\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals( + e.getMessage(), + "Format error: expected 'ScoreMatrix ', found 'ScoreMatrix' at line 1"); + } + } +}