X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FScoreMatrixFile.java;h=0b70dce523589290b5bd64b09e0f902966161eb5;hb=f6123f656fa387e11f506dedd09672a0d0ff5ac5;hp=4e89c3ff36ed0087b20c2cf8d6005a4f959530c0;hpb=e65e612cabab4118364c44b6075302e0d2881744;p=jalview.git diff --git a/src/jalview/io/ScoreMatrixFile.java b/src/jalview/io/ScoreMatrixFile.java index 4e89c3f..0b70dce 100644 --- a/src/jalview/io/ScoreMatrixFile.java +++ b/src/jalview/io/ScoreMatrixFile.java @@ -1,3 +1,23 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.io; import jalview.analysis.scoremodels.ScoreMatrix; @@ -30,8 +50,8 @@ import java.util.StringTokenizer; * and the substitution scores * */ -public class ScoreMatrixFile extends AlignFile implements - AlignmentFileReaderI +public class ScoreMatrixFile extends AlignFile + implements AlignmentFileReaderI { // first non-comment line identifier - also checked in IdentifyFile public static final String SCOREMATRIX = "SCOREMATRIX"; @@ -42,7 +62,15 @@ public class ScoreMatrixFile extends AlignFile implements private String matrixName; - boolean lowerDiagonalOnly; + /* + * aaindex format has scores for diagonal and below only + */ + boolean isLowerDiagonalOnly; + + /* + * ncbi format has symbols as first column on score rows + */ + boolean hasGuideColumn; /** * Constructor @@ -93,7 +121,7 @@ public class ScoreMatrixFile extends AlignFile implements int row = 0; String err = null; String data; - lowerDiagonalOnly = false; + isLowerDiagonalOnly = false; while ((data = nextLine()) != null) { @@ -111,8 +139,8 @@ public class ScoreMatrixFile extends AlignFile implements */ if (name != null) { - System.err - .println("Warning: 'ScoreMatrix' repeated in file at line " + throw new FileFormatException( + "Error: 'ScoreMatrix' repeated in file at line " + lineNo); } StringTokenizer nameLine = new StringTokenizer(data, DELIMITERS); @@ -178,9 +206,9 @@ public class ScoreMatrixFile extends AlignFile implements */ if (row < size) { - err = String - .format("Expected %d rows of score data in score matrix but only found %d", - size, row); + err = String.format( + "Expected %d rows of score data in score matrix but only found %d", + size, row); throw new FileFormatException(err); } @@ -244,20 +272,10 @@ public class ScoreMatrixFile extends AlignFile implements { parseValues(data, lineNo, scores, row, alphabet); row++; - if (row == size) - { - break; - } } } - if (data != null) - { - System.err.println("Warning: unexpected extra data in matrix file: " - + data); - } - ScoreMatrix sm = new ScoreMatrix(name, alphabet, scores); - sm.setDescription(description); + ScoreMatrix sm = new ScoreMatrix(name, description, alphabet, scores); matrixName = name; return sm; @@ -289,7 +307,24 @@ public class ScoreMatrixFile extends AlignFile implements StringTokenizer scoreLine = new StringTokenizer(data, DELIMITERS); int tokenCount = scoreLine.countTokens(); - if (tokenCount == size + 1) + + /* + * inspect first row to see if it includes the symbol in the first column, + * and to see if it is lower diagonal values only (i.e. just one score) + */ + if (row == 0) + { + if (data.startsWith(String.valueOf(alphabet[0]))) + { + hasGuideColumn = true; + } + if (tokenCount == (hasGuideColumn ? 2 : 1)) + { + isLowerDiagonalOnly = true; + } + } + + if (hasGuideColumn) { /* * check 'guide' symbol is the row'th letter of the alphabet @@ -297,41 +332,37 @@ public class ScoreMatrixFile extends AlignFile implements String symbol = scoreLine.nextToken(); if (symbol.length() > 1 || symbol.charAt(0) != alphabet[row]) { - err = String - .format("Error parsing score matrix at line %d, expected '%s' but found '%s'", - lineNo, alphabet[row], symbol); + err = String.format( + "Error parsing score matrix at line %d, expected '%s' but found '%s'", + lineNo, alphabet[row], symbol); throw new FileFormatException(err); } + tokenCount = scoreLine.countTokens(); // excluding guide symbol } - tokenCount = scoreLine.countTokens(); - /* - * AAIndex format only has the lower diagonal i.e. - * 1 score in row 0, 2 in row 1, etc - * check this in all but the last row (which is the same either way) + * check the right number of values (lower diagonal or full format) */ - if (row < size - 1) + if (isLowerDiagonalOnly && tokenCount != row + 1) { - boolean lowerDiagonal = tokenCount == row + 1; - if (lowerDiagonalOnly && !lowerDiagonal) - { - /* - * had detected lower diagonal form but now it isn't - error - */ - err = String.format("Unexpected number of tokens at line %d", - lineNo); - throw new FileFormatException(err); - } - lowerDiagonalOnly = lowerDiagonal; + err = String.format( + "Expected %d scores at line %d: '%s' but found %d", row + 1, + lineNo, data, tokenCount); + throw new FileFormatException(err); } - if (!lowerDiagonalOnly && tokenCount != size) + if (!isLowerDiagonalOnly && tokenCount != size) { - err = String.format("Expected %d scores at line %d but found %d", - size, lineNo, scoreLine.countTokens()); + err = String.format( + "Expected %d scores at line %d: '%s' but found %d", size, + lineNo, data, scoreLine.countTokens()); throw new FileFormatException(err); } + + /* + * parse and set the values, setting the symmetrical value + * as well if lower diagonal format data + */ scores[row] = new float[size]; int col = 0; String value = null; @@ -341,16 +372,15 @@ public class ScoreMatrixFile extends AlignFile implements { value = scoreLine.nextToken(); scores[row][col] = Float.valueOf(value); - if (lowerDiagonalOnly) + if (isLowerDiagonalOnly) { scores[col][row] = scores[row][col]; } col++; } catch (NumberFormatException e) { - err = String.format( - "Invalid score value '%s' at line %d column %d", value, - lineNo, col); + err = String.format("Invalid score value '%s' at line %d column %d", + value, lineNo, col); throw new FileFormatException(err); } } @@ -376,7 +406,7 @@ public class ScoreMatrixFile extends AlignFile implements { String err = "Unexpected aaIndex score matrix data at line " + lineNo + ": " + data; - + try { String[] toks = data.split(","); @@ -415,37 +445,6 @@ public class ScoreMatrixFile extends AlignFile implements return false; } - /** - * Answers true if the data line consists of the alphabet characters, - * delimited (as to provide a heading row). Otherwise returns false (e.g. if - * the data is a row of score values). - * - * @param data - * @param alphabet - * @return - */ - private boolean isHeaderLine(String data, String alphabet) - { - StringTokenizer scoreLine = new StringTokenizer(data, DELIMITERS); - int i = 0; - while (scoreLine.hasMoreElements()) - { - /* - * skip over characters in the alphabet that are - * also a delimiter (e.g. space) - */ - char symbol = alphabet.charAt(i++); - if (!DELIMITERS.contains(String.valueOf(symbol))) - { - if (!String.valueOf(symbol).equals(scoreLine.nextToken())) - { - return false; - } - } - } - return true; - } - public String getMatrixName() { return matrixName;