+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
package jalview.io;
import jalview.analysis.scoremodels.ScoreMatrix;
* and the substitution scores
* </pre>
*/
-public class ScoreMatrixFile extends AlignFile implements
- AlignmentFileReaderI
+public class ScoreMatrixFile extends AlignFile
+ implements AlignmentFileReaderI
{
// first non-comment line identifier - also checked in IdentifyFile
public static final String SCOREMATRIX = "SCOREMATRIX";
private String matrixName;
- boolean lowerDiagonalOnly;
+ /*
+ * aaindex format has scores for diagonal and below only
+ */
+ boolean isLowerDiagonalOnly;
+
+ /*
+ * ncbi format has symbols as first column on score rows
+ */
+ boolean hasGuideColumn;
/**
* Constructor
int row = 0;
String err = null;
String data;
- lowerDiagonalOnly = false;
+ isLowerDiagonalOnly = false;
while ((data = nextLine()) != null)
{
*/
if (name != null)
{
- System.err
- .println("Warning: 'ScoreMatrix' repeated in file at line "
+ throw new FileFormatException(
+ "Error: 'ScoreMatrix' repeated in file at line "
+ lineNo);
}
StringTokenizer nameLine = new StringTokenizer(data, DELIMITERS);
*/
if (row < size)
{
- err = String
- .format("Expected %d rows of score data in score matrix but only found %d",
- size, row);
+ err = String.format(
+ "Expected %d rows of score data in score matrix but only found %d",
+ size, row);
throw new FileFormatException(err);
}
{
parseValues(data, lineNo, scores, row, alphabet);
row++;
- if (row == size)
- {
- break;
- }
}
}
- if (data != null)
- {
- System.err.println("Warning: unexpected extra data in matrix file: "
- + data);
- }
- ScoreMatrix sm = new ScoreMatrix(name, alphabet, scores);
- sm.setDescription(description);
+ ScoreMatrix sm = new ScoreMatrix(name, description, alphabet, scores);
matrixName = name;
return sm;
StringTokenizer scoreLine = new StringTokenizer(data, DELIMITERS);
int tokenCount = scoreLine.countTokens();
- if (tokenCount == size + 1)
+
+ /*
+ * inspect first row to see if it includes the symbol in the first column,
+ * and to see if it is lower diagonal values only (i.e. just one score)
+ */
+ if (row == 0)
+ {
+ if (data.startsWith(String.valueOf(alphabet[0])))
+ {
+ hasGuideColumn = true;
+ }
+ if (tokenCount == (hasGuideColumn ? 2 : 1))
+ {
+ isLowerDiagonalOnly = true;
+ }
+ }
+
+ if (hasGuideColumn)
{
/*
* check 'guide' symbol is the row'th letter of the alphabet
String symbol = scoreLine.nextToken();
if (symbol.length() > 1 || symbol.charAt(0) != alphabet[row])
{
- err = String
- .format("Error parsing score matrix at line %d, expected '%s' but found '%s'",
- lineNo, alphabet[row], symbol);
+ err = String.format(
+ "Error parsing score matrix at line %d, expected '%s' but found '%s'",
+ lineNo, alphabet[row], symbol);
throw new FileFormatException(err);
}
+ tokenCount = scoreLine.countTokens(); // excluding guide symbol
}
- tokenCount = scoreLine.countTokens();
-
/*
- * AAIndex format only has the lower diagonal i.e.
- * 1 score in row 0, 2 in row 1, etc
- * check this in all but the last row (which is the same either way)
+ * check the right number of values (lower diagonal or full format)
*/
- if (row < size - 1)
+ if (isLowerDiagonalOnly && tokenCount != row + 1)
{
- boolean lowerDiagonal = tokenCount == row + 1;
- if (lowerDiagonalOnly && !lowerDiagonal)
- {
- /*
- * had detected lower diagonal form but now it isn't - error
- */
- err = String.format("Unexpected number of tokens at line %d",
- lineNo);
- throw new FileFormatException(err);
- }
- lowerDiagonalOnly = lowerDiagonal;
+ err = String.format(
+ "Expected %d scores at line %d: '%s' but found %d", row + 1,
+ lineNo, data, tokenCount);
+ throw new FileFormatException(err);
}
- if (!lowerDiagonalOnly && tokenCount != size)
+ if (!isLowerDiagonalOnly && tokenCount != size)
{
- err = String.format("Expected %d scores at line %d but found %d",
- size, lineNo, scoreLine.countTokens());
+ err = String.format(
+ "Expected %d scores at line %d: '%s' but found %d", size,
+ lineNo, data, scoreLine.countTokens());
throw new FileFormatException(err);
}
+
+ /*
+ * parse and set the values, setting the symmetrical value
+ * as well if lower diagonal format data
+ */
scores[row] = new float[size];
int col = 0;
String value = null;
{
value = scoreLine.nextToken();
scores[row][col] = Float.valueOf(value);
- if (lowerDiagonalOnly)
+ if (isLowerDiagonalOnly)
{
scores[col][row] = scores[row][col];
}
col++;
} catch (NumberFormatException e)
{
- err = String.format(
- "Invalid score value '%s' at line %d column %d", value,
- lineNo, col);
+ err = String.format("Invalid score value '%s' at line %d column %d",
+ value, lineNo, col);
throw new FileFormatException(err);
}
}
{
String err = "Unexpected aaIndex score matrix data at line " + lineNo
+ ": " + data;
-
+
try
{
String[] toks = data.split(",");
return false;
}
- /**
- * Answers true if the data line consists of the alphabet characters,
- * delimited (as to provide a heading row). Otherwise returns false (e.g. if
- * the data is a row of score values).
- *
- * @param data
- * @param alphabet
- * @return
- */
- private boolean isHeaderLine(String data, String alphabet)
- {
- StringTokenizer scoreLine = new StringTokenizer(data, DELIMITERS);
- int i = 0;
- while (scoreLine.hasMoreElements())
- {
- /*
- * skip over characters in the alphabet that are
- * also a delimiter (e.g. space)
- */
- char symbol = alphabet.charAt(i++);
- if (!DELIMITERS.contains(String.valueOf(symbol)))
- {
- if (!String.valueOf(symbol).equals(scoreLine.nextToken()))
- {
- return false;
- }
- }
- }
- return true;
- }
-
public String getMatrixName()
{
return matrixName;