3 import jalview.analysis.scoremodels.ScoreMatrix;
4 import jalview.analysis.scoremodels.ScoreModels;
5 import jalview.datamodel.SequenceI;
7 import java.io.IOException;
8 import java.util.StringTokenizer;
11 * A class that can parse a file containing a substitution matrix and register
12 * it for use in Jalview
17 // TODO modify the AlignFile / IdentifyFile pattern so that non-alignment files
18 // like this are handled more naturally
19 public class ScoreMatrixFile extends AlignFile implements
22 // first non-comment line identifier - also checked in IdentifyFile
23 public static final String SCOREMATRIX = "SCOREMATRIX";
25 private static final String DELIMITERS = " ,\t";
27 private static final String COMMENT_CHAR = "#";
29 private String matrixName;
37 public ScoreMatrixFile(FileParse source) throws IOException
43 public String print(SequenceI[] sqs, boolean jvsuffix)
49 * Parses the score matrix file, and if successful registers the matrix so it
50 * will be shown in Jalview menus.
53 public void parse() throws IOException
55 ScoreMatrix sm = parseMatrix();
57 ScoreModels.getInstance().registerScoreModel(sm);
61 * Parses the score matrix file and constructs a ScoreMatrix object. If an
62 * error is found in parsing, it is thrown as FileFormatException. Any
63 * warnings are written to syserr.
68 public ScoreMatrix parseMatrix() throws IOException
70 ScoreMatrix sm = null;
73 String alphabet = null;
74 float[][] scores = null;
80 while ((data = nextLine()) != null)
84 if (data.startsWith(COMMENT_CHAR) || data.length() == 0)
88 if (data.toUpperCase().startsWith(SCOREMATRIX))
91 * Parse name from ScoreMatrix <name>
96 .println("Warning: 'ScoreMatrix' repeated in file at line "
99 StringTokenizer nameLine = new StringTokenizer(data, DELIMITERS);
100 if (nameLine.countTokens() != 2)
102 err = "Format error: expected 'ScoreMatrix <name>', found '"
103 + data + "' at line " + lineNo;
104 throw new FileFormatException(err);
106 nameLine.nextToken();
107 name = nameLine.nextToken();
110 else if (name == null)
112 err = "Format error: 'ScoreMatrix <name>' should be the first non-comment line";
113 throw new FileFormatException(err);
117 * next line after ScoreMatrix should be the alphabet of scored symbols
119 if (alphabet == null)
122 size = alphabet.length();
123 scores = new float[size][];
128 * too much information
132 err = "Unexpected extra input line in score model file: '" + data
134 throw new FileFormatException(err);
138 * subsequent lines should be the symbol scores
139 * optionally with the symbol as the first column for readability
141 StringTokenizer scoreLine = new StringTokenizer(data, DELIMITERS);
142 if (scoreLine.countTokens() == size + 1)
145 * check 'guide' symbol is the row'th letter of the alphabet
147 String symbol = scoreLine.nextToken();
148 if (symbol.length() > 1 || symbol.charAt(0) != alphabet.charAt(row))
151 .format("Error parsing score matrix at line %d, expected '%s' but found '%s'",
152 lineNo, alphabet.charAt(row), symbol);
153 throw new FileFormatException(err);
156 if (scoreLine.countTokens() != size)
158 err = String.format("Expected %d scores at line %d but found %d",
159 size, lineNo, scoreLine.countTokens());
160 throw new FileFormatException(err);
162 scores[row] = new float[size];
165 while (scoreLine.hasMoreTokens())
169 value = scoreLine.nextToken();
170 scores[row][col] = Float.valueOf(value);
172 } catch (NumberFormatException e)
175 "Invalid score value '%s' at line %d column %d", value,
177 throw new FileFormatException(err);
184 * out of data - check we found enough
189 .format("Expected %d rows of score data in score matrix but only found %d",
191 throw new FileFormatException(err);
195 * If we get here, then name, alphabet and scores have been parsed successfully
197 sm = new ScoreMatrix(name, alphabet.toCharArray(), scores);
203 public String getMatrixName()