3 import jalview.analysis.scoremodels.ScoreMatrix;
4 import jalview.analysis.scoremodels.ScoreModels;
5 import jalview.datamodel.SequenceI;
7 import java.io.IOException;
8 import java.util.StringTokenizer;
11 * A class that can parse a file containing a substitution matrix and register
12 * it for use in Jalview
17 // TODO modify the AlignFile / IdentifyFile pattern so that non-alignment files
18 // like this are handled more naturally
19 public class ScoreMatrixFile extends AlignFile implements
22 // first non-comment line identifier - also checked in IdentifyFile
23 public static final String SCOREMATRIX = "SCOREMATRIX";
25 private static final String DELIMITERS = " ,\t";
27 private static final String COMMENT_CHAR = "#";
29 private String matrixName;
37 public ScoreMatrixFile(FileParse source) throws IOException
43 public String print(SequenceI[] sqs, boolean jvsuffix)
49 * Parses the score matrix file, and if successful registers the matrix so it
50 * will be shown in Jalview menus.
53 public void parse() throws IOException
55 ScoreMatrix sm = parseMatrix();
57 ScoreModels.getInstance().registerScoreModel(sm);
61 * Parses the score matrix file and constructs a ScoreMatrix object. If an
62 * error is found in parsing, it is thrown as FileFormatException. Any
63 * warnings are written to syserr.
68 public ScoreMatrix parseMatrix() throws IOException
70 ScoreMatrix sm = null;
73 String alphabet = null;
74 float[][] scores = null;
80 while ((data = nextLine()) != null)
84 if (data.startsWith(COMMENT_CHAR) || data.length() == 0)
88 if (data.toUpperCase().startsWith(SCOREMATRIX))
91 * Parse name from ScoreMatrix <name>
92 * we allow any delimiter after ScoreMatrix then take the rest of the line
97 .println("Warning: 'ScoreMatrix' repeated in file at line "
100 StringTokenizer nameLine = new StringTokenizer(data, DELIMITERS);
101 if (nameLine.countTokens() < 2)
103 err = "Format error: expected 'ScoreMatrix <name>', found '"
104 + data + "' at line " + lineNo;
105 throw new FileFormatException(err);
107 nameLine.nextToken(); // 'ScoreMatrix'
108 name = nameLine.nextToken(); // next field
109 name = data.substring(1).substring(data.substring(1).indexOf(name));
112 else if (name == null)
114 err = "Format error: 'ScoreMatrix <name>' should be the first non-comment line";
115 throw new FileFormatException(err);
119 * next line after ScoreMatrix should be the alphabet of scored symbols
121 if (alphabet == null)
124 size = alphabet.length();
125 scores = new float[size][];
130 * too much information
134 err = "Unexpected extra input line in score model file: '" + data
136 throw new FileFormatException(err);
140 * subsequent lines should be the symbol scores
141 * optionally with the symbol as the first column for readability
143 StringTokenizer scoreLine = new StringTokenizer(data, DELIMITERS);
144 if (scoreLine.countTokens() == size + 1)
147 * check 'guide' symbol is the row'th letter of the alphabet
149 String symbol = scoreLine.nextToken();
150 if (symbol.length() > 1 || symbol.charAt(0) != alphabet.charAt(row))
153 .format("Error parsing score matrix at line %d, expected '%s' but found '%s'",
154 lineNo, alphabet.charAt(row), symbol);
155 throw new FileFormatException(err);
158 if (scoreLine.countTokens() != size)
160 err = String.format("Expected %d scores at line %d but found %d",
161 size, lineNo, scoreLine.countTokens());
162 throw new FileFormatException(err);
164 scores[row] = new float[size];
167 while (scoreLine.hasMoreTokens())
171 value = scoreLine.nextToken();
172 scores[row][col] = Float.valueOf(value);
174 } catch (NumberFormatException e)
177 "Invalid score value '%s' at line %d column %d", value,
179 throw new FileFormatException(err);
186 * out of data - check we found enough
191 .format("Expected %d rows of score data in score matrix but only found %d",
193 throw new FileFormatException(err);
197 * If we get here, then name, alphabet and scores have been parsed successfully
199 sm = new ScoreMatrix(name, alphabet.toCharArray(), scores);
205 public String getMatrixName()