package jalview.io; import jalview.analysis.scoremodels.ScoreMatrix; import jalview.analysis.scoremodels.ScoreModels; import jalview.datamodel.SequenceI; import java.io.IOException; import java.util.StringTokenizer; /** * A class that can parse a file containing a substitution matrix and register * it for use in Jalview * * @author gmcarstairs * */ // TODO modify the AlignFile / IdentifyFile pattern so that non-alignment files // like this are handled more naturally public class ScoreMatrixFile extends AlignFile implements AlignmentFileReaderI { // first non-comment line identifier - also checked in IdentifyFile public static final String SCOREMATRIX = "SCOREMATRIX"; private static final String DELIMITERS = " ,\t"; private static final String COMMENT_CHAR = "#"; private String matrixName; /** * Constructor * * @param source * @throws IOException */ public ScoreMatrixFile(FileParse source) throws IOException { super(false, source); } @Override public String print(SequenceI[] sqs, boolean jvsuffix) { return null; } /** * Parses the score matrix file, and if successful registers the matrix so it * will be shown in Jalview menus. */ @Override public void parse() throws IOException { ScoreMatrix sm = parseMatrix(); ScoreModels.getInstance().registerScoreModel(sm); } /** * Parses the score matrix file and constructs a ScoreMatrix object. If an * error is found in parsing, it is thrown as FileFormatException. Any * warnings are written to syserr. * * @return * @throws IOException */ public ScoreMatrix parseMatrix() throws IOException { ScoreMatrix sm = null; int lineNo = 0; String name = null; String alphabet = null; float[][] scores = null; int size = 0; int row = 0; String err = null; String data; while ((data = nextLine()) != null) { lineNo++; data = data.trim(); if (data.startsWith(COMMENT_CHAR) || data.length() == 0) { continue; } if (data.toUpperCase().startsWith(SCOREMATRIX)) { /* * Parse name from ScoreMatrix * we allow any delimiter after ScoreMatrix then take the rest of the line */ if (name != null) { System.err .println("Warning: 'ScoreMatrix' repeated in file at line " + lineNo); } StringTokenizer nameLine = new StringTokenizer(data, DELIMITERS); if (nameLine.countTokens() < 2) { err = "Format error: expected 'ScoreMatrix ', found '" + data + "' at line " + lineNo; throw new FileFormatException(err); } nameLine.nextToken(); // 'ScoreMatrix' name = nameLine.nextToken(); // next field name = data.substring(1).substring(data.substring(1).indexOf(name)); continue; } else if (name == null) { err = "Format error: 'ScoreMatrix ' should be the first non-comment line"; throw new FileFormatException(err); } /* * next line after ScoreMatrix should be the alphabet of scored symbols */ if (alphabet == null) { alphabet = data; size = alphabet.length(); scores = new float[size][]; continue; } /* * too much information */ if (row >= size) { err = "Unexpected extra input line in score model file: '" + data + "'"; throw new FileFormatException(err); } /* * permit an uncommented line with delimited residue headings */ if (isHeaderLine(data, alphabet)) { continue; } /* * subsequent lines should be the symbol scores * optionally with the symbol as the first column for readability */ StringTokenizer scoreLine = new StringTokenizer(data, DELIMITERS); if (scoreLine.countTokens() == size + 1) { /* * check 'guide' symbol is the row'th letter of the alphabet */ String symbol = scoreLine.nextToken(); if (symbol.length() > 1 || symbol.charAt(0) != alphabet.charAt(row)) { err = String .format("Error parsing score matrix at line %d, expected '%s' but found '%s'", lineNo, alphabet.charAt(row), symbol); throw new FileFormatException(err); } } if (scoreLine.countTokens() != size) { err = String.format("Expected %d scores at line %d but found %d", size, lineNo, scoreLine.countTokens()); throw new FileFormatException(err); } scores[row] = new float[size]; int col = 0; String value = null; while (scoreLine.hasMoreTokens()) { try { value = scoreLine.nextToken(); scores[row][col] = Float.valueOf(value); col++; } catch (NumberFormatException e) { err = String.format( "Invalid score value '%s' at line %d column %d", value, lineNo, col); throw new FileFormatException(err); } } row++; } /* * out of data - check we found enough */ if (row < size) { err = String .format("Expected %d rows of score data in score matrix but only found %d", size, row); throw new FileFormatException(err); } /* * If we get here, then name, alphabet and scores have been parsed successfully */ sm = new ScoreMatrix(name, alphabet.toCharArray(), scores); matrixName = name; return sm; } /** * Answers true if the data line consists of the alphabet characters, * delimited (as to provide a heading row). Otherwise returns false (e.g. if * the data is a row of score values). * * @param data * @param alphabet * @return */ private boolean isHeaderLine(String data, String alphabet) { StringTokenizer scoreLine = new StringTokenizer(data, DELIMITERS); int i = 0; while (scoreLine.hasMoreElements()) { /* * skip over characters in the alphabet that are * also a delimiter (e.g. space) */ char symbol = alphabet.charAt(i++); if (!DELIMITERS.contains(String.valueOf(symbol))) { if (!String.valueOf(symbol).equals(scoreLine.nextToken())) { return false; } } } return true; } public String getMatrixName() { return matrixName; } }