package jalview.io; import jalview.analysis.scoremodels.ScoreMatrix; import jalview.analysis.scoremodels.ScoreModels; import jalview.datamodel.SequenceI; import java.io.IOException; import java.util.StringTokenizer; /** * A class that can parse a file containing a substitution matrix and register * it for use in Jalview * * @author gmcarstairs * */ // TODO modify the AlignFile / IdentifyFile pattern so that non-alignment files // like this are handled more naturally public class ScoreMatrixFile extends AlignFile implements AlignmentFileReaderI { // first non-comment line identifier - also checked in IdentifyFile public static final String SCOREMATRIX = "SCOREMATRIX"; private static final String DELIMITERS = " ,\t"; private static final String COMMENT_CHAR = "#"; private String matrixName; /** * Constructor * * @param source * @throws IOException */ public ScoreMatrixFile(FileParse source) throws IOException { super(false, source); } @Override public String print(SequenceI[] sqs, boolean jvsuffix) { return null; } /** * Parses the score matrix file, and if successful registers the matrix so it * will be shown in Jalview menus. */ @Override public void parse() throws IOException { ScoreMatrix sm = parseMatrix(); ScoreModels.getInstance().registerScoreModel(sm); } /** * Parses the score matrix file and constructs a ScoreMatrix object. If an * error is found in parsing, it is thrown as FileFormatException. Any * warnings are written to syserr. * * @return * @throws IOException */ public ScoreMatrix parseMatrix() throws IOException { ScoreMatrix sm = null; int lineNo = 0; String name = null; String alphabet = null; float[][] scores = null; int size = 0; int row = 0; String err = null; String data; while ((data = nextLine()) != null) { lineNo++; data = data.trim(); if (data.startsWith(COMMENT_CHAR) || data.length() == 0) { continue; } if (data.toUpperCase().startsWith(SCOREMATRIX)) { /* * Parse name from ScoreMatrix */ if (name != null) { System.err .println("Warning: 'ScoreMatrix' repeated in file at line " + lineNo); } StringTokenizer nameLine = new StringTokenizer(data, DELIMITERS); if (nameLine.countTokens() != 2) { err = "Format error: expected 'ScoreMatrix ', found '" + data + "' at line " + lineNo; throw new FileFormatException(err); } nameLine.nextToken(); name = nameLine.nextToken(); continue; } else if (name == null) { err = "Format error: 'ScoreMatrix ' should be the first non-comment line"; throw new FileFormatException(err); } /* * next line after ScoreMatrix should be the alphabet of scored symbols */ if (alphabet == null) { alphabet = data; size = alphabet.length(); scores = new float[size][]; continue; } /* * too much information */ if (row >= size) { err = "Unexpected extra input line in score model file: '" + data + "'"; throw new FileFormatException(err); } /* * subsequent lines should be the symbol scores * optionally with the symbol as the first column for readability */ StringTokenizer scoreLine = new StringTokenizer(data, DELIMITERS); if (scoreLine.countTokens() == size + 1) { /* * check 'guide' symbol is the row'th letter of the alphabet */ String symbol = scoreLine.nextToken(); if (symbol.length() > 1 || symbol.charAt(0) != alphabet.charAt(row)) { err = String .format("Error parsing score matrix at line %d, expected '%s' but found '%s'", lineNo, alphabet.charAt(row), symbol); throw new FileFormatException(err); } } if (scoreLine.countTokens() != size) { err = String.format("Expected %d scores at line %d but found %d", size, lineNo, scoreLine.countTokens()); throw new FileFormatException(err); } scores[row] = new float[size]; int col = 0; String value = null; while (scoreLine.hasMoreTokens()) { try { value = scoreLine.nextToken(); scores[row][col] = Float.valueOf(value); col++; } catch (NumberFormatException e) { err = String.format( "Invalid score value '%s' at line %d column %d", value, lineNo, col); throw new FileFormatException(err); } } row++; } /* * out of data - check we found enough */ if (row < size) { err = String .format("Expected %d rows of score data in score matrix but only found %d", size, row); throw new FileFormatException(err); } /* * If we get here, then name, alphabet and scores have been parsed successfully */ sm = new ScoreMatrix(name, alphabet.toCharArray(), scores); matrixName = name; return sm; } public String getMatrixName() { return matrixName; } }