package jalview.io; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.Reader; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; /** * A file parse for T-Coffee score ascii format. This file contains the alignment consensus * for each resude in any sequence. *

* This file is procuded by t_coffee providing the option * -output=score_ascii to the program command line * * An example file is the following * *

 * T-COFFEE, Version_9.02.r1228 (2012-02-16 18:15:12 - Revision 1228 - Build 336)
 * Cedric Notredame 
 * CPU TIME:0 sec.
 * SCORE=90
 * *
 *  BAD AVG GOOD
 * *
 * 1PHT   :  89
 * 1BB9   :  90
 * 1UHC   :  94
 * 1YCS   :  94
 * 1OOT   :  93
 * 1ABO   :  94
 * 1FYN   :  94
 * 1QCF   :  94
 * cons   :  90
 * 
 * 1PHT   999999999999999999999999998762112222543211112134
 * 1BB9   99999999999999999999999999987-------4322----2234
 * 1UHC   99999999999999999999999999987-------5321----2246
 * 1YCS   99999999999999999999999999986-------4321----1-35
 * 1OOT   999999999999999999999999999861-------3------1135
 * 1ABO   99999999999999999999999999986-------422-------34
 * 1FYN   99999999999999999999999999985-------32--------35
 * 1QCF   99999999999999999999999999974-------2---------24
 * cons   999999999999999999999999999851000110321100001134
 * 
 * 
 * 1PHT   ----------5666642367889999999999889
 * 1BB9   1111111111676653-355679999999999889
 * 1UHC   ----------788774--66789999999999889
 * 1YCS   ----------78777--356789999999999889
 * 1OOT   ----------78877--356789999999997-67
 * 1ABO   ----------687774--56779999999999889
 * 1FYN   ----------6888842356789999999999889
 * 1QCF   ----------6878742356789999999999889
 * cons   00100000006877641356789999999999889
 * 
* * * @author Paolo Di Tommaso * */ public class TCoffeeScoreFile { /** The {@link Header} structure holder */ Header header; /** * Holds the consensues values for each sequences. It uses a LinkedHashMap to maintaint the * insertion order. */ LinkedHashMap scores = new LinkedHashMap(); /** * Parse the specified file. * * @param file The file to be parsed */ public static TCoffeeScoreFile load(File file) { try { return load(new FileReader(file)); } catch (FileNotFoundException e) { throw new RuntimeException(e); } } /** * Parse the provided reader for the T-Coffee scores file format * * @param reader */ public static TCoffeeScoreFile load(Reader reader) { try { BufferedReader in = (BufferedReader) (reader instanceof BufferedReader ? reader : new BufferedReader(reader)); TCoffeeScoreFile result = new TCoffeeScoreFile(); result.doParsing(in); return result.header != null && result.scores != null ? result : null; } catch( Exception e) { throw new RuntimeException(e); } } /** * The default constructor is marked as {@code protected} since this class is meant to created * through the {@link #load(File)} or {@link #load(Reader)} factory methods */ protected TCoffeeScoreFile() { } /** * Get the string of score values for the specified seqeunce ID. * @param id The sequence ID * @return The scores as a string of values e.g. {@code 99999987-------432}. * It return an empty string when the specified ID is missing. */ public String getScoresFor( String id ) { return scores.containsKey(id) ? scores.get(id).toString() : ""; } /** * @return The list of score string as a {@link List} object, in the same ordeer of the insertion i.e. in the MSA */ public List getScoresList() { List result = new ArrayList( scores.size() ); for( Map.Entry it : scores.entrySet() ) { result.add(it.getValue().toString()); } return result; } /** * @return The parsed score values a matrix of bytes */ public byte[][] getScoresArray() { byte[][] result = new byte[ scores.size() ][]; int rowCount = 0; for( Map.Entry it : scores.entrySet() ) { String line = it.getValue().toString(); byte[] seqValues = new byte[ line.length() ]; for( int j=0, c=line.length(); j= 0 && val <= 9 ) ? val : -1; } result[rowCount++] = seqValues; } return result; } private void doParsing(BufferedReader in) throws IOException { /* * read the header */ header = readHeader(in); if( header == null ) { return; } /* * initilize the structure */ for( Map.Entry entry : header.scores.entrySet() ) { scores.put( entry.getKey(), new StringBuilder()); } /* * go with the reading */ Block block; while( (block = readBlock(in, header.scores.size())) != null ) { /* * append sequences read in the block */ for( Map.Entry entry : block.items.entrySet() ) { StringBuilder scoreStringBuilder = scores.get(entry.getKey()); if( scoreStringBuilder == null ) { throw new RuntimeException(String.format("Invalid T-Coffee score file. Sequence ID '%s' is not declared in header section", entry.getKey())); } scoreStringBuilder.append( entry.getValue() ); } } } static int parseInt( String str ) { try { return Integer.parseInt(str); } catch( NumberFormatException e ) { // TODO report a warning ? return 0; } } /** * Reaad the header section in the T-Coffee score file format * * @param reader The scores reader * @return The parser {@link Header} instance * @throws RuntimeException when the header is not in the expected format */ static Header readHeader(BufferedReader reader) { Header result = null; try { result = new Header(); result.head = reader.readLine(); String line; while( (line = reader.readLine()) != null ) { if( line.startsWith("SCORE=")) { result.score = parseInt( line.substring(6).trim() ); break; } } if( (line=reader.readLine())==null || !"*".equals(line.trim())) return null; if( (line=reader.readLine())==null || !"BAD AVG GOOD".equals(line.trim())) return null; if( (line=reader.readLine())==null || !"*".equals(line.trim())) return null; /* * now are expected a list if sequences ID up to the first blank line */ while( (line=reader.readLine()) != null ) { if( "".equals(line) ) { break; } int p = line.indexOf(":"); if( p == -1 ) { // TODO report a warning continue; } String id = line.substring(0,p).trim(); int val = parseInt(line.substring(p+1).trim()); if( "".equals(id) ) { // TODO report warning continue; } result.scores.put(id,val); } } catch( IOException e ) { throw new RuntimeException("Cannot parse T-Coffee score ascii file", e); } return result; } /** * Read a scores block ihe provided stream. * * @param reader The stream to parse * @param size The expected number of the sequence to be read * @return The {@link Block} instance read or {link null} null if the end of file has reached. * @throws IOException Something went wrong on the 'wire' */ static Block readBlock( BufferedReader reader, int size ) throws IOException { Block result = new Block(size); String line; /* * read blank lines (eventually) */ while( (line=reader.readLine()) != null && "".equals(line.trim())) { // consume blank lines } if( line == null ) return null; /* * read the scores block */ do { if( "".equals(line.trim()) ) { // terminated break; } // split the line on the first blank // the first part have to contain the sequence id // theramining part are the scores values int p = line.indexOf(" "); if( p == -1 ) { //TODO This is an unexpected condition, log a warning or throw an exception ? continue; } String id = line.substring(0,p).trim(); String val = line.substring(p+1).trim(); result.items.put(id, val); } while( (line = reader.readLine()) != null ); return result; } /* * The score file header */ static class Header { String head; int score; LinkedHashMap scores = new LinkedHashMap(); public int getScoreAvg() { return score; } public int getScoreFor( String ID ) { return scores.containsKey(ID) ? scores.get(ID) : -1; } } /* * Hold a single block values block in the score file */ static class Block { int size; Map items; public Block( int size ) { this.size = size; this.items = new HashMap(size); } String getScoresFor( String id ) { return items.get(id); } String getConsensus() { return items.get("cons"); } } }