package jalview.io;
import jalview.analysis.SequenceIdMatcher;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.Annotation;
import jalview.datamodel.SequenceI;
import java.awt.Color;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
/**
* A file parse for T-Coffee score ascii format. This file contains the alignment consensus
* for each resude in any sequence.
*
* This file is procuded by t_coffee
providing the option
* -output=score_ascii
to the program command line
*
* An example file is the following
*
*
* T-COFFEE, Version_9.02.r1228 (2012-02-16 18:15:12 - Revision 1228 - Build 336)
* Cedric Notredame
* CPU TIME:0 sec.
* SCORE=90
* *
* BAD AVG GOOD
* *
* 1PHT : 89
* 1BB9 : 90
* 1UHC : 94
* 1YCS : 94
* 1OOT : 93
* 1ABO : 94
* 1FYN : 94
* 1QCF : 94
* cons : 90
*
* 1PHT 999999999999999999999999998762112222543211112134
* 1BB9 99999999999999999999999999987-------4322----2234
* 1UHC 99999999999999999999999999987-------5321----2246
* 1YCS 99999999999999999999999999986-------4321----1-35
* 1OOT 999999999999999999999999999861-------3------1135
* 1ABO 99999999999999999999999999986-------422-------34
* 1FYN 99999999999999999999999999985-------32--------35
* 1QCF 99999999999999999999999999974-------2---------24
* cons 999999999999999999999999999851000110321100001134
*
*
* 1PHT ----------5666642367889999999999889
* 1BB9 1111111111676653-355679999999999889
* 1UHC ----------788774--66789999999999889
* 1YCS ----------78777--356789999999999889
* 1OOT ----------78877--356789999999997-67
* 1ABO ----------687774--56779999999999889
* 1FYN ----------6888842356789999999999889
* 1QCF ----------6878742356789999999999889
* cons 00100000006877641356789999999999889
*
*
*
* @author Paolo Di Tommaso
*
*/
public class TCoffeeScoreFile {
/** The {@link Header} structure holder */
Header header;
/**
* Holds the consensues values for each sequences. It uses a LinkedHashMap to maintaint the
* insertion order.
*/
LinkedHashMap scores = new LinkedHashMap();
/**
* Parse the specified file.
*
* @param file The file to be parsed
*/
public static TCoffeeScoreFile load(File file) {
try {
return load(new FileReader(file));
}
catch (FileNotFoundException e) {
throw new RuntimeException(e);
}
}
/**
* Parse the provided reader for the T-Coffee scores file format
*
* @param reader
*/
public static TCoffeeScoreFile load(Reader reader) {
try {
BufferedReader in = (BufferedReader) (reader instanceof BufferedReader ? reader : new BufferedReader(reader));
TCoffeeScoreFile result = new TCoffeeScoreFile();
result.doParsing(in);
return result.header != null && result.scores != null ? result : null;
}
catch( Exception e) {
throw new RuntimeException(e);
}
}
/**
* The default constructor is marked as {@code protected} since this class is meant to created
* through the {@link #load(File)} or {@link #load(Reader)} factory methods
*/
protected TCoffeeScoreFile() { }
/**
* Get the string of score values for the specified seqeunce ID.
* @param id The sequence ID
* @return The scores as a string of values e.g. {@code 99999987-------432}.
* It return an empty string when the specified ID is missing.
*/
public String getScoresFor( String id ) {
return scores.containsKey(id) ? scores.get(id).toString() : "";
}
/**
* @return The list of score string as a {@link List} object, in the same ordeer of the insertion i.e. in the MSA
*/
public List getScoresList() {
List result = new ArrayList( scores.size() );
for( Map.Entry it : scores.entrySet() ) {
result.add(it.getValue().toString());
}
return result;
}
/**
* @return The parsed score values a matrix of bytes
*/
public byte[][] getScoresArray() {
byte[][] result = new byte[ scores.size() ][];
int rowCount = 0;
for( Map.Entry it : scores.entrySet() ) {
String line = it.getValue().toString();
byte[] seqValues = new byte[ line.length() ];
for( int j=0, c=line.length(); j= 0 && val <= 9 ) ? val : -1;
}
result[rowCount++] = seqValues;
}
return result;
}
private void doParsing(BufferedReader in) throws IOException {
/*
* read the header
*/
header = readHeader(in);
if( header == null ) { return; }
/*
* initilize the structure
*/
for( Map.Entry entry : header.scores.entrySet() ) {
scores.put( entry.getKey(), new StringBuilder());
}
/*
* go with the reading
*/
Block block;
while( (block = readBlock(in, header.scores.size())) != null ) {
/*
* append sequences read in the block
*/
for( Map.Entry entry : block.items.entrySet() ) {
StringBuilder scoreStringBuilder = scores.get(entry.getKey());
if( scoreStringBuilder == null ) {
throw new RuntimeException(String.format("Invalid T-Coffee score file. Sequence ID '%s' is not declared in header section", entry.getKey()));
}
scoreStringBuilder.append( entry.getValue() );
}
}
}
static int parseInt( String str ) {
try {
return Integer.parseInt(str);
}
catch( NumberFormatException e ) {
// TODO report a warning ?
return 0;
}
}
/**
* Reaad the header section in the T-Coffee score file format
*
* @param reader The scores reader
* @return The parser {@link Header} instance
* @throws RuntimeException when the header is not in the expected format
*/
static Header readHeader(BufferedReader reader) {
Header result = null;
try {
result = new Header();
result.head = reader.readLine();
String line;
while( (line = reader.readLine()) != null ) {
if( line.startsWith("SCORE=")) {
result.score = parseInt( line.substring(6).trim() );
break;
}
}
if( (line=reader.readLine())==null || !"*".equals(line.trim())) return null;
if( (line=reader.readLine())==null || !"BAD AVG GOOD".equals(line.trim())) return null;
if( (line=reader.readLine())==null || !"*".equals(line.trim())) return null;
/*
* now are expected a list if sequences ID up to the first blank line
*/
while( (line=reader.readLine()) != null ) {
if( "".equals(line) ) {
break;
}
int p = line.indexOf(":");
if( p == -1 ) {
// TODO report a warning
continue;
}
String id = line.substring(0,p).trim();
int val = parseInt(line.substring(p+1).trim());
if( "".equals(id) ) {
// TODO report warning
continue;
}
result.scores.put(id,val);
}
}
catch( IOException e ) {
throw new RuntimeException("Cannot parse T-Coffee score ascii file", e);
}
return result;
}
/**
* Read a scores block ihe provided stream.
*
* @param reader The stream to parse
* @param size The expected number of the sequence to be read
* @return The {@link Block} instance read or {link null} null if the end of file has reached.
* @throws IOException Something went wrong on the 'wire'
*/
static Block readBlock( BufferedReader reader, int size ) throws IOException {
Block result = new Block(size);
String line;
/*
* read blank lines (eventually)
*/
while( (line=reader.readLine()) != null && "".equals(line.trim())) {
// consume blank lines
}
if( line == null ) return null;
/*
* read the scores block
*/
do {
if( "".equals(line.trim()) ) {
// terminated
break;
}
// split the line on the first blank
// the first part have to contain the sequence id
// theramining part are the scores values
int p = line.indexOf(" ");
if( p == -1 ) {
//TODO This is an unexpected condition, log a warning or throw an exception ?
continue;
}
String id = line.substring(0,p).trim();
String val = line.substring(p+1).trim();
result.items.put(id, val);
} while( (line = reader.readLine()) != null );
return result;
}
/*
* The score file header
*/
static class Header {
String head;
int score;
LinkedHashMap scores = new LinkedHashMap();
public int getScoreAvg() { return score; }
public int getScoreFor( String ID ) {
return scores.containsKey(ID) ? scores.get(ID) : -1;
}
}
/*
* Hold a single block values block in the score file
*/
static class Block {
int size;
Map items;
public Block( int size ) {
this.size = size;
this.items = new HashMap(size);
}
String getScoresFor( String id ) {
return items.get(id);
}
String getConsensus() {
return items.get("cons");
}
}
/**
* TCOFFEE score colourscheme
*/
static final Color[] colors = {
new Color( 102, 102, 255 ), // #6666FF
new Color( 0, 255, 0), // #00FF00
new Color( 102, 255, 0), // #66FF00
new Color( 204, 255, 0), // #CCFF00
new Color( 255, 255, 0), // #FFFF00
new Color( 255, 204, 0), // #FFCC00
new Color( 255, 153, 0), // #FF9900
new Color( 255, 102, 0), // #FF6600
new Color( 255, 51, 0), // #FF3300
new Color( 255, 34, 0) // #FF2000
};
public final static String TCOFFEE_SCORE="TCoffeeScore";
/**
* generate annotation for this TCoffee score set on the given alignment
* @param al alignment to annotate
* @param matchids if true, annotate sequences based on matching sequence names
* @return true if alignment annotation was modified, false otherwise.
*/
public boolean annotateAlignment(AlignmentI al, boolean matchids)
{
boolean added=false;
int i=0;
SequenceIdMatcher sidmatcher = new SequenceIdMatcher(al.getSequencesArray());
byte[][] scoreMatrix=getScoresArray();
// for 2.8 - we locate any existing TCoffee annotation and remove it first before adding this.
for (Map.Entry id:scores.entrySet())
{
byte[] srow=scoreMatrix[i];
SequenceI s;
if (matchids)
{
s=sidmatcher.findIdMatch(id.getKey());
} else {
s=al.getSequenceAt(i);
}
i++;
if (s==null && i!=scores.size() && !id.getKey().equals("cons"))
{
System.err.println("No "+(matchids ? "match ":" sequences left ")+" for TCoffee score set : "+id.getKey());
continue;
}
int jSize=al.getWidth()< srow.length ? al.getWidth() : srow.length;
Annotation[] annotations=new Annotation[al.getWidth()];
for (int j=0;j= 0 && val < colors.length ? colors[val] : Color.white);
}
AlignmentAnnotation aa=null;
if (s!=null)
{
// TODO - set per sequence score
aa=new AlignmentAnnotation(TCOFFEE_SCORE, "Score for "+id.getKey(), annotations);
aa.setSequenceRef(s);
aa.visible=false;
aa.belowAlignment=false;
} else {
aa=new AlignmentAnnotation("T-COFFEE", "TCoffee column reliability score", annotations);
aa.belowAlignment=true;
aa.visible=true;
}
al.addAnnotation(aa);
added=true;
}
return added;
}
}