package jalview.io;
import jalview.analysis.SequenceIdMatcher;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.Annotation;
import jalview.datamodel.SequenceI;
import java.awt.Color;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
/**
* A file parse for T-Coffee score ascii format. This file contains the alignment consensus
* for each resude in any sequence.
*
* This file is procuded by t_coffee
providing the option
* -output=score_ascii
to the program command line
*
* An example file is the following
*
*
* T-COFFEE, Version_9.02.r1228 (2012-02-16 18:15:12 - Revision 1228 - Build 336)
* Cedric Notredame
* CPU TIME:0 sec.
* SCORE=90
* *
* BAD AVG GOOD
* *
* 1PHT : 89
* 1BB9 : 90
* 1UHC : 94
* 1YCS : 94
* 1OOT : 93
* 1ABO : 94
* 1FYN : 94
* 1QCF : 94
* cons : 90
*
* 1PHT 999999999999999999999999998762112222543211112134
* 1BB9 99999999999999999999999999987-------4322----2234
* 1UHC 99999999999999999999999999987-------5321----2246
* 1YCS 99999999999999999999999999986-------4321----1-35
* 1OOT 999999999999999999999999999861-------3------1135
* 1ABO 99999999999999999999999999986-------422-------34
* 1FYN 99999999999999999999999999985-------32--------35
* 1QCF 99999999999999999999999999974-------2---------24
* cons 999999999999999999999999999851000110321100001134
*
*
* 1PHT ----------5666642367889999999999889
* 1BB9 1111111111676653-355679999999999889
* 1UHC ----------788774--66789999999999889
* 1YCS ----------78777--356789999999999889
* 1OOT ----------78877--356789999999997-67
* 1ABO ----------687774--56779999999999889
* 1FYN ----------6888842356789999999999889
* 1QCF ----------6878742356789999999999889
* cons 00100000006877641356789999999999889
*
*
*
* @author Paolo Di Tommaso
*
*/
public class TCoffeeScoreFile extends AlignFile {
public TCoffeeScoreFile(String inFile, String type) throws IOException
{
super(inFile, type);
}
public TCoffeeScoreFile(FileParse source) throws IOException
{
super(source);
}
/** The {@link Header} structure holder */
Header header;
/**
* Holds the consensues values for each sequences. It uses a LinkedHashMap to maintaint the
* insertion order.
*/
LinkedHashMap scores;
Integer fWidth;
/**
* Parse the provided reader for the T-Coffee scores file format
*
* @param reader
public static TCoffeeScoreFile load(Reader reader) {
try {
BufferedReader in = (BufferedReader) (reader instanceof BufferedReader ? reader : new BufferedReader(reader));
TCoffeeScoreFile result = new TCoffeeScoreFile();
result.doParsing(in);
return result.header != null && result.scores != null ? result : null;
}
catch( Exception e) {
throw new RuntimeException(e);
}
}
*/
/**
* @return The 'height' of the score matrix i.e. the numbers of score rows that should matches
* the number of sequences in the alignment
*/
public int getHeight() {
// the last entry will always be the 'global' alingment consensus scores, so it is removed
// from the 'height' count to make this value compatible with the number of sequences in the MSA
return scores != null && scores.size() > 0 ? scores.size()-1 : 0;
}
/**
* @return The 'width' of the score matrix i.e. the number of columns.
* Since teh score value are supposd to be calculated for an 'aligned' MSA, all the entries
* have to have the same width.
*/
public int getWidth() {
return fWidth != null ? fWidth : 0;
}
/**
* Get the string of score values for the specified seqeunce ID.
* @param id The sequence ID
* @return The scores as a string of values e.g. {@code 99999987-------432}.
* It return an empty string when the specified ID is missing.
*/
public String getScoresFor( String id ) {
return scores!=null && scores.containsKey(id) ? scores.get(id).toString() : "";
}
/**
* @return The list of score string as a {@link List} object, in the same ordeer of the insertion i.e. in the MSA
*/
public List getScoresList() {
if (scores==null)
{
return null;
}
List result = new ArrayList( scores.size() );
for( Map.Entry it : scores.entrySet() ) {
result.add(it.getValue().toString());
}
return result;
}
/**
* @return The parsed score values a matrix of bytes
*/
public byte[][] getScoresArray() {
if (scores==null)
{
return null;
}
byte[][] result = new byte[ scores.size() ][];
int rowCount = 0;
for( Map.Entry it : scores.entrySet() ) {
String line = it.getValue().toString();
byte[] seqValues = new byte[ line.length() ];
for( int j=0, c=line.length(); j= 0 && val <= 9 ) ? val : -1;
}
result[rowCount++] = seqValues;
}
return result;
}
public void parse() throws IOException
{
/*
* read the header
*/
header = readHeader(this);
if( header == null ) { error=true; return;}
scores = new LinkedHashMap();
/*
* initilize the structure
*/
for( Map.Entry entry : header.scores.entrySet() ) {
scores.put( entry.getKey(), new StringBuilder());
}
/*
* go with the reading
*/
Block block;
while( (block = readBlock(this,header.scores.size())) != null ) {
/*
* append sequences read in the block
*/
for( Map.Entry entry : block.items.entrySet() ) {
StringBuilder scoreStringBuilder = scores.get(entry.getKey());
if( scoreStringBuilder == null ) {
error=true;
errormessage=String.format("Invalid T-Coffee score file: Sequence ID '%s' is not declared in header section", entry.getKey());
return ;
}
scoreStringBuilder.append( entry.getValue() );
}
}
/*
* verify that all rows have the same width
*/
for( StringBuilder str : scores.values() ) {
if( fWidth == null ) {
fWidth = str.length();
}
else if( fWidth != str.length() ) {
error=true;
errormessage="Invalid T-Coffee score file: All the score sequences must have the same length";
return ;
}
}
return;
}
static int parseInt( String str ) {
try {
return Integer.parseInt(str);
}
catch( NumberFormatException e ) {
// TODO report a warning ?
return 0;
}
}
/**
* Reaad the header section in the T-Coffee score file format
*
* @param reader The scores reader
* @return The parser {@link Header} instance
* @throws RuntimeException when the header is not in the expected format
*/
static Header readHeader(FileParse reader) throws IOException {
Header result = null;
try {
result = new Header();
result.head = reader.nextLine();
String line;
while( (line = reader.nextLine()) != null ) {
if( line.startsWith("SCORE=")) {
result.score = parseInt( line.substring(6).trim() );
break;
}
}
if( (line=reader.nextLine())==null || !"*".equals(line.trim())) { error(reader,"Invalid T-COFFEE score format (NO BAD/AVG/GOOD header)"); return null;}
if( (line=reader.nextLine())==null || !"BAD AVG GOOD".equals(line.trim())) { error(reader,"Invalid T-COFFEE score format (NO BAD/AVG/GOOD header)"); return null;}
if( (line=reader.nextLine())==null || !"*".equals(line.trim())) {error(reader,"Invalid T-COFFEE score format (NO BAD/AVG/GOOD header)"); return null;}
/*
* now are expected a list if sequences ID up to the first blank line
*/
while( (line=reader.nextLine()) != null ) {
if( "".equals(line) ) {
break;
}
int p = line.indexOf(":");
if( p == -1 ) {
// TODO report a warning
continue;
}
String id = line.substring(0,p).trim();
int val = parseInt(line.substring(p+1).trim());
if( "".equals(id) ) {
// TODO report warning
continue;
}
result.scores.put(id,val);
}
if (result==null) {
error(reader, "T-COFFEE score file had no per-sequence scores");
}
}
catch( IOException e ) {
error(reader,"Unexpected problem parsing T-Coffee score ascii file");
throw e;
}
return result;
}
private static void error(FileParse reader, String errm)
{
reader.error=true;
if (reader.errormessage==null)
{ reader.errormessage=errm;
} else {
reader.errormessage+="\n"+errm;
}
}
/**
* Read a scores block ihe provided stream.
*
* @param reader The stream to parse
* @param size The expected number of the sequence to be read
* @return The {@link Block} instance read or {link null} null if the end of file has reached.
* @throws IOException Something went wrong on the 'wire'
*/
static Block readBlock( FileParse reader, int size ) throws IOException {
Block result = new Block(size);
String line;
/*
* read blank lines (eventually)
*/
while( (line=reader.nextLine()) != null && "".equals(line.trim())) {
// consume blank lines
}
if( line == null ) { return null; }
/*
* read the scores block
*/
do {
if( "".equals(line.trim()) ) {
// terminated
break;
}
// split the line on the first blank
// the first part have to contain the sequence id
// the remaining part are the scores values
int p = line.indexOf(" ");
if( p == -1 ) {
if (reader.warningMessage==null) { reader.warningMessage=""; }
reader.warningMessage+="Possible parsing error - expected to find a space in line: '"+line+"'\n";
continue;
}
String id = line.substring(0,p).trim();
String val = line.substring(p+1).trim();
result.items.put(id, val);
} while( (line = reader.nextLine()) != null );
return result;
}
/*
* The score file header
*/
static class Header {
String head;
int score;
LinkedHashMap scores = new LinkedHashMap();
public int getScoreAvg() { return score; }
public int getScoreFor( String ID ) {
return scores.containsKey(ID) ? scores.get(ID) : -1;
}
}
/*
* Hold a single block values block in the score file
*/
static class Block {
int size;
Map items;
public Block( int size ) {
this.size = size;
this.items = new HashMap(size);
}
String getScoresFor( String id ) {
return items.get(id);
}
String getConsensus() {
return items.get("cons");
}
}
/**
* TCOFFEE score colourscheme
*/
static final Color[] colors = {
new Color( 102, 102, 255 ), // #6666FF
new Color( 0, 255, 0), // #00FF00
new Color( 102, 255, 0), // #66FF00
new Color( 204, 255, 0), // #CCFF00
new Color( 255, 255, 0), // #FFFF00
new Color( 255, 204, 0), // #FFCC00
new Color( 255, 153, 0), // #FF9900
new Color( 255, 102, 0), // #FF6600
new Color( 255, 51, 0), // #FF3300
new Color( 255, 34, 0) // #FF2000
};
public final static String TCOFFEE_SCORE="TCoffeeScore";
/**
* generate annotation for this TCoffee score set on the given alignment
* @param al alignment to annotate
* @param matchids if true, annotate sequences based on matching sequence names
* @return true if alignment annotation was modified, false otherwise.
*/
public boolean annotateAlignment(AlignmentI al, boolean matchids)
{
if (al.getHeight()!=getHeight() || al.getWidth()!=getWidth())
{
warningMessage="Alignment shape does not match T-Coffee score file shape.";
return false;
}
boolean added=false;
int i=0;
SequenceIdMatcher sidmatcher = new SequenceIdMatcher(al.getSequencesArray());
byte[][] scoreMatrix=getScoresArray();
// for 2.8 - we locate any existing TCoffee annotation and remove it first before adding this.
for (Map.Entry id:scores.entrySet())
{
byte[] srow=scoreMatrix[i];
SequenceI s;
if (matchids)
{
s=sidmatcher.findIdMatch(id.getKey());
} else {
s=al.getSequenceAt(i);
}
i++;
if (s==null && i!=scores.size() && !id.getKey().equals("cons"))
{
System.err.println("No "+(matchids ? "match ":" sequences left ")+" for TCoffee score set : "+id.getKey());
continue;
}
int jSize=al.getWidth()< srow.length ? al.getWidth() : srow.length;
Annotation[] annotations=new Annotation[al.getWidth()];
for (int j=0;j0)
{
System.err.println("Warning: non-zero value for positional T-COFFEE score for gap at "+j+" in sequence "+s.getName());
}
} else {
annotations[j]=new Annotation(s==null ? ""+val:null,s==null ? ""+val:null,'\0',val*1f,val >= 0 && val < colors.length ? colors[val] : Color.white);
}
}
// this will overwrite any existing t-coffee scores for the alignment
AlignmentAnnotation aa=al.findOrCreateAnnotation(TCOFFEE_SCORE,TCOFFEE_SCORE,false,s, null);
if (s!=null)
{
aa.label="T-COFFEE";
aa.description=""+id.getKey();
aa.annotations=annotations;
aa.visible=false;
aa.belowAlignment=false;
aa.setScore(header.getScoreFor(id.getKey()));
aa.createSequenceMapping(s, s.getStart(),true);
s.addAlignmentAnnotation(aa);
aa.adjustForAlignment();
} else {
aa.graph=AlignmentAnnotation.NO_GRAPH;
aa.label="T-COFFEE";
aa.description="TCoffee column reliability score";
aa.annotations=annotations;
aa.belowAlignment=true;
aa.visible=true;
aa.setScore(header.getScoreAvg());
}
aa.showAllColLabels=true;
aa.validateRangeAndDisplay();
added=true;
}
return added;
}
@Override
public String print()
{
// TODO Auto-generated method stub
return "Not valid.";
}
}