JAL-1065 per sequence and per-alignment scores added to annotation
[jalview.git] / src / jalview / io / TCoffeeScoreFile.java
index 6cab3ae..cf33671 100644 (file)
@@ -72,8 +72,19 @@ import java.util.Map;
  * @author Paolo Di Tommaso
  *
  */
-public class TCoffeeScoreFile {
+public class TCoffeeScoreFile extends AlignFile {
        
+  public TCoffeeScoreFile(String inFile, String type) throws IOException
+  {
+    super(inFile, type);
+    
+  }
+
+  public TCoffeeScoreFile(FileParse source) throws IOException
+  {
+    super(source);
+  }
+
        /** The {@link Header} structure holder */
        Header header;
        
@@ -81,29 +92,14 @@ public class TCoffeeScoreFile {
         * Holds the consensues values for each sequences. It uses a LinkedHashMap to maintaint the 
         * insertion order. 
         */
-       LinkedHashMap<String,StringBuilder> scores = new LinkedHashMap<String,StringBuilder>();
+       LinkedHashMap<String,StringBuilder> scores;
 
        Integer fWidth;
-
-       /**
-        * Parse the specified file.
-        * 
-        * @param file The file to be parsed 
-        */
-       public static TCoffeeScoreFile load(File file) {
-               try {
-                       return load(new FileReader(file));
-               } 
-               catch (FileNotFoundException e) {
-                       throw new RuntimeException(e);
-               }
-       }
        
        /**
         * Parse the provided reader for the T-Coffee scores file format
         * 
         * @param reader 
-        */
        public static TCoffeeScoreFile load(Reader reader) {
 
                try {
@@ -116,6 +112,7 @@ public class TCoffeeScoreFile {
                        throw new RuntimeException(e);
                }
        }
+         */
                
        /**
         * @return The 'height' of the score matrix i.e. the numbers of score rows that should matches 
@@ -136,11 +133,6 @@ public class TCoffeeScoreFile {
                return fWidth != null ? fWidth : 0;
        }
        
-       /**
-        * The default constructor is marked as {@code protected} since this class is meant to created 
-        * through the {@link #load(File)} or {@link #load(Reader)} factory methods
-        */
-       protected TCoffeeScoreFile() { } 
        
        /**
         * Get the string of score values for the specified seqeunce ID. 
@@ -149,13 +141,17 @@ public class TCoffeeScoreFile {
         *      It return an empty string when the specified ID is missing. 
         */
        public String getScoresFor( String id ) {
-               return scores.containsKey(id) ? scores.get(id).toString() : "";
+               return scores!=null && scores.containsKey(id) ? scores.get(id).toString() : "";
        }
        
        /**
         * @return The list of score string as a {@link List} object, in the same ordeer of the insertion i.e. in the MSA
         */
        public List<String> getScoresList() {
+         if (scores==null)
+         {
+           return null;
+         }
                List<String> result = new ArrayList<String>( scores.size() );
                for( Map.Entry<String,StringBuilder> it : scores.entrySet() ) {
                        result.add(it.getValue().toString());
@@ -168,6 +164,10 @@ public class TCoffeeScoreFile {
         * @return The parsed score values a matrix of bytes
         */
        public byte[][] getScoresArray() { 
+         if (scores==null)
+         {
+           return null;
+         }
                byte[][] result = new byte[ scores.size() ][];
                
                int rowCount = 0;
@@ -188,15 +188,15 @@ public class TCoffeeScoreFile {
        }
        
 
-       private void doParsing(BufferedReader in) throws IOException {
-
+       public void parse() throws IOException
+       {
                /*
                 * read the header
                 */
-               header = readHeader(in);
+               header = readHeader(this);
 
-               if( header == null ) { return; }
-               
+               if( header == null ) { error=true; return;}
+               scores = new LinkedHashMap<String,StringBuilder>();
        
                /*
                 * initilize the structure
@@ -209,7 +209,7 @@ public class TCoffeeScoreFile {
                 * go with the reading
                 */
                Block block;
-               while( (block = readBlock(in, header.scores.size())) != null  ) {
+               while( (block = readBlock(this,header.scores.size())) != null  ) {
                        
                        /*
                         * append sequences read in the block
@@ -217,7 +217,9 @@ public class TCoffeeScoreFile {
                        for( Map.Entry<String,String> entry : block.items.entrySet() ) {
                                StringBuilder scoreStringBuilder = scores.get(entry.getKey());
                                if( scoreStringBuilder == null ) {
-                                       throw new RuntimeException(String.format("Invalid T-Coffee score file: Sequence ID '%s' is not declared in header section", entry.getKey()));
+                                       error=true;
+                                       errormessage=String.format("Invalid T-Coffee score file: Sequence ID '%s' is not declared in header section", entry.getKey());
+                                       return ;
                                }
                                
                                scoreStringBuilder.append( entry.getValue() );
@@ -232,12 +234,14 @@ public class TCoffeeScoreFile {
                                fWidth = str.length();
                        }
                        else if( fWidth != str.length() ) {
-                               throw new RuntimeException("Invalid T-Coffee score file: All the score sequences must have the same length");
+                         error=true;
+                         errormessage="Invalid T-Coffee score file: All the score sequences must have the same length";
+                         return ;
                        }
                }
                
                
-               
+               return;
        }
 
 
@@ -258,30 +262,30 @@ public class TCoffeeScoreFile {
         * @return The parser {@link Header} instance 
         * @throws RuntimeException when the header is not in the expected format
         */
-       static Header readHeader(BufferedReader reader) {
+       static Header readHeader(FileParse reader) throws IOException {
                
                Header result = null;
                try {
                        result = new Header();
-                       result.head = reader.readLine();
+                       result.head = reader.nextLine();
                        
                        String line;
 
-                       while( (line = reader.readLine()) != null ) {
+                       while( (line = reader.nextLine()) != null ) {
                                if( line.startsWith("SCORE=")) {
                                        result.score = parseInt( line.substring(6).trim() );
                                        break;
                                }
                        }
 
-                       if( (line=reader.readLine())==null || !"*".equals(line.trim())) return null;
-                       if( (line=reader.readLine())==null || !"BAD AVG GOOD".equals(line.trim())) return null;
-                       if( (line=reader.readLine())==null || !"*".equals(line.trim())) return null;
+                       if( (line=reader.nextLine())==null || !"*".equals(line.trim())) { error(reader,"Invalid T-COFFEE score format (NO BAD/AVG/GOOD header)"); return null;}
+                       if( (line=reader.nextLine())==null || !"BAD AVG GOOD".equals(line.trim())) { error(reader,"Invalid T-COFFEE score format (NO BAD/AVG/GOOD header)"); return null;}
+                       if( (line=reader.nextLine())==null || !"*".equals(line.trim())) {error(reader,"Invalid T-COFFEE score format (NO BAD/AVG/GOOD header)"); return null;}
                        
                        /*
                         * now are expected a list if sequences ID up to the first blank line
                         */
-                       while( (line=reader.readLine()) != null ) {
+                       while( (line=reader.nextLine()) != null ) {
                                if( "".equals(line) ) {
                                        break;
                                }
@@ -302,14 +306,27 @@ public class TCoffeeScoreFile {
                                result.scores.put(id,val);
                        }
                        
+                       if (result==null) {
+                         error(reader, "T-COFFEE score file had no per-sequence scores");
+                       }
+                       
                }
                catch( IOException e ) {
-                       throw new RuntimeException("Cannot parse T-Coffee score ascii file", e);
+                 error(reader,"Unexpected problem parsing T-Coffee score ascii file");
+                 throw e;
                }
                
                return result;
        } 
-       
+       private static void error(FileParse reader, String errm)
+       {
+         reader.error=true;
+         if (reader.errormessage==null)
+         { reader.errormessage=errm;
+         } else {
+           reader.errormessage+="\n"+errm;
+         }
+       }
        /**
         * Read a scores block ihe provided stream. 
         * 
@@ -318,18 +335,18 @@ public class TCoffeeScoreFile {
         * @return The {@link Block} instance read or {link null} null if the end of file has reached.
         * @throws IOException Something went wrong on the 'wire' 
         */
-       static Block readBlock( BufferedReader reader, int size ) throws IOException {
+       static Block readBlock( FileParse reader, int size ) throws IOException {
                Block result = new Block(size);
                String line;
                
                /*
                 * read blank lines (eventually)
                 */
-               while( (line=reader.readLine()) != null && "".equals(line.trim())) {
+               while( (line=reader.nextLine()) != null && "".equals(line.trim())) {
                        // consume blank lines 
                }
                
-               if( line == null ) return null;
+               if( line == null ) { return null; }
                
                /*
                 * read the scores block
@@ -342,11 +359,12 @@ public class TCoffeeScoreFile {
                        
                        // split the line on the first blank 
                        // the first part have to contain the sequence id
-                       // theramining part are the scores values
+                       // the remaining part are the scores values
                        int p = line.indexOf(" ");
                        if( p == -1 ) {
-                               //TODO This is an unexpected condition, log a warning or throw an exception ? 
-                               continue;
+                         if (reader.warningMessage==null) { reader.warningMessage=""; }
+                         reader.warningMessage+="Possible parsing error - expected to find a space in line: '"+line+"'\n";
+                               continue;
                        } 
                        
                        String id = line.substring(0,p).trim();
@@ -354,7 +372,7 @@ public class TCoffeeScoreFile {
                        
                        result.items.put(id, val);
                        
-               } while( (line = reader.readLine()) != null ); 
+               } while( (line = reader.nextLine()) != null ); 
                
 
                return result;
@@ -422,6 +440,11 @@ public class TCoffeeScoreFile {
         */
        public boolean annotateAlignment(AlignmentI al, boolean matchids)
        {
+         if (al.getHeight()!=getHeight() || al.getWidth()!=getWidth())
+         {
+           warningMessage="Alignment shape does not match T-Coffee score file shape.";
+           return false;
+         }
          boolean added=false;
          int i=0;
          SequenceIdMatcher sidmatcher = new SequenceIdMatcher(al.getSequencesArray());
@@ -447,28 +470,51 @@ public class TCoffeeScoreFile {
            Annotation[] annotations=new Annotation[al.getWidth()];
            for (int j=0;j<jSize;j++) {
              byte val = srow[j];
-             annotations[j]=new Annotation(s==null ? ""+val:null,s==null ? ""+val:null,(char) val,val*1f,val >= 0 && val < colors.length ? colors[val] : Color.white);
+             if (s!=null && jalview.util.Comparison.isGap(s.getCharAt(j)))
+             {
+               annotations[j]=null;
+               if (val>0)
+               {
+                 System.err.println("Warning: non-zero value for positional T-COFFEE score for gap at "+j+" in sequence "+s.getName());
+               }
+             } else {
+             annotations[j]=new Annotation(s==null ? ""+val:null,s==null ? ""+val:null,'\0',val*1f,val >= 0 && val < colors.length ? colors[val] : Color.white);
+             }
            }
-           AlignmentAnnotation aa=null;
+           // this will overwrite any existing t-coffee scores for the alignment
+           AlignmentAnnotation aa=al.findOrCreateAnnotation(TCOFFEE_SCORE,false,s,null);
            if (s!=null)
            {
-             // TODO - set per sequence score
-             aa=new AlignmentAnnotation(TCOFFEE_SCORE, "Score for "+id.getKey(), annotations);
-             
-             aa.setSequenceRef(s);
+             aa.label="T-COFFEE";
+             aa.description=""+id.getKey();
+             aa.annotations=annotations;
              aa.visible=false;
              aa.belowAlignment=false;
+             aa.setScore(header.getScoreFor(id.getKey()));
+              aa.createSequenceMapping(s, s.getStart(),true);
+              s.addAlignmentAnnotation(aa);
+              aa.adjustForAlignment();
            } else {
-             aa=new AlignmentAnnotation("T-COFFEE", "TCoffee column reliability score", annotations);
+             aa.graph=AlignmentAnnotation.NO_GRAPH;
+             aa.label="T-COFFEE";
+             aa.description="TCoffee column reliability score";
+             aa.annotations=annotations;
               aa.belowAlignment=true;
              aa.visible=true;
-             
+             aa.setScore(header.getScoreAvg());
            }
-           al.addAnnotation(aa);
+           aa.showAllColLabels=true;
+           aa.validateRangeAndDisplay();
            added=true;
          }
+         
          return added;
        }
-         
 
+  @Override
+  public String print()
+  {
+    // TODO Auto-generated method stub
+    return "Not valid.";
+  }
 }