Merge branch 'JAL-1199-Stk_writing' into Release_2_8_Branch
authorjprocter <jprocter@compbio.dundee.ac.uk>
Tue, 5 Feb 2013 15:00:37 +0000 (15:00 +0000)
committerjprocter <jprocter@compbio.dundee.ac.uk>
Tue, 5 Feb 2013 15:00:37 +0000 (15:00 +0000)
src/jalview/io/AppletFormatAdapter.java
src/jalview/io/StockholmFile.java

index 11f5adb..b57b088 100755 (executable)
@@ -45,21 +45,21 @@ public class AppletFormatAdapter
    * method
    */
   public static final String[] WRITEABLE_FORMATS = new String[]
-  { "BLC", "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "AMSA" };
+  { "BLC", "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "STH", "AMSA" };
 
   /**
    * List of extensions corresponding to file format types in WRITABLE_FNAMES
    * that are writable by the application.
    */
   public static final String[] WRITABLE_EXTENSIONS = new String[]
-  { "fa, fasta, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa", "jar" };
+  { "fa, fasta, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa", "jar", "sto,stk" };
 
   /**
    * List of writable formats by the application. Order must correspond with the
    * WRITABLE_EXTENSIONS list of formats.
    */
   public static final String[] WRITABLE_FNAMES = new String[]
-  { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Jalview" };
+  { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Jalview", "STH"};
 
   /**
    * List of readable format file extensions by application in order
@@ -445,7 +445,7 @@ public class AppletFormatAdapter
       }
       else if (format.equalsIgnoreCase("STH"))
       {
-        afile = new StockholmFile();
+        afile = new StockholmFile(alignment);
       }
       else if (format.equalsIgnoreCase("AMSA"))
       {
@@ -502,6 +502,14 @@ public class AppletFormatAdapter
         {
           System.out.println("Reading file: " + f);
           AppletFormatAdapter afa = new AppletFormatAdapter();
+          String fName = f.getName();
+          String extension = fName.substring(fName.lastIndexOf(".") + 1, fName.length());
+          if (extension.equals("stk") || extension.equals("sto"))
+          {  
+                 afa.test(f);
+          }
+          else
+          {
           Runtime r = Runtime.getRuntime();
           System.gc();
           long memf = -r.totalMemory() + r.freeMemory();
@@ -534,12 +542,13 @@ public class AppletFormatAdapter
           System.out
                   .println("Difference between free memory now and before is "
                           + (memf / (1024.0 * 1024.0) * 1.0) + " MB");
-
+          }
         } catch (Exception e)
         {
           System.err.println("Exception when dealing with " + i
                   + "'th argument: " + args[i] + "\n" + e);
         }
+     
       }
       else
       {
@@ -550,6 +559,160 @@ public class AppletFormatAdapter
     }
   }
 
+  private void test(File f) {
+               System.out.println("Reading file: " + f);
+           String ff = f.getPath();
+               try 
+               {
+                 Alignment al = readFile(ff, FILE, new IdentifyFile().Identify(ff, FILE));
+             for (int i = 0; i < al.getSequencesArray().length; ++i) {
+                 al.getSequenceAt(i).setDatasetSequence(al.getSequenceAt(i));
+             }
+                 AlignFile stFile = new StockholmFile(al);
+             stFile.setSeqs(al.getSequencesArray());
+
+             String stockholmoutput = stFile.print();
+             Alignment al_input = readFile(stockholmoutput, AppletFormatAdapter.PASTE, "STH");
+             if (al != null && al_input!= null) 
+             {
+               System.out.println("Alignment contains: " + al.getHeight() + " and " + al_input.getHeight()
+                      + " sequences; " + al.getWidth() +  " and " + al_input.getWidth() + " columns.");
+               AlignmentAnnotation[] aa_new = al_input.getAlignmentAnnotation();
+               AlignmentAnnotation[] aa_original = al.getAlignmentAnnotation();
+               
+               // check Alignment annotation
+               if (aa_new != null && aa_original != null) 
+               {
+                 System.out.println("Alignment contains: " + aa_new.length
+                       + "  and " + aa_original.length  + " alignment annotation(s)");
+                 for (int i = 0; i < aa_original.length; i++)
+                 {
+                   if (!equalss(aa_original[i], aa_new[i]))
+                       System.out.println("Different alignment annotation");
+                 }
+               }
+               
+               // check sequences, annotation and features
+               SequenceI[] seq_original = new SequenceI[al.getSequencesArray().length];
+               seq_original = al.getSequencesArray();
+               SequenceI[] seq_new = new SequenceI[al_input.getSequencesArray().length];
+               seq_new = al_input.getSequencesArray();
+               SequenceFeature[] sequenceFeatures_original,sequenceFeatures_new;
+               AlignmentAnnotation annot_original, annot_new;
+               //
+               for (int i = 0; i < al.getSequencesArray().length; i++) 
+               {
+                 String name = seq_original[i].getName();
+                 int start = seq_original[i].getStart();       
+                 int end = seq_original[i].getEnd();
+                 System.out.println("Check sequence: " + name + "/" + start + "-" + end);   
+                 
+                     // search equal sequence
+                 for (int in = 0; in < al_input.getSequencesArray().length; in++) {
+                   if (name.equals(seq_new[in].getName()) && 
+                               start == seq_new[in].getStart() && 
+                               end ==seq_new[in].getEnd())
+                   {
+                     String ss_original = seq_original[i].getSequenceAsString();
+                     String ss_new = seq_new[in].getSequenceAsString();
+                     if (!ss_original.equals(ss_new))
+                     {
+                       System.out.println("The sequences " + name + "/" + start + "-" + end + " are not equal");
+                     } 
+            
+                         // compare sequence features
+                     if (seq_original[i].getSequenceFeatures() != null && seq_new[in].getSequenceFeatures() != null) 
+                     {
+                         System.out.println("There are feature!!!");
+                       sequenceFeatures_original = new SequenceFeature[seq_original[i].getSequenceFeatures().length];
+                           sequenceFeatures_original = seq_original[i].getSequenceFeatures(); 
+                       sequenceFeatures_new = new SequenceFeature[seq_new[in].getSequenceFeatures().length];
+                           sequenceFeatures_new = seq_new[in].getSequenceFeatures();
+                           
+                           if (seq_original[i].getSequenceFeatures().length == seq_new[in].getSequenceFeatures().length) 
+                           {
+                              for (int feat = 0; feat < seq_original[i].getSequenceFeatures().length; feat++) {
+                            if (!sequenceFeatures_original[feat].equals(sequenceFeatures_new[feat])) {
+                              System.out.println("Different features");
+                              break;
+                            }
+                          }
+                       } else
+                       {
+                               System.out.println("different number of features");
+                       }
+                     } else if (seq_original[i].getSequenceFeatures() == null && seq_new[in].getSequenceFeatures() == null)
+                     {
+                         System.out.println("No sequence features");
+                     } else if (seq_original[i].getSequenceFeatures() != null && seq_new[in].getSequenceFeatures() == null) 
+                     {
+                       System.out.println("Coudn't compare sequence features new one");
+                     }
+                           // compare alignment annotation    
+                     if (al.getSequenceAt(i).getAnnotation() != null && al_input.getSequenceAt(in).getAnnotation() != null) 
+                     {
+                       for (int j = 0; j < al.getSequenceAt(i).getAnnotation().length; j++) 
+                       {
+                         if (al.getSequenceAt(i).getAnnotation()[j] != null &&
+                            al_input.getSequenceAt(in).getAnnotation()[j] != null) 
+                         {
+                           annot_original = al.getSequenceAt(i).getAnnotation()[j];
+                           annot_new = al_input.getSequenceAt(in).getAnnotation()[j];
+                           if (!equalss(annot_original, annot_new))
+                             System.out.println("Different annotation");  
+                         } 
+                       }
+                     } else if (al.getSequenceAt(i).getAnnotation() == null && al_input.getSequenceAt(in).getAnnotation() == null) 
+                     {
+                         System.out.println("No annotations");
+                     } else if (al.getSequenceAt(i).getAnnotation() != null && al_input.getSequenceAt(in).getAnnotation() == null)
+                     {
+                         System.out.println("Coudn't compare annotations new one"); 
+                     }
+                     break;
+                   }
+                 }     
+               }
+             } else 
+             {
+               System.out.println("Couldn't read alignment");
+             }
+           } catch (Exception e)
+           {
+             System.err.println("Couln't format the alignment for output file.");
+             e.printStackTrace(System.err);
+           }
+         }
+
+         /*
+          * compare annotations
+          */
+          private boolean equalss(AlignmentAnnotation annot_or, AlignmentAnnotation annot_new)
+          {
+                  if (annot_or.annotations.length != annot_new.annotations.length) 
+                  {
+                    return false; 
+                  }
+                  for (int i = 0; i < annot_or.annotations.length; i++)
+                  {
+                    if (annot_or.annotations[i] != null && annot_new.annotations[i] != null)
+                    {
+                  if (!annot_or.annotations[i].displayCharacter.equals(annot_new.annotations[i].displayCharacter) && 
+                                  annot_or.annotations[i].secondaryStructure != annot_new.annotations[i].secondaryStructure &&
+                          !annot_or.annotations[i].description.equals(annot_new.annotations[i].description)) 
+                  {
+                    return false;
+                  }      
+                    } else if (annot_or.annotations[i] == null && annot_new.annotations[i] == null) 
+                    {
+                      continue; 
+                    } else 
+                    {
+                      return false;  
+                    }
+                  }
+                  return true;
+          }
   /**
    * try to discover how to access the given file as a valid datasource that
    * will be identified as the given type.
index 669181a..c6a3ce4 100644 (file)
@@ -25,6 +25,7 @@ import java.util.*;
 
 import com.stevesoft.pat.*;
 import jalview.datamodel.*;
+import jalview.util.Format;
 
 // import org.apache.log4j.*;
 
@@ -43,11 +44,21 @@ import jalview.datamodel.*;
 public class StockholmFile extends AlignFile
 {
   // static Logger logger = Logger.getLogger("jalview.io.StockholmFile");
-
+  StringBuffer out; // output buffer
+  AlignmentI al;
+  
   public StockholmFile()
   {
   }
 
+  /**
+  * Creates a new StockholmFile object for output.
+  */
+  public StockholmFile(AlignmentI al)
+  {
+    this.al = al;
+  }
+  
   public StockholmFile(String inFile, String type) throws IOException
   {
     super(inFile, type);
@@ -132,8 +143,23 @@ public class StockholmFile extends AlignFile
       if (rend.search(line))
       {
         // End of the alignment, pass stuff back
-
         this.noSeqs = seqs.size();
+        
+        String propety = null;
+        Regex pf = new Regex("PF[0-9]{5}(.*)"); // Finds AC for Pfam
+        Regex rf = new Regex("RF[0-9]{5}(.*)"); // Finds AC for Rfam
+        if (getAlignmentProperty("AC") != null)
+        {
+          String dbType = getAlignmentProperty("AC").toString();
+          if (pf.search(dbType))
+          {
+            propety = "PFAM";
+          }
+          else if (rf.search(dbType))
+          {
+               propety = "RFAM";
+          }
+        }
         // logger.debug("Number of sequences: " + this.noSeqs);
         Enumeration accs = seqs.keys();
         while (accs.hasMoreElements())
@@ -176,6 +202,9 @@ public class StockholmFile extends AlignFile
             String desc = (String) accAnnotations.get("DE");
             seqO.setDescription((desc == null) ? "" : desc);
           }
+          
+         
+            
           // Add DB References (if any)
           if (accAnnotations != null && accAnnotations.containsKey("DR"))
           {
@@ -185,21 +214,21 @@ public class StockholmFile extends AlignFile
               String src = dbr.substring(0, dbr.indexOf(";"));
               String acn = dbr.substring(dbr.indexOf(";") + 1);
               jalview.util.DBRefUtils.parseToDbRef(seqO, src, "0", acn);
-              // seqO.addDBRef(dbref);
-            }
+            }            
           }
-          if (accAnnotations != null && accAnnotations.containsKey("SS"))
+          
+          if (accAnnotations != null && accAnnotations.containsKey("AC") && propety != null)
           {
-            Vector v = (Vector) accAnnotations.get("SS");
-
-            for (int i = 0; i < v.size(); i++)
+            String dbr = (String) accAnnotations.get("AC");
+            if (dbr != null)
             {
-              AlignmentAnnotation an = (AlignmentAnnotation) v.elementAt(i);
-              seqO.addAlignmentAnnotation(an);
-              // annotations.add(an);
-            }
+              String src = propety;
+              String acn = dbr.toString();
+              jalview.util.DBRefUtils.parseToDbRef(seqO, src, "0", acn);
+            }            
           }
-
+          
+      
           Hashtable features = null;
           // We need to adjust the positions of all features to account for gaps
           try
@@ -224,6 +253,22 @@ public class StockholmFile extends AlignFile
               // TODO: map coding region to core jalview feature types
               String type = i.nextElement().toString();
               Hashtable content = (Hashtable) features.remove(type);
+             
+              // add alignment annotation for this feature
+              String key = type2id(type);
+              if (key != null) 
+              {
+                if (accAnnotations != null && accAnnotations.containsKey(key))
+                {
+                  Vector vv = (Vector) accAnnotations.get(key);        
+                  for (int ii = 0; ii < vv.size(); ii++)
+                  {
+                    AlignmentAnnotation an = (AlignmentAnnotation) vv.elementAt(ii);
+                    seqO.addAlignmentAnnotation(an);           
+                  }        
+                }
+              }
+              
               Enumeration j = content.keys();
               while (j.hasMoreElements())
               {
@@ -477,25 +522,20 @@ public class StockholmFile extends AlignFile
             }
             ns += seq;
             content.put(description, ns);
-
-            if (type.equals("SS"))
+            Hashtable strucAnn;
+            if (seqAnn.containsKey(acc))
             {
-              Hashtable strucAnn;
-              if (seqAnn.containsKey(acc))
-              {
-                strucAnn = (Hashtable) seqAnn.get(acc);
-              }
-              else
-              {
-                strucAnn = new Hashtable();
-              }
-
-              Vector newStruc = new Vector();
-              parseAnnotationRow(newStruc, type, ns);
-
-              strucAnn.put(type, newStruc);
-              seqAnn.put(acc, strucAnn);
+              strucAnn = (Hashtable) seqAnn.get(acc);
             }
+            else
+            {
+              strucAnn = new Hashtable();
+            }
+
+            Vector newStruc = new Vector();
+            parseAnnotationRow(newStruc, type, ns);
+            strucAnn.put(type, newStruc);
+            seqAnn.put(acc, strucAnn);
           }
           else
           {
@@ -537,9 +577,12 @@ public class StockholmFile extends AlignFile
     convert1 = openparen.replaceAll(annots);
     convert2 = closeparen.replaceAll(convert1);
     annots = convert2;
-
-    String type = (label.indexOf("_cons") == label.length() - 5) ? label
-            .substring(0, label.length() - 5) : label;
+    
+    String type = label;
+    if (label.contains("_cons")) {
+       type = (label.indexOf("_cons") == label.length() - 5) ? label
+                .substring(0, label.length() - 5) : label;     
+    }     
     boolean ss = false;
     type = id2type(type);
     if (type.equals("secondary structure"))
@@ -605,15 +648,193 @@ public class StockholmFile extends AlignFile
     }
     return annot;
   }
-
-  public static String print(SequenceI[] s)
+  
+  public String print(SequenceI[] s)
   {
-    return "not yet implemented";
+         // find max length of id
+           int max = 0;
+           int maxid = 0;
+           int in = 0;
+           Hashtable dataRef = null;
+           while ((in < s.length) && (s[in] != null))
+           {
+             String tmp = printId(s[in]);
+             if (s[in].getSequence().length > max)
+             {
+               max = s[in].getSequence().length;
+             }
+
+             if (tmp.length() > maxid)
+             {
+               maxid = tmp.length();
+             }
+             if (s[in].getDBRef() != null)
+             {  
+               for (int idb = 0; idb < s[in].getDBRef().length; idb++)
+               {               
+                 if (dataRef == null) 
+                       dataRef = new Hashtable();
+  
+                 String datAs1 = s[in].getDBRef()[idb].getSource().toString() + " ; " +s[in].getDBRef()[idb].getAccessionId().toString();
+                 dataRef.put(tmp, datAs1);
+            }
+          }
+             in++;
+           }
+           maxid += 9;
+           int i = 0;
+           
+           // output database type
+           if (al.getProperties() != null)
+           {
+             if (!al.getProperties().isEmpty())
+             {
+               Enumeration key = al.getProperties().keys();
+               Enumeration val = al.getProperties().elements();
+               while (key.hasMoreElements())
+               {  
+                  out.append("#=GF " + key.nextElement() + " " + val.nextElement());
+                  out.append(newline);
+               }
+             }  
+           }
+           
+           // output  database accessions 
+           if (dataRef != null)
+           {
+             Enumeration en = dataRef.keys();    
+             while (en.hasMoreElements())
+             {
+                Object idd = en.nextElement();
+                String type = (String) dataRef.remove(idd);
+                out.append(new Format("%-" + (maxid - 2) + "s").form("#=GS " +idd.toString() + " "));
+                if (type.contains("PFAM") || type.contains("RFAM") )
+                {
+               
+                       out.append(" AC " + type.substring(type.indexOf(";") + 1));
+                } else
+                {
+                   out.append(" DR " + type + " ");
+                }
+                out.append(newline);
+             } 
+           }
+           
+           // output annotations
+           while (i < s.length && s[i] != null) 
+           {
+             if (s[i].getDatasetSequence() != null) 
+             {
+               SequenceI ds = s[i].getDatasetSequence();       
+                       AlignmentAnnotation[] alAnot;
+                   Annotation[] ann;
+                   Annotation annot;  
+                   alAnot = s[i].getAnnotation();
+                   String feature = "";
+               if (alAnot != null) 
+               {
+                         for (int j = 0; j < alAnot.length; j++) 
+                         {     
+                           if (ds.getSequenceFeatures() != null) 
+                   {
+                                 feature = ds.getSequenceFeatures()[0].type;
+                           }   
+                           String key = type2id(feature);
+                           
+                      
+                       if (key == null)
+                           continue;
+                       
+                         //  out.append("#=GR ");
+                           out.append(new Format("%-" + maxid + "s").form("#=GR " + printId(s[i]) + " " + key + " "));
+                           ann = alAnot[j].annotations;
+                           String seq = "";
+                               for (int k = 0; k < ann.length; k++) 
+                               {         
+                             annot = ann[k]; 
+                             String ch = (annot == null) ? Character.toString(s[i].getCharAt(k)) : annot.displayCharacter;
+                             if (ch.length() == 0)
+                         {
+                           if (key.equals("SS")) {
+                             char ll = annot.secondaryStructure;
+                                 seq = (Character.toString(ll).equals(" ")) ? seq + "C" : seq + ll;    
+                               } else {
+                                 seq += ".";  
+                                 }
+                               } else if (ch.length() == 1) {
+                                     seq += ch;
+                                   } else if (ch.length() > 1) {
+                                     seq += ch.charAt(1) ;
+                               }   
+                             }
+                             out.append(seq);
+                             out.append(newline);
+                         }
+               }
+                 }
+           
+             out.append(new Format("%-" + maxid + "s").form(printId(s[i])+" "));
+                 out.append(s[i].getSequenceAsString());
+                 out.append(newline);  
+             i++;
+           } 
+           
+           // alignment annotation
+           AlignmentAnnotation aa;
+           if (al.getAlignmentAnnotation() != null) 
+           {
+             for (int ia = 0; ia < al.getAlignmentAnnotation().length; ia++)
+             {
+               aa = al.getAlignmentAnnotation()[ia];
+               if (aa.autoCalculated || !aa.visible)
+               {
+                 continue;
+               }
+               String seq = "";
+               String label;
+               
+               if (aa.label.equals("seq"))
+                 label = "seq_cons";
+               else
+                 label = type2id(aa.label.toLowerCase())+"_cons";
+
+               if (label == null) 
+                 label = aa.label;     
+                
+               out.append(new Format("%-" + maxid + "s").form("#=GC " + label+" "));
+               for (int j = 0; j < aa.annotations.length; j++) 
+               {
+                 String ch = (aa.annotations[j] == null) ? "-" : aa.annotations[j].displayCharacter ;
+                 if (ch.length() == 0) 
+                 {
+                   char ll = aa.annotations[j].secondaryStructure;
+                           if (Character.toString(ll).equals(" "))
+                             seq += "C";
+                               else 
+                             seq += ll;  
+                 } else if (ch.length() == 1) {
+                   seq += ch;
+                 } else if (ch.length() > 1) {
+                   seq += ch.charAt(1) ;
+                 }   
+               }
+                   out.append(seq);
+               out.append(newline);
+             }
+           }
+               return out.toString();
   }
 
   public String print()
   {
-    return print(getSeqsAsArray());
+       out = new StringBuffer();
+       out.append("# STOCKHOLM 1.0");
+       out.append(newline); 
+    print(getSeqsAsArray());
+           
+    out.append("//");
+    out.append(newline);  
+    return out.toString();
   }
 
   private static Hashtable typeIds = null;
@@ -651,6 +872,28 @@ public class StockholmFile extends AlignFile
             + id);
     return id;
   }
+  
+  protected static String type2id(String type)
+  {
+         String key = null;
+         Enumeration e = typeIds.keys();
+      while (e.hasMoreElements()) 
+      {
+        Object ll = e.nextElement();
+        if (typeIds.get(ll).toString().equals(type))
+        {      
+          key = (String) ll;
+          break;
+        }
+      }  
+      if (key != null) 
+      {
+         return (String) key;
+      }
+      System.err.println("Warning : Unknown Stockholm annotation type: "
+            + type);
+    return key;
+  }
   /**
    * //ssline is complete secondary structure line private AlignmentAnnotation
    * addHelices(Vector annotation, String label, String ssline) {