JAL-2235 patch tests where self-reference was used for dataset sequence ref
[jalview.git] / test / jalview / io / StockholmFileTest.java
index cac1a88..b635aa3 100644 (file)
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
 package jalview.io;
 
-import static org.junit.Assert.*;
-import jalview.datamodel.Alignment;
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertNotNull;
+import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.AssertJUnit.fail;
+
 import jalview.datamodel.AlignmentAnnotation;
 import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Annotation;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
 
 import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.Map;
 
-import org.junit.Test;
+import org.testng.annotations.Test;
 
 public class StockholmFileTest
 {
 
-  public static void main(String argv[])
+  static String PfamFile = "examples/PF00111_seed.stk",
+          RfamFile = "examples/RF00031_folded.stk";
+
+  @Test(groups = { "Functional" })
+  public void pfamFileIO() throws Exception
   {
-    try {
-      new StockholmFileTest().pfamFileIO();
-    
-    } catch (Exception x)
-    {
-      x.printStackTrace();
-    }
+    testFileIOwithFormat(new File(PfamFile), "STH", -1, 0);
   }
-  static String PfamFile = "examples/PF00111_seed.stk";
 
-  @Test
-  public void pfamFileIO() throws Exception
+  @Test(groups = { "Functional" })
+  public void pfamFileDataExtraction() throws Exception
   {
-      test(new File(PfamFile));
-      AppletFormatAdapter af = new AppletFormatAdapter();
-      AlignmentI al = af.readFile(PfamFile, af.FILE, new IdentifyFile().Identify(PfamFile, af.FILE));
-      int numpdb=0;
-      for (SequenceI sq:al.getSequences())
+    AppletFormatAdapter af = new AppletFormatAdapter();
+    AlignmentI al = af.readFile(PfamFile, af.FILE,
+            new IdentifyFile().identify(PfamFile, af.FILE));
+    int numpdb = 0;
+    for (SequenceI sq : al.getSequences())
+    {
+      if (sq.getAllPDBEntries() != null)
       {
-        if (sq.getPDBId()!=null)
-        {
-          numpdb+=sq.getPDBId().size();
-        }
+        numpdb += sq.getAllPDBEntries().size();
       }
-      assertTrue("PF00111 seed alignment has at least 1 PDB file, but the reader found none.",numpdb>0);
+    }
+    assertTrue(
+            "PF00111 seed alignment has at least 1 PDB file, but the reader found none.",
+            numpdb > 0);
   }
 
-  private void test(File f)
+  @Test(groups = { "Functional" })
+  public void rfamFileIO() throws Exception
+  {
+    testFileIOwithFormat(new File(RfamFile), "STH", 2, 1);
+  }
+
+  /**
+   * test alignment data in given file can be imported, exported and reimported
+   * with no dataloss
+   * 
+   * @param f
+   *          - source datafile (IdentifyFile.identify() should work with it)
+   * @param ioformat
+   *          - label for IO class used to write and read back in the data from
+   *          f
+   */
+
+  public static void testFileIOwithFormat(File f, String ioformat,
+          int naliannot, int nminseqann)
   {
     System.out.println("Reading file: " + f);
     String ff = f.getPath();
     try
     {
-      Alignment al = new AppletFormatAdapter().readFile(ff, AppletFormatAdapter.FILE,
-              new IdentifyFile().Identify(ff, AppletFormatAdapter.FILE));
-      
+      AppletFormatAdapter rf = new AppletFormatAdapter();
+
+      AlignmentI al = rf.readFile(ff, AppletFormatAdapter.FILE,
+              new IdentifyFile().identify(ff, AppletFormatAdapter.FILE));
+
+      assertNotNull("Couldn't read supplied alignment data.", al);
+
+      // make sure dataset is initialised ? not sure about this
       for (int i = 0; i < al.getSequencesArray().length; ++i)
       {
-        al.getSequenceAt(i).setDatasetSequence(al.getSequenceAt(i));
+        al.getSequenceAt(i).createDatasetSequence();
+      }
+      String outputfile = rf.formatSequences(ioformat, al, true);
+      System.out.println("Output file in '" + ioformat + "':\n"
+              + outputfile + "\n<<EOF\n");
+      // test for consistency in io
+      AlignmentI al_input = new AppletFormatAdapter().readFile(outputfile,
+              AppletFormatAdapter.PASTE, ioformat);
+      assertNotNull("Couldn't parse reimported alignment data.", al_input);
+
+      String identifyoutput = new IdentifyFile().identify(outputfile,
+              AppletFormatAdapter.PASTE);
+      assertNotNull("Identify routine failed for outputformat " + ioformat,
+              identifyoutput);
+      assertTrue(
+              "Identify routine could not recognise output generated by '"
+                      + ioformat + "' writer",
+              ioformat.equals(identifyoutput));
+      testAlignmentEquivalence(al, al_input, false);
+      int numaliannot = 0, numsqswithali = 0;
+      for (AlignmentAnnotation ala : al_input.getAlignmentAnnotation())
+      {
+        if (ala.sequenceRef == null)
+        {
+          numaliannot++;
+        }
+        else
+        {
+          numsqswithali++;
+        }
+      }
+      if (naliannot > -1)
+      {
+        assertEquals("Number of alignment annotations", naliannot,
+                numaliannot);
       }
-      AlignFile stFile = new StockholmFile(al);
-      stFile.setSeqs(al.getSequencesArray());
 
-      String stockholmoutput = stFile.print();
-      Alignment al_input = new AppletFormatAdapter().readFile(stockholmoutput,
-              AppletFormatAdapter.PASTE, "STH");
-      if (al != null && al_input != null)
+      assertTrue(
+              "Number of sequence associated annotations wasn't at least "
+                      + nminseqann, numsqswithali >= nminseqann);
+
+    } catch (Exception e)
+    {
+      e.printStackTrace();
+      assertTrue("Couln't format the alignment for output file.", false);
+    }
+  }
+
+  /**
+   * assert alignment equivalence
+   * 
+   * @param al
+   *          'original'
+   * @param al_input
+   *          'secondary' or generated alignment from some datapreserving
+   *          transformation
+   * @param ignoreFeatures
+   *          when true, differences in sequence feature annotation are ignored
+   */
+  public static void testAlignmentEquivalence(AlignmentI al,
+          AlignmentI al_input, boolean ignoreFeatures)
+  {
+    assertNotNull("Original alignment was null", al);
+    assertNotNull("Generated alignment was null", al_input);
+
+    assertTrue("Alignment dimension mismatch: original: " + al.getHeight()
+            + "x" + al.getWidth() + ", generated: " + al_input.getHeight()
+            + "x" + al_input.getWidth(),
+            al.getHeight() == al_input.getHeight()
+                    && al.getWidth() == al_input.getWidth());
+
+    // check Alignment annotation
+    AlignmentAnnotation[] aa_new = al_input.getAlignmentAnnotation();
+    AlignmentAnnotation[] aa_original = al.getAlignmentAnnotation();
+
+    // note - at moment we do not distinguish between alignment without any
+    // annotation rows and alignment with no annotation row vector
+    // we might want to revise this in future
+    int aa_new_size = (aa_new == null ? 0 : aa_new.length);
+    int aa_original_size = (aa_original == null ? 0 : aa_original.length);
+    Map<Integer, BitSet> orig_groups = new HashMap<Integer, BitSet>();
+    Map<Integer, BitSet> new_groups = new HashMap<Integer, BitSet>();
+
+    if (aa_new != null && aa_original != null)
+    {
+      for (int i = 0; i < aa_original.length; i++)
       {
-        System.out.println("Alignment contains: " + al.getHeight()
-                + " and " + al_input.getHeight() + " sequences; "
-                + al.getWidth() + " and " + al_input.getWidth()
-                + " columns.");
-        AlignmentAnnotation[] aa_new = al_input.getAlignmentAnnotation();
-        AlignmentAnnotation[] aa_original = al.getAlignmentAnnotation();
-
-        // check Alignment annotation
-        if (aa_new != null && aa_original != null)
+        if (aa_new.length > i)
         {
-          System.out.println("Alignment contains: " + aa_new.length
-                  + "  and " + aa_original.length
-                  + " alignment annotation(s)");
-          for (int i = 0; i < aa_original.length; i++)
+          assertTrue("Different alignment annotation at position " + i,
+                  equalss(aa_original[i], aa_new[i]));
+          // compare graphGroup or graph properties - needed to verify JAL-1299
+          assertEquals("Graph type not identical.", aa_original[i].graph,
+                  aa_new[i].graph);
+          assertEquals("Visibility not identical.", aa_original[i].visible,
+                  aa_new[i].visible);
+          assertEquals("Threshold line not identical.",
+                  aa_original[i].threshold, aa_new[i].threshold);
+          // graphGroup may differ, but pattern should be the same
+          Integer o_ggrp = new Integer(aa_original[i].graphGroup + 2);
+          Integer n_ggrp = new Integer(aa_new[i].graphGroup + 2);
+          BitSet orig_g = orig_groups.get(o_ggrp);
+          BitSet new_g = new_groups.get(n_ggrp);
+          if (orig_g == null)
+          {
+            orig_groups.put(o_ggrp, orig_g = new BitSet());
+          }
+          if (new_g == null)
           {
-            assertTrue("Different alignment annotation",equalss(aa_original[i], aa_new[i]));
-              
+            new_groups.put(n_ggrp, new_g = new BitSet());
           }
+          assertEquals("Graph Group pattern differs at annotation " + i,
+                  orig_g, new_g);
+          orig_g.set(i);
+          new_g.set(i);
+        }
+        else
+        {
+          System.err.println("No matching annotation row for "
+                  + aa_original[i].toString());
         }
+      }
+    }
+    assertEquals(
+            "Generated and imported alignment have different annotation sets",
+            aa_original_size, aa_new_size);
 
-        // check sequences, annotation and features
-        SequenceI[] seq_original = new SequenceI[al.getSequencesArray().length];
-        seq_original = al.getSequencesArray();
-        SequenceI[] seq_new = new SequenceI[al_input.getSequencesArray().length];
-        seq_new = al_input.getSequencesArray();
-        SequenceFeature[] sequenceFeatures_original, sequenceFeatures_new;
-        AlignmentAnnotation annot_original, annot_new;
-        //
-        for (int i = 0; i < al.getSequencesArray().length; i++)
+    // check sequences, annotation and features
+    SequenceI[] seq_original = new SequenceI[al.getSequencesArray().length];
+    seq_original = al.getSequencesArray();
+    SequenceI[] seq_new = new SequenceI[al_input.getSequencesArray().length];
+    seq_new = al_input.getSequencesArray();
+    SequenceFeature[] sequenceFeatures_original, sequenceFeatures_new;
+    AlignmentAnnotation annot_original, annot_new;
+    //
+    for (int i = 0; i < al.getSequencesArray().length; i++)
+    {
+      String name = seq_original[i].getName();
+      int start = seq_original[i].getStart();
+      int end = seq_original[i].getEnd();
+      System.out.println("Check sequence: " + name + "/" + start + "-"
+              + end);
+
+      // search equal sequence
+      for (int in = 0; in < al_input.getSequencesArray().length; in++)
+      {
+        if (name.equals(seq_new[in].getName())
+                && start == seq_new[in].getStart()
+                && end == seq_new[in].getEnd())
         {
-          String name = seq_original[i].getName();
-          int start = seq_original[i].getStart();
-          int end = seq_original[i].getEnd();
-          System.out.println("Check sequence: " + name + "/" + start + "-"
-                  + end);
-
-          // search equal sequence
-          for (int in = 0; in < al_input.getSequencesArray().length; in++)
+          String ss_original = seq_original[i].getSequenceAsString();
+          String ss_new = seq_new[in].getSequenceAsString();
+          assertEquals("The sequences " + name + "/" + start + "-" + end
+                  + " are not equal", ss_original, ss_new);
+
+          assertTrue(
+                  "Sequence Features were not equivalent"
+                          + (ignoreFeatures ? " ignoring." : ""),
+                  ignoreFeatures
+                          || (seq_original[i].getSequenceFeatures() == null && seq_new[in]
+                                  .getSequenceFeatures() == null)
+                          || (seq_original[i].getSequenceFeatures() != null && seq_new[in]
+                                  .getSequenceFeatures() != null));
+          // compare sequence features
+          if (seq_original[i].getSequenceFeatures() != null
+                  && seq_new[in].getSequenceFeatures() != null)
           {
-            if (name.equals(seq_new[in].getName())
-                    && start == seq_new[in].getStart()
-                    && end == seq_new[in].getEnd())
-            {
-              String ss_original = seq_original[i].getSequenceAsString();
-              String ss_new = seq_new[in].getSequenceAsString();
-              assertTrue("The sequences " + name + "/" + start
-                      + "-" + end + " are not equal", ss_original.equals(ss_new));
-              
-              assertTrue("Sequence Features were not equivalent", (seq_original[i].getSequenceFeatures()==null && seq_new[in].getSequenceFeatures()==null) || (seq_original[i].getSequenceFeatures()!=null && seq_new[in].getSequenceFeatures()!=null));
-              // compare sequence features
-              if (seq_original[i].getSequenceFeatures() != null
-                      && seq_new[in].getSequenceFeatures() != null)
-              {
-                System.out.println("There are feature!!!");
-                sequenceFeatures_original = new SequenceFeature[seq_original[i]
-                        .getSequenceFeatures().length];
-                sequenceFeatures_original = seq_original[i]
-                        .getSequenceFeatures();
-                sequenceFeatures_new = new SequenceFeature[seq_new[in]
-                        .getSequenceFeatures().length];
-                sequenceFeatures_new = seq_new[in].getSequenceFeatures();
-
-                assertTrue("different number of features", seq_original[i].getSequenceFeatures().length == seq_new[in]
-                        .getSequenceFeatures().length);
-                
-                for (int feat = 0; feat < seq_original[i]
-                        .getSequenceFeatures().length; feat++)
-                {
-                  assertTrue("Different features",sequenceFeatures_original[feat]
-                          .equals(sequenceFeatures_new[feat]));
-                }
-              }
+            System.out.println("There are feature!!!");
+            sequenceFeatures_original = new SequenceFeature[seq_original[i]
+                    .getSequenceFeatures().length];
+            sequenceFeatures_original = seq_original[i]
+                    .getSequenceFeatures();
+            sequenceFeatures_new = new SequenceFeature[seq_new[in]
+                    .getSequenceFeatures().length];
+            sequenceFeatures_new = seq_new[in].getSequenceFeatures();
 
-              // compare alignment annotation
-              if (al.getSequenceAt(i).getAnnotation() != null
-                      && al_input.getSequenceAt(in).getAnnotation() != null)
-              {
-                for (int j = 0; j < al.getSequenceAt(i).getAnnotation().length; j++)
-                {
-                  if (al.getSequenceAt(i).getAnnotation()[j] != null
-                          && al_input.getSequenceAt(in).getAnnotation()[j] != null)
-                  {
-                    annot_original = al.getSequenceAt(i).getAnnotation()[j];
-                    annot_new = al_input.getSequenceAt(in).getAnnotation()[j];
-                    assertTrue("Different annotation", equalss(annot_original, annot_new));
-                  }
-                }
-              }
-              else if (al.getSequenceAt(i).getAnnotation() == null
-                      && al_input.getSequenceAt(in).getAnnotation() == null)
-              {
-                System.out.println("No annotations");
-              }
-              else if (al.getSequenceAt(i).getAnnotation() != null
-                      && al_input.getSequenceAt(in).getAnnotation() == null)
+            assertEquals("different number of features",
+                    seq_original[i].getSequenceFeatures().length,
+                    seq_new[in]
+                    .getSequenceFeatures().length);
+
+            for (int feat = 0; feat < seq_original[i].getSequenceFeatures().length; feat++)
+            {
+              assertEquals("Different features",
+                      sequenceFeatures_original[feat],
+                      sequenceFeatures_new[feat]);
+            }
+          }
+          // compare alignment annotation
+          if (al.getSequenceAt(i).getAnnotation() != null
+                  && al_input.getSequenceAt(in).getAnnotation() != null)
+          {
+            for (int j = 0; j < al.getSequenceAt(i).getAnnotation().length; j++)
+            {
+              if (al.getSequenceAt(i).getAnnotation()[j] != null
+                      && al_input.getSequenceAt(in).getAnnotation()[j] != null)
               {
-                assertTrue("Annotations differed between sequences ("+al.getSequenceAt(i).getName()+") and ("+al_input.getSequenceAt(i).getName()+")", false);
+                annot_original = al.getSequenceAt(i).getAnnotation()[j];
+                annot_new = al_input.getSequenceAt(in).getAnnotation()[j];
+                assertTrue("Different annotation elements",
+                        equalss(annot_original, annot_new));
               }
-              break;
             }
           }
+          else if (al.getSequenceAt(i).getAnnotation() == null
+                  && al_input.getSequenceAt(in).getAnnotation() == null)
+          {
+            System.out.println("No annotations");
+          }
+          else if (al.getSequenceAt(i).getAnnotation() != null
+                  && al_input.getSequenceAt(in).getAnnotation() == null)
+          {
+            fail("Annotations differed between sequences ("
+                    + al.getSequenceAt(i).getName() + ") and ("
+                    + al_input.getSequenceAt(i).getName() + ")");
+          }
+          break;
         }
       }
-      else
-      {
-        assertTrue("Couldn't read alignment", false);
-      }
-    } catch (Exception e)
-    {
-      e.printStackTrace();
-      assertTrue("Couln't format the alignment for output file.", false);
     }
   }
 
   /*
    * compare annotations
    */
-  private boolean equalss(AlignmentAnnotation annot_or,
+  private static boolean equalss(AlignmentAnnotation annot_or,
           AlignmentAnnotation annot_new)
   {
     if (annot_or.annotations.length != annot_new.annotations.length)
     {
+      System.err.println("Different lengths for annotation row elements: "
+              + annot_or.annotations.length + "!="
+              + annot_new.annotations.length);
       return false;
     }
     for (int i = 0; i < annot_or.annotations.length; i++)
     {
-      if (annot_or.annotations[i] != null
-              && annot_new.annotations[i] != null)
+      Annotation an_or = annot_or.annotations[i], an_new = annot_new.annotations[i];
+      if (an_or != null && an_new != null)
       {
-        // Jim's comment - shouldn't the conditional here be using || not && for all these clauses ?
-        if (!annot_or.annotations[i].displayCharacter
-                .equals(annot_new.annotations[i].displayCharacter)
-                && annot_or.annotations[i].secondaryStructure != annot_new.annotations[i].secondaryStructure
-                && !annot_or.annotations[i].description
-                        .equals(annot_new.annotations[i].description))
+        if (!an_or.displayCharacter.trim().equals(
+                an_new.displayCharacter.trim())
+                || !("" + an_or.secondaryStructure).trim().equals(
+                        ("" + an_new.secondaryStructure).trim())
+                || (an_or.description != an_new.description && !((an_or.description == null && an_new.description
+                        .trim().length() == 0)
+                        || (an_new.description == null && an_or.description
+                                .trim().length() == 0) || an_or.description
+                        .trim().equals(an_new.description.trim()))))
         {
+          System.err.println("Annotation Element Mismatch\nElement " + i
+                  + " in original: " + annot_or.annotations[i].toString()
+                  + "\nElement " + i + " in new: "
+                  + annot_new.annotations[i].toString());
           return false;
         }
       }
@@ -212,6 +367,16 @@ public class StockholmFileTest
       }
       else
       {
+        System.err.println("Annotation Element Mismatch\nElement "
+                + i
+                + " in original: "
+                + (annot_or.annotations[i] == null ? "is null"
+                        : annot_or.annotations[i].toString())
+                + "\nElement "
+                + i
+                + " in new: "
+                + (annot_new.annotations[i] == null ? "is null"
+                        : annot_new.annotations[i].toString()));
         return false;
       }
     }