JAL-2175 code/test fixes/tidy for PFAM/MSF/JSON roundtrip
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 22 Aug 2016 14:38:54 +0000 (15:38 +0100)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 22 Aug 2016 14:38:54 +0000 (15:38 +0100)
src/jalview/io/JSONFile.java
src/jalview/io/MSFfile.java
src/jalview/io/PfamFile.java
test/jalview/io/FormatAdapterTest.java [new file with mode: 0644]
test/jalview/io/StockholmFileTest.java

index 3cda444..653c071 100644 (file)
@@ -735,6 +735,10 @@ public class JSONFile extends AlignFile implements ComplexAlignFile
   @Override
   public void configureForView(AlignmentViewPanel avpanel)
   {
+    if (avpanel == null)
+    {
+      return;
+    }
     super.configureForView(avpanel);
     AlignViewportI viewport = avpanel.getAlignViewport();
     AlignmentI alignment = viewport.getAlignment();
index f62ad81..ab510d5 100755 (executable)
@@ -22,12 +22,14 @@ package jalview.io;
 
 import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceI;
+import jalview.util.Comparison;
 import jalview.util.Format;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Hashtable;
+import java.util.List;
 import java.util.StringTokenizer;
-import java.util.Vector;
 
 /**
  * DOCUMENT ME!
@@ -67,24 +69,23 @@ public class MSFfile extends AlignFile
   }
 
   /**
-   * DOCUMENT ME!
+   * Read and parse MSF sequence data
    */
   @Override
   public void parse() throws IOException
   {
-    int i = 0;
     boolean seqFlag = false;
-    String key = new String();
-    Vector headers = new Vector();
-    Hashtable seqhash = new Hashtable();
-    String line;
+    List<String> headers = new ArrayList<String>();
+    Hashtable<String, StringBuilder> seqhash = new Hashtable<String, StringBuilder>();
 
     try
     {
+      String line;
       while ((line = nextLine()) != null)
       {
         StringTokenizer str = new StringTokenizer(line);
 
+        String key = null;
         while (str.hasMoreTokens())
         {
           String inStr = str.nextToken();
@@ -93,31 +94,31 @@ public class MSFfile extends AlignFile
           if (inStr.indexOf("Name:") != -1)
           {
             key = str.nextToken();
-            headers.addElement(key);
+            headers.add(key);
           }
 
-          // if line has // set SeqFlag to 1 so we know sequences are coming
+          // if line has // set SeqFlag so we know sequences are coming
           if (inStr.indexOf("//") != -1)
           {
             seqFlag = true;
           }
 
           // Process lines as sequence lines if seqFlag is set
-          if ((inStr.indexOf("//") == -1) && (seqFlag == true))
+          if ((inStr.indexOf("//") == -1) && seqFlag)
           {
-            // seqeunce id is the first field
+            // sequence id is the first field
             key = inStr;
 
-            StringBuffer tempseq;
+            StringBuilder tempseq;
 
             // Get sequence from hash if it exists
             if (seqhash.containsKey(key))
             {
-              tempseq = (StringBuffer) seqhash.get(key);
+              tempseq = seqhash.get(key);
             }
             else
             {
-              tempseq = new StringBuffer();
+              tempseq = new StringBuilder(64);
               seqhash.put(key, tempseq);
             }
 
@@ -125,7 +126,8 @@ public class MSFfile extends AlignFile
             while (str.hasMoreTokens())
             {
               // append the word to the sequence
-              tempseq.append(str.nextToken());
+              String sequenceBlock = str.nextToken();
+              tempseq.append(sequenceBlock);
             }
           }
         }
@@ -139,11 +141,11 @@ public class MSFfile extends AlignFile
     this.noSeqs = headers.size();
 
     // Add sequences to the hash
-    for (i = 0; i < headers.size(); i++)
+    for (int i = 0; i < headers.size(); i++)
     {
-      if (seqhash.get(headers.elementAt(i)) != null)
+      if (seqhash.get(headers.get(i)) != null)
       {
-        String head = headers.elementAt(i).toString();
+        String head = headers.get(i);
         String seq = seqhash.get(head).toString();
 
         if (maxLength < head.length())
@@ -151,8 +153,11 @@ public class MSFfile extends AlignFile
           maxLength = head.length();
         }
 
-        // Replace ~ with a sensible gap character
-        seq = seq.replace('~', '-');
+        /*
+         * replace ~ (leading/trailing positions) with the gap character;
+         * use '.' as this is the internal gap character required by MSF
+         */
+        seq = seq.replace('~', '.');
 
         Sequence newSeq = parseId(head);
 
@@ -163,7 +168,7 @@ public class MSFfile extends AlignFile
       else
       {
         System.err.println("MSFFile Parser: Can't find sequence for "
-                + headers.elementAt(i));
+                + headers.get(i));
       }
     }
   }
@@ -211,15 +216,16 @@ public class MSFfile extends AlignFile
    * 
    * @return DOCUMENT ME!
    */
-  public String print(SequenceI[] seqs)
+  public String print(SequenceI[] sqs)
   {
 
-    boolean is_NA = jalview.util.Comparison.isNucleotide(seqs);
+    boolean is_NA = Comparison.isNucleotide(sqs);
 
-    SequenceI[] s = new SequenceI[seqs.length];
+    SequenceI[] s = new SequenceI[sqs.length];
 
-    StringBuffer out = new StringBuffer("!!" + (is_NA ? "NA" : "AA")
-            + "_MULTIPLE_ALIGNMENT 1.0");
+    StringBuilder out = new StringBuilder(256);
+    out.append("!!").append(is_NA ? "NA" : "AA")
+            .append("_MULTIPLE_ALIGNMENT 1.0");
     // TODO: JBPNote : Jalview doesn't remember NA or AA yet.
     out.append(newline);
     out.append(newline);
@@ -227,14 +233,16 @@ public class MSFfile extends AlignFile
     int maxid = 0;
     int i = 0;
 
-    while ((i < seqs.length) && (seqs[i] != null))
+    while ((i < sqs.length) && (sqs[i] != null))
     {
-      // Replace all internal gaps with . and external spaces with ~
-      s[i] = new Sequence(seqs[i].getName(), seqs[i].getSequenceAsString()
-              .replace('-', '.'), seqs[i].getStart(), seqs[i].getEnd());
+      /*
+       * modify to MSF format: uses '.' for internal gaps, 
+       * and '~' for leading or trailing gaps
+       */
+      String seqString = sqs[i].getSequenceAsString()
+              .replace('-', '.');
 
-      StringBuffer sb = new StringBuffer();
-      sb.append(s[i].getSequence());
+      StringBuilder sb = new StringBuilder(seqString);
 
       for (int ii = 0; ii < sb.length(); ii++)
       {
@@ -259,12 +267,12 @@ public class MSFfile extends AlignFile
           break;
         }
       }
+      s[i] = new Sequence(sqs[i].getName(), sb.toString(),
+              sqs[i].getStart(), sqs[i].getEnd());
 
-      s[i].setSequence(sb.toString());
-
-      if (s[i].getSequence().length > max)
+      if (sb.length() > max)
       {
-        max = s[i].getSequence().length;
+        max = sb.length();
       }
 
       i++;
index 71cc7f0..667da9f 100755 (executable)
@@ -108,7 +108,7 @@ public class PfamFile extends AlignFile
       }
       if (spces + 1 < line.length())
       {
-        tempseq.append(line.substring(spces + 1));
+        tempseq.append(line.substring(spces + 1).trim());
       }
     }
 
diff --git a/test/jalview/io/FormatAdapterTest.java b/test/jalview/io/FormatAdapterTest.java
new file mode 100644 (file)
index 0000000..81e336e
--- /dev/null
@@ -0,0 +1,138 @@
+package jalview.io;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertNotNull;
+import static org.testng.AssertJUnit.fail;
+
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.SequenceI;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+public class FormatAdapterTest
+{
+
+  /**
+   * Test saving and re-reading in a specified format
+   * 
+   * @throws IOException
+   */
+  @Test(groups = { "Functional" }, dataProvider = "formats")
+  public void testRoundTrip(String format) throws IOException
+  {
+    try
+    {
+      AlignmentI al = new FormatAdapter().readFile("examples/uniref50.fa",
+              FormatAdapter.FILE, "FASTA");
+
+      /*
+       * 'gap' is the gap character used in the alignment data file here,
+       * not the user preferred gap character
+       */
+      char gap = al.getGapCharacter();
+      assertNotNull(al);
+
+      SequenceI[] seqs = al.getSequencesArray();
+      String formatted = new FormatAdapter().formatSequences(format, al,
+              false);
+
+      AlignmentI reloaded = new FormatAdapter().readFile(formatted,
+              FormatAdapter.PASTE, format);
+      List<SequenceI> reread = reloaded.getSequences();
+      assertEquals("Wrong number of reloaded sequences", seqs.length,
+              reread.size());
+
+      int i = 0;
+      for (SequenceI seq : reread)
+      {
+        String sequenceString = seq.getSequenceAsString();
+
+        /*
+         * special case: MSF always uses '.' as gap character
+         */
+        sequenceString = adjustForGapTreatment(sequenceString, gap, format);
+        assertEquals(
+                String.format("Sequence %d: %s", i,
+                        seqs[i].getName()), seqs[i].getSequenceAsString(),
+                sequenceString);
+        i++;
+      }
+    } catch (IOException e)
+    {
+      fail(String
+              .format("Format %s failed with %s", format, e.getMessage()));
+    }
+  }
+
+  /**
+   * Optionally change the gap character in the string to the given character,
+   * depending on the sequence file format
+   * 
+   * @param sequenceString
+   *          a sequence (as written in 'format' format)
+   * @param gap
+   *          the sequence's original gap character
+   * @param format
+   * @return
+   */
+  String adjustForGapTreatment(String sequenceString, char gap,
+          String format)
+  {
+    if ("MSF".equals(format))
+    {
+      /*
+       * MSF forces gap character to '.', so change it back
+       * for comparison purposes
+       */
+      sequenceString = sequenceString.replace('.', gap);
+    }
+    return sequenceString;
+  }
+
+  /**
+   * Data provider that serves alignment formats that are both readable and
+   * writable
+   * 
+   * @return
+   */
+  @DataProvider(name = "formats")
+  static Object[][] getFormats()
+  {
+    List<String> both = new ArrayList<String>();
+    String[] readable = FormatAdapter.READABLE_FORMATS;
+    List<String> writeable = Arrays.asList(FormatAdapter.WRITEABLE_FORMATS);
+    for (String r : readable)
+    {
+      if (writeable.contains(r))
+      {
+        both.add(r);
+      }
+    }
+
+    Object[][] formats = new Object[both.size()][];
+    int i = 0;
+    for (String format : both)
+    {
+      formats[i] = new Object[] { format };
+      i++;
+    }
+    return formats;
+  }
+
+  /**
+   * Enable this to isolate testing to a single file format
+   * 
+   * @throws IOException
+   */
+  @Test(groups = { "Functional" }, enabled = false)
+  public void testOneFormatRoundTrip() throws IOException
+  {
+    testRoundTrip("JSON");
+  }
+}
index d7a9166..b034fb0 100644 (file)
@@ -23,6 +23,7 @@ package jalview.io;
 import static org.testng.AssertJUnit.assertEquals;
 import static org.testng.AssertJUnit.assertNotNull;
 import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.AssertJUnit.fail;
 
 import jalview.datamodel.AlignmentAnnotation;
 import jalview.datamodel.AlignmentI;
@@ -159,7 +160,7 @@ public class StockholmFileTest
    *          'secondary' or generated alignment from some datapreserving
    *          transformation
    * @param ignoreFeatures
-   *          when true, differences in seuqence feature annotation are ignored.
+   *          when true, differences in sequence feature annotation are ignored
    */
   public static void testAlignmentEquivalence(AlignmentI al,
           AlignmentI al_input, boolean ignoreFeatures)
@@ -167,12 +168,9 @@ public class StockholmFileTest
     assertNotNull("Original alignment was null", al);
     assertNotNull("Generated alignment was null", al_input);
 
-    assertTrue(
-            "Alignment dimension mismatch: originl contains "
-                    + al.getHeight() + " and generated has "
-                    + al_input.getHeight() + " sequences; original has "
-                    + al.getWidth() + " and generated has "
-                    + al_input.getWidth() + " columns.",
+    assertTrue("Alignment dimension mismatch: original: " + al.getHeight()
+            + "x" + al.getWidth() + ", generated: " + al_input.getHeight()
+            + "x" + al_input.getWidth(),
             al.getHeight() == al_input.getHeight()
                     && al.getWidth() == al_input.getWidth());
 
@@ -183,9 +181,10 @@ public class StockholmFileTest
     // note - at moment we do not distinguish between alignment without any
     // annotation rows and alignment with no annotation row vector
     // we might want to revise this in future
-    int aa_new_size = (aa_new == null ? 0 : aa_new.length), aa_original_size = (aa_original == null ? 0
-            : aa_original.length);
-    Map<Integer, java.util.BitSet> orig_groups = new HashMap<Integer, java.util.BitSet>(), new_groups = new HashMap<Integer, java.util.BitSet>();
+    int aa_new_size = (aa_new == null ? 0 : aa_new.length);
+    int aa_original_size = (aa_original == null ? 0 : aa_original.length);
+    Map<Integer, BitSet> orig_groups = new HashMap<Integer, BitSet>();
+    Map<Integer, BitSet> new_groups = new HashMap<Integer, BitSet>();
 
     if (aa_new != null && aa_original != null)
     {
@@ -196,20 +195,17 @@ public class StockholmFileTest
           assertTrue("Different alignment annotation at position " + i,
                   equalss(aa_original[i], aa_new[i]));
           // compare graphGroup or graph properties - needed to verify JAL-1299
-          assertTrue("Graph type not identical.",
-                  aa_original[i].graph == aa_new[i].graph);
-          assertTrue("Visibility not identical.",
-                  aa_original[i].visible == aa_new[i].visible);
-          assertTrue(
-                  "Threshold line not identical.",
-                  aa_original[i].threshold == null ? aa_new[i].threshold == null
-                          : aa_original[i].threshold
-                                  .equals(aa_new[i].threshold));
+          assertEquals("Graph type not identical.", aa_original[i].graph,
+                  aa_new[i].graph);
+          assertEquals("Visibility not identical.", aa_original[i].visible,
+                  aa_new[i].visible);
+          assertEquals("Threshold line not identical.",
+                  aa_original[i].threshold, aa_new[i].threshold);
           // graphGroup may differ, but pattern should be the same
-          Integer o_ggrp = new Integer(aa_original[i].graphGroup + 2), n_ggrp = new Integer(
-                  aa_new[i].graphGroup + 2);
-          BitSet orig_g = orig_groups.get(o_ggrp), new_g = new_groups
-                  .get(n_ggrp);
+          Integer o_ggrp = new Integer(aa_original[i].graphGroup + 2);
+          Integer n_ggrp = new Integer(aa_new[i].graphGroup + 2);
+          BitSet orig_g = orig_groups.get(o_ggrp);
+          BitSet new_g = new_groups.get(n_ggrp);
           if (orig_g == null)
           {
             orig_groups.put(o_ggrp, orig_g = new BitSet());
@@ -218,8 +214,8 @@ public class StockholmFileTest
           {
             new_groups.put(n_ggrp, new_g = new BitSet());
           }
-          assertTrue("Graph Group pattern differs at annotation " + i,
-                  orig_g.equals(new_g));
+          assertEquals("Graph Group pattern differs at annotation " + i,
+                  orig_g, new_g);
           orig_g.set(i);
           new_g.set(i);
         }
@@ -230,10 +226,9 @@ public class StockholmFileTest
         }
       }
     }
-    assertTrue(
-            "Generated and imported alignment have different annotation sets ("
-                    + aa_new_size + " != " + aa_original_size + ")",
-            aa_new_size == aa_original_size);
+    assertEquals(
+            "Generated and imported alignment have different annotation sets",
+            aa_new_size, aa_original_size);
 
     // check sequences, annotation and features
     SequenceI[] seq_original = new SequenceI[al.getSequencesArray().length];
@@ -260,8 +255,8 @@ public class StockholmFileTest
         {
           String ss_original = seq_original[i].getSequenceAsString();
           String ss_new = seq_new[in].getSequenceAsString();
-          assertTrue("The sequences " + name + "/" + start + "-" + end
-                  + " are not equal", ss_original.equals(ss_new));
+          assertEquals("The sequences " + name + "/" + start + "-" + end
+                  + " are not equal", ss_original, ss_new);
 
           assertTrue(
                   "Sequence Features were not equivalent"
@@ -284,15 +279,16 @@ public class StockholmFileTest
                     .getSequenceFeatures().length];
             sequenceFeatures_new = seq_new[in].getSequenceFeatures();
 
-            assertTrue("different number of features", seq_original[i]
-                    .getSequenceFeatures().length == seq_new[in]
+            assertEquals("different number of features",
+                    seq_original[i].getSequenceFeatures().length,
+                    seq_new[in]
                     .getSequenceFeatures().length);
 
             for (int feat = 0; feat < seq_original[i].getSequenceFeatures().length; feat++)
             {
-              assertTrue("Different features",
-                      sequenceFeatures_original[feat]
-                              .equals(sequenceFeatures_new[feat]));
+              assertEquals("Different features",
+                      sequenceFeatures_original[feat],
+                      sequenceFeatures_new[feat]);
             }
           }
           // compare alignment annotation
@@ -319,9 +315,9 @@ public class StockholmFileTest
           else if (al.getSequenceAt(i).getAnnotation() != null
                   && al_input.getSequenceAt(in).getAnnotation() == null)
           {
-            assertTrue("Annotations differed between sequences ("
+            fail("Annotations differed between sequences ("
                     + al.getSequenceAt(i).getName() + ") and ("
-                    + al_input.getSequenceAt(i).getName() + ")", false);
+                    + al_input.getSequenceAt(i).getName() + ")");
           }
           break;
         }