JAL-1499 initial tests working, can export to / import from textbox ok
[jalview.git] / test / jalview / io / MegaFileTest.java
index 92a3c3c..5caa50e 100644 (file)
@@ -6,6 +6,7 @@ import static org.testng.AssertJUnit.assertNull;
 import static org.testng.AssertJUnit.assertTrue;
 import static org.testng.AssertJUnit.fail;
 
+import jalview.datamodel.AlignmentI;
 import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceI;
 
@@ -19,87 +20,70 @@ import org.testng.annotations.Test;
  */
 public class MegaFileTest
 {
-  private static final String THIRTY_CHARS = "012345678901234567890123456789";
+  private static final String TWENTY_CHARS = "9876543210abcdefghij";
+
+  private static final String THIRTY_CHARS = "0123456789klmnopqrstABCDEFGHIJ";
 
   //@formatter:off
   private static final String INTERLEAVED = 
           "#MEGA\n"+ 
           "TITLE: Interleaved sequence data\n\n" + 
           "#U455   ABCDEF\n" + 
-          "#CPZANT  MNOPQR\n\n" + "#U455   KLMNOP\n" + 
-          "#CPZANT WXYZ";
+          "#CPZANT  MNOPQR\n\n" + 
+          "#U455   KLMNOP\n" + 
+          "#CPZANT WXYZGC";
 
   private static final String INTERLEAVED_NOHEADERS = 
           "#U455   ABCDEF\n" 
-          + "#CPZANT  MNOPQR\n\n" 
+          + "#CPZANT MNOPQR\n\n" 
           + "#U455   KLMNOP\n"
-          + "#CPZANT WXYZ\n";
+          + "#CPZANT WXYZGC\n";
 
-  // interleaved sequences, one with 60 one with 120 characters (on overlong
-  // input lines)
-  private static final String INTERLEAVED_LONGERTHAN50 = 
+  // interleaved sequences, with 50 residues
+  private static final String INTERLEAVED_50RESIDUES = 
           "#MEGA\n"
-          + "TITLE: Interleaved sequence data\n\n"
-          + "#U455 " + THIRTY_CHARS + THIRTY_CHARS + "\n" 
-          + "#CPZANT "
-          + THIRTY_CHARS + THIRTY_CHARS + THIRTY_CHARS + THIRTY_CHARS;
+          + "!TITLE Interleaved sequence data\n\n"
+          + "#U455 " + THIRTY_CHARS + TWENTY_CHARS + "\n" 
+          + "#CPZANT " + TWENTY_CHARS + THIRTY_CHARS + "\n";
 
   private static final String NONINTERLEAVED = 
           "#MEGA\n"
-          + "TITLE: Noninterleaved sequence data\n\n" 
+          + "!TITLE Noninterleaved sequence data\n\n" 
           + "#U455  \n"
           + "ABCFEDHIJ\n" 
           + "MNOPQR\n\n" 
           + "#CPZANT \n" 
           + "KLMNOPWXYZ\n" 
           + "CGATC\n";
-
-  // Sequence length 60 (split over two lines)
-  private static final String NONINTERLEAVED_LONGERTHAN50 = 
-          "#SIXTY\n" + THIRTY_CHARS + "\n" + THIRTY_CHARS;
-
-  // this one starts noninterleaved then switches to interleaved
+  
+  // this one starts interleaved then switches to non-interleaved
   private static final String MIXED = 
           "#MEGA\n"
-          + "TITLE: This is a mess\n\n" + "#CPZANT KLMNOPWXYZCGATC\n\n"
+          + "!TITLE This is a mess\n\n" 
+          + "#CPZANT KLMNOPWXYZCGATC\n\n"
           + "#U455\n  "
           + "ABCFEDHIJ\n";
 
   // interleaved with a new sequence appearing in the second block :-O
   private static final String INTERLEAVED_SEQUENCE_ERROR = 
           "#MEGA" + "\n"
-          + "TITLE: Interleaved sequence data\n\n"
+          + "!TITLE Interleaved sequence data\n\n"
           + "#U455   ABCDEF\n" 
           + "#CPZANT  MNOPQR\n\n"
           + "#U456   KLMNOP\n";
 
   // the 'fancy' format, different header format, bases in triplet groups
-  private static final String FANCY_FORMAT = 
+  private static final String INTERLEAVED_WITH_DESCRIPTION = 
           "#MEGA\n"
-          + "!Title Fancy format data;\n"
-          + "!Format DataType=DNA indel=- CodeTable=Standard;\n\n"
+          + "!Title Data with description;\n"
+          + "!Format DataType=DNA indel=- CodeTable=Standard Missing=? MatchChar=.;\n\n"
           + "!Description\n" 
           + "    Line one of description\n"
           + "    Line two of description;\n\n"
-          + "!Gene=Adh Property=Coding CodonStart=1;\n"
-          + "#U455   ABC DEF\n" 
-          + "#CPZANT  MNO PQR\n\n"
-          + "#U455   KLM NOP\n" 
-          + "#CPZANT WXY Z\n";
-
-  // interleaved sequence data for two genes
-  private static final String TWO_GENES = 
-          "#MEGA\n"
-          + "!Title Fancy format data;\n"
-          + "!Format DataType=DNA indel=- CodeTable=Standard;\n\n"
-          + "!Description\n" 
-          + "    Line one of description\n"
-          + "    Line two of description;\n\n"
-          + "!Gene=Adh Property=Coding CodonStart=1;\n"
-          + "#U455   ABC DEF\n" 
-          + "#CPZANT  MNO PQR\n\n"
-          + "#U455   KLM NOP\n" 
-          + "#CPZANT WXY Z\n"; //TODO complete
+          + "#U455   CGC GTA\n" 
+          + "#CPZANT ATC GGG\n\n"
+          + "#U455   CGA TTT\n" 
+          + "#CPZANT CAA TGC\n";
 
   //@formatter:on
 
@@ -124,7 +108,7 @@ public class MegaFileTest
     // check sequence data
     assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
             .getSequenceAsString());
-    assertEquals("Second sequence data wrong", "MNOPQRWXYZ", seqs.get(1)
+    assertEquals("Second sequence data wrong", "MNOPQRWXYZGC", seqs.get(1)
             .getSequenceAsString());
     assertTrue("File format is not flagged as interleaved",
             testee.isInterleaved());
@@ -246,8 +230,9 @@ public class MegaFileTest
     System.out.println(printed);
     // normally output should match input
     // we cheated here with a number of short input lines
-    String expected = "#MEGA\n" + "!TITLE Interleaved sequence data;\n\n"
-            + "#U455   ABCDEFKLMNOP\n" + "#CPZANT MNOPQRWXYZ"
+    // nb don't get Title in output if not calling print(AlignmentI)
+    String expected = "#MEGA\n\n" + "#U455   ABCDEF\n"
+            + "#CPZANT MNOPQR\n\n" + "#U455   KLMNOP\n" + "#CPZANT WXYZGC"
             + "\n";
     assertEquals("Print format wrong", expected, printed);
   }
@@ -264,11 +249,9 @@ public class MegaFileTest
             AppletFormatAdapter.PASTE);
     String printed = testee.print();
     System.out.println(printed);
-    // normally output should match input
-    // we cheated here with a number of short input lines
-    String expected = "#MEGA\n\n" + "#U455   ABCDEFKLMNOP" + "\n"
-            + "#CPZANT MNOPQRWXYZ\n";
-    assertEquals("Print format wrong", expected, printed);
+
+    assertEquals("Print format wrong", "#MEGA\n\n" + INTERLEAVED_NOHEADERS,
+            printed);
   }
 
   /**
@@ -281,14 +264,14 @@ public class MegaFileTest
   {
     MegaFile testee = new MegaFile(NONINTERLEAVED,
             AppletFormatAdapter.PASTE);
+    assertEquals(10, testee.getPositionsPerLine());
     String printed = testee.print();
     System.out.println(printed);
     // normally output should match input
     // we cheated here with a number of short input lines
-    String expected = "#MEGA\n"
-            + "!TITLE Noninterleaved sequence data;\n\n"
-            + "#U455\n" + "ABCFEDHIJMNOPQR\n\n" + "#CPZANT\n"
-            + "KLMNOPWXYZCGATC\n";
+    String expected = "#MEGA\n\n"
+ + "#U455\n" + "ABCFEDHIJM\nNOPQR\n\n"
+            + "#CPZANT\n" + "KLMNOPWXYZ\nCGATC\n";
     assertEquals("Print format wrong", expected, printed);
   }
 
@@ -301,20 +284,26 @@ public class MegaFileTest
   @Test(groups = { "Functional" })
   public void testPrint_interleavedMultiLine() throws IOException
   {
-    MegaFile testee = new MegaFile(INTERLEAVED_LONGERTHAN50,
+    MegaFile testee = new MegaFile(INTERLEAVED_50RESIDUES,
             AppletFormatAdapter.PASTE);
+    assertEquals(50, testee.getPositionsPerLine());
+    /*
+     * now simulate choosing 20 residues per line on output
+     */
+    testee.setPositionsPerLine(20);
     String printed = testee.print();
     System.out.println(printed);
-    // first sequence is length 60, second length 120
-    // should be output as 50 + 10 + 0 and as 50 + 50 + 20 character lines
-    // respectively
-    String expected = "#MEGA\n" + "!TITLE Interleaved sequence data;\n\n"
-            + "#U455   " + THIRTY_CHARS + "01234567890123456789\n"
-            + "#CPZANT " + THIRTY_CHARS + "01234567890123456789\n" + "\n"
-            + "#U455   " + "0123456789\n" + "#CPZANT " + THIRTY_CHARS
-            + "01234567890123456789\n\n" + "#U455   \n" + "#CPZANT "
-            + "01234567890123456789"
-            + "\n";
+    //@formatter:off
+    //0123456789klmnopqrstABCDEFGHIJ9876543210abcdefghij
+    String expected = 
+            "#MEGA\n\n" + 
+            "#U455   0123456789 klmnopqrst\n" + // first 20
+            "#CPZANT 9876543210 abcdefghij\n\n" +
+            "#U455   ABCDEFGHIJ 9876543210\n" + // next 20
+            "#CPZANT 0123456789 klmnopqrst\n\n" +
+            "#U455   abcdefghij\n" + // last 10
+            "#CPZANT ABCDEFGHIJ\n";
+    //@formatter:on
     assertEquals("Print format wrong", expected, printed);
   }
 
@@ -327,31 +316,33 @@ public class MegaFileTest
   @Test(groups = { "Functional" })
   public void testPrint_noninterleavedMultiLine() throws IOException
   {
+    final String NONINTERLEAVED_LONGERTHAN50 = "#SIXTY\n" + THIRTY_CHARS
+            + "\n" + TWENTY_CHARS + "9993332221\n";
     MegaFile testee = new MegaFile(NONINTERLEAVED_LONGERTHAN50,
             AppletFormatAdapter.PASTE);
+    assertEquals(30, testee.getPositionsPerLine());
+    testee.setPositionsPerLine(25);
     String printed = testee.print();
-    System.out.println(printed);
     // 60 character sequence should be output as 50 on first line then 10 more
-    String expected = "#MEGA\n\n" + "#SIXTY\n" + THIRTY_CHARS
-            + "01234567890123456789\n" + "0123456789\n";
+    String expected = "#MEGA\n\n" + "#SIXTY\n"
+            + "0123456789klmnopqrstABCDE\n" + "FGHIJ9876543210abcdefghij\n"
+            + "9993332221\n";
     assertEquals("Print format wrong", expected, printed);
   }
 
   /**
-   * Test paste / parse of 'fancy format' data.
+   * Test parse of data including description
    * 
    * @throws IOException
    */
   @Test(groups = { "Functional" })
-  public void testParse_fancyFormat() throws IOException
+  public void testParse_withDescription() throws IOException
   {
-    MegaFile testee = new MegaFile(FANCY_FORMAT, AppletFormatAdapter.PASTE);
-    assertEquals("Title not as expected", "Fancy format data",
+    MegaFile testee = new MegaFile(INTERLEAVED_WITH_DESCRIPTION,
+            AppletFormatAdapter.PASTE);
+    assertEquals("Title not as expected", "Data with description",
             testee.getAlignmentProperty(MegaFile.PROP_TITLE));
 
-    // assertEquals("Format property not parsed",
-    // "DataType=DNA indel=- CodeTable=Standard;",
-    // testee.getAlignmentProperty(MegaFile.PROP_FORMAT));
     Vector<SequenceI> seqs = testee.getSeqs();
     // should be 2 sequences
     assertEquals("Expected two sequences", 2, seqs.size());
@@ -360,16 +351,16 @@ public class MegaFileTest
     assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1)
             .getName());
     // check sequence data
-    assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0)
+    assertEquals("First sequence data wrong", "CGCGTACGATTT", seqs.get(0)
             .getSequenceAsString());
-    assertEquals("Second sequence data wrong", "MNOPQRWXYZ", seqs.get(1)
+    assertEquals("Second sequence data wrong", "ATCGGGCAATGC", seqs.get(1)
             .getSequenceAsString());
     assertTrue("File format is not flagged as interleaved",
             testee.isInterleaved());
 
-    assertEquals("Description property not parsed",
-            "    Line one of description\n"
-                    + "    Line two of description\n",
+    assertEquals(
+            "Description property not parsed",
+            "    Line one of description\n" + "    Line two of description",
             testee.getAlignmentProperty(MegaFile.PROP_DESCRIPTION));
   }
 
@@ -410,4 +401,36 @@ public class MegaFileTest
     assertEquals("Mega", MegaFile.getValue("!Name \t\t Mega; "));
     assertEquals("", MegaFile.getValue("Name"));
   }
+
+  /**
+   * Test reading a MEGA file to an alignment then writing it out in MEGA
+   * format. Verify the output is (functionally) the same as the input.
+   * 
+   * @throws IOException
+   */
+  @Test(groups = "Functional")
+  public void testRoundTrip_Interleaved() throws IOException
+  {
+    AppletFormatAdapter fa = new AppletFormatAdapter();
+    AlignmentI al = fa.readFile(INTERLEAVED_WITH_DESCRIPTION,
+            AppletFormatAdapter.PASTE, "MEGA");
+    MegaFile output = new MegaFile();
+    String formatted = output.print(al);
+    //@formatter:off
+    String expected = 
+         "#MEGA\n!Title Data with description;\n" +
+         "!Description     Line one of description\n" +
+         "    Line two of description;\n" +
+         "!Format\n" +
+         "    DataType=DNA CodeTable=Standard\n" +
+         "    NSeqs=2 NSites=12\n" +
+         "    Indel=- Identical=. Missing=?;\n\n" +
+         "#U455   CGC GTA\n" +
+         "#CPZANT ATC GGG\n\n" +
+         "#U455   CGA TTT\n" +
+         "#CPZANT CAA TGC\n";
+    //@formatter:on
+    assertEquals("Roundtrip didn't match", expected,
+            formatted);
+  }
 }