JAL-2089 patch broken merge to master for Release 2.10.0b1
[jalview.git] / test / jalview / datamodel / xdb / embl / EmblFileTest.java
index 81be1be..906436f 100644 (file)
@@ -1,6 +1,6 @@
 /*
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.9)
- * Copyright (C) 2015 The Jalview Authors
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
  * 
  * This file is part of Jalview.
  * 
 package jalview.datamodel.xdb.embl;
 
 import static org.testng.AssertJUnit.assertEquals;
-import static org.testng.AssertJUnit.assertFalse;
 import static org.testng.AssertJUnit.assertNull;
-import static org.testng.AssertJUnit.assertTrue;
 
 import jalview.datamodel.DBRefEntry;
 
-import java.io.StringReader;
 import java.util.Vector;
 
 import org.testng.annotations.Test;
 
 public class EmblFileTest
 {
-  // adapted from http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/x53828/emblxml
-  private static final String TESTDATA = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
-          + "<EMBL_Services>"
-          + "<entry accession=\"X53828\" version=\"3\" lastUpdated=\"2005-04-18\" releaseCreated=\"25\" releaseLastUpdated=\"83\">"
-          + "<description>Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)</description>"
-          + "<keyword>L-lactate dehydrogenase</keyword><keyword>chutney</keyword>"
-          + "<dbreference db=\"EuropePMC\" primary=\"PMC1460223\" secondary=\"9649548\" />"
-          + "<feature name=\"CDS\"><dbreference db=\"GOA\" primary=\"P00340\" secondary=\"2.1\" /><dbreference db=\"InterPro\" primary=\"IPR001236\" />"
-          + "<qualifier name=\"note\"><value>L-lactate dehydrogenase A-chain</value><value>pickle</value></qualifier>"
-          + "<qualifier name=\"translation\"><value>MSLKDHLIHN</value><evidence>Keith</evidence></qualifier>"
-          + "<location type=\"single\" complement=\"true\">"
-          + "<locationElement type=\"range\" accession=\"X53828\" version=\"1\" complement=\"false\">"
-          + "<basePosition type=\"simple\">60</basePosition><basePosition type=\"join\">1058</basePosition>"
-          + "</locationElement></location></feature>"
-          + "<sequence type=\"mRNA\" version=\"2\">GTGACG</sequence></entry></EMBL_Services>";
 
   @Test(groups = { "Functional" })
   public void testGetEmblFile()
   {
-    Vector<EmblEntry> entries = EmblFile.getEmblFile(
-            new StringReader(TESTDATA)).getEntries();
+    Vector<EmblEntry> entries = EmblTestHelper.getEmblFile().getEntries();
     assertEquals(1, entries.size());
     EmblEntry entry = entries.get(0);
 
-    assertEquals("X53828", entry.getAccession());
-    assertEquals(
-            "Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)",
-            entry.getDesc());
-    assertEquals("2005-04-18", entry.getLastUpdated());
+    assertEquals("X07547", entry.getAccession());
+    assertEquals("C. trachomatis plasmid", entry.getDescription());
+    assertEquals("STD", entry.getDataClass());
+    assertEquals("PRO", entry.getTaxonomicDivision());
+    assertEquals("1999-02-10", entry.getLastUpdatedDate());
+    assertEquals("58", entry.getLastUpdatedRelease());
+    assertEquals("1988-11-10", entry.getFirstPublicDate());
+    assertEquals("18", entry.getFirstPublicRelease());
+    assertEquals("genomic DNA", entry.getMoleculeType());
+    assertEquals("1", entry.getSequenceVersion());
+    assertEquals("8", entry.getEntryVersion());
+    assertEquals("linear", entry.getTopology());
+    assertEquals("7499", entry.getSequenceLength());
 
     /*
      * FIXME these assertions fail - values are null - why?? Adding or removing
      * attributes in the test XML modifies behaviour. eg. inserting an attribute
      * _before_ lastUpdated results in a null value in this field.
      */
-    // assertEquals("25", entry.getRCreated());
-    // assertEquals("83", entry.getRLastUpdated());
+    assertEquals("1988-11-10", entry.getFirstPublicDate());
+    assertEquals("18", entry.getFirstPublicRelease());
 
     assertEquals(2, entry.getKeywords().size());
-    assertEquals("L-lactate dehydrogenase", entry.getKeywords().get(0));
-    assertEquals("chutney", entry.getKeywords().get(1));
+    assertEquals("plasmid", entry.getKeywords().get(0));
+    assertEquals("unidentified reading frame", entry.getKeywords().get(1));
 
     /*
      * dbrefs
      */
-    assertEquals(1, entry.getDbRefs().size());
+    assertEquals(2, entry.getDbRefs().size());
     DBRefEntry dbref = entry.getDbRefs().get(0);
     assertEquals("EuropePMC", dbref.getSource());
-    assertEquals("PMC1460223", dbref.getAccessionId());
-    assertEquals("9649548", dbref.getVersion());
+    assertEquals("PMC107176", dbref.getAccessionId());
+    assertEquals("9573186", dbref.getVersion());
+    dbref = entry.getDbRefs().get(1);
+    assertEquals("MD5", dbref.getSource());
+    assertEquals("ac73317", dbref.getAccessionId());
+    // blank version has been converted to "0"
+    assertEquals("0", dbref.getVersion());
 
     /*
-     * sequence features
+     * three sequence features for CDS
+     */
+    assertEquals(3, entry.getFeatures().size());
+    /*
+     * first CDS
      */
-    assertEquals(1, entry.getFeatures().size());
     EmblFeature ef = entry.getFeatures().get(0);
     assertEquals("CDS", ef.getName());
+    assertEquals("complement(46..57)", ef.getLocation());
     assertEquals(2, ef.getDbRefs().size());
     dbref = ef.getDbRefs().get(0);
-    assertEquals("GOA", dbref.getSource());
-    assertEquals("P00340", dbref.getAccessionId());
+    assertEquals("UniProtKB/Swiss-Prot", dbref.getSource());
+    assertEquals("B0BCM4", dbref.getAccessionId());
     assertEquals("2.1", dbref.getVersion());
     dbref = ef.getDbRefs().get(1);
-    assertEquals("InterPro", dbref.getSource());
-    assertEquals("IPR001236", dbref.getAccessionId());
-    assertEquals("", dbref.getVersion());
-    assertEquals(2, ef.getQualifiers().size());
-
-    // feature qualifiers
+    assertEquals("UniProtKB/Swiss-Prot", dbref.getSource());
+    assertEquals("P0CE20", dbref.getAccessionId());
+    // blank version gets converted to "0":
+    assertEquals("0", dbref.getVersion());
+    // CDS feature qualifiers
+    assertEquals(3, ef.getQualifiers().size());
     Qualifier q = ef.getQualifiers().get(0);
     assertEquals("note", q.getName());
     assertEquals(2, q.getValues().length);
-    assertEquals("L-lactate dehydrogenase A-chain", q.getValues()[0]);
+    assertEquals("ORF 8 (AA 1-330)", q.getValues()[0]);
     assertEquals("pickle", q.getValues()[1]);
     assertNull(q.getEvidence());
     q = ef.getQualifiers().get(1);
+    assertEquals("protein_id", q.getName());
+    assertEquals(1, q.getValues().length);
+    assertEquals("CAA30420.1", q.getValues()[0]);
+    q = ef.getQualifiers().get(2);
     assertEquals("translation", q.getName());
     assertEquals(1, q.getValues().length);
-    assertEquals("MSLKDHLIHN", q.getValues()[0]);
+    assertEquals("MLCF", q.getValues()[0]);
     assertEquals(1, q.getEvidence().length);
     assertEquals("Keith", q.getEvidence()[0]);
 
-    // feature locations
-    assertEquals(1, ef.getLocations().size());
-    EmblFeatureLocations fl = ef.getLocations().get(0);
-    assertEquals("single", fl.getLocationType());
-    assertTrue(fl.isLocationComplement());
-    assertEquals(1, fl.getLocElements().size());
-    EmblFeatureLocElement le = fl.getLocElements().get(0);
-    assertEquals("range", le.getType());
-    assertEquals("X53828", le.getAccession());
-    assertEquals("1", le.getVersion());
-    assertFalse(le.isComplement());
-    assertEquals(2, le.getBasePositions().length);
-    BasePosition bp = le.getBasePositions()[0];
-    assertEquals("simple", bp.getType());
-    assertEquals("60", bp.getPos());
-    bp = le.getBasePositions()[1];
-    assertEquals("join", bp.getType());
-    assertEquals("1058", bp.getPos());
+    /*
+     * second CDS
+     */
+    ef = entry.getFeatures().get(1);
+    assertEquals("CDS", ef.getName());
+    assertEquals("4..15", ef.getLocation());
+    assertEquals(1, ef.getDbRefs().size());
+    dbref = ef.getDbRefs().get(0);
+    assertEquals("UniProtKB/Swiss-Prot", dbref.getSource());
+    assertEquals("B0BCM3", dbref.getAccessionId());
+    assertEquals("0", dbref.getVersion());
+    assertEquals(2, ef.getQualifiers().size());
+    q = ef.getQualifiers().get(0);
+    assertEquals("protein_id", q.getName());
+    assertEquals(1, q.getValues().length);
+    assertEquals("CAA30421.1", q.getValues()[0]);
+    q = ef.getQualifiers().get(1);
+    assertEquals("translation", q.getName());
+    assertEquals(1, q.getValues().length);
+    assertEquals("MSSS", q.getValues()[0]);
+
+    /*
+     * third CDS
+     */
+    ef = entry.getFeatures().get(2);
+    assertEquals("CDS", ef.getName());
+    assertEquals("join(4..6,10..15)", ef.getLocation());
+    assertNull(ef.getDbRefs());
+    assertEquals(2, ef.getQualifiers().size());
+    q = ef.getQualifiers().get(0);
+    assertEquals("protein_id", q.getName());
+    assertEquals(1, q.getValues().length);
+    assertEquals("CAA12345.6", q.getValues()[0]);
+    q = ef.getQualifiers().get(1);
+    assertEquals("translation", q.getName());
+    assertEquals(1, q.getValues().length);
+    assertEquals("MSS", q.getValues()[0]);
 
     /*
-     * Sequence
+     * Sequence - verify newline not converted to space (JAL-2029)
      */
     EmblSequence seq = entry.getSequence();
-    assertEquals("mRNA", seq.getType());
-    assertEquals("2", seq.getVersion());
-    assertEquals("GTGACG", seq.getSequence());
+    assertEquals(
+            "GGTATGTCCTCTAGTACAAACACCCCCAATATTGTGATATAATTAAAAACATAGCAT",
+            seq.getSequence());
+
+    /*
+     * getSequence() converts empty DBRefEntry.version to "0"
+     */
+    assertEquals("0", entry.getDbRefs().get(1).getVersion());
+    assertEquals("0", entry.getFeatures().get(0).getDbRefs().get(1)
+            .getVersion());
   }
 }