JAL-2114 save 'raw' ENA location as a sequence feature attribute
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 23 May 2016 12:07:38 +0000 (13:07 +0100)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 23 May 2016 12:07:38 +0000 (13:07 +0100)
src/jalview/datamodel/SequenceFeature.java
src/jalview/datamodel/xdb/embl/EmblEntry.java
test/jalview/datamodel/xdb/embl/EmblEntryTest.java
test/jalview/datamodel/xdb/embl/EmblFileTest.java
test/jalview/datamodel/xdb/embl/EmblTestHelper.java [new file with mode: 0644]

index f2eb8ac..c75d6f2 100755 (executable)
@@ -39,6 +39,9 @@ public class SequenceFeature
   // private key for Phase designed not to conflict with real GFF data
   private static final String PHASE = "!Phase";
 
+  // private key for ENA location designed not to conflict with real GFF data
+  private static final String LOCATION = "!Location";
+
   /*
    * ATTRIBUTES is reserved for the GFF 'column 9' data, formatted as
    * name1=value1;name2=value2,value3;...etc
@@ -55,6 +58,10 @@ public class SequenceFeature
 
   public String description;
 
+  /*
+   * a map of key-value pairs; may be populated from GFF 'column 9' data,
+   * other data sources (e.g. GenBank file), or programmatically
+   */
   public Map<String, Object> otherDetails;
 
   public Vector<String> links;
@@ -480,6 +487,26 @@ public class SequenceFeature
   }
 
   /**
+   * Sets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
+   * 
+   * @param loc
+   */
+  public void setEnaLocation(String loc)
+  {
+    setValue(LOCATION, loc);
+  }
+
+  /**
+   * Gets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
+   * 
+   * @param loc
+   */
+  public String getEnaLocation()
+  {
+    return (String) getValue(LOCATION);
+  }
+
+  /**
    * Readable representation, for debug only, not guaranteed not to change
    * between versions
    */
index 3207411..50a262f 100644 (file)
@@ -510,6 +510,7 @@ public class EmblEntry
         SequenceFeature sf = makeCdsFeature(exon, xint, prname, prid, vals,
                 codonStart);
         sf.setType(feature.getName()); // "CDS"
+        sf.setEnaLocation(feature.getLocation());
         sf.setFeatureGroup(sourceDb);
         dna.addSequenceFeature(sf);
       }
index cab3b92..c36b7d3 100644 (file)
@@ -2,7 +2,12 @@ package jalview.datamodel.xdb.embl;
 
 import static org.testng.AssertJUnit.assertEquals;
 
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceI;
+
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.List;
 
 import org.testng.annotations.Test;
 
@@ -23,4 +28,25 @@ public class EmblEntryTest
     assertEquals("[10, 20, 40, 30, 50, 60, 70, 80, 120, 110]",
             Arrays.toString(exons));
   }
+
+  @Test(groups = "Functional")
+  public void testParseCodingFeature()
+  {
+    // not the whole sequence but enough for this test...
+    SequenceI dna = new Sequence("J03321", "GGATCCGTAAGTTAGACGAAATT");
+    List<SequenceI> peptides = new ArrayList<SequenceI>();
+    EmblFile ef = EmblTestHelper.getEmblFile();
+    EmblFeature feature = null;
+    for (EmblFeature feat : ef.getEntries().get(0).getFeatures())
+    {
+      if ("CDS".equals(feat.getName()))
+      {
+        feature = feat;
+        break;
+      }
+    }
+
+    EmblEntry testee = new EmblEntry();
+    testee.parseCodingFeature(feature, "EMBL", dna, peptides);
+  }
 }
index fda25e3..a62cb87 100644 (file)
@@ -25,34 +25,17 @@ import static org.testng.AssertJUnit.assertNull;
 
 import jalview.datamodel.DBRefEntry;
 
-import java.io.StringReader;
 import java.util.Vector;
 
 import org.testng.annotations.Test;
 
 public class EmblFileTest
 {
-  // adapted from http://www.ebi.ac.uk/ena/data/view/x53828&display=xml
-  private static final String TESTDATA = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
-          + "<ROOT>"
-          + "<entry accession=\"X53828\" entryVersion=\"3\" lastUpdated=\"2005-04-18\" releaseCreated=\"25\" releaseLastUpdated=\"83\""
-          + " version=\"1\" moleculeType=\"mRNA\" topology=\"linear\" sequenceLength=\"1575\">"
-          + "<description>Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)</description>"
-          + "<keyword>L-lactate dehydrogenase</keyword><keyword>chutney</keyword>"
-          + "<xref db=\"EuropePMC\" id=\"PMC1460223\" secondaryId=\"9649548\" />"
-          + "<xref db=\"MD5\" id=\"d3b68\" />"
-          + "<feature name=\"CDS\" location=\"60..1058\">"
-          + "<xref db=\"GOA\" id=\"P00340\" secondaryId=\"2.1\" /><xref db=\"InterPro\" id=\"IPR001236\" />"
-          + "<qualifier name=\"note\"><value>L-lactate dehydrogenase A-chain</value><value>pickle</value></qualifier>"
-          + "<qualifier name=\"translation\"><value>MSLKDHLIHN</value><evidence>Keith</evidence></qualifier>"
-          + "</feature>"
-          + "<sequence>GTGACG</sequence></entry></ROOT>";
 
   @Test(groups = { "Functional" })
   public void testGetEmblFile()
   {
-    Vector<EmblEntry> entries = EmblFile.getEmblFile(
-            new StringReader(TESTDATA)).getEntries();
+    Vector<EmblEntry> entries = EmblTestHelper.getEmblFile().getEntries();
     assertEquals(1, entries.size());
     EmblEntry entry = entries.get(0);
 
diff --git a/test/jalview/datamodel/xdb/embl/EmblTestHelper.java b/test/jalview/datamodel/xdb/embl/EmblTestHelper.java
new file mode 100644 (file)
index 0000000..8f631df
--- /dev/null
@@ -0,0 +1,26 @@
+package jalview.datamodel.xdb.embl;
+
+import java.io.StringReader;
+
+public class EmblTestHelper
+{
+  // adapted from http://www.ebi.ac.uk/ena/data/view/x53828&display=xml
+  private static final String TESTDATA = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
+          + "<ROOT>"
+          + "<entry accession=\"X53828\" entryVersion=\"3\" lastUpdated=\"2005-04-18\" releaseCreated=\"25\" releaseLastUpdated=\"83\""
+          + " version=\"1\" moleculeType=\"mRNA\" topology=\"linear\" sequenceLength=\"1575\">"
+          + "<description>Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)</description>"
+          + "<keyword>L-lactate dehydrogenase</keyword><keyword>chutney</keyword>"
+          + "<xref db=\"EuropePMC\" id=\"PMC1460223\" secondaryId=\"9649548\" />"
+          + "<xref db=\"MD5\" id=\"d3b68\" />"
+          + "<feature name=\"CDS\" location=\"60..1058\">"
+          + "<xref db=\"GOA\" id=\"P00340\" secondaryId=\"2.1\" /><xref db=\"InterPro\" id=\"IPR001236\" />"
+          + "<qualifier name=\"note\"><value>L-lactate dehydrogenase A-chain</value><value>pickle</value></qualifier>"
+          + "<qualifier name=\"translation\"><value>MSLKDHLIHN</value><evidence>Keith</evidence></qualifier>"
+          + "</feature>" + "<sequence>GTGACG</sequence></entry></ROOT>";
+
+  static EmblFile getEmblFile()
+  {
+    return EmblFile.getEmblFile(new StringReader(TESTDATA));
+  }
+}