JAL-2113 additions to embl_mapping.xml / data models / tests
[jalview.git] / test / jalview / datamodel / xdb / embl / EmblTestHelper.java
index 71ca508..9957c72 100644 (file)
@@ -4,22 +4,47 @@ import java.io.StringReader;
 
 public class EmblTestHelper
 {
-  // adapted from http://www.ebi.ac.uk/ena/data/view/x53828&display=xml
+  // adapted from http://www.ebi.ac.uk/ena/data/view/X07547&display=xml
+  // dna and translations truncated for convenience
   private static final String TESTDATA = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
           + "<ROOT>"
-          + "<entry accession=\"X53828\" entryVersion=\"3\" lastUpdated=\"2005-04-18\" releaseCreated=\"25\" releaseLastUpdated=\"83\""
-          + " version=\"1\" moleculeType=\"mRNA\" topology=\"linear\" sequenceLength=\"1575\">"
-          + "<description>Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)</description>"
-          + "<keyword>L-lactate dehydrogenase</keyword><keyword>chutney</keyword>"
-          + "<xref db=\"EuropePMC\" id=\"PMC1460223\" secondaryId=\"9649548\" />"
-          + "<xref db=\"MD5\" id=\"d3b68\" />"
-          + "<feature name=\"CDS\" location=\"60..1058\">"
-          + "<xref db=\"GOA\" id=\"P00340\" secondaryId=\"2.1\" /><xref db=\"InterPro\" id=\"IPR001236\" />"
-          + "<qualifier name=\"note\"><value>L-lactate dehydrogenase A-chain</value><value>pickle</value></qualifier>"
-          + "<qualifier name=\"translation\"><value>MSLKDHLIHN</value><evidence>Keith</evidence></qualifier>"
-          // emulate EMBL XML 1.2 which splits sequence data every 60 characters
-          // see EmblSequence.setSequence
-          + "</feature>" + "<sequence>GTG\nACG</sequence></entry></ROOT>";
+          + "<entry accession=\"X07547\" version=\"1\" entryVersion=\"8\""
+          + " dataClass=\"STD\" taxonomicDivision=\"PRO\""
+          + " moleculeType=\"genomic DNA\" sequenceLength=\"7499\" topology=\"linear\""
+          + " firstPublic=\"1988-11-10\" firstPublicRelease=\"18\""
+          + " lastUpdated=\"1999-02-10\" lastUpdatedRelease=\"58\">"
+          + "<secondaryAccession>X07574</secondaryAccession>"
+          + "<description>C. trachomatis plasmid</description>"
+          + "<keyword>plasmid</keyword><keyword>unidentified reading frame</keyword>"
+          + "<xref db=\"EuropePMC\" id=\"PMC107176\" secondaryId=\"9573186\" />"
+          + "<xref db=\"MD5\" id=\"ac73317\" />"
+          /*
+           * first CDS (range and translation changed to keep test data manageable)
+           */
+          + "<feature name=\"CDS\" location=\"complement(46..57)\">"
+          // test the case of >1 cross-ref to the same database (JAL-2029)
+          + "<xref db=\"UniProtKB/Swiss-Prot\" id=\"B0BCM4\" secondaryId=\"2.1\" />"
+          + "<xref db=\"UniProtKB/Swiss-Prot\" id=\"P0CE20\" />"
+          + "<qualifier name=\"note\"><value>ORF 8 (AA 1-330)</value><value>pickle</value></qualifier>"
+          + "<qualifier name=\"protein_id\"><value>CAA30420.1</value></qualifier>"
+          + "<qualifier name=\"translation\"><value>MLCF</value><evidence>Keith</evidence></qualifier>"
+          + "</feature>"
+          /*
+           * second CDS (range and translation changed to keep test data manageable)
+           */
+          + "<feature name=\"CDS\" location=\"4..15\">"
+          + "<xref db=\"UniProtKB/Swiss-Prot\" id=\"B0BCM3\" />"
+          + "<qualifier name=\"protein_id\"><value>CAA30421.1</value></qualifier>"
+          + "<qualifier name=\"translation\"><value>MSSS</value></qualifier>"
+          + "</feature>"
+          /*
+           * sequence (modified for test purposes)
+           * emulates EMBL XML 1.2 which splits sequence data every 60 characters
+           * see EmblSequence.setSequence
+           */
+          + "<sequence>GGTATGTCCTCTAGTACAAAC\n"
+          + "ACCCCCAATATTGTGATATAATTAAAAACATAGCAT"
+          + "</sequence></entry></ROOT>";
 
   static EmblFile getEmblFile()
   {