Merge branch 'develop' into features/JAL-2094_colourInterface
[jalview.git] / test / jalview / datamodel / xdb / embl / EmblTestHelper.java
diff --git a/test/jalview/datamodel/xdb/embl/EmblTestHelper.java b/test/jalview/datamodel/xdb/embl/EmblTestHelper.java
new file mode 100644 (file)
index 0000000..6349164
--- /dev/null
@@ -0,0 +1,61 @@
+package jalview.datamodel.xdb.embl;
+
+import java.io.StringReader;
+
+public class EmblTestHelper
+{
+  // adapted from http://www.ebi.ac.uk/ena/data/view/X07547&display=xml
+  // dna and translations truncated for convenience
+  private static final String TESTDATA = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
+          + "<ROOT>"
+          + "<entry accession=\"X07547\" version=\"1\" entryVersion=\"8\""
+          + " dataClass=\"STD\" taxonomicDivision=\"PRO\""
+          + " moleculeType=\"genomic DNA\" sequenceLength=\"7499\" topology=\"linear\""
+          + " firstPublic=\"1988-11-10\" firstPublicRelease=\"18\""
+          + " lastUpdated=\"1999-02-10\" lastUpdatedRelease=\"58\">"
+          + "<secondaryAccession>X07574</secondaryAccession>"
+          + "<description>C. trachomatis plasmid</description>"
+          + "<keyword>plasmid</keyword><keyword>unidentified reading frame</keyword>"
+          + "<xref db=\"EuropePMC\" id=\"PMC107176\" secondaryId=\"9573186\" />"
+          + "<xref db=\"MD5\" id=\"ac73317\" />"
+          /*
+           * first CDS (range and translation changed to keep test data manageable)
+           */
+          + "<feature name=\"CDS\" location=\"complement(46..57)\">"
+          // test the case of >1 cross-ref to the same database (JAL-2029)
+          + "<xref db=\"UniProtKB/Swiss-Prot\" id=\"B0BCM4\" secondaryId=\"2.1\" />"
+          + "<xref db=\"UniProtKB/Swiss-Prot\" id=\"P0CE20\" />"
+          + "<qualifier name=\"note\"><value>ORF 8 (AA 1-330)</value><value>pickle</value></qualifier>"
+          + "<qualifier name=\"protein_id\"><value>CAA30420.1</value></qualifier>"
+          + "<qualifier name=\"translation\"><value>MLCF</value><evidence>Keith</evidence></qualifier>"
+          + "</feature>"
+          /*
+           * second CDS (range and translation changed to keep test data manageable)
+           */
+          + "<feature name=\"CDS\" location=\"4..15\">"
+          + "<xref db=\"UniProtKB/Swiss-Prot\" id=\"B0BCM3\" />"
+          + "<qualifier name=\"protein_id\"><value>CAA30421.1</value></qualifier>"
+          + "<qualifier name=\"translation\"><value>MSSS</value></qualifier>"
+          + "</feature>"
+          /*
+           * third CDS is made up - has no xref - code should synthesize 
+           * one to an assumed EMBLCDSPROTEIN accession
+           */
+          + "<feature name=\"CDS\" location=\"join(4..6,10..15)\">"
+          + "<qualifier name=\"protein_id\"><value>CAA12345.6</value></qualifier>"
+          + "<qualifier name=\"translation\"><value>MSS</value></qualifier>"
+          + "</feature>"
+          /*
+           * sequence (modified for test purposes)
+           * emulates EMBL XML 1.2 which splits sequence data every 60 characters
+           * see EmblSequence.setSequence
+           */
+          + "<sequence>GGTATGTCCTCTAGTACAAAC\n"
+          + "ACCCCCAATATTGTGATATAATTAAAAACATAGCAT"
+          + "</sequence></entry></ROOT>";
+
+  static EmblFile getEmblFile()
+  {
+    return EmblFile.getEmblFile(new StringReader(TESTDATA));
+  }
+}