- + "<entry accession=\"X53828\" entryVersion=\"3\" lastUpdated=\"2005-04-18\" releaseCreated=\"25\" releaseLastUpdated=\"83\""
- + " version=\"1\" moleculeType=\"mRNA\" topology=\"linear\" sequenceLength=\"1575\">"
- + "<description>Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)</description>"
- + "<keyword>L-lactate dehydrogenase</keyword><keyword>chutney</keyword>"
- + "<xref db=\"EuropePMC\" id=\"PMC1460223\" secondaryId=\"9649548\" />"
- + "<xref db=\"MD5\" id=\"d3b68\" />"
- + "<feature name=\"CDS\" location=\"60..1058\">"
- + "<xref db=\"GOA\" id=\"P00340\" secondaryId=\"2.1\" /><xref db=\"InterPro\" id=\"IPR001236\" />"
- + "<qualifier name=\"note\"><value>L-lactate dehydrogenase A-chain</value><value>pickle</value></qualifier>"
- + "<qualifier name=\"translation\"><value>MSLKDHLIHN</value><evidence>Keith</evidence></qualifier>"
- // emulate EMBL XML 1.2 which splits sequence data every 60 characters
- // see EmblSequence.setSequence
- + "</feature>" + "<sequence>GTG\nACG</sequence></entry></ROOT>";
+ + "<entry accession=\"X07547\" version=\"1\" entryVersion=\"8\""
+ + " dataClass=\"STD\" taxonomicDivision=\"PRO\""
+ + " moleculeType=\"genomic DNA\" sequenceLength=\"7499\" topology=\"linear\""
+ + " firstPublic=\"1988-11-10\" firstPublicRelease=\"18\""
+ + " lastUpdated=\"1999-02-10\" lastUpdatedRelease=\"58\">"
+ + "<secondaryAccession>X07574</secondaryAccession>"
+ + "<description>C. trachomatis plasmid</description>"
+ + "<keyword>plasmid</keyword><keyword>unidentified reading frame</keyword>"
+ + "<xref db=\"EuropePMC\" id=\"PMC107176\" secondaryId=\"9573186\" />"
+ + "<xref db=\"MD5\" id=\"ac73317\" />"
+ /*
+ * first CDS (range and translation changed to keep test data manageable)
+ */
+ + "<feature name=\"CDS\" location=\"complement(46..57)\">"
+ // test the case of >1 cross-ref to the same database (JAL-2029)
+ + "<xref db=\"UniProtKB/Swiss-Prot\" id=\"B0BCM4\" secondaryId=\"2.1\" />"
+ + "<xref db=\"UniProtKB/Swiss-Prot\" id=\"P0CE20\" />"
+ + "<qualifier name=\"note\"><value>ORF 8 (AA 1-330)</value><value>pickle</value></qualifier>"
+ + "<qualifier name=\"protein_id\"><value>CAA30420.1</value></qualifier>"
+ + "<qualifier name=\"translation\"><value>MLCF</value><evidence>Keith</evidence></qualifier>"
+ + "</feature>"
+ /*
+ * second CDS (range and translation changed to keep test data manageable)
+ */
+ + "<feature name=\"CDS\" location=\"4..15\">"
+ + "<xref db=\"UniProtKB/Swiss-Prot\" id=\"B0BCM3\" />"
+ + "<qualifier name=\"protein_id\"><value>CAA30421.1</value></qualifier>"
+ + "<qualifier name=\"translation\"><value>MSSS</value></qualifier>"
+ + "</feature>"
+ /*
+ * third CDS is made up - has no xref - code should synthesize
+ * one to an assumed EMBLCDSPROTEIN accession
+ */
+ + "<feature name=\"CDS\" location=\"join(4..6,10..15)\">"
+ + "<qualifier name=\"protein_id\"><value>CAA12345.6</value></qualifier>"
+ + "<qualifier name=\"translation\"><value>MSS</value></qualifier>"
+ + "</feature>"
+ /*
+ * sequence (modified for test purposes)
+ * emulates EMBL XML 1.2 which splits sequence data every 60 characters
+ * see EmblSequence.setSequence
+ */
+ + "<sequence>GGTATGTCCTCTAGTACAAAC\n"
+ + "ACCCCCAATATTGTGATATAATTAAAAACATAGCAT"
+ + "</sequence></entry></ROOT>";