JAL-3116 parse EMBL XML with JAXB (todo: update unit tests)
[jalview.git] / test / jalview / datamodel / xdb / embl / EmblTestHelper.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.datamodel.xdb.embl;
22
23 import java.io.StringReader;
24
25 public class EmblTestHelper
26 {
27   // adapted from http://www.ebi.ac.uk/ena/data/view/X07547&display=xml
28   // dna and translations truncated for convenience
29   private static final String TESTDATA = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
30           + "<ROOT>"
31           + "<entry accession=\"X07547\" version=\"1\" entryVersion=\"8\""
32           + " dataClass=\"STD\" taxonomicDivision=\"PRO\""
33           + " moleculeType=\"genomic DNA\" sequenceLength=\"7499\" topology=\"linear\""
34           + " firstPublic=\"1988-11-10\" firstPublicRelease=\"18\""
35           + " lastUpdated=\"1999-02-10\" lastUpdatedRelease=\"58\">"
36           + "<secondaryAccession>X07574</secondaryAccession>"
37           + "<description>C. trachomatis plasmid</description>"
38           + "<keyword>plasmid</keyword><keyword>unidentified reading frame</keyword>"
39           + "<xref db=\"EuropePMC\" id=\"PMC107176\" secondaryId=\"9573186\" />"
40           + "<xref db=\"MD5\" id=\"ac73317\" />"
41           /*
42            * first CDS (range and translation changed to keep test data manageable)
43            */
44           + "<feature name=\"CDS\" location=\"complement(46..57)\">"
45           // test the case of >1 cross-ref to the same database (JAL-2029)
46           + "<xref db=\"UniProtKB/Swiss-Prot\" id=\"B0BCM4\" secondaryId=\"2.1\" />"
47           + "<xref db=\"UniProtKB/Swiss-Prot\" id=\"P0CE20\" />"
48           + "<qualifier name=\"note\"><value>ORF 8 (AA 1-330)</value><value>pickle</value></qualifier>"
49           + "<qualifier name=\"protein_id\"><value>CAA30420.1</value></qualifier>"
50           + "<qualifier name=\"translation\"><value>MLCF</value><evidence>Keith</evidence></qualifier>"
51           + "</feature>"
52           /*
53            * second CDS (range and translation changed to keep test data manageable)
54            */
55           + "<feature name=\"CDS\" location=\"4..15\">"
56           + "<xref db=\"UniProtKB/Swiss-Prot\" id=\"B0BCM3\" />"
57           + "<qualifier name=\"protein_id\"><value>CAA30421.1</value></qualifier>"
58           + "<qualifier name=\"translation\"><value>MSSS</value></qualifier>"
59           + "</feature>"
60           /*
61            * third CDS is made up - has no xref - code should synthesize 
62            * one to an assumed EMBLCDSPROTEIN accession
63            */
64           + "<feature name=\"CDS\" location=\"join(4..6,10..15)\">"
65           + "<qualifier name=\"protein_id\"><value>CAA12345.6</value></qualifier>"
66           + "<qualifier name=\"translation\"><value>MSS</value></qualifier>"
67           + "</feature>"
68           /*
69            * sequence (modified for test purposes)
70            * emulates EMBL XML 1.2 which splits sequence data every 60 characters
71            * see EmblSequence.setSequence
72            */
73           + "<sequence>GGTATGTCCTCTAGTACAAAC\n"
74           + "ACCCCCAATATTGTGATATAATTAAAAACATAGCAT"
75           + "</sequence></entry></ROOT>";
76
77   static EmblFile getEmblFile()
78   {
79     return EmblFile.getEmblFile(new StringReader(TESTDATA));
80   }
81 }