3 import static org.testng.Assert.assertEquals;
4 import static org.testng.Assert.assertTrue;
5 import static org.testng.AssertJUnit.assertNull;
8 import java.io.IOException;
9 import java.net.MalformedURLException;
10 import java.util.Arrays;
11 import java.util.List;
14 import org.testng.annotations.BeforeClass;
15 import org.testng.annotations.Test;
17 import jalview.bin.Cache;
18 import jalview.datamodel.DBRefEntry;
19 import jalview.datamodel.Mapping;
20 import jalview.datamodel.SequenceFeature;
21 import jalview.datamodel.SequenceI;
22 import jalview.datamodel.features.SequenceFeatures;
23 import jalview.util.MapList;
25 public class GenBankFileTest
27 @BeforeClass(alwaysRun = true)
34 * A fairly tough test, using J03321 (circular DNA), which has 8 CDS features,
35 * one of them reverse strand
37 * @throws MalformedURLException
40 @Test(groups = "Functional")
41 public void testParse() throws MalformedURLException, IOException
43 File dataFile = new File("test/jalview/io/J03321.gb");
44 FileParse fp = new FileParse(dataFile.getAbsolutePath(),
46 FlatFile parser = new GenBankFile(fp, "GenBankTest");
47 List<SequenceI> seqs = parser.getSeqs();
49 assertEquals(seqs.size(), 1);
50 SequenceI seq = seqs.get(0);
51 assertEquals(seq.getName(), "GenBankTest|J03321");
52 assertEquals(seq.getLength(), 7502);
53 assertEquals(seq.getDescription(),
54 "Chlamydia trachomatis plasmid pCHL1, complete sequence");
57 * should be 9 CDS features (one is a 'join' of two exons)
59 Set<String> featureTypes = seq.getFeatures().getFeatureTypes();
60 assertEquals(featureTypes.size(), 1);
61 assertTrue(featureTypes.contains("CDS"));
64 * inspect some features (sorted just for convenience of test assertions)
66 List<SequenceFeature> features = seq.getFeatures()
67 .getAllFeatures("CDS");
68 SequenceFeatures.sortFeatures(features, true);
69 assertEquals(features.size(), 9);
71 SequenceFeature sf = features.get(0);
72 assertEquals(sf.getBegin(), 1);
73 assertEquals(sf.getEnd(), 437);
74 assertEquals(sf.getDescription(),
75 "Exon 2 for protein EMBLCDS:AAA91567.1");
76 assertEquals(sf.getFeatureGroup(), "GenBankTest");
77 assertEquals(sf.getEnaLocation(), "join(7022..7502,1..437)");
78 assertEquals(sf.getPhase(), "0");
79 assertEquals(sf.getStrand(), 1);
80 assertEquals(sf.getValue("note"), "pGP7-D");
81 // this is the second exon of circular CDS!
82 assertEquals(sf.getValue("exon number"), 2);
83 assertEquals(sf.getValue("product"), "hypothetical protein");
84 assertEquals(sf.getValue("transl_table"), "11");
87 assertEquals(sf.getBegin(), 488);
88 assertEquals(sf.getEnd(), 1480);
89 assertEquals(sf.getDescription(),
90 "Exon 1 for protein EMBLCDS:AAA91568.1");
91 assertEquals(sf.getFeatureGroup(), "GenBankTest");
92 assertEquals(sf.getEnaLocation(), "complement(488..1480)");
93 assertEquals(sf.getPhase(), "0");
94 assertEquals(sf.getStrand(), -1); // reverse strand!
95 assertEquals(sf.getValue("note"), "pGP8-D");
96 assertEquals(sf.getValue("exon number"), 1);
97 assertEquals(sf.getValue("product"), "hypothetical protein");
100 assertEquals(sf.getBegin(), 6045);
101 assertEquals(sf.getEnd(), 6788);
102 assertEquals(sf.getDescription(),
103 "Exon 1 for protein EMBLCDS:AAA91574.1");
104 assertEquals(sf.getFeatureGroup(), "GenBankTest");
105 assertEquals(sf.getEnaLocation(), "6045..6788");
106 assertEquals(sf.getPhase(), "0");
107 assertEquals(sf.getStrand(), 1);
108 assertEquals(sf.getValue("note"), "pGP6-D (gtg start codon)");
109 assertEquals(sf.getValue("exon number"), 1);
110 assertEquals(sf.getValue("product"), "hypothetical protein");
113 * CDS at 7022-7502 is the first exon of the circular CDS
115 sf = features.get(8);
116 assertEquals(sf.getBegin(), 7022);
117 assertEquals(sf.getEnd(), 7502);
118 assertEquals(sf.getDescription(),
119 "Exon 1 for protein EMBLCDS:AAA91567.1");
120 assertEquals(sf.getFeatureGroup(), "GenBankTest");
121 assertEquals(sf.getEnaLocation(), "join(7022..7502,1..437)");
122 assertEquals(sf.getPhase(), "0");
123 assertEquals(sf.getStrand(), 1);
124 assertEquals(sf.getValue("note"), "pGP7-D");
125 assertEquals(sf.getValue("exon number"), 1);
126 assertEquals(sf.getValue("product"), "hypothetical protein");
129 * GenBank doesn't declare accession or CDS xrefs;
130 * dbrefs are added by Jalview for
132 * protein products: 8
134 List<DBRefEntry> dbrefs = Arrays.asList(seq.getDBRefs());
136 assertEquals(dbrefs.size(), 9);
138 DBRefEntry selfRef = new DBRefEntry("GENBANKTEST", "1", "J03321");
139 int[] range = new int[] { 1, seq.getLength() };
140 selfRef.setMap(new Mapping(null, range, range, 1, 1));
141 assertTrue(dbrefs.contains(selfRef));
144 * dna should have dbref to itself, and to EMBLCDSPROTEIN
145 * for each /protein_id (synthesized as no UNIPROT xref)
147 // TODO check if we should synthesize EMBLCDSPROTEIN dbrefs
148 DBRefEntry dbref = dbrefs.get(0);
149 assertEquals(dbref.getSource(), "GENBANKTEST");
150 assertEquals(dbref.getAccessionId(), "J03321");
151 Mapping mapping = dbref.getMap();
152 assertNull(mapping.getTo());
153 MapList map = mapping.getMap();
154 assertEquals(map.getFromLowest(), 1);
155 assertEquals(map.getFromHighest(), 7502);
156 assertEquals(map.getToLowest(), 1);
157 assertEquals(map.getToHighest(), 7502);
158 assertEquals(map.getFromRatio(), 1);
159 assertEquals(map.getToRatio(), 1);
161 // dbref to inferred EMBLCDSPROTEIN for first CDS
162 dbref = dbrefs.get(1);
163 assertEquals(dbref.getSource(), "EMBLCDSPROTEIN");
164 assertEquals(dbref.getAccessionId(), "AAA91567.1");
165 mapping = dbref.getMap();
166 SequenceI mapTo = mapping.getTo();
167 assertEquals(mapTo.getName(), "AAA91567.1");
168 // the /product qualifier transfers to protein product description
169 assertEquals(mapTo.getDescription(), "hypothetical protein");
170 String seqString = mapTo.getSequenceAsString();
171 assertEquals(seqString.length(), 305);
172 assertTrue(seqString.startsWith("MGSMAF"));
173 assertTrue(seqString.endsWith("QTPTIL"));
174 map = mapping.getMap();
175 assertEquals(map.getFromLowest(), 1);
176 assertEquals(map.getFromHighest(), 7502);
177 assertEquals(map.getToLowest(), 1);
178 assertEquals(map.getToHighest(), 305);
179 assertEquals(map.getFromRatio(), 3);
180 assertEquals(map.getToRatio(), 1);
182 // dbref to inferred EMBLCDSPROTEIN for last CDS
183 dbref = dbrefs.get(8);
184 assertEquals(dbref.getSource(), "EMBLCDSPROTEIN");
185 assertEquals(dbref.getAccessionId(), "AAA91574.1");
186 mapping = dbref.getMap();
187 mapTo = mapping.getTo();
188 assertEquals(mapTo.getName(), "AAA91574.1");
189 // the /product qualifier transfers to protein product description
190 assertEquals(mapTo.getDescription(), "hypothetical protein");
191 seqString = mapTo.getSequenceAsString();
192 assertEquals(seqString.length(), 247);
193 assertTrue(seqString.startsWith("MNKLK"));
194 assertTrue(seqString.endsWith("FKQKS"));
195 map = mapping.getMap();
196 assertEquals(map.getFromLowest(), 6045);
197 assertEquals(map.getFromHighest(), 6788);
198 assertEquals(map.getToLowest(), 1);
199 assertEquals(map.getToHighest(), 247);
200 assertEquals(map.getFromRatio(), 3);
201 assertEquals(map.getToRatio(), 1);