3 import static org.testng.Assert.assertEquals;
4 import static org.testng.Assert.assertTrue;
5 import static org.testng.AssertJUnit.assertNull;
8 import java.io.IOException;
9 import java.net.MalformedURLException;
10 import java.util.Arrays;
11 import java.util.List;
14 import org.testng.annotations.BeforeClass;
15 import org.testng.annotations.Test;
17 import jalview.bin.Cache;
18 import jalview.datamodel.DBRefEntry;
19 import jalview.datamodel.Mapping;
20 import jalview.datamodel.SequenceFeature;
21 import jalview.datamodel.SequenceI;
22 import jalview.datamodel.features.SequenceFeatures;
23 import jalview.util.MapList;
25 public class GenBankFileTest
27 @BeforeClass(alwaysRun = true)
34 * A fairly tough test, using J03321 (circular DNA), which has 8 CDS features,
35 * one of them reverse strand
37 * @throws MalformedURLException
40 @Test(groups = "Functional")
41 public void testParse() throws MalformedURLException, IOException
43 File dataFile = new File("test/jalview/io/J03321.gb");
44 FileParse fp = new FileParse(dataFile.getAbsolutePath(),
46 FlatFile parser = new GenBankFile(fp, "GenBankTest");
48 List<SequenceI> seqs = parser.getSeqs();
50 assertEquals(seqs.size(), 1);
51 SequenceI seq = seqs.get(0);
52 assertEquals(seq.getName(), "GenBankTest|J03321");
53 assertEquals(seq.getLength(), 7502);
54 assertEquals(seq.getDescription(),
55 "Chlamydia trachomatis plasmid pCHL1, complete sequence");
58 * should be 9 CDS features (one is a 'join' of two exons)
60 Set<String> featureTypes = seq.getFeatures().getFeatureTypes();
61 assertEquals(featureTypes.size(), 1);
62 assertTrue(featureTypes.contains("CDS"));
65 * inspect some features (sorted just for convenience of test assertions)
67 List<SequenceFeature> features = seq.getFeatures()
68 .getAllFeatures("CDS");
69 SequenceFeatures.sortFeatures(features, true);
70 assertEquals(features.size(), 9);
72 SequenceFeature sf = features.get(0);
73 assertEquals(sf.getBegin(), 1);
74 assertEquals(sf.getEnd(), 437);
75 assertEquals(sf.getDescription(),
76 "Exon 2 for protein EMBLCDS:AAA91567.1");
77 assertEquals(sf.getFeatureGroup(), "GenBankTest");
78 assertEquals(sf.getEnaLocation(), "join(7022..7502,1..437)");
79 assertEquals(sf.getPhase(), "0");
80 assertEquals(sf.getStrand(), 1);
81 assertEquals(sf.getValue("note"), "pGP7-D");
82 // this is the second exon of circular CDS!
83 assertEquals(sf.getValue("exon number"), 2);
84 assertEquals(sf.getValue("product"), "hypothetical protein");
85 assertEquals(sf.getValue("transl_table"), "11");
88 assertEquals(sf.getBegin(), 488);
89 assertEquals(sf.getEnd(), 1480);
90 assertEquals(sf.getDescription(),
91 "Exon 1 for protein EMBLCDS:AAA91568.1");
92 assertEquals(sf.getFeatureGroup(), "GenBankTest");
93 assertEquals(sf.getEnaLocation(), "complement(488..1480)");
94 assertEquals(sf.getPhase(), "0");
95 assertEquals(sf.getStrand(), -1); // reverse strand!
96 assertEquals(sf.getValue("note"), "pGP8-D");
97 assertEquals(sf.getValue("exon number"), 1);
98 assertEquals(sf.getValue("product"), "hypothetical protein");
100 sf = features.get(7);
101 assertEquals(sf.getBegin(), 6045);
102 assertEquals(sf.getEnd(), 6788);
103 assertEquals(sf.getDescription(),
104 "Exon 1 for protein EMBLCDS:AAA91574.1");
105 assertEquals(sf.getFeatureGroup(), "GenBankTest");
106 assertEquals(sf.getEnaLocation(), "6045..6788");
107 assertEquals(sf.getPhase(), "0");
108 assertEquals(sf.getStrand(), 1);
109 assertEquals(sf.getValue("note"), "pGP6-D (gtg start codon)");
110 assertEquals(sf.getValue("exon number"), 1);
111 assertEquals(sf.getValue("product"), "hypothetical protein");
114 * CDS at 7022-7502 is the first exon of the circular CDS
116 sf = features.get(8);
117 assertEquals(sf.getBegin(), 7022);
118 assertEquals(sf.getEnd(), 7502);
119 assertEquals(sf.getDescription(),
120 "Exon 1 for protein EMBLCDS:AAA91567.1");
121 assertEquals(sf.getFeatureGroup(), "GenBankTest");
122 assertEquals(sf.getEnaLocation(), "join(7022..7502,1..437)");
123 assertEquals(sf.getPhase(), "0");
124 assertEquals(sf.getStrand(), 1);
125 assertEquals(sf.getValue("note"), "pGP7-D");
126 assertEquals(sf.getValue("exon number"), 1);
127 assertEquals(sf.getValue("product"), "hypothetical protein");
130 * GenBank doesn't declare accession or CDS xrefs;
131 * dbrefs are added by Jalview for
133 * protein products: 8
135 List<DBRefEntry> dbrefs = Arrays.asList(seq.getDBRefs());
137 assertEquals(dbrefs.size(), 9);
139 DBRefEntry selfRef = new DBRefEntry("GENBANKTEST", "1", "J03321");
140 int[] range = new int[] { 1, seq.getLength() };
141 selfRef.setMap(new Mapping(null, range, range, 1, 1));
142 assertTrue(dbrefs.contains(selfRef));
145 * dna should have dbref to itself, and to EMBLCDSPROTEIN
146 * for each /protein_id (synthesized as no UNIPROT xref)
148 // TODO check if we should synthesize EMBLCDSPROTEIN dbrefs
149 DBRefEntry dbref = dbrefs.get(0);
150 assertEquals(dbref.getSource(), "GENBANKTEST");
151 assertEquals(dbref.getAccessionId(), "J03321");
152 Mapping mapping = dbref.getMap();
153 assertNull(mapping.getTo());
154 MapList map = mapping.getMap();
155 assertEquals(map.getFromLowest(), 1);
156 assertEquals(map.getFromHighest(), 7502);
157 assertEquals(map.getToLowest(), 1);
158 assertEquals(map.getToHighest(), 7502);
159 assertEquals(map.getFromRatio(), 1);
160 assertEquals(map.getToRatio(), 1);
162 // dbref to inferred EMBLCDSPROTEIN for first CDS
163 dbref = dbrefs.get(1);
164 assertEquals(dbref.getSource(), "EMBLCDSPROTEIN");
165 assertEquals(dbref.getAccessionId(), "AAA91567.1");
166 mapping = dbref.getMap();
167 SequenceI mapTo = mapping.getTo();
168 assertEquals(mapTo.getName(), "AAA91567.1");
169 // the /product qualifier transfers to protein product description
170 assertEquals(mapTo.getDescription(), "hypothetical protein");
171 String seqString = mapTo.getSequenceAsString();
172 assertEquals(seqString.length(), 305);
173 assertTrue(seqString.startsWith("MGSMAF"));
174 assertTrue(seqString.endsWith("QTPTIL"));
175 map = mapping.getMap();
176 assertEquals(map.getFromLowest(), 1);
177 assertEquals(map.getFromHighest(), 7502);
178 assertEquals(map.getToLowest(), 1);
179 assertEquals(map.getToHighest(), 305);
180 assertEquals(map.getFromRatio(), 3);
181 assertEquals(map.getToRatio(), 1);
183 // dbref to inferred EMBLCDSPROTEIN for last CDS
184 dbref = dbrefs.get(8);
185 assertEquals(dbref.getSource(), "EMBLCDSPROTEIN");
186 assertEquals(dbref.getAccessionId(), "AAA91574.1");
187 mapping = dbref.getMap();
188 mapTo = mapping.getTo();
189 assertEquals(mapTo.getName(), "AAA91574.1");
190 // the /product qualifier transfers to protein product description
191 assertEquals(mapTo.getDescription(), "hypothetical protein");
192 seqString = mapTo.getSequenceAsString();
193 assertEquals(seqString.length(), 247);
194 assertTrue(seqString.startsWith("MNKLK"));
195 assertTrue(seqString.endsWith("FKQKS"));
196 map = mapping.getMap();
197 assertEquals(map.getFromLowest(), 6045);
198 assertEquals(map.getFromHighest(), 6788);
199 assertEquals(map.getToLowest(), 1);
200 assertEquals(map.getToHighest(), 247);
201 assertEquals(map.getFromRatio(), 3);
202 assertEquals(map.getToRatio(), 1);