2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel.xdb.embl;
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertNull;
25 import static org.testng.AssertJUnit.assertSame;
27 import jalview.analysis.SequenceIdMatcher;
28 import jalview.datamodel.DBRefEntry;
29 import jalview.datamodel.DBRefSource;
30 import jalview.datamodel.SequenceI;
31 import jalview.gui.JvOptionPane;
32 import jalview.util.MapList;
34 import java.util.ArrayList;
35 import java.util.Arrays;
36 import java.util.List;
38 import org.testng.annotations.BeforeClass;
39 import org.testng.annotations.Test;
41 public class EmblEntryTest
44 @BeforeClass(alwaysRun = true)
45 public void setUpJvOptionPane()
47 JvOptionPane.setInteractiveMode(false);
48 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
51 @Test(groups = "Functional")
52 public void testGetCdsRanges()
54 EmblEntry testee = new EmblEntry();
57 * Make a (CDS) Feature with 5 locations
59 EmblFeature cds = new EmblFeature();
60 cds.setLocation("join(10..20,complement(30..40),50..60,70..80,complement(110..120))");
62 int[] exons = testee.getCdsRanges(cds);
63 assertEquals("[10, 20, 40, 30, 50, 60, 70, 80, 120, 110]",
64 Arrays.toString(exons));
67 @Test(groups = "Functional")
68 public void testParseCodingFeature()
70 // not the whole sequence but enough for this test...
71 List<SequenceI> peptides = new ArrayList<SequenceI>();
72 SequenceIdMatcher matcher = new SequenceIdMatcher(peptides);
73 EmblFile ef = EmblTestHelper.getEmblFile();
74 assertEquals(1, ef.getEntries().size());
75 EmblEntry testee = ef.getEntries().get(0);
76 String sourceDb = "EMBL";
77 SequenceI dna = testee.makeSequence(sourceDb);
80 * parse three CDS features, with two/one/no Uniprot cross-refs
82 for (EmblFeature feature : ef.getEntries().get(0).getFeatures())
84 if ("CDS".equals(feature.getName()))
86 testee.parseCodingFeature(feature, sourceDb, dna, peptides, matcher);
91 * peptides should now have five entries:
92 * EMBL product and two Uniprot accessions for the first CDS / translation
93 * EMBL product and one Uniprot accession for the second CDS / "
94 * EMBL product only for the third
96 assertEquals(6, peptides.size());
97 assertEquals("CAA30420.1", peptides.get(0).getName());
98 assertEquals("MLCF", peptides.get(0).getSequenceAsString());
99 assertEquals("UNIPROT|B0BCM4", peptides.get(1).getName());
100 assertEquals("MLCF", peptides.get(1).getSequenceAsString());
101 assertEquals("UNIPROT|P0CE20", peptides.get(2).getName());
102 assertEquals("MLCF", peptides.get(2).getSequenceAsString());
103 assertEquals("CAA30421.1", peptides.get(3).getName());
104 assertEquals("MSSS", peptides.get(3).getSequenceAsString());
105 assertEquals("UNIPROT|B0BCM3", peptides.get(4).getName());
106 assertEquals("MSSS", peptides.get(4).getSequenceAsString());
107 assertEquals("CAA12345.6", peptides.get(5).getName());
108 assertEquals("MSS", peptides.get(5).getSequenceAsString());
111 * verify dna sequence has dbrefs with CDS mappings to the peptide 'products'
113 MapList cds1Map = new MapList(new int[] { 57, 46 }, new int[] { 1, 4 },
115 MapList cds2Map = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 },
117 MapList cds3Map = new MapList(new int[] { 4, 6, 10, 15 }, new int[] {
119 DBRefEntry[] dbrefs = dna.getDBRefs();
120 assertEquals(4, dbrefs.length);
121 DBRefEntry dbRefEntry = dbrefs[0];
122 assertEquals("UNIPROT", dbRefEntry.getSource());
123 assertEquals("B0BCM4", dbRefEntry.getAccessionId());
124 assertSame(peptides.get(1), dbRefEntry.getMap().getTo());
125 assertEquals(cds1Map, dbRefEntry.getMap().getMap());
127 dbRefEntry = dbrefs[1];
128 assertEquals("UNIPROT", dbRefEntry.getSource());
129 assertEquals("P0CE20", dbRefEntry.getAccessionId());
130 assertSame(peptides.get(2), dbRefEntry.getMap().getTo());
131 assertEquals(cds1Map, dbRefEntry.getMap().getMap());
133 dbRefEntry = dbrefs[2];
134 assertEquals("UNIPROT", dbRefEntry.getSource());
135 assertEquals("B0BCM3", dbRefEntry.getAccessionId());
136 assertSame(peptides.get(4), dbRefEntry.getMap().getTo());
137 assertEquals(cds2Map, dbRefEntry.getMap().getMap());
139 dbRefEntry = dbrefs[3];
140 assertEquals("EMBLCDSPROTEIN", dbRefEntry.getSource());
141 assertEquals("CAA12345.6", dbRefEntry.getAccessionId());
142 assertSame(peptides.get(5), dbRefEntry.getMap().getTo());
143 assertEquals(cds3Map, dbRefEntry.getMap().getMap());
146 * verify peptides have dbrefs
147 * - to EMBL sequence (with inverse 1:3 cds mapping)
148 * - to EMBLCDS (with 1:3 mapping)
149 * - direct (no mapping) to other protein accessions
151 MapList proteinToCdsMap1 = new MapList(new int[] { 1, 4 }, new int[] {
153 MapList proteinToCdsMap2 = new MapList(new int[] { 1, 3 }, new int[] {
156 // dbrefs for first CDS EMBL product CAA30420.1
157 dbrefs = peptides.get(0).getDBRefs();
158 assertEquals(5, dbrefs.length);
159 assertEquals(DBRefSource.EMBL, dbrefs[0].getSource());
160 assertEquals("CAA30420.1", dbrefs[0].getAccessionId());
161 // TODO: verify getPrimaryDBRefs() for peptide products
162 assertEquals(cds1Map.getInverse(), dbrefs[0].getMap().getMap());
163 assertEquals(DBRefSource.EMBLCDS, dbrefs[1].getSource());
164 assertEquals("CAA30420.1", dbrefs[1].getAccessionId());
165 assertEquals(proteinToCdsMap1, dbrefs[1].getMap().getMap());
166 assertEquals(DBRefSource.EMBLCDSProduct, dbrefs[2].getSource());
167 assertEquals("CAA30420.1", dbrefs[2].getAccessionId());
168 assertNull(dbrefs[2].getMap());
169 assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "2.1", "B0BCM4"),
171 assertNull(dbrefs[3].getMap());
172 assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "P0CE20"),
174 assertNull(dbrefs[4].getMap());
176 // dbrefs for first CDS first Uniprot xref
177 dbrefs = peptides.get(1).getDBRefs();
178 assertEquals(2, dbrefs.length);
179 assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "2.1", "B0BCM4"),
181 assertNull(dbrefs[0].getMap());
182 assertEquals(DBRefSource.EMBL, dbrefs[1].getSource());
183 assertEquals("X07547", dbrefs[1].getAccessionId());
184 assertEquals(cds1Map.getInverse(), dbrefs[1].getMap().getMap());
186 // dbrefs for first CDS second Uniprot xref
187 dbrefs = peptides.get(2).getDBRefs();
188 assertEquals(2, dbrefs.length);
189 assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "P0CE20"),
191 assertNull(dbrefs[0].getMap());
192 assertEquals(DBRefSource.EMBL, dbrefs[1].getSource());
193 assertEquals("X07547", dbrefs[1].getAccessionId());
194 assertEquals(cds1Map.getInverse(), dbrefs[1].getMap().getMap());
196 // dbrefs for second CDS EMBL product CAA30421.1
197 dbrefs = peptides.get(3).getDBRefs();
198 assertEquals(4, dbrefs.length);
199 assertEquals(DBRefSource.EMBL, dbrefs[0].getSource());
200 assertEquals("CAA30421.1", dbrefs[0].getAccessionId());
201 assertEquals(cds2Map.getInverse(), dbrefs[0].getMap().getMap());
202 assertEquals(DBRefSource.EMBLCDS, dbrefs[1].getSource());
203 assertEquals("CAA30421.1", dbrefs[1].getAccessionId());
204 assertEquals(proteinToCdsMap1, dbrefs[1].getMap().getMap());
205 assertEquals(DBRefSource.EMBLCDSProduct, dbrefs[2].getSource());
206 assertEquals("CAA30421.1", dbrefs[2].getAccessionId());
207 assertNull(dbrefs[2].getMap());
208 assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "B0BCM3"),
210 assertNull(dbrefs[3].getMap());
212 // dbrefs for second CDS second Uniprot xref
213 dbrefs = peptides.get(4).getDBRefs();
214 assertEquals(2, dbrefs.length);
215 assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "B0BCM3"),
217 assertNull(dbrefs[0].getMap());
218 assertEquals(DBRefSource.EMBL, dbrefs[1].getSource());
219 assertEquals("X07547", dbrefs[1].getAccessionId());
220 assertEquals(cds2Map.getInverse(), dbrefs[1].getMap().getMap());
222 // dbrefs for third CDS inferred EMBL product CAA12345.6
223 dbrefs = peptides.get(5).getDBRefs();
224 assertEquals(3, dbrefs.length);
225 assertEquals(DBRefSource.EMBL, dbrefs[0].getSource());
226 assertEquals("CAA12345.6", dbrefs[0].getAccessionId());
227 assertEquals(cds3Map.getInverse(), dbrefs[0].getMap().getMap());
228 assertEquals(DBRefSource.EMBLCDS, dbrefs[1].getSource());
229 assertEquals("CAA12345.6", dbrefs[1].getAccessionId());
230 assertEquals(proteinToCdsMap2, dbrefs[1].getMap().getMap());
231 assertEquals(DBRefSource.EMBLCDSProduct, dbrefs[2].getSource());
232 assertEquals("CAA12345.6", dbrefs[2].getAccessionId());
233 assertNull(dbrefs[2].getMap());
236 @Test(groups = "Functional")
237 public void testAdjustForProteinLength()
239 int[] exons = new int[] { 11, 15, 21, 25, 31, 38 }; // 18 bp
241 // exact length match:
242 assertSame(exons, EmblEntry.adjustForProteinLength(6, exons));
244 // match if we assume exons include stop codon not in protein:
245 assertSame(exons, EmblEntry.adjustForProteinLength(5, exons));
247 // truncate last exon by 6bp
248 int[] truncated = EmblEntry.adjustForProteinLength(4, exons);
249 assertEquals("[11, 15, 21, 25, 31, 32]", Arrays.toString(truncated));
251 // remove last exon and truncate preceding by 1bp
252 truncated = EmblEntry.adjustForProteinLength(3, exons);
253 assertEquals("[11, 15, 21, 24]", Arrays.toString(truncated));
255 // exact removal of exon case:
256 exons = new int[] { 11, 15, 21, 27, 33, 38 }; // 18 bp
257 truncated = EmblEntry.adjustForProteinLength(4, exons);
258 assertEquals("[11, 15, 21, 27]", Arrays.toString(truncated));
260 // what if exons are too short for protein?
261 truncated = EmblEntry.adjustForProteinLength(7, exons);
262 assertSame(exons, truncated);