2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel.xdb.embl;
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertNull;
25 import static org.testng.AssertJUnit.assertSame;
27 import jalview.analysis.SequenceIdMatcher;
28 import jalview.datamodel.DBRefEntry;
29 import jalview.datamodel.DBRefSource;
30 import jalview.datamodel.SequenceI;
31 import jalview.util.MapList;
33 import java.util.ArrayList;
34 import java.util.Arrays;
35 import java.util.List;
37 import org.testng.annotations.Test;
39 public class EmblEntryTest
41 @Test(groups = "Functional")
42 public void testGetCdsRanges()
44 EmblEntry testee = new EmblEntry();
47 * Make a (CDS) Feature with 5 locations
49 EmblFeature cds = new EmblFeature();
50 cds.setLocation("join(10..20,complement(30..40),50..60,70..80,complement(110..120))");
52 int[] exons = testee.getCdsRanges(cds);
53 assertEquals("[10, 20, 40, 30, 50, 60, 70, 80, 120, 110]",
54 Arrays.toString(exons));
57 @Test(groups = "Functional")
58 public void testParseCodingFeature()
60 // not the whole sequence but enough for this test...
61 List<SequenceI> peptides = new ArrayList<SequenceI>();
62 SequenceIdMatcher matcher = new SequenceIdMatcher(peptides);
63 EmblFile ef = EmblTestHelper.getEmblFile();
64 assertEquals(1, ef.getEntries().size());
65 EmblEntry testee = ef.getEntries().get(0);
66 String sourceDb = "EMBL";
67 SequenceI dna = testee.makeSequence(sourceDb);
70 * parse three CDS features, with two/one/no Uniprot cross-refs
72 for (EmblFeature feature : ef.getEntries().get(0).getFeatures())
74 if ("CDS".equals(feature.getName()))
76 testee.parseCodingFeature(feature, sourceDb, dna, peptides, matcher);
81 * peptides should now have five entries:
82 * EMBL product and two Uniprot accessions for the first CDS / translation
83 * EMBL product and one Uniprot accession for the second CDS / "
84 * EMBL product only for the third
86 assertEquals(6, peptides.size());
87 assertEquals("CAA30420.1", peptides.get(0).getName());
88 assertEquals("MLCF", peptides.get(0).getSequenceAsString());
89 assertEquals("UNIPROT|B0BCM4", peptides.get(1).getName());
90 assertEquals("MLCF", peptides.get(1).getSequenceAsString());
91 assertEquals("UNIPROT|P0CE20", peptides.get(2).getName());
92 assertEquals("MLCF", peptides.get(2).getSequenceAsString());
93 assertEquals("CAA30421.1", peptides.get(3).getName());
94 assertEquals("MSSS", peptides.get(3).getSequenceAsString());
95 assertEquals("UNIPROT|B0BCM3", peptides.get(4).getName());
96 assertEquals("MSSS", peptides.get(4).getSequenceAsString());
97 assertEquals("CAA12345.6", peptides.get(5).getName());
98 assertEquals("MSS", peptides.get(5).getSequenceAsString());
101 * verify dna sequence has dbrefs with CDS mappings to the peptide 'products'
103 MapList cds1Map = new MapList(new int[] { 57, 46 }, new int[] { 1, 4 },
105 MapList cds2Map = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 },
107 MapList cds3Map = new MapList(new int[] { 4, 6, 10, 15 }, new int[] {
109 DBRefEntry[] dbrefs = dna.getDBRefs();
110 assertEquals(4, dbrefs.length);
111 DBRefEntry dbRefEntry = dbrefs[0];
112 assertEquals("UNIPROT", dbRefEntry.getSource());
113 assertEquals("B0BCM4", dbRefEntry.getAccessionId());
114 assertSame(peptides.get(1), dbRefEntry.getMap().getTo());
115 assertEquals(cds1Map, dbRefEntry.getMap().getMap());
117 dbRefEntry = dbrefs[1];
118 assertEquals("UNIPROT", dbRefEntry.getSource());
119 assertEquals("P0CE20", dbRefEntry.getAccessionId());
120 assertSame(peptides.get(2), dbRefEntry.getMap().getTo());
121 assertEquals(cds1Map, dbRefEntry.getMap().getMap());
123 dbRefEntry = dbrefs[2];
124 assertEquals("UNIPROT", dbRefEntry.getSource());
125 assertEquals("B0BCM3", dbRefEntry.getAccessionId());
126 assertSame(peptides.get(4), dbRefEntry.getMap().getTo());
127 assertEquals(cds2Map, dbRefEntry.getMap().getMap());
129 dbRefEntry = dbrefs[3];
130 assertEquals("EMBLCDSPROTEIN", dbRefEntry.getSource());
131 assertEquals("CAA12345.6", dbRefEntry.getAccessionId());
132 assertSame(peptides.get(5), dbRefEntry.getMap().getTo());
133 assertEquals(cds3Map, dbRefEntry.getMap().getMap());
136 * verify peptides have dbrefs
137 * - to EMBL sequence (with inverse 1:3 cds mapping)
138 * - to EMBLCDS (with 1:3 mapping)
139 * - direct (no mapping) to other protein accessions
141 MapList proteinToCdsMap1 = new MapList(new int[] { 1, 4 }, new int[] {
143 MapList proteinToCdsMap2 = new MapList(new int[] { 1, 3 }, new int[] {
146 // dbrefs for first CDS EMBL product CAA30420.1
147 dbrefs = peptides.get(0).getDBRefs();
148 assertEquals(5, dbrefs.length);
149 assertEquals(DBRefSource.EMBL, dbrefs[0].getSource());
150 assertEquals("CAA30420.1", dbrefs[0].getAccessionId());
151 // TODO: verify getPrimaryDBRefs() for peptide products
152 assertEquals(cds1Map.getInverse(), dbrefs[0].getMap().getMap());
153 assertEquals(DBRefSource.EMBLCDS, dbrefs[1].getSource());
154 assertEquals("CAA30420.1", dbrefs[1].getAccessionId());
155 assertEquals(proteinToCdsMap1, dbrefs[1].getMap().getMap());
156 assertEquals(DBRefSource.EMBLCDSProduct, dbrefs[2].getSource());
157 assertEquals("CAA30420.1", dbrefs[2].getAccessionId());
158 assertNull(dbrefs[2].getMap());
159 assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "2.1", "B0BCM4"),
161 assertNull(dbrefs[3].getMap());
162 assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "P0CE20"),
164 assertNull(dbrefs[4].getMap());
166 // dbrefs for first CDS first Uniprot xref
167 dbrefs = peptides.get(1).getDBRefs();
168 assertEquals(2, dbrefs.length);
169 assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "2.1", "B0BCM4"),
171 assertNull(dbrefs[0].getMap());
172 assertEquals(DBRefSource.EMBL, dbrefs[1].getSource());
173 assertEquals("X07547", dbrefs[1].getAccessionId());
174 assertEquals(cds1Map.getInverse(), dbrefs[1].getMap().getMap());
176 // dbrefs for first CDS second Uniprot xref
177 dbrefs = peptides.get(2).getDBRefs();
178 assertEquals(2, dbrefs.length);
179 assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "P0CE20"),
181 assertNull(dbrefs[0].getMap());
182 assertEquals(DBRefSource.EMBL, dbrefs[1].getSource());
183 assertEquals("X07547", dbrefs[1].getAccessionId());
184 assertEquals(cds1Map.getInverse(), dbrefs[1].getMap().getMap());
186 // dbrefs for second CDS EMBL product CAA30421.1
187 dbrefs = peptides.get(3).getDBRefs();
188 assertEquals(4, dbrefs.length);
189 assertEquals(DBRefSource.EMBL, dbrefs[0].getSource());
190 assertEquals("CAA30421.1", dbrefs[0].getAccessionId());
191 assertEquals(cds2Map.getInverse(), dbrefs[0].getMap().getMap());
192 assertEquals(DBRefSource.EMBLCDS, dbrefs[1].getSource());
193 assertEquals("CAA30421.1", dbrefs[1].getAccessionId());
194 assertEquals(proteinToCdsMap1, dbrefs[1].getMap().getMap());
195 assertEquals(DBRefSource.EMBLCDSProduct, dbrefs[2].getSource());
196 assertEquals("CAA30421.1", dbrefs[2].getAccessionId());
197 assertNull(dbrefs[2].getMap());
198 assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "B0BCM3"),
200 assertNull(dbrefs[3].getMap());
202 // dbrefs for second CDS second Uniprot xref
203 dbrefs = peptides.get(4).getDBRefs();
204 assertEquals(2, dbrefs.length);
205 assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "B0BCM3"),
207 assertNull(dbrefs[0].getMap());
208 assertEquals(DBRefSource.EMBL, dbrefs[1].getSource());
209 assertEquals("X07547", dbrefs[1].getAccessionId());
210 assertEquals(cds2Map.getInverse(), dbrefs[1].getMap().getMap());
212 // dbrefs for third CDS inferred EMBL product CAA12345.6
213 dbrefs = peptides.get(5).getDBRefs();
214 assertEquals(3, dbrefs.length);
215 assertEquals(DBRefSource.EMBL, dbrefs[0].getSource());
216 assertEquals("CAA12345.6", dbrefs[0].getAccessionId());
217 assertEquals(cds3Map.getInverse(), dbrefs[0].getMap().getMap());
218 assertEquals(DBRefSource.EMBLCDS, dbrefs[1].getSource());
219 assertEquals("CAA12345.6", dbrefs[1].getAccessionId());
220 assertEquals(proteinToCdsMap2, dbrefs[1].getMap().getMap());
221 assertEquals(DBRefSource.EMBLCDSProduct, dbrefs[2].getSource());
222 assertEquals("CAA12345.6", dbrefs[2].getAccessionId());
223 assertNull(dbrefs[2].getMap());
226 @Test(groups = "Functional")
227 public void testAdjustForProteinLength()
229 int[] exons = new int[] { 11, 15, 21, 25, 31, 38 }; // 18 bp
231 // exact length match:
232 assertSame(exons, EmblEntry.adjustForProteinLength(6, exons));
234 // match if we assume exons include stop codon not in protein:
235 assertSame(exons, EmblEntry.adjustForProteinLength(5, exons));
237 // truncate last exon by 6bp
238 int[] truncated = EmblEntry.adjustForProteinLength(4, exons);
239 assertEquals("[11, 15, 21, 25, 31, 32]", Arrays.toString(truncated));
241 // remove last exon and truncate preceding by 1bp
242 truncated = EmblEntry.adjustForProteinLength(3, exons);
243 assertEquals("[11, 15, 21, 24]", Arrays.toString(truncated));
245 // exact removal of exon case:
246 exons = new int[] { 11, 15, 21, 27, 33, 38 }; // 18 bp
247 truncated = EmblEntry.adjustForProteinLength(4, exons);
248 assertEquals("[11, 15, 21, 27]", Arrays.toString(truncated));
250 // what if exons are too short for protein?
251 truncated = EmblEntry.adjustForProteinLength(7, exons);
252 assertSame(exons, truncated);