/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.datamodel.xdb.embl;
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertNull;
import static org.testng.AssertJUnit.assertSame;
import jalview.analysis.SequenceIdMatcher;
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.DBRefSource;
import jalview.datamodel.SequenceI;
import jalview.gui.JvOptionPane;
import jalview.util.MapList;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
public class EmblEntryTest
{
@BeforeClass(alwaysRun = true)
public void setUpJvOptionPane()
{
JvOptionPane.setInteractiveMode(false);
JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
}
@Test(groups = "Functional")
public void testGetCdsRanges()
{
EmblEntry testee = new EmblEntry();
/*
* Make a (CDS) Feature with 5 locations
*/
EmblFeature cds = new EmblFeature();
cds.setLocation("join(10..20,complement(30..40),50..60,70..80,complement(110..120))");
int[] exons = testee.getCdsRanges(cds);
assertEquals("[10, 20, 40, 30, 50, 60, 70, 80, 120, 110]",
Arrays.toString(exons));
}
@Test(groups = "Functional")
public void testParseCodingFeature()
{
// not the whole sequence but enough for this test...
List peptides = new ArrayList();
SequenceIdMatcher matcher = new SequenceIdMatcher(peptides);
EmblFile ef = EmblTestHelper.getEmblFile();
assertEquals(1, ef.getEntries().size());
EmblEntry testee = ef.getEntries().get(0);
String sourceDb = "EMBL";
SequenceI dna = testee.makeSequence(sourceDb);
/*
* parse three CDS features, with two/one/no Uniprot cross-refs
*/
for (EmblFeature feature : ef.getEntries().get(0).getFeatures())
{
if ("CDS".equals(feature.getName()))
{
testee.parseCodingFeature(feature, sourceDb, dna, peptides, matcher);
}
}
/*
* peptides should now have five entries:
* EMBL product and two Uniprot accessions for the first CDS / translation
* EMBL product and one Uniprot accession for the second CDS / "
* EMBL product only for the third
*/
assertEquals(6, peptides.size());
assertEquals("CAA30420.1", peptides.get(0).getName());
assertEquals("MLCF", peptides.get(0).getSequenceAsString());
assertEquals("UNIPROT|B0BCM4", peptides.get(1).getName());
assertEquals("MLCF", peptides.get(1).getSequenceAsString());
assertEquals("UNIPROT|P0CE20", peptides.get(2).getName());
assertEquals("MLCF", peptides.get(2).getSequenceAsString());
assertEquals("CAA30421.1", peptides.get(3).getName());
assertEquals("MSSS", peptides.get(3).getSequenceAsString());
assertEquals("UNIPROT|B0BCM3", peptides.get(4).getName());
assertEquals("MSSS", peptides.get(4).getSequenceAsString());
assertEquals("CAA12345.6", peptides.get(5).getName());
assertEquals("MSS", peptides.get(5).getSequenceAsString());
/*
* verify dna sequence has dbrefs with CDS mappings to the peptide 'products'
*/
MapList cds1Map = new MapList(new int[] { 57, 46 }, new int[] { 1, 4 },
3, 1);
MapList cds2Map = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 },
3, 1);
MapList cds3Map = new MapList(new int[] { 4, 6, 10, 15 }, new int[] {
1, 3 }, 3, 1);
DBRefEntry[] dbrefs = dna.getDBRefs();
assertEquals(4, dbrefs.length);
DBRefEntry dbRefEntry = dbrefs[0];
assertEquals("UNIPROT", dbRefEntry.getSource());
assertEquals("B0BCM4", dbRefEntry.getAccessionId());
assertSame(peptides.get(1), dbRefEntry.getMap().getTo());
assertEquals(cds1Map, dbRefEntry.getMap().getMap());
dbRefEntry = dbrefs[1];
assertEquals("UNIPROT", dbRefEntry.getSource());
assertEquals("P0CE20", dbRefEntry.getAccessionId());
assertSame(peptides.get(2), dbRefEntry.getMap().getTo());
assertEquals(cds1Map, dbRefEntry.getMap().getMap());
dbRefEntry = dbrefs[2];
assertEquals("UNIPROT", dbRefEntry.getSource());
assertEquals("B0BCM3", dbRefEntry.getAccessionId());
assertSame(peptides.get(4), dbRefEntry.getMap().getTo());
assertEquals(cds2Map, dbRefEntry.getMap().getMap());
dbRefEntry = dbrefs[3];
assertEquals("EMBLCDSPROTEIN", dbRefEntry.getSource());
assertEquals("CAA12345.6", dbRefEntry.getAccessionId());
assertSame(peptides.get(5), dbRefEntry.getMap().getTo());
assertEquals(cds3Map, dbRefEntry.getMap().getMap());
/*
* verify peptides have dbrefs
* - to EMBL sequence (with inverse 1:3 cds mapping)
* - to EMBLCDS (with 1:3 mapping)
* - direct (no mapping) to other protein accessions
*/
MapList proteinToCdsMap1 = new MapList(new int[] { 1, 4 }, new int[] {
1, 12 }, 1, 3);
MapList proteinToCdsMap2 = new MapList(new int[] { 1, 3 }, new int[] {
1, 9 }, 1, 3);
// dbrefs for first CDS EMBL product CAA30420.1
dbrefs = peptides.get(0).getDBRefs();
assertEquals(5, dbrefs.length);
assertEquals(DBRefSource.EMBL, dbrefs[0].getSource());
assertEquals("CAA30420.1", dbrefs[0].getAccessionId());
// TODO: verify getPrimaryDBRefs() for peptide products
assertEquals(cds1Map.getInverse(), dbrefs[0].getMap().getMap());
assertEquals(DBRefSource.EMBLCDS, dbrefs[1].getSource());
assertEquals("CAA30420.1", dbrefs[1].getAccessionId());
assertEquals(proteinToCdsMap1, dbrefs[1].getMap().getMap());
assertEquals(DBRefSource.EMBLCDSProduct, dbrefs[2].getSource());
assertEquals("CAA30420.1", dbrefs[2].getAccessionId());
assertNull(dbrefs[2].getMap());
assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "2.1", "B0BCM4"),
dbrefs[3]);
assertNull(dbrefs[3].getMap());
assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "P0CE20"),
dbrefs[4]);
assertNull(dbrefs[4].getMap());
// dbrefs for first CDS first Uniprot xref
dbrefs = peptides.get(1).getDBRefs();
assertEquals(2, dbrefs.length);
assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "2.1", "B0BCM4"),
dbrefs[0]);
assertNull(dbrefs[0].getMap());
assertEquals(DBRefSource.EMBL, dbrefs[1].getSource());
assertEquals("X07547", dbrefs[1].getAccessionId());
assertEquals(cds1Map.getInverse(), dbrefs[1].getMap().getMap());
// dbrefs for first CDS second Uniprot xref
dbrefs = peptides.get(2).getDBRefs();
assertEquals(2, dbrefs.length);
assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "P0CE20"),
dbrefs[0]);
assertNull(dbrefs[0].getMap());
assertEquals(DBRefSource.EMBL, dbrefs[1].getSource());
assertEquals("X07547", dbrefs[1].getAccessionId());
assertEquals(cds1Map.getInverse(), dbrefs[1].getMap().getMap());
// dbrefs for second CDS EMBL product CAA30421.1
dbrefs = peptides.get(3).getDBRefs();
assertEquals(4, dbrefs.length);
assertEquals(DBRefSource.EMBL, dbrefs[0].getSource());
assertEquals("CAA30421.1", dbrefs[0].getAccessionId());
assertEquals(cds2Map.getInverse(), dbrefs[0].getMap().getMap());
assertEquals(DBRefSource.EMBLCDS, dbrefs[1].getSource());
assertEquals("CAA30421.1", dbrefs[1].getAccessionId());
assertEquals(proteinToCdsMap1, dbrefs[1].getMap().getMap());
assertEquals(DBRefSource.EMBLCDSProduct, dbrefs[2].getSource());
assertEquals("CAA30421.1", dbrefs[2].getAccessionId());
assertNull(dbrefs[2].getMap());
assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "B0BCM3"),
dbrefs[3]);
assertNull(dbrefs[3].getMap());
// dbrefs for second CDS second Uniprot xref
dbrefs = peptides.get(4).getDBRefs();
assertEquals(2, dbrefs.length);
assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "B0BCM3"),
dbrefs[0]);
assertNull(dbrefs[0].getMap());
assertEquals(DBRefSource.EMBL, dbrefs[1].getSource());
assertEquals("X07547", dbrefs[1].getAccessionId());
assertEquals(cds2Map.getInverse(), dbrefs[1].getMap().getMap());
// dbrefs for third CDS inferred EMBL product CAA12345.6
dbrefs = peptides.get(5).getDBRefs();
assertEquals(3, dbrefs.length);
assertEquals(DBRefSource.EMBL, dbrefs[0].getSource());
assertEquals("CAA12345.6", dbrefs[0].getAccessionId());
assertEquals(cds3Map.getInverse(), dbrefs[0].getMap().getMap());
assertEquals(DBRefSource.EMBLCDS, dbrefs[1].getSource());
assertEquals("CAA12345.6", dbrefs[1].getAccessionId());
assertEquals(proteinToCdsMap2, dbrefs[1].getMap().getMap());
assertEquals(DBRefSource.EMBLCDSProduct, dbrefs[2].getSource());
assertEquals("CAA12345.6", dbrefs[2].getAccessionId());
assertNull(dbrefs[2].getMap());
}
@Test(groups = "Functional")
public void testAdjustForProteinLength()
{
int[] exons = new int[] { 11, 15, 21, 25, 31, 38 }; // 18 bp
// exact length match:
assertSame(exons, EmblEntry.adjustForProteinLength(6, exons));
// match if we assume exons include stop codon not in protein:
assertSame(exons, EmblEntry.adjustForProteinLength(5, exons));
// truncate last exon by 6bp
int[] truncated = EmblEntry.adjustForProteinLength(4, exons);
assertEquals("[11, 15, 21, 25, 31, 32]", Arrays.toString(truncated));
// remove last exon and truncate preceding by 1bp
truncated = EmblEntry.adjustForProteinLength(3, exons);
assertEquals("[11, 15, 21, 24]", Arrays.toString(truncated));
// exact removal of exon case:
exons = new int[] { 11, 15, 21, 27, 33, 38 }; // 18 bp
truncated = EmblEntry.adjustForProteinLength(4, exons);
assertEquals("[11, 15, 21, 27]", Arrays.toString(truncated));
// what if exons are too short for protein?
truncated = EmblEntry.adjustForProteinLength(7, exons);
assertSame(exons, truncated);
}
}