From c775190fba1fe7430b060d48d5d8cc13902a8f47 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Fri, 1 Jul 2016 10:19:20 +0100 Subject: [PATCH] JAL-2110 add 'products' parameter to filter results in madeCdsAlignment --- src/jalview/analysis/AlignmentUtils.java | 31 ++++- src/jalview/gui/AlignFrame.java | 2 +- test/jalview/analysis/AlignmentUtilsTests.java | 174 +++++++++++++++++++----- 3 files changed, 165 insertions(+), 42 deletions(-) diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 74066d7..f94d393 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -1407,11 +1407,14 @@ public class AlignmentUtils * aligned dna sequences * @param dataset * - throws error if not given a dataset + * @param products + * (optional) to restrict results to CDS that map to specified + * protein products * @return an alignment whose sequences are the cds-only parts of the dna * sequences (or null if no mappings are found) */ public static AlignmentI makeCdsAlignment(SequenceI[] dna, - AlignmentI dataset) + AlignmentI dataset, AlignmentI products) { if (dataset.getDataset() != null) { @@ -1420,7 +1423,16 @@ public class AlignmentUtils } List cdsSeqs = new ArrayList(); List mappings = dataset.getCodonFrames(); - + HashSet productSeqs = null; + if (products != null) + { + productSeqs = new HashSet(); + for (SequenceI seq : products.getSequences()) + { + productSeqs.add(seq.getDatasetSequence() == null ? seq : seq + .getDatasetSequence()); + } + } /* * construct CDS sequences from the (cds-to-protein) mappings made earlier; @@ -1453,9 +1465,20 @@ public class AlignmentUtils * the dna mapping's product */ SequenceI cdsSeq = null; + // TODO better mappings collection data model so we can do - // a table lookup instead of double loops to find mappings + // a direct lookup instead of double loops to find mappings + SequenceI proteinProduct = aMapping.getTo(); + + /* + * skip if not mapped to one of a specified set of proteins + */ + if (productSeqs != null && !productSeqs.contains(proteinProduct)) + { + continue; + } + for (AlignedCodonFrame acf : MappingUtils .findMappingsForSequence(proteinProduct, mappings)) { @@ -1544,7 +1567,7 @@ public class AlignmentUtils AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs .size()])); - cds.setDataset((Alignment) dataset); + cds.setDataset(dataset); return cds; } diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index 54b1cb6..64bdf44 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -4768,7 +4768,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, if (dna) { copyAlignment = AlignmentUtils.makeCdsAlignment( - sequenceSelection, dataset); + sequenceSelection, dataset, xrefsAlignment); if (copyAlignment.getHeight() == 0) { System.err.println("Failed to make CDS alignment"); diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 3de2ce4..d704ec6 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -981,16 +981,6 @@ public class AlignmentUtilsTests dna2.createDatasetSequence(); pep1.createDatasetSequence(); pep2.createDatasetSequence(); - dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 6, 0f, - null)); - dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 10, 12, 0f, - null)); - dna2.addSequenceFeature(new SequenceFeature("CDS", "cds3", 1, 3, 0f, - null)); - dna2.addSequenceFeature(new SequenceFeature("CDS", "cds4", 7, 9, 0f, - null)); - dna2.addSequenceFeature(new SequenceFeature("CDS", "cds5", 13, 15, 0f, - null)); AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 }); dna.setDataset(null); @@ -1009,7 +999,7 @@ public class AlignmentUtilsTests * execute method under test: */ AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] { - dna1, dna2 }, dna.getDataset()); + dna1, dna2 }, dna.getDataset(), null); assertEquals(2, cds.getSequences().size()); assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString()); @@ -1115,18 +1105,6 @@ public class AlignmentUtilsTests pep1.createDatasetSequence(); pep2.createDatasetSequence(); pep3.createDatasetSequence(); - dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 6, 0f, - null)); - dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 10, 12, 0f, - null)); - dna1.addSequenceFeature(new SequenceFeature("CDS", "cds3", 1, 3, 0f, - null)); - dna1.addSequenceFeature(new SequenceFeature("CDS", "cds4", 7, 9, 0f, - null)); - dna1.addSequenceFeature(new SequenceFeature("CDS", "cds5", 1, 3, 0f, - null)); - dna1.addSequenceFeature(new SequenceFeature("CDS", "cds6", 10, 12, 0f, - null)); pep1.getDatasetSequence().addDBRef( new DBRefEntry("EMBLCDS", "2", "A12345")); pep2.getDatasetSequence().addDBRef( @@ -1166,7 +1144,7 @@ public class AlignmentUtilsTests * execute method under test */ AlignmentI cdsal = AlignmentUtils.makeCdsAlignment( - new SequenceI[] { dna1 }, dna.getDataset()); + new SequenceI[] { dna1 }, dna.getDataset(), null); /* * Verify we have 3 cds sequences, mapped to pep1/2/3 respectively @@ -1538,18 +1516,6 @@ public class AlignmentUtilsTests dna3.createDatasetSequence(); pep1.createDatasetSequence(); pep2.createDatasetSequence(); - dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 8, 0f, - null)); - dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 9, 12, 0f, - null)); - dna1.addSequenceFeature(new SequenceFeature("CDS", "cds3", 16, 18, 0f, - null)); - dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 4, 8, 0f, - null)); - dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 12, 12, 0f, - null)); - dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 16, 18, 0f, - null)); AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 }); dna.setDataset(null); @@ -1567,7 +1533,7 @@ public class AlignmentUtilsTests dna.addCodonFrame(acf); AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] { - dna1, dna2, dna3 }, dna.getDataset()); + dna1, dna2, dna3 }, dna.getDataset(), null); List cdsSeqs = cds.getSequences(); assertEquals(2, cdsSeqs.size()); assertEquals("GGGCCCTTTGGG", cdsSeqs.get(0).getSequenceAsString()); @@ -2244,4 +2210,138 @@ public class AlignmentUtilsTests assertEquals('T', map.get(11).get(seq1).charValue()); assertEquals('T', map.get(12).get(seq1).charValue()); } + + /** + * Test for the case where the products for which we want CDS are specified. + * This is to represent the case where EMBL has CDS mappings to both Uniprot + * and EMBLCDSPROTEIN. makeCdsAlignment() should only return the mappings for + * the protein sequences specified. + */ + @Test(groups = { "Functional" }) + public void testMakeCdsAlignment_filterProducts() + { + SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa"); + SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC"); + SequenceI pep1 = new Sequence("Uniprot|pep1", "GF"); + SequenceI pep2 = new Sequence("Uniprot|pep2", "GFP"); + SequenceI pep3 = new Sequence("EMBL|pep3", "GF"); + SequenceI pep4 = new Sequence("EMBL|pep4", "GFP"); + dna1.createDatasetSequence(); + dna2.createDatasetSequence(); + pep1.createDatasetSequence(); + pep2.createDatasetSequence(); + pep3.createDatasetSequence(); + pep4.createDatasetSequence(); + AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 }); + dna.setDataset(null); + AlignmentI emblPeptides = new Alignment(new SequenceI[] { pep3, pep4 }); + emblPeptides.setDataset(null); + + AlignedCodonFrame acf = new AlignedCodonFrame(); + MapList map = new MapList(new int[] { 4, 6, 10, 12 }, + new int[] { 1, 2 }, 3, 1); + acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map); + acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map); + dna.addCodonFrame(acf); + + acf = new AlignedCodonFrame(); + map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 }, + 3, 1); + acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map); + acf.addMap(dna2.getDatasetSequence(), pep4.getDatasetSequence(), map); + dna.addCodonFrame(acf); + + /* + * execute method under test to find CDS for EMBL peptides only + */ + AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] { + dna1, dna2 }, dna.getDataset(), emblPeptides); + + assertEquals(2, cds.getSequences().size()); + assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString()); + assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString()); + + /* + * verify shared, extended alignment dataset + */ + assertSame(dna.getDataset(), cds.getDataset()); + assertTrue(dna.getDataset().getSequences() + .contains(cds.getSequenceAt(0).getDatasetSequence())); + assertTrue(dna.getDataset().getSequences() + .contains(cds.getSequenceAt(1).getDatasetSequence())); + + /* + * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide + * the mappings are on the shared alignment dataset + */ + List cdsMappings = cds.getDataset().getCodonFrames(); + /* + * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep) + */ + assertEquals(6, cdsMappings.size()); + + /* + * verify that mapping sets for dna and cds alignments are different + * [not current behaviour - all mappings are on the alignment dataset] + */ + // select -> subselect type to test. + // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames()); + // assertEquals(4, dna.getCodonFrames().size()); + // assertEquals(4, cds.getCodonFrames().size()); + + /* + * Two mappings involve pep3 (dna to pep3, cds to pep3) + * Mapping from pep3 to GGGTTT in first new exon sequence + */ + List pep3Mappings = MappingUtils + .findMappingsForSequence(pep3, cdsMappings); + assertEquals(2, pep3Mappings.size()); + List mappings = MappingUtils + .findMappingsForSequence(cds.getSequenceAt(0), pep3Mappings); + assertEquals(1, mappings.size()); + + // map G to GGG + SearchResults sr = MappingUtils.buildSearchResults(pep3, 1, mappings); + assertEquals(1, sr.getResults().size()); + Match m = sr.getResults().get(0); + assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence()); + assertEquals(1, m.getStart()); + assertEquals(3, m.getEnd()); + // map F to TTT + sr = MappingUtils.buildSearchResults(pep3, 2, mappings); + m = sr.getResults().get(0); + assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence()); + assertEquals(4, m.getStart()); + assertEquals(6, m.getEnd()); + + /* + * Two mappings involve pep4 (dna to pep4, cds to pep4) + * Verify mapping from pep4 to GGGTTTCCC in second new exon sequence + */ + List pep4Mappings = MappingUtils + .findMappingsForSequence(pep4, cdsMappings); + assertEquals(2, pep4Mappings.size()); + mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1), + pep4Mappings); + assertEquals(1, mappings.size()); + // map G to GGG + sr = MappingUtils.buildSearchResults(pep4, 1, mappings); + assertEquals(1, sr.getResults().size()); + m = sr.getResults().get(0); + assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); + assertEquals(1, m.getStart()); + assertEquals(3, m.getEnd()); + // map F to TTT + sr = MappingUtils.buildSearchResults(pep4, 2, mappings); + m = sr.getResults().get(0); + assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); + assertEquals(4, m.getStart()); + assertEquals(6, m.getEnd()); + // map P to CCC + sr = MappingUtils.buildSearchResults(pep4, 3, mappings); + m = sr.getResults().get(0); + assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); + assertEquals(7, m.getStart()); + assertEquals(9, m.getEnd()); + } } -- 1.7.10.2