X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fanalysis%2FAlignmentUtilsTests.java;h=3d3736f4e6d783b3c47c1ac48ab6b025c8c4b318;hb=6ed535f7ef953468f8827255ec6ebcd5a6e54d8d;hp=8bdd7403bf34d573b8052608d20dbd85019cc106;hpb=8c4b7ec68807806bfc88a3c3c3df132e849390f5;p=jalview.git diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 8bdd740..3d3736f 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -46,52 +46,15 @@ import jalview.util.MappingUtils; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Set; import org.testng.annotations.Test; public class AlignmentUtilsTests { - // @formatter:off - private static final String TEST_DATA = - "# STOCKHOLM 1.0\n" + - "#=GS D.melanogaster.1 AC AY119185.1/838-902\n" + - "#=GS D.melanogaster.2 AC AC092237.1/57223-57161\n" + - "#=GS D.melanogaster.3 AC AY060611.1/560-627\n" + - "D.melanogaster.1 G.AGCC.CU...AUGAUCGA\n" + - "#=GR D.melanogaster.1 SS ................((((\n" + - "D.melanogaster.2 C.AUUCAACU.UAUGAGGAU\n" + - "#=GR D.melanogaster.2 SS ................((((\n" + - "D.melanogaster.3 G.UGGCGCU..UAUGACGCA\n" + - "#=GR D.melanogaster.3 SS (.(((...(....(((((((\n" + - "//"; - - private static final String AA_SEQS_1 = - ">Seq1Name\n" + - "K-QY--L\n" + - ">Seq2Name\n" + - "-R-FP-W-\n"; - - private static final String CDNA_SEQS_1 = - ">Seq1Name\n" + - "AC-GG--CUC-CAA-CT\n" + - ">Seq2Name\n" + - "-CG-TTA--ACG---AAGT\n"; - - private static final String CDNA_SEQS_2 = - ">Seq1Name\n" + - "GCTCGUCGTACT\n" + - ">Seq2Name\n" + - "GGGTCAGGCAGT\n"; - // @formatter:on - - // public static Sequence ts=new - // Sequence("short","ASDASDASDASDASDASDASDASDASDASDASDASDASD"); public static Sequence ts = new Sequence("short", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm"); @@ -498,30 +461,6 @@ public class AlignmentUtilsTests } /** - * Test for the method that generates an aligned translated sequence from one - * mapping. - */ - @Test(groups = { "Functional" }) - public void testGetAlignedTranslation_dnaLikeProtein() - { - // dna alignment will be replaced - SequenceI dna = new Sequence("Seq1", "T-G-CC-A--T-TAC-CAG-"); - dna.createDatasetSequence(); - // protein alignment will be 'applied' to dna - SequenceI protein = new Sequence("Seq1", "-CH-Y--Q-"); - protein.createDatasetSequence(); - MapList map = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3, 1); - AlignedCodonFrame acf = new AlignedCodonFrame(); - acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map); - - final SequenceI aligned = AlignmentUtils.getAlignedTranslation(protein, - '-', acf); - assertEquals("---TGCCAT---TAC------CAG---", - aligned.getSequenceAsString()); - assertSame(aligned.getDatasetSequence(), dna.getDatasetSequence()); - } - - /** * Test the method that realigns protein to match mapped codon alignment. */ @Test(groups = { "Functional" }) @@ -1066,12 +1005,16 @@ public class AlignmentUtilsTests acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map); mappings.add(acf); + /* + * execute method under test: + */ AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] { dna1, dna2 }, mappings, dna); + assertEquals(2, cds.getSequences().size()); - assertEquals("---GGG---TTT---", cds.getSequenceAt(0) + assertEquals("GGGTTT", cds.getSequenceAt(0) .getSequenceAsString()); - assertEquals("GGG---TTT---CCC", cds.getSequenceAt(1) + assertEquals("GGGTTTCCC", cds.getSequenceAt(1) .getSequenceAsString()); /* @@ -1084,18 +1027,22 @@ public class AlignmentUtilsTests .contains(cds.getSequenceAt(1).getDatasetSequence())); /* - * Verify updated mappings + * Verify mappings from CDS to peptide and cDNA to CDS + * the mappings are on the shared alignment dataset */ - assertEquals(2, mappings.size()); - + assertSame(dna.getCodonFrames(), cds.getCodonFrames()); + List cdsMappings = cds.getCodonFrames(); + assertEquals(2, cdsMappings.size()); + /* * Mapping from pep1 to GGGTTT in first new exon sequence */ List pep1Mapping = MappingUtils - .findMappingsForSequence(pep1, mappings); + .findMappingsForSequence(pep1, cdsMappings); assertEquals(1, pep1Mapping.size()); // map G to GGG - SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings); + SearchResults sr = MappingUtils + .buildSearchResults(pep1, 1, cdsMappings); assertEquals(1, sr.getResults().size()); Match m = sr.getResults().get(0); assertSame(cds.getSequenceAt(0).getDatasetSequence(), @@ -1103,7 +1050,7 @@ public class AlignmentUtilsTests assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); // map F to TTT - sr = MappingUtils.buildSearchResults(pep1, 2, mappings); + sr = MappingUtils.buildSearchResults(pep1, 2, cdsMappings); m = sr.getResults().get(0); assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence()); @@ -1114,10 +1061,10 @@ public class AlignmentUtilsTests * Mapping from pep2 to GGGTTTCCC in second new exon sequence */ List pep2Mapping = MappingUtils - .findMappingsForSequence(pep2, mappings); + .findMappingsForSequence(pep2, cdsMappings); assertEquals(1, pep2Mapping.size()); // map G to GGG - sr = MappingUtils.buildSearchResults(pep2, 1, mappings); + sr = MappingUtils.buildSearchResults(pep2, 1, cdsMappings); assertEquals(1, sr.getResults().size()); m = sr.getResults().get(0); assertSame(cds.getSequenceAt(1).getDatasetSequence(), @@ -1125,14 +1072,14 @@ public class AlignmentUtilsTests assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); // map F to TTT - sr = MappingUtils.buildSearchResults(pep2, 2, mappings); + sr = MappingUtils.buildSearchResults(pep2, 2, cdsMappings); m = sr.getResults().get(0); assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); assertEquals(4, m.getStart()); assertEquals(6, m.getEnd()); // map P to CCC - sr = MappingUtils.buildSearchResults(pep2, 3, mappings); + sr = MappingUtils.buildSearchResults(pep2, 3, cdsMappings); m = sr.getResults().get(0); assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); @@ -1141,52 +1088,6 @@ public class AlignmentUtilsTests } /** - * Test the method that makes a cds-only sequence from a DNA sequence and its - * product mapping. Test includes the expected case that the DNA sequence - * already has a protein product (Uniprot translation) which in turn has an - * x-ref to the EMBLCDS record. - */ - @Test(groups = { "Functional" }) - public void testMakeCdsSequences() - { - SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa"); - SequenceI pep1 = new Sequence("pep1", "GF"); - dna1.createDatasetSequence(); - pep1.createDatasetSequence(); - pep1.getDatasetSequence().addDBRef( - new DBRefEntry("EMBLCDS", "2", "A12345")); - - /* - * Make the mapping from dna to protein. The protein sequence has a DBRef to - * EMBLCDS|A12345. - */ - Set mappings = new HashSet(); - MapList map = new MapList(new int[] { 4, 6, 10, 12 }, - new int[] { 1, 2 }, 3, 1); - AlignedCodonFrame acf = new AlignedCodonFrame(); - acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map); - mappings.add(acf); - - AlignedCodonFrame newMapping = new AlignedCodonFrame(); - List ungappedColumns = new ArrayList(); - ungappedColumns.add(new int[] { 4, 6 }); - ungappedColumns.add(new int[] { 10, 12 }); - List cdsSeqs = AlignmentUtils.makeCdsSequences(dna1, acf, - ungappedColumns, - newMapping, '-'); - assertEquals(1, cdsSeqs.size()); - SequenceI cdsSeq = cdsSeqs.get(0); - - assertEquals("GGGTTT", cdsSeq.getSequenceAsString()); - assertEquals("dna1|A12345", cdsSeq.getName()); - assertEquals(1, cdsSeq.getDBRefs().length); - DBRefEntry cdsRef = cdsSeq.getDBRefs()[0]; - assertEquals("EMBLCDS", cdsRef.getSource()); - assertEquals("2", cdsRef.getVersion()); - assertEquals("A12345", cdsRef.getAccessionId()); - } - - /** * Test the method that makes a cds-only alignment from a DNA sequence and its * product mappings, for the case where there are multiple exon mappings to * different protein products. @@ -1245,24 +1146,28 @@ public class AlignmentUtilsTests mappings.add(acf); /* - * Create the Exon alignment; also replaces the dna-to-protein mappings with + * Create the CDS alignment; also augments the dna-to-protein mappings with * exon-to-protein and exon-to-dna mappings */ AlignmentI dna = new Alignment(new SequenceI[] { dna1 }); dna.setDataset(null); - AlignmentI exal = AlignmentUtils.makeCdsAlignment( + + /* + * execute method under test + */ + AlignmentI cdsal = AlignmentUtils.makeCdsAlignment( new SequenceI[] { dna1 }, mappings, dna); /* * Verify we have 3 cds sequences, mapped to pep1/2/3 respectively */ - List cds = exal.getSequences(); + List cds = cdsal.getSequences(); assertEquals(3, cds.size()); /* * verify shared, extended alignment dataset */ - assertSame(exal.getDataset(), dna.getDataset()); + assertSame(cdsal.getDataset(), dna.getDataset()); assertTrue(dna.getDataset().getSequences() .contains(cds.get(0).getDatasetSequence())); assertTrue(dna.getDataset().getSequences() @@ -1274,72 +1179,72 @@ public class AlignmentUtilsTests * verify aligned cds sequences and their xrefs */ SequenceI cdsSeq = cds.get(0); - assertEquals("---GGG---TTT", cdsSeq.getSequenceAsString()); - assertEquals("dna1|A12345", cdsSeq.getName()); - assertEquals(1, cdsSeq.getDBRefs().length); - DBRefEntry cdsRef = cdsSeq.getDBRefs()[0]; - assertEquals("EMBLCDS", cdsRef.getSource()); - assertEquals("2", cdsRef.getVersion()); - assertEquals("A12345", cdsRef.getAccessionId()); + assertEquals("GGGTTT", cdsSeq.getSequenceAsString()); + // assertEquals("dna1|A12345", cdsSeq.getName()); + assertEquals("dna1|pep1", cdsSeq.getName()); + // assertEquals(1, cdsSeq.getDBRefs().length); + // DBRefEntry cdsRef = cdsSeq.getDBRefs()[0]; + // assertEquals("EMBLCDS", cdsRef.getSource()); + // assertEquals("2", cdsRef.getVersion()); + // assertEquals("A12345", cdsRef.getAccessionId()); cdsSeq = cds.get(1); - assertEquals("aaa---ccc---", cdsSeq.getSequenceAsString()); - assertEquals("dna1|A12346", cdsSeq.getName()); - assertEquals(1, cdsSeq.getDBRefs().length); - cdsRef = cdsSeq.getDBRefs()[0]; - assertEquals("EMBLCDS", cdsRef.getSource()); - assertEquals("3", cdsRef.getVersion()); - assertEquals("A12346", cdsRef.getAccessionId()); + assertEquals("aaaccc", cdsSeq.getSequenceAsString()); + // assertEquals("dna1|A12346", cdsSeq.getName()); + assertEquals("dna1|pep2", cdsSeq.getName()); + // assertEquals(1, cdsSeq.getDBRefs().length); + // cdsRef = cdsSeq.getDBRefs()[0]; + // assertEquals("EMBLCDS", cdsRef.getSource()); + // assertEquals("3", cdsRef.getVersion()); + // assertEquals("A12346", cdsRef.getAccessionId()); cdsSeq = cds.get(2); - assertEquals("aaa------TTT", cdsSeq.getSequenceAsString()); - assertEquals("dna1|A12347", cdsSeq.getName()); - assertEquals(1, cdsSeq.getDBRefs().length); - cdsRef = cdsSeq.getDBRefs()[0]; - assertEquals("EMBLCDS", cdsRef.getSource()); - assertEquals("4", cdsRef.getVersion()); - assertEquals("A12347", cdsRef.getAccessionId()); + assertEquals("aaaTTT", cdsSeq.getSequenceAsString()); + // assertEquals("dna1|A12347", cdsSeq.getName()); + assertEquals("dna1|pep3", cdsSeq.getName()); + // assertEquals(1, cdsSeq.getDBRefs().length); + // cdsRef = cdsSeq.getDBRefs()[0]; + // assertEquals("EMBLCDS", cdsRef.getSource()); + // assertEquals("4", cdsRef.getVersion()); + // assertEquals("A12347", cdsRef.getAccessionId()); /* * Verify there are mappings from each cds sequence to its protein product * and also to its dna source */ - Iterator newMappingsIterator = mappings.iterator(); + Iterator newMappingsIterator = cdsal + .getCodonFrames().iterator(); // mappings for dna1 - exon1 - pep1 AlignedCodonFrame cdsMapping = newMappingsIterator.next(); - List dnaMappings = cdsMapping.getMappingsForSequence(dna1); - assertEquals(1, dnaMappings.size()); + List dnaMappings = cdsMapping.getMappingsFromSequence(dna1); + assertEquals(3, dnaMappings.size()); assertSame(cds.get(0).getDatasetSequence(), dnaMappings.get(0) .getTo()); assertEquals("G(1) in CDS should map to G(4) in DNA", 4, dnaMappings .get(0).getMap().getToPosition(1)); - List peptideMappings = cdsMapping - .getMappingsForSequence(pep1); + List peptideMappings = cdsMapping.getMappingsFromSequence(cds + .get(0).getDatasetSequence()); assertEquals(1, peptideMappings.size()); assertSame(pep1.getDatasetSequence(), peptideMappings.get(0).getTo()); // mappings for dna1 - cds2 - pep2 - cdsMapping = newMappingsIterator.next(); - dnaMappings = cdsMapping.getMappingsForSequence(dna1); - assertEquals(1, dnaMappings.size()); - assertSame(cds.get(1).getDatasetSequence(), dnaMappings.get(0) + assertSame(cds.get(1).getDatasetSequence(), dnaMappings.get(1) .getTo()); assertEquals("c(4) in CDS should map to c(7) in DNA", 7, dnaMappings - .get(0).getMap().getToPosition(4)); - peptideMappings = cdsMapping.getMappingsForSequence(pep2); + .get(1).getMap().getToPosition(4)); + peptideMappings = cdsMapping.getMappingsFromSequence(cds.get(1) + .getDatasetSequence()); assertEquals(1, peptideMappings.size()); assertSame(pep2.getDatasetSequence(), peptideMappings.get(0).getTo()); // mappings for dna1 - cds3 - pep3 - cdsMapping = newMappingsIterator.next(); - dnaMappings = cdsMapping.getMappingsForSequence(dna1); - assertEquals(1, dnaMappings.size()); - assertSame(cds.get(2).getDatasetSequence(), dnaMappings.get(0) + assertSame(cds.get(2).getDatasetSequence(), dnaMappings.get(2) .getTo()); assertEquals("T(4) in CDS should map to T(10) in DNA", 10, dnaMappings - .get(0).getMap().getToPosition(4)); - peptideMappings = cdsMapping.getMappingsForSequence(pep3); + .get(2).getMap().getToPosition(4)); + peptideMappings = cdsMapping.getMappingsFromSequence(cds.get(2) + .getDatasetSequence()); assertEquals(1, peptideMappings.size()); assertSame(pep3.getDatasetSequence(), peptideMappings.get(0).getTo()); } @@ -1623,7 +1528,7 @@ public class AlignmentUtilsTests List cdsSeqs = cds.getSequences(); assertEquals(2, cdsSeqs.size()); assertEquals("GGGCCCTTTGGG", cdsSeqs.get(0).getSequenceAsString()); - assertEquals("GGGCC---TGGG", cdsSeqs.get(1).getSequenceAsString()); + assertEquals("GGGCCTGGG", cdsSeqs.get(1).getSequenceAsString()); /* * verify shared, extended alignment dataset @@ -1637,33 +1542,35 @@ public class AlignmentUtilsTests /* * Verify updated mappings */ - assertEquals(2, mappings.size()); + List cdsMappings = cds.getCodonFrames(); + assertEquals(2, cdsMappings.size()); /* * Mapping from pep1 to GGGTTT in first new CDS sequence */ List pep1Mapping = MappingUtils - .findMappingsForSequence(pep1, mappings); + .findMappingsForSequence(pep1, cdsMappings); assertEquals(1, pep1Mapping.size()); /* * maps GPFG to 1-3,4-6,7-9,10-12 */ - SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings); + SearchResults sr = MappingUtils + .buildSearchResults(pep1, 1, cdsMappings); assertEquals(1, sr.getResults().size()); Match m = sr.getResults().get(0); assertEquals(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence()); assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); - sr = MappingUtils.buildSearchResults(pep1, 2, mappings); + sr = MappingUtils.buildSearchResults(pep1, 2, cdsMappings); m = sr.getResults().get(0); assertEquals(4, m.getStart()); assertEquals(6, m.getEnd()); - sr = MappingUtils.buildSearchResults(pep1, 3, mappings); + sr = MappingUtils.buildSearchResults(pep1, 3, cdsMappings); m = sr.getResults().get(0); assertEquals(7, m.getStart()); assertEquals(9, m.getEnd()); - sr = MappingUtils.buildSearchResults(pep1, 4, mappings); + sr = MappingUtils.buildSearchResults(pep1, 4, cdsMappings); m = sr.getResults().get(0); assertEquals(10, m.getStart()); assertEquals(12, m.getEnd()); @@ -1672,98 +1579,26 @@ public class AlignmentUtilsTests * GPG in pep2 map to 1-3,4-6,7-9 in second CDS sequence */ List pep2Mapping = MappingUtils - .findMappingsForSequence(pep2, mappings); + .findMappingsForSequence(pep2, cdsMappings); assertEquals(1, pep2Mapping.size()); - sr = MappingUtils.buildSearchResults(pep2, 1, mappings); + sr = MappingUtils.buildSearchResults(pep2, 1, cdsMappings); assertEquals(1, sr.getResults().size()); m = sr.getResults().get(0); assertEquals(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); - sr = MappingUtils.buildSearchResults(pep2, 2, mappings); + sr = MappingUtils.buildSearchResults(pep2, 2, cdsMappings); m = sr.getResults().get(0); assertEquals(4, m.getStart()); assertEquals(6, m.getEnd()); - sr = MappingUtils.buildSearchResults(pep2, 3, mappings); + sr = MappingUtils.buildSearchResults(pep2, 3, cdsMappings); m = sr.getResults().get(0); assertEquals(7, m.getStart()); assertEquals(9, m.getEnd()); } /** - * Tests for gapped column in sequences - */ - @Test(groups = { "Functional" }) - public void testIsGappedColumn() - { - SequenceI seq1 = new Sequence("Seq1", "a--c.tc-a-g"); - SequenceI seq2 = new Sequence("Seq2", "aa---t--a-g"); - SequenceI seq3 = new Sequence("Seq3", "ag-c t-g-"); - List seqs = Arrays - .asList(new SequenceI[] { seq1, seq2, seq3 }); - // the column number is base 1 - assertFalse(AlignmentUtils.isGappedColumn(seqs, 1)); - assertFalse(AlignmentUtils.isGappedColumn(seqs, 2)); - assertTrue(AlignmentUtils.isGappedColumn(seqs, 3)); - assertFalse(AlignmentUtils.isGappedColumn(seqs, 4)); - assertTrue(AlignmentUtils.isGappedColumn(seqs, 5)); - assertFalse(AlignmentUtils.isGappedColumn(seqs, 6)); - assertFalse(AlignmentUtils.isGappedColumn(seqs, 7)); - assertFalse(AlignmentUtils.isGappedColumn(seqs, 8)); - assertFalse(AlignmentUtils.isGappedColumn(seqs, 9)); - assertTrue(AlignmentUtils.isGappedColumn(seqs, 10)); - assertFalse(AlignmentUtils.isGappedColumn(seqs, 11)); - // out of bounds: - assertTrue(AlignmentUtils.isGappedColumn(seqs, 0)); - assertTrue(AlignmentUtils.isGappedColumn(seqs, 100)); - assertTrue(AlignmentUtils.isGappedColumn(seqs, -100)); - assertTrue(AlignmentUtils.isGappedColumn(null, 0)); - } - - @Test(groups = { "Functional" }) - public void testFindCdsColumns() - { - // TODO target method belongs in a general-purpose alignment - // analysis method to find columns for feature - - /* - * NB this method assumes CDS ranges are contiguous (no introns) - */ - SequenceI gene = new Sequence("gene", "aaacccgggtttaaacccgggttt"); - SequenceI seq1 = new Sequence("Seq1", "--ac-cgGG-GGaaACC--GGtt-"); - SequenceI seq2 = new Sequence("Seq2", "AA--CCGG--g-AAA--cG-GTTt"); - seq1.createDatasetSequence(); - seq2.createDatasetSequence(); - seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 5, 6, 0f, - null)); - seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 7, 8, 0f, - null)); - seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 11, 13, 0f, - null)); - seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 14, 15, 0f, - null)); - seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 1, 2, 0f, - null)); - seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 3, 6, 0f, - null)); - seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 8, 10, 0f, - null)); - seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 12, 12, 0f, - null)); - seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 13, 15, 0f, - null)); - - List cdsColumns = AlignmentUtils.findCdsColumns(new SequenceI[] { - seq1, seq2 }); - assertEquals(4, cdsColumns.size()); - assertEquals("[1, 2]", Arrays.toString(cdsColumns.get(0))); - assertEquals("[5, 9]", Arrays.toString(cdsColumns.get(1))); - assertEquals("[11, 17]", Arrays.toString(cdsColumns.get(2))); - assertEquals("[19, 23]", Arrays.toString(cdsColumns.get(3))); - } - - /** * Test the method that realigns protein to match mapped codon alignment. */ @Test(groups = { "Functional" }) @@ -1819,7 +1654,7 @@ public class AlignmentUtilsTests * (or subtype) feature - case where the start codon is incomplete. */ @Test(groups = "Functional") - public void testGetCdsRanges_fivePrimeIncomplete() + public void testFindCdsPositions_fivePrimeIncomplete() { SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt"); dnaSeq.createDatasetSequence(); @@ -1851,23 +1686,31 @@ public class AlignmentUtilsTests * (or subtype) feature. */ @Test(groups = "Functional") - public void testGetCdsRanges() + public void testFindCdsPositions() { SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt"); dnaSeq.createDatasetSequence(); SequenceI ds = dnaSeq.getDatasetSequence(); - // CDS for dna 3-6 - SequenceFeature sf = new SequenceFeature("CDS", "", 4, 6, 0f, null); + // CDS for dna 10-12 + SequenceFeature sf = new SequenceFeature("CDS_predicted", "", 10, 12, + 0f, null); + sf.setStrand("+"); + ds.addSequenceFeature(sf); + // CDS for dna 4-6 + sf = new SequenceFeature("CDS", "", 4, 6, 0f, null); + sf.setStrand("+"); ds.addSequenceFeature(sf); // exon feature should be ignored here sf = new SequenceFeature("exon", "", 7, 9, 0f, null); ds.addSequenceFeature(sf); - // CDS for dna 10-12 - sf = new SequenceFeature("CDS_predicted", "", 10, 12, 0f, null); - ds.addSequenceFeature(sf); List ranges = AlignmentUtils.findCdsPositions(dnaSeq); + /* + * verify ranges { [4-6], [12-10] } + * note CDS ranges are ordered ascending even if the CDS + * features are not + */ assertEquals(6, MappingUtils.getLength(ranges)); assertEquals(2, ranges.size()); assertEquals(4, ranges.get(0)[0]); @@ -2006,4 +1849,111 @@ public class AlignmentUtilsTests variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); assertEquals("[C, R, T, W]", variants.toString()); } + + /** + * Tests for the method that maps the subset of a dna sequence that has CDS + * (or subtype) feature, with CDS strand = '-' (reverse) + */ + // test turned off as currently findCdsPositions is not strand-dependent + // left in case it comes around again... + @Test(groups = "Functional", enabled = false) + public void testFindCdsPositions_reverseStrand() + { + SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt"); + dnaSeq.createDatasetSequence(); + SequenceI ds = dnaSeq.getDatasetSequence(); + + // CDS for dna 4-6 + SequenceFeature sf = new SequenceFeature("CDS", "", 4, 6, 0f, null); + sf.setStrand("-"); + ds.addSequenceFeature(sf); + // exon feature should be ignored here + sf = new SequenceFeature("exon", "", 7, 9, 0f, null); + ds.addSequenceFeature(sf); + // CDS for dna 10-12 + sf = new SequenceFeature("CDS_predicted", "", 10, 12, 0f, null); + sf.setStrand("-"); + ds.addSequenceFeature(sf); + + List ranges = AlignmentUtils.findCdsPositions(dnaSeq); + /* + * verify ranges { [12-10], [6-4] } + */ + assertEquals(6, MappingUtils.getLength(ranges)); + assertEquals(2, ranges.size()); + assertEquals(12, ranges.get(0)[0]); + assertEquals(10, ranges.get(0)[1]); + assertEquals(6, ranges.get(1)[0]); + assertEquals(4, ranges.get(1)[1]); + } + + /** + * Tests for the method that maps the subset of a dna sequence that has CDS + * (or subtype) feature - reverse strand case where the start codon is + * incomplete. + */ + @Test(groups = "Functional", enabled = false) + // test turned off as currently findCdsPositions is not strand-dependent + // left in case it comes around again... + public void testFindCdsPositions_reverseStrandThreePrimeIncomplete() + { + SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt"); + dnaSeq.createDatasetSequence(); + SequenceI ds = dnaSeq.getDatasetSequence(); + + // CDS for dna 5-9 + SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null); + sf.setStrand("-"); + ds.addSequenceFeature(sf); + // CDS for dna 13-15 + sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null); + sf.setStrand("-"); + sf.setPhase("2"); // skip 2 bases to start of next codon + ds.addSequenceFeature(sf); + + List ranges = AlignmentUtils.findCdsPositions(dnaSeq); + + /* + * check the mapping starts with the first complete codon + * expect ranges [13, 13], [9, 5] + */ + assertEquals(6, MappingUtils.getLength(ranges)); + assertEquals(2, ranges.size()); + assertEquals(13, ranges.get(0)[0]); + assertEquals(13, ranges.get(0)[1]); + assertEquals(9, ranges.get(1)[0]); + assertEquals(5, ranges.get(1)[1]); + } + + @Test(groups = "Functional") + public void testAlignAs_alternateTranscriptsUngapped() + { + SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa"); + SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA"); + AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 }); + ((Alignment) dna).createDatasetAlignment(); + SequenceI cds1 = new Sequence("cds1", "GGGTTT"); + SequenceI cds2 = new Sequence("cds2", "CCCAAA"); + AlignmentI cds = new Alignment(new SequenceI[] { cds1, cds2 }); + ((Alignment) cds).createDatasetAlignment(); + + AlignedCodonFrame acf = new AlignedCodonFrame(); + MapList map = new MapList(new int[] { 4, 9 }, new int[] { 1, 6 }, 1, 1); + acf.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(), map); + map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 6 }, 1, 1); + acf.addMap(dna2.getDatasetSequence(), cds2.getDatasetSequence(), map); + + /* + * verify CDS alignment is as: + * cccGGGTTTaaa (cdna) + * CCCgggtttAAA (cdna) + * + * ---GGGTTT--- (cds) + * CCC------AAA (cds) + */ + dna.addCodonFrame(acf); + AlignmentUtils.alignAs(cds, dna); + assertEquals("---GGGTTT---", cds.getSequenceAt(0).getSequenceAsString()); + assertEquals("CCC------AAA", cds.getSequenceAt(1).getSequenceAsString()); + } }