X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fanalysis%2FAlignmentUtilsTests.java;h=09bd64e5b31198a75981eba79e2d23eef097be88;hb=a6b324e3f5edac3df0b968f0037b1cc8b651598e;hp=74e4940b43ffefa5e53dadbaf03d9ca3f6369399;hpb=5b079109d33a4596a8c3017dbf93b7de5b0300c5;p=jalview.git diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 74e4940..09bd64e 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -36,6 +36,7 @@ import jalview.datamodel.Mapping; import jalview.datamodel.SearchResults; import jalview.datamodel.SearchResults.Match; import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.io.AppletFormatAdapter; import jalview.io.FormatAdapter; @@ -45,10 +46,8 @@ import jalview.util.MappingUtils; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -372,8 +371,8 @@ public class AlignmentUtilsTests * region). The leading gap, and the gaps between codons, are subsumed by * the protein alignment gap. */ - checkAlignSequenceAs("-G-GG--AA-A-", "-A-L-", true, true, map, - "---G-GG---AA-A-"); + checkAlignSequenceAs("-G-GG--AA-A---", "-A-L-", true, true, map, + "---G-GG---AA-A---"); /* * Include only unmapped gaps in dna when realigning (outside the exon @@ -381,7 +380,7 @@ public class AlignmentUtilsTests * the protein alignment gap. */ checkAlignSequenceAs("-G-GG--AA-A-", "-A-L-", false, true, map, - "---GGG---AAA-"); + "---GGG---AAA---"); } /** @@ -439,7 +438,6 @@ public class AlignmentUtilsTests @Test(groups = { "Functional" }) public void testAlignSequenceAs_withMapping_withUnmappedProtein() { - /* * Exons at codon 2 (AAA) and 4 (TTT) mapped to A and P */ @@ -447,38 +445,39 @@ public class AlignmentUtilsTests 1, 1, 3, 3 }, 3, 1); /* - * Expect alignment does nothing (aborts realignment). Change this test - * first if different behaviour wanted. + * -L- 'aligns' ccc------ */ - checkAlignSequenceAs("GGGAAACCCTTTGGG", "-A-L-P-", false, false, map, - "GGGAAACCCTTTGGG"); + checkAlignSequenceAs("gggAAAcccTTTggg", "-A-L-P-", false, false, map, + "gggAAAccc------TTTggg"); } /** * Helper method that performs and verifies the method under test. * - * @param dnaSeq - * @param proteinSeq + * @param alignee + * the sequence to be realigned + * @param alignModel + * the sequence whose alignment is to be copied * @param preserveMappedGaps * @param preserveUnmappedGaps * @param map * @param expected */ - protected void checkAlignSequenceAs(final String dnaSeq, - final String proteinSeq, final boolean preserveMappedGaps, + protected void checkAlignSequenceAs(final String alignee, + final String alignModel, final boolean preserveMappedGaps, final boolean preserveUnmappedGaps, MapList map, final String expected) { - SequenceI dna = new Sequence("Seq1", dnaSeq); - dna.createDatasetSequence(); - SequenceI protein = new Sequence("Seq1", proteinSeq); - protein.createDatasetSequence(); + SequenceI alignMe = new Sequence("Seq1", alignee); + alignMe.createDatasetSequence(); + SequenceI alignFrom = new Sequence("Seq2", alignModel); + alignFrom.createDatasetSequence(); AlignedCodonFrame acf = new AlignedCodonFrame(); - acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map); + acf.addMap(alignMe.getDatasetSequence(), alignFrom.getDatasetSequence(), map); - AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', + AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "---", '-', preserveMappedGaps, preserveUnmappedGaps); - assertEquals(expected, dna.getSequenceAsString()); + assertEquals(expected, alignMe.getSequenceAsString()); } /** @@ -550,7 +549,9 @@ public class AlignmentUtilsTests acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map); acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map); acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map); - protein.setCodonFrames(Collections.singleton(acf)); + ArrayList acfs = new ArrayList(); + acfs.add(acf); + protein.setCodonFrames(acfs); /* * Translated codon order is [1,2,3] [1,3,4] [4,5,6] [4,5,7] [5,6,7] [7,8,9] @@ -1000,10 +1001,10 @@ public class AlignmentUtilsTests } /** - * Test the method that extracts the exon-only part of a dna alignment. + * Test the method that extracts the cds-only part of a dna alignment. */ @Test(groups = { "Functional" }) - public void testMakeExonAlignment() + public void testMakeCdsAlignment() { SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa"); SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC"); @@ -1014,7 +1015,7 @@ public class AlignmentUtilsTests pep1.createDatasetSequence(); pep2.createDatasetSequence(); - Set mappings = new HashSet(); + List mappings = new ArrayList(); MapList map = new MapList(new int[] { 4, 6, 10, 12 }, new int[] { 1, 2 }, 3, 1); AlignedCodonFrame acf = new AlignedCodonFrame(); @@ -1026,11 +1027,11 @@ public class AlignmentUtilsTests acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map); mappings.add(acf); - AlignmentI exons = AlignmentUtils.makeExonAlignment(new SequenceI[] { + AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] { dna1, dna2 }, mappings); - assertEquals(2, exons.getSequences().size()); - assertEquals("GGGTTT", exons.getSequenceAt(0).getSequenceAsString()); - assertEquals("GGGTTTCCC", exons.getSequenceAt(1).getSequenceAsString()); + assertEquals(2, cds.getSequences().size()); + assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString()); + assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString()); /* * Verify updated mappings @@ -1047,14 +1048,14 @@ public class AlignmentUtilsTests SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings); assertEquals(1, sr.getResults().size()); Match m = sr.getResults().get(0); - assertEquals(exons.getSequenceAt(0).getDatasetSequence(), + assertEquals(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence()); assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); // map F to TTT sr = MappingUtils.buildSearchResults(pep1, 2, mappings); m = sr.getResults().get(0); - assertEquals(exons.getSequenceAt(0).getDatasetSequence(), + assertEquals(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence()); assertEquals(4, m.getStart()); assertEquals(6, m.getEnd()); @@ -1069,34 +1070,34 @@ public class AlignmentUtilsTests sr = MappingUtils.buildSearchResults(pep2, 1, mappings); assertEquals(1, sr.getResults().size()); m = sr.getResults().get(0); - assertEquals(exons.getSequenceAt(1).getDatasetSequence(), + assertEquals(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); // map F to TTT sr = MappingUtils.buildSearchResults(pep2, 2, mappings); m = sr.getResults().get(0); - assertEquals(exons.getSequenceAt(1).getDatasetSequence(), + assertEquals(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); assertEquals(4, m.getStart()); assertEquals(6, m.getEnd()); // map P to CCC sr = MappingUtils.buildSearchResults(pep2, 3, mappings); m = sr.getResults().get(0); - assertEquals(exons.getSequenceAt(1).getDatasetSequence(), + assertEquals(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); assertEquals(7, m.getStart()); assertEquals(9, m.getEnd()); } /** - * Test the method that makes an exon-only sequence from a DNA sequence and - * its product mapping. Test includes the expected case that the DNA sequence + * Test the method that makes a cds-only sequence from a DNA sequence and its + * product mapping. Test includes the expected case that the DNA sequence * already has a protein product (Uniprot translation) which in turn has an * x-ref to the EMBLCDS record. */ @Test(groups = { "Functional" }) - public void testMakeExonSequences() + public void testMakeCdsSequences() { SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa"); SequenceI pep1 = new Sequence("pep1", "GF"); @@ -1117,27 +1118,27 @@ public class AlignmentUtilsTests mappings.add(acf); AlignedCodonFrame newMapping = new AlignedCodonFrame(); - List exons = AlignmentUtils.makeExonSequences(dna1, acf, + List cdsSeqs = AlignmentUtils.makeCdsSequences(dna1, acf, newMapping); - assertEquals(1, exons.size()); - SequenceI exon = exons.get(0); + assertEquals(1, cdsSeqs.size()); + SequenceI cdsSeq = cdsSeqs.get(0); - assertEquals("GGGTTT", exon.getSequenceAsString()); - assertEquals("dna1|A12345", exon.getName()); - assertEquals(1, exon.getDBRefs().length); - DBRefEntry cdsRef = exon.getDBRefs()[0]; + assertEquals("GGGTTT", cdsSeq.getSequenceAsString()); + assertEquals("dna1|A12345", cdsSeq.getName()); + assertEquals(1, cdsSeq.getDBRefs().length); + DBRefEntry cdsRef = cdsSeq.getDBRefs()[0]; assertEquals("EMBLCDS", cdsRef.getSource()); assertEquals("2", cdsRef.getVersion()); assertEquals("A12345", cdsRef.getAccessionId()); } /** - * Test the method that makes an exon-only alignment from a DNA sequence and - * its product mappings, for the case where there are multiple exon mappings - * to different protein products. + * Test the method that makes a cds-only alignment from a DNA sequence and its + * product mappings, for the case where there are multiple exon mappings to + * different protein products. */ @Test(groups = { "Functional" }) - public void testMakeExonAlignment_multipleProteins() + public void testMakeCdsAlignment_multipleProteins() { SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa"); SequenceI pep1 = new Sequence("pep1", "GF"); // GGGTTT @@ -1159,7 +1160,7 @@ public class AlignmentUtilsTests * convenience so results are in the input order. There is no assertion that * the generated exon sequences are in any particular order. */ - Set mappings = new LinkedHashSet(); + List mappings = new ArrayList(); // map ...GGG...TTT to GF MapList map = new MapList(new int[] { 4, 6, 10, 12 }, new int[] { 1, 2 }, 3, 1); @@ -1183,82 +1184,82 @@ public class AlignmentUtilsTests * Create the Exon alignment; also replaces the dna-to-protein mappings with * exon-to-protein and exon-to-dna mappings */ - AlignmentI exal = AlignmentUtils.makeExonAlignment( + AlignmentI exal = AlignmentUtils.makeCdsAlignment( new SequenceI[] { dna1 }, mappings); /* - * Verify we have 3 exon sequences, mapped to pep1/2/3 respectively + * Verify we have 3 cds sequences, mapped to pep1/2/3 respectively */ - List exons = exal.getSequences(); - assertEquals(3, exons.size()); - - SequenceI exon = exons.get(0); - assertEquals("GGGTTT", exon.getSequenceAsString()); - assertEquals("dna1|A12345", exon.getName()); - assertEquals(1, exon.getDBRefs().length); - DBRefEntry cdsRef = exon.getDBRefs()[0]; + List cds = exal.getSequences(); + assertEquals(3, cds.size()); + + SequenceI cdsSeq = cds.get(0); + assertEquals("GGGTTT", cdsSeq.getSequenceAsString()); + assertEquals("dna1|A12345", cdsSeq.getName()); + assertEquals(1, cdsSeq.getDBRefs().length); + DBRefEntry cdsRef = cdsSeq.getDBRefs()[0]; assertEquals("EMBLCDS", cdsRef.getSource()); assertEquals("2", cdsRef.getVersion()); assertEquals("A12345", cdsRef.getAccessionId()); - exon = exons.get(1); - assertEquals("aaaccc", exon.getSequenceAsString()); - assertEquals("dna1|A12346", exon.getName()); - assertEquals(1, exon.getDBRefs().length); - cdsRef = exon.getDBRefs()[0]; + cdsSeq = cds.get(1); + assertEquals("aaaccc", cdsSeq.getSequenceAsString()); + assertEquals("dna1|A12346", cdsSeq.getName()); + assertEquals(1, cdsSeq.getDBRefs().length); + cdsRef = cdsSeq.getDBRefs()[0]; assertEquals("EMBLCDS", cdsRef.getSource()); assertEquals("3", cdsRef.getVersion()); assertEquals("A12346", cdsRef.getAccessionId()); - exon = exons.get(2); - assertEquals("aaaTTT", exon.getSequenceAsString()); - assertEquals("dna1|A12347", exon.getName()); - assertEquals(1, exon.getDBRefs().length); - cdsRef = exon.getDBRefs()[0]; + cdsSeq = cds.get(2); + assertEquals("aaaTTT", cdsSeq.getSequenceAsString()); + assertEquals("dna1|A12347", cdsSeq.getName()); + assertEquals(1, cdsSeq.getDBRefs().length); + cdsRef = cdsSeq.getDBRefs()[0]; assertEquals("EMBLCDS", cdsRef.getSource()); assertEquals("4", cdsRef.getVersion()); assertEquals("A12347", cdsRef.getAccessionId()); /* - * Verify there are mappings from each exon sequence to its protein product + * Verify there are mappings from each cds sequence to its protein product * and also to its dna source */ Iterator newMappingsIterator = mappings.iterator(); // mappings for dna1 - exon1 - pep1 - AlignedCodonFrame exonMapping = newMappingsIterator.next(); - List dnaMappings = exonMapping.getMappingsForSequence(dna1); + AlignedCodonFrame cdsMapping = newMappingsIterator.next(); + List dnaMappings = cdsMapping.getMappingsForSequence(dna1); assertEquals(1, dnaMappings.size()); - assertSame(exons.get(0).getDatasetSequence(), dnaMappings.get(0) + assertSame(cds.get(0).getDatasetSequence(), dnaMappings.get(0) .getTo()); assertEquals("G(1) in CDS should map to G(4) in DNA", 4, dnaMappings .get(0).getMap().getToPosition(1)); - List peptideMappings = exonMapping + List peptideMappings = cdsMapping .getMappingsForSequence(pep1); assertEquals(1, peptideMappings.size()); assertSame(pep1.getDatasetSequence(), peptideMappings.get(0).getTo()); - // mappings for dna1 - exon2 - pep2 - exonMapping = newMappingsIterator.next(); - dnaMappings = exonMapping.getMappingsForSequence(dna1); + // mappings for dna1 - cds2 - pep2 + cdsMapping = newMappingsIterator.next(); + dnaMappings = cdsMapping.getMappingsForSequence(dna1); assertEquals(1, dnaMappings.size()); - assertSame(exons.get(1).getDatasetSequence(), dnaMappings.get(0) + assertSame(cds.get(1).getDatasetSequence(), dnaMappings.get(0) .getTo()); assertEquals("c(4) in CDS should map to c(7) in DNA", 7, dnaMappings .get(0).getMap().getToPosition(4)); - peptideMappings = exonMapping.getMappingsForSequence(pep2); + peptideMappings = cdsMapping.getMappingsForSequence(pep2); assertEquals(1, peptideMappings.size()); assertSame(pep2.getDatasetSequence(), peptideMappings.get(0).getTo()); - // mappings for dna1 - exon3 - pep3 - exonMapping = newMappingsIterator.next(); - dnaMappings = exonMapping.getMappingsForSequence(dna1); + // mappings for dna1 - cds3 - pep3 + cdsMapping = newMappingsIterator.next(); + dnaMappings = cdsMapping.getMappingsForSequence(dna1); assertEquals(1, dnaMappings.size()); - assertSame(exons.get(2).getDatasetSequence(), dnaMappings.get(0) + assertSame(cds.get(2).getDatasetSequence(), dnaMappings.get(0) .getTo()); assertEquals("T(4) in CDS should map to T(10) in DNA", 10, dnaMappings .get(0).getMap().getToPosition(4)); - peptideMappings = exonMapping.getMappingsForSequence(pep3); + peptideMappings = cdsMapping.getMappingsForSequence(pep3); assertEquals(1, peptideMappings.size()); assertSame(pep3.getDatasetSequence(), peptideMappings.get(0).getTo()); } @@ -1303,4 +1304,110 @@ public class AlignmentUtilsTests assertEquals(40, map.getFromLowest()); assertEquals(48, map.getFromHighest()); } + + /** + * Test for the alignSequenceAs method where we have protein mapped to protein + */ + @Test(groups = { "Functional" }) + public void testAlignSequenceAs_mappedProteinProtein() + { + + SequenceI alignMe = new Sequence("Match", "MGAASEV"); + alignMe.createDatasetSequence(); + SequenceI alignFrom = new Sequence("Query", "LQTGYMGAASEVMFSPTRR"); + alignFrom.createDatasetSequence(); + + AlignedCodonFrame acf = new AlignedCodonFrame(); + // this is like a domain or motif match of part of a peptide sequence + MapList map = new MapList(new int[] { 6, 12 }, new int[] { 1, 7 }, 1, 1); + acf.addMap(alignFrom.getDatasetSequence(), + alignMe.getDatasetSequence(), map); + + AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "-", '-', true, + true); + assertEquals("-----MGAASEV-------", alignMe.getSequenceAsString()); + } + + /** + * Test for the alignSequenceAs method where there are trailing unmapped + * residues in the model sequence + */ + @Test(groups = { "Functional" }) + public void testAlignSequenceAs_withTrailingPeptide() + { + // map first 3 codons to KPF; G is a trailing unmapped residue + MapList map = new MapList(new int[] { 1, 9 }, new int[] { 1, 3 }, 3, 1); + + checkAlignSequenceAs("AAACCCTTT", "K-PFG", true, true, map, + "AAA---CCCTTT---"); + } + + @Test(groups = { "Functional" }) + public void testTransferFeatures() + { + SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt"); + SequenceI cds = new Sequence("cds/10-15", "TAGGCC"); + + // no overlap + dna.addSequenceFeature(new SequenceFeature("type1", "desc1", 1, 2, 1f, + null)); + // partial overlap - to [1, 1] + dna.addSequenceFeature(new SequenceFeature("type2", "desc2", 3, 4, 2f, + null)); + // exact overlap - to [1, 3] + dna.addSequenceFeature(new SequenceFeature("type3", "desc3", 4, 6, 3f, + null)); + // spanning overlap - to [2, 5] + dna.addSequenceFeature(new SequenceFeature("type4", "desc4", 5, 11, 4f, + null)); + // exactly overlaps whole mapped range [1, 6] + dna.addSequenceFeature(new SequenceFeature("type5", "desc5", 4, 12, 5f, + null)); + // no overlap (internal) + dna.addSequenceFeature(new SequenceFeature("type6", "desc6", 7, 9, 6f, + null)); + // no overlap (3' end) + dna.addSequenceFeature(new SequenceFeature("type7", "desc7", 13, 15, + 7f, null)); + // overlap (3' end) - to [6, 6] + dna.addSequenceFeature(new SequenceFeature("type8", "desc8", 12, 12, + 8f, null)); + // extended overlap - to [6, +] + dna.addSequenceFeature(new SequenceFeature("type9", "desc9", 12, 13, + 9f, null)); + + MapList map = new MapList(new int[] { 4, 6, 10, 12 }, + new int[] { 1, 6 }, 1, 1); + + /* + * behaviour of transferFeatures depends on MapList.locateInTo() + * if start and end positions are mapped, returns the mapped region + * if either is not mapped, does _not_ search for overlapped region + */ + AlignmentUtils.transferFeatures(dna, cds, map); + SequenceFeature[] sfs = cds.getSequenceFeatures(); + assertEquals(4, sfs.length); + + SequenceFeature sf = sfs[0]; + assertEquals("type3", sf.getType()); + assertEquals("desc3", sf.getDescription()); + assertEquals(3f, sf.getScore()); + assertEquals(1, sf.getBegin()); + assertEquals(3, sf.getEnd()); + + sf = sfs[1]; + assertEquals("type4", sf.getType()); + assertEquals(2, sf.getBegin()); + assertEquals(5, sf.getEnd()); + + sf = sfs[2]; + assertEquals("type5", sf.getType()); + assertEquals(1, sf.getBegin()); + assertEquals(6, sf.getEnd()); + + sf = sfs[3]; + assertEquals("type8", sf.getType()); + assertEquals(6, sf.getBegin()); + assertEquals(6, sf.getEnd()); + } }