From 59cd0fad63879353090634cd27cdf31cc96b911e Mon Sep 17 00:00:00 2001 From: gmungoc Date: Mon, 14 Sep 2015 12:12:39 +0100 Subject: [PATCH] JAL-1880 updated mapping tests to ensure they handle subsequence offsets --- test/jalview/util/MappingUtilsTest.java | 146 +++++++++++++++++++++---------- 1 file changed, 102 insertions(+), 44 deletions(-) diff --git a/test/jalview/util/MappingUtilsTest.java b/test/jalview/util/MappingUtilsTest.java index ad7193a..fbf021b 100644 --- a/test/jalview/util/MappingUtilsTest.java +++ b/test/jalview/util/MappingUtilsTest.java @@ -25,7 +25,11 @@ import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; import jalview.api.AlignViewportI; +import jalview.commands.EditCommand; +import jalview.commands.EditCommand.Action; +import jalview.commands.EditCommand.Edit; import jalview.datamodel.AlignedCodonFrame; +import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.ColumnSelection; import jalview.datamodel.SearchResults; @@ -42,6 +46,7 @@ import java.io.IOException; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.List; import java.util.Set; @@ -59,46 +64,47 @@ public class MappingUtilsTest @Test(groups = { "Functional" }) public void testBuildSearchResults() { - final Sequence seq1 = new Sequence("Seq1", "C-G-TA-GC"); + final Sequence seq1 = new Sequence("Seq1/5-10", "C-G-TA-GC"); seq1.createDatasetSequence(); - final Sequence aseq1 = new Sequence("Seq1", "-P-R"); + final Sequence aseq1 = new Sequence("Seq1/12-13", "-P-R"); aseq1.createDatasetSequence(); /* - * Map dna bases 1-6 to protein residues 1-2 + * Map dna bases 5-10 to protein residues 12-13 */ AlignedCodonFrame acf = new AlignedCodonFrame(); - MapList map = new MapList(new int[] { 1, 6 }, new int[] { 1, 2 }, 3, 1); + MapList map = new MapList(new int[] { 5, 10 }, new int[] { 12, 13 }, 3, + 1); acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map); Set acfList = Collections.singleton(acf); /* - * Check protein residue 1 maps to codon 1-3, 2 to codon 4-6 + * Check protein residue 12 maps to codon 5-7, 13 to codon 8-10 */ - SearchResults sr = MappingUtils.buildSearchResults(aseq1, 1, acfList); + SearchResults sr = MappingUtils.buildSearchResults(aseq1, 12, acfList); assertEquals(1, sr.getResults().size()); Match m = sr.getResults().get(0); assertEquals(seq1.getDatasetSequence(), m.getSequence()); - assertEquals(1, m.getStart()); - assertEquals(3, m.getEnd()); - sr = MappingUtils.buildSearchResults(aseq1, 2, acfList); + assertEquals(5, m.getStart()); + assertEquals(7, m.getEnd()); + sr = MappingUtils.buildSearchResults(aseq1, 13, acfList); assertEquals(1, sr.getResults().size()); m = sr.getResults().get(0); assertEquals(seq1.getDatasetSequence(), m.getSequence()); - assertEquals(4, m.getStart()); - assertEquals(6, m.getEnd()); + assertEquals(8, m.getStart()); + assertEquals(10, m.getEnd()); /* - * Check inverse mappings, from codons 1-3, 4-6 to protein 1, 2 + * Check inverse mappings, from codons 5-7, 8-10 to protein 12, 13 */ - for (int i = 1; i < 7; i++) + for (int i = 5; i < 11; i++) { sr = MappingUtils.buildSearchResults(seq1, i, acfList); assertEquals(1, sr.getResults().size()); m = sr.getResults().get(0); assertEquals(aseq1.getDatasetSequence(), m.getSequence()); - int residue = i > 3 ? 2 : 1; + int residue = i > 7 ? 13 : 12; assertEquals(residue, m.getStart()); assertEquals(residue, m.getEnd()); } @@ -110,61 +116,61 @@ public class MappingUtilsTest @Test(groups = { "Functional" }) public void testBuildSearchResults_withIntron() { - final Sequence seq1 = new Sequence("Seq1", "C-G-TAGA-GCAGCTT"); + final Sequence seq1 = new Sequence("Seq1/5-17", "c-G-tAGa-GcAgCtt"); seq1.createDatasetSequence(); - final Sequence aseq1 = new Sequence("Seq1", "-P-R"); + final Sequence aseq1 = new Sequence("Seq1/8-9", "-E-D"); aseq1.createDatasetSequence(); /* - * Map dna bases [2, 4, 5], [7, 9, 11] to protein residues 1 and 2 + * Map dna bases [6, 8, 9], [11, 13, 115] to protein residues 8 and 9 */ AlignedCodonFrame acf = new AlignedCodonFrame(); - MapList map = new MapList(new int[] { 2, 2, 4, 5, 7, 7, 9, 9, 11, 11 }, - new int[] { 1, 2 }, 3, 1); + MapList map = new MapList(new int[] { 6, 6, 8, 9, 11, 11, 13, 13, 15, + 15 }, new int[] { 8, 9 }, 3, 1); acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map); Set acfList = Collections.singleton(acf); /* - * Check protein residue 1 maps to [2, 4, 5] + * Check protein residue 8 maps to [6, 8, 9] */ - SearchResults sr = MappingUtils.buildSearchResults(aseq1, 1, acfList); + SearchResults sr = MappingUtils.buildSearchResults(aseq1, 8, acfList); assertEquals(2, sr.getResults().size()); Match m = sr.getResults().get(0); assertEquals(seq1.getDatasetSequence(), m.getSequence()); - assertEquals(2, m.getStart()); - assertEquals(2, m.getEnd()); + assertEquals(6, m.getStart()); + assertEquals(6, m.getEnd()); m = sr.getResults().get(1); assertEquals(seq1.getDatasetSequence(), m.getSequence()); - assertEquals(4, m.getStart()); - assertEquals(5, m.getEnd()); + assertEquals(8, m.getStart()); + assertEquals(9, m.getEnd()); /* - * Check protein residue 2 maps to [7, 9, 11] + * Check protein residue 9 maps to [11, 13, 15] */ - sr = MappingUtils.buildSearchResults(aseq1, 2, acfList); + sr = MappingUtils.buildSearchResults(aseq1, 9, acfList); assertEquals(3, sr.getResults().size()); m = sr.getResults().get(0); assertEquals(seq1.getDatasetSequence(), m.getSequence()); - assertEquals(7, m.getStart()); - assertEquals(7, m.getEnd()); + assertEquals(11, m.getStart()); + assertEquals(11, m.getEnd()); m = sr.getResults().get(1); assertEquals(seq1.getDatasetSequence(), m.getSequence()); - assertEquals(9, m.getStart()); - assertEquals(9, m.getEnd()); + assertEquals(13, m.getStart()); + assertEquals(13, m.getEnd()); m = sr.getResults().get(2); assertEquals(seq1.getDatasetSequence(), m.getSequence()); - assertEquals(11, m.getStart()); - assertEquals(11, m.getEnd()); + assertEquals(15, m.getStart()); + assertEquals(15, m.getEnd()); /* * Check inverse mappings, from codons to protein */ - for (int i = 1; i < 14; i++) + for (int i = 5; i < 18; i++) { sr = MappingUtils.buildSearchResults(seq1, i, acfList); - int residue = (i == 2 || i == 4 || i == 5) ? 1 : (i == 7 || i == 9 - || i == 11 ? 2 : 0); + int residue = (i == 6 || i == 8 || i == 9) ? 8 : (i == 11 || i == 13 + || i == 15 ? 9 : 0); if (residue == 0) { assertEquals(0, sr.getResults().size()); @@ -329,6 +335,9 @@ public class MappingUtilsTest } /** + * Set up mappings for tests from 3 dna to 3 protein sequences. Sequences have + * offset start positions for a more general test case. + * * @throws IOException */ protected void setupMappedAlignments() throws IOException @@ -337,23 +346,32 @@ public class MappingUtilsTest * Set up dna and protein Seq1/2/3 with mappings (held on the protein * viewport). Lower case for introns. */ - AlignmentI cdna = loadAlignment(">Seq1\nAC-GctGtC-T\n" - + ">Seq2\nTc-GA-G-T-Tc\n" + ">Seq3\nTtTT-AaCGg-\n", "FASTA"); + AlignmentI cdna = loadAlignment(">Seq1/10-18\nAC-GctGtC-T\n" + + ">Seq2/20-27\nTc-GA-G-T-Tc\n" + ">Seq3/30-38\nTtTT-AaCGg-\n", + "FASTA"); cdna.setDataset(null); AlignmentI protein = loadAlignment( - ">Seq1\n-K-P\n>Seq2\nL--Q\n>Seq3\nG--S\n", "FASTA"); + ">Seq1/40-41\n-K-P\n>Seq2/50-51\nL--Q\n>Seq3/60-61\nG--S\n", + "FASTA"); protein.setDataset(null); + + // map first dna to first protein seq AlignedCodonFrame acf = new AlignedCodonFrame(); - MapList map = new MapList(new int[] { 1, 3, 6, 6, 8, 9 }, new int[] { - 1, 2 }, 3, 1); + MapList map = new MapList(new int[] { 10, 12, 15, 15, 17, 18 }, + new int[] { 40, 41 }, 3, 1); acf.addMap(cdna.getSequenceAt(0).getDatasetSequence(), protein .getSequenceAt(0).getDatasetSequence(), map); - map = new MapList(new int[] { 1, 1, 3, 4, 5, 7 }, new int[] { 1, 2 }, + + // map second dna to second protein seq + map = new MapList(new int[] { 20, 20, 22, 23, 24, 26 }, new int[] { 50, + 51 }, 3, 1); acf.addMap(cdna.getSequenceAt(1).getDatasetSequence(), protein .getSequenceAt(1).getDatasetSequence(), map); - map = new MapList(new int[] { 1, 1, 3, 4, 5, 5, 7, 8 }, new int[] { 1, - 2 }, 3, 1); + + // map third dna to third protein seq + map = new MapList(new int[] { 30, 30, 32, 34, 36, 37 }, new int[] { 60, + 61 }, 3, 1); acf.addMap(cdna.getSequenceAt(2).getDatasetSequence(), protein .getSequenceAt(2).getDatasetSequence(), map); Set acfList = Collections.singleton(acf); @@ -679,4 +697,44 @@ public class MappingUtilsTest result = MappingUtils.findMappingsForSequence(null, null); assertEquals(0, result.size()); } + + @Test(groups = { "Functional" }) + public void testMapEditCommand() + { + SequenceI dna = new Sequence("Seq1", "---ACG---GCATCA", 8, 16); + SequenceI protein = new Sequence("Seq2", "-T-AS", 5, 7); + dna.createDatasetSequence(); + protein.createDatasetSequence(); + AlignedCodonFrame acf = new AlignedCodonFrame(); + MapList map = new MapList(new int[] { 8, 16 }, new int[] { 5, 7 }, 3, 1); + acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map); + Set mappings = new LinkedHashSet(); + mappings.add(acf); + + AlignmentI prot = new Alignment(new SequenceI[] { protein }); + prot.setCodonFrames(mappings); + AlignmentI nuc = new Alignment(new SequenceI[] { dna }); + + /* + * construct and perform the edit command to turn "-T-AS" in to "-T-A--S" + * i.e. insert two gaps at column 4 + */ + EditCommand ec = new EditCommand(); + final Edit edit = ec.new Edit(Action.INSERT_GAP, + new SequenceI[] { protein }, 4, 2, '-'); + ec.appendEdit(edit, prot, true, null); + + /* + * the mapped edit command should be to insert 6 gaps before base 4 in the + * nucleotide sequence, which corresponds to aligned column 12 in the dna + */ + EditCommand mappedEdit = MappingUtils.mapEditCommand(ec, false, nuc, + '-', mappings); + assertEquals(1, mappedEdit.getEdits().size()); + Edit e = mappedEdit.getEdits().get(0); + assertEquals(1, e.getSequences().length); + assertEquals(dna, e.getSequences()[0]); + assertEquals(12, e.getPosition()); + assertEquals(6, e.getNumber()); + } } -- 1.7.10.2