package jalview.util; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import java.awt.Color; import java.io.IOException; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; import org.junit.Test; import jalview.api.AlignViewportI; import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.ColumnSelection; import jalview.datamodel.SearchResults; import jalview.datamodel.SearchResults.Match; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; import jalview.gui.AlignViewport; import jalview.io.AppletFormatAdapter; import jalview.io.FormatAdapter; public class MappingUtilsTest { private AlignViewportI dnaView; private AlignViewportI proteinView; /** * Simple test of mapping with no intron involved. */ @Test public void testBuildSearchResults() { final Sequence seq1 = new Sequence("Seq1", "C-G-TA-GC"); seq1.createDatasetSequence(); final Sequence aseq1 = new Sequence("Seq1", "-P-R"); aseq1.createDatasetSequence(); /* * Map dna bases 1-6 to protein residues 1-2 */ AlignedCodonFrame acf = new AlignedCodonFrame(); MapList map = new MapList(new int[] { 1, 6 }, new int[] { 1, 2 }, 3, 1); acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map); Set acfList = Collections.singleton(acf); /* * Check protein residue 1 maps to codon 1-3, 2 to codon 4-6 */ SearchResults sr = MappingUtils.buildSearchResults(aseq1, 1, acfList); assertEquals(1, sr.getResults().size()); Match m = sr.getResults().get(0); assertEquals(seq1.getDatasetSequence(), m.getSequence()); assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); sr = MappingUtils.buildSearchResults(aseq1, 2, acfList); assertEquals(1, sr.getResults().size()); m = sr.getResults().get(0); assertEquals(seq1.getDatasetSequence(), m.getSequence()); assertEquals(4, m.getStart()); assertEquals(6, m.getEnd()); /* * Check inverse mappings, from codons 1-3, 4-6 to protein 1, 2 */ for (int i = 1; i < 7; i++) { sr = MappingUtils.buildSearchResults(seq1, i, acfList); assertEquals(1, sr.getResults().size()); m = sr.getResults().get(0); assertEquals(aseq1.getDatasetSequence(), m.getSequence()); int residue = i > 3 ? 2 : 1; assertEquals(residue, m.getStart()); assertEquals(residue, m.getEnd()); } } /** * Simple test of mapping with introns involved. */ @Test public void testBuildSearchResults_withIntron() { final Sequence seq1 = new Sequence("Seq1", "C-G-TAGA-GCAGCTT"); seq1.createDatasetSequence(); final Sequence aseq1 = new Sequence("Seq1", "-P-R"); aseq1.createDatasetSequence(); /* * Map dna bases [2, 4, 5], [7, 9, 11] to protein residues 1 and 2 */ AlignedCodonFrame acf = new AlignedCodonFrame(); MapList map = new MapList(new int[] { 2, 2, 4, 5, 7, 7, 9, 9, 11, 11 }, new int[] { 1, 2 }, 3, 1); acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map); Set acfList = Collections.singleton(acf); /* * Check protein residue 1 maps to [2, 4, 5] */ SearchResults sr = MappingUtils.buildSearchResults(aseq1, 1, acfList); assertEquals(2, sr.getResults().size()); Match m = sr.getResults().get(0); assertEquals(seq1.getDatasetSequence(), m.getSequence()); assertEquals(2, m.getStart()); assertEquals(2, m.getEnd()); m = sr.getResults().get(1); assertEquals(seq1.getDatasetSequence(), m.getSequence()); assertEquals(4, m.getStart()); assertEquals(5, m.getEnd()); /* * Check protein residue 2 maps to [7, 9, 11] */ sr = MappingUtils.buildSearchResults(aseq1, 2, acfList); assertEquals(3, sr.getResults().size()); m = sr.getResults().get(0); assertEquals(seq1.getDatasetSequence(), m.getSequence()); assertEquals(7, m.getStart()); assertEquals(7, m.getEnd()); m = sr.getResults().get(1); assertEquals(seq1.getDatasetSequence(), m.getSequence()); assertEquals(9, m.getStart()); assertEquals(9, m.getEnd()); m = sr.getResults().get(2); assertEquals(seq1.getDatasetSequence(), m.getSequence()); assertEquals(11, m.getStart()); assertEquals(11, m.getEnd()); /* * Check inverse mappings, from codons to protein */ for (int i = 1; i < 14; i++) { sr = MappingUtils.buildSearchResults(seq1, i, acfList); int residue = (i == 2 || i == 4 || i == 5) ? 1 : (i == 7 || i == 9 || i == 11 ? 2 : 0); if (residue == 0) { assertEquals(0, sr.getResults().size()); continue; } assertEquals(1, sr.getResults().size()); m = sr.getResults().get(0); assertEquals(aseq1.getDatasetSequence(), m.getSequence()); assertEquals(residue, m.getStart()); assertEquals(residue, m.getEnd()); } } /** * Test mapping a sequence group made of entire sequences. * * @throws IOException */ @Test public void testMapSequenceGroup_sequences() throws IOException { /* * Set up dna and protein Seq1/2/3 with mappings (held on the protein * viewport). */ AlignmentI cdna = loadAlignment(">Seq1\nACG\n>Seq2\nTGA\n>Seq3\nTAC\n", "FASTA"); cdna.setDataset(null); AlignmentI protein = loadAlignment(">Seq1\nK\n>Seq2\nL\n>Seq3\nQ\n", "FASTA"); protein.setDataset(null); AlignedCodonFrame acf = new AlignedCodonFrame(); MapList map = new MapList(new int[] { 1, 3 }, new int[] { 1, 1 }, 3, 1); for (int seq = 0; seq < 3; seq++) { acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein .getSequenceAt(seq).getDatasetSequence(), map); } Set acfList = Collections.singleton(acf); AlignViewportI dnaView = new AlignViewport(cdna); AlignViewportI proteinView = new AlignViewport(protein); protein.setCodonFrames(acfList); /* * Select Seq1 and Seq3 in the protein (startRes=endRes=0) */ SequenceGroup sg = new SequenceGroup(); sg.setColourText(true); sg.setIdColour(Color.GREEN); sg.setOutlineColour(Color.LIGHT_GRAY); sg.addSequence(protein.getSequenceAt(0), false); sg.addSequence(protein.getSequenceAt(2), false); /* * Verify the mapped sequence group in dna */ SequenceGroup mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView); assertTrue(mappedGroup.getColourText()); assertSame(sg.getIdColour(), mappedGroup.getIdColour()); assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour()); assertEquals(2, mappedGroup.getSequences().size()); assertSame(cdna.getSequenceAt(0), mappedGroup.getSequences().get(0)); assertSame(cdna.getSequenceAt(2), mappedGroup.getSequences().get(1)); assertEquals(0, mappedGroup.getStartRes()); assertEquals(2, mappedGroup.getEndRes()); /* * Verify mapping sequence group from dna to protein */ sg.clear(); sg.addSequence(cdna.getSequenceAt(1), false); sg.addSequence(cdna.getSequenceAt(0), false); sg.setStartRes(0); sg.setEndRes(2); mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView); assertTrue(mappedGroup.getColourText()); assertSame(sg.getIdColour(), mappedGroup.getIdColour()); assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour()); assertEquals(2, mappedGroup.getSequences().size()); assertSame(protein.getSequenceAt(1), mappedGroup.getSequences().get(0)); assertSame(protein.getSequenceAt(0), mappedGroup.getSequences().get(1)); assertEquals(0, mappedGroup.getStartRes()); assertEquals(0, mappedGroup.getEndRes()); } /** * Helper method to load an alignment and ensure dataset sequences are set up. * * @param data * @param format * TODO * @return * @throws IOException */ protected AlignmentI loadAlignment(final String data, String format) throws IOException { Alignment a = new FormatAdapter().readFile(data, AppletFormatAdapter.PASTE, format); a.setDataset(null); return a; } /** * Test mapping a column selection in protein to its dna equivalent * * @throws IOException */ @Test public void testMapColumnSelection_proteinToDna() throws IOException { setupMappedAlignments(); ColumnSelection colsel = new ColumnSelection(); /* * Column 0 in protein picks up Seq2/L, Seq3/G which map to cols 0-4 and 0-3 * in dna respectively, overall 0-4 */ colsel.addElement(0); ColumnSelection cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView); assertEquals("[0, 1, 2, 3, 4]", cs.getSelected().toString()); /* * Column 1 in protein picks up Seq1/K which maps to cols 0-3 in dna */ colsel.clear(); colsel.addElement(1); cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView); assertEquals("[0, 1, 2, 3]", cs.getSelected().toString()); /* * Column 2 in protein picks up gaps only - no mapping */ colsel.clear(); colsel.addElement(2); cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView); assertEquals("[]", cs.getSelected().toString()); /* * Column 3 in protein picks up Seq1/P, Seq2/Q, Seq3/S which map to columns * 6-9, 6-10, 5-8 respectively, overall to 5-10 */ colsel.clear(); colsel.addElement(3); cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView); assertEquals("[5, 6, 7, 8, 9, 10]", cs.getSelected().toString()); /* * Combine selection of columns 1 and 3 to get a discontiguous mapped * selection */ colsel.clear(); colsel.addElement(1); colsel.addElement(3); cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView); assertEquals("[0, 1, 2, 3, 5, 6, 7, 8, 9, 10]", cs.getSelected() .toString()); } /** * @throws IOException */ protected void setupMappedAlignments() throws IOException { /* * Set up dna and protein Seq1/2/3 with mappings (held on the protein * viewport). Lower case for introns. */ AlignmentI cdna = loadAlignment(">Seq1\nAC-GctGtC-T\n" + ">Seq2\nTc-GA-G-T-Tc\n" + ">Seq3\nTtTT-AaCGg-\n", "FASTA"); cdna.setDataset(null); AlignmentI protein = loadAlignment( ">Seq1\n-K-P\n>Seq2\nL--Q\n>Seq3\nG--S\n", "FASTA"); protein.setDataset(null); AlignedCodonFrame acf = new AlignedCodonFrame(); MapList map = new MapList(new int[] { 1, 3, 6, 6, 8, 9 }, new int[] { 1, 2 }, 3, 1); acf.addMap(cdna.getSequenceAt(0).getDatasetSequence(), protein .getSequenceAt(0).getDatasetSequence(), map); map = new MapList(new int[] { 1, 1, 3, 4, 5, 7 }, new int[] { 1, 2 }, 3, 1); acf.addMap(cdna.getSequenceAt(1).getDatasetSequence(), protein .getSequenceAt(1).getDatasetSequence(), map); map = new MapList(new int[] { 1, 1, 3, 4, 5, 5, 7, 8 }, new int[] { 1, 2 }, 3, 1); acf.addMap(cdna.getSequenceAt(2).getDatasetSequence(), protein .getSequenceAt(2).getDatasetSequence(), map); Set acfList = Collections.singleton(acf); dnaView = new AlignViewport(cdna); proteinView = new AlignViewport(protein); protein.setCodonFrames(acfList); } /** * Test mapping a column selection in dna to its protein equivalent * * @throws IOException */ @Test public void testMapColumnSelection_dnaToProtein() throws IOException { setupMappedAlignments(); ColumnSelection colsel = new ColumnSelection(); /* * Column 0 in dna picks up first bases which map to residue 1, columns 0-1 * in protein. */ colsel.addElement(0); ColumnSelection cs = MappingUtils.mapColumnSelection(colsel, dnaView, proteinView); assertEquals("[0, 1]", cs.getSelected().toString()); /* * Columns 3-5 in dna map to the first residues in protein Seq1, Seq2, and * the first two in Seq3. Overall to columns 0, 1, 3 (col2 is all gaps). */ colsel.addElement(3); colsel.addElement(4); colsel.addElement(5); cs = MappingUtils.mapColumnSelection(colsel, dnaView, proteinView); assertEquals("[0, 1, 3]", cs.getSelected().toString()); } @Test public void testMapColumnSelection_null() throws IOException { setupMappedAlignments(); ColumnSelection cs = MappingUtils.mapColumnSelection(null, dnaView, proteinView); assertTrue("mapped selection not empty", cs.getSelected().isEmpty()); } /** * Tests for the method that converts a series of [start, end] ranges to * single positions */ @Test public void testFlattenRanges() { assertEquals("[1, 2, 3, 4]", Arrays.toString(MappingUtils.flattenRanges(new int[] { 1, 4 }))); assertEquals("[1, 2, 3, 4]", Arrays.toString(MappingUtils.flattenRanges(new int[] { 1, 2, 3, 4 }))); assertEquals("[1, 2, 3, 4]", Arrays.toString(MappingUtils.flattenRanges(new int[] { 1, 1, 2, 2, 3, 3, 4, 4 }))); assertEquals("[1, 2, 3, 4, 7, 8, 9, 12]", Arrays.toString(MappingUtils.flattenRanges(new int[] { 1, 4, 7, 9, 12, 12 }))); // unpaired start position is ignored: assertEquals("[1, 2, 3, 4, 7, 8, 9, 12]", Arrays.toString(MappingUtils.flattenRanges(new int[] { 1, 4, 7, 9, 12, 12, 15 }))); } /** * Test mapping a sequence group made of entire columns. * * @throws IOException */ @Test public void testMapSequenceGroup_columns() throws IOException { /* * Set up dna and protein Seq1/2/3 with mappings (held on the protein * viewport). */ AlignmentI cdna = loadAlignment( ">Seq1\nACGGCA\n>Seq2\nTGACAG\n>Seq3\nTACGTA\n", "FASTA"); cdna.setDataset(null); AlignmentI protein = loadAlignment(">Seq1\nKA\n>Seq2\nLQ\n>Seq3\nQV\n", "FASTA"); protein.setDataset(null); AlignedCodonFrame acf = new AlignedCodonFrame(); MapList map = new MapList(new int[] { 1, 6 }, new int[] { 1, 2 }, 3, 1); for (int seq = 0; seq < 3; seq++) { acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein .getSequenceAt(seq).getDatasetSequence(), map); } Set acfList = Collections.singleton(acf); AlignViewportI dnaView = new AlignViewport(cdna); AlignViewportI proteinView = new AlignViewport(protein); protein.setCodonFrames(acfList); /* * Select all sequences, column 2 in the protein */ SequenceGroup sg = new SequenceGroup(); sg.setColourText(true); sg.setIdColour(Color.GREEN); sg.setOutlineColour(Color.LIGHT_GRAY); sg.addSequence(protein.getSequenceAt(0), false); sg.addSequence(protein.getSequenceAt(1), false); sg.addSequence(protein.getSequenceAt(2), false); sg.setStartRes(1); sg.setEndRes(1); /* * Verify the mapped sequence group in dna */ SequenceGroup mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView); assertTrue(mappedGroup.getColourText()); assertSame(sg.getIdColour(), mappedGroup.getIdColour()); assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour()); assertEquals(3, mappedGroup.getSequences().size()); assertSame(cdna.getSequenceAt(0), mappedGroup.getSequences().get(0)); assertSame(cdna.getSequenceAt(1), mappedGroup.getSequences().get(1)); assertSame(cdna.getSequenceAt(2), mappedGroup.getSequences().get(2)); assertEquals(3, mappedGroup.getStartRes()); assertEquals(5, mappedGroup.getEndRes()); /* * Verify mapping sequence group from dna to protein */ sg.clear(); sg.addSequence(cdna.getSequenceAt(0), false); sg.addSequence(cdna.getSequenceAt(1), false); sg.addSequence(cdna.getSequenceAt(2), false); // select columns 2 and 3 in DNA which span protein columns 0 and 1 sg.setStartRes(2); sg.setEndRes(3); mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView); assertTrue(mappedGroup.getColourText()); assertSame(sg.getIdColour(), mappedGroup.getIdColour()); assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour()); assertEquals(3, mappedGroup.getSequences().size()); assertSame(protein.getSequenceAt(0), mappedGroup.getSequences().get(0)); assertSame(protein.getSequenceAt(1), mappedGroup.getSequences().get(1)); assertSame(protein.getSequenceAt(2), mappedGroup.getSequences().get(2)); assertEquals(0, mappedGroup.getStartRes()); assertEquals(1, mappedGroup.getEndRes()); } /** * Test mapping a sequence group made of a sequences/columns region. * * @throws IOException */ @Test public void testMapSequenceGroup_region() throws IOException { /* * Set up gapped dna and protein Seq1/2/3 with mappings (held on the protein * viewport). */ AlignmentI cdna = loadAlignment( ">Seq1\nA-CG-GC--AT-CA\n>Seq2\n-TG-AC-AG-T-AT\n>Seq3\n-T--ACG-TAAT-G\n", "FASTA"); cdna.setDataset(null); AlignmentI protein = loadAlignment( ">Seq1\n-KA-S\n>Seq2\n--L-QY\n>Seq3\nQ-V-M\n", "FASTA"); protein.setDataset(null); AlignedCodonFrame acf = new AlignedCodonFrame(); MapList map = new MapList(new int[] { 1, 9 }, new int[] { 1, 3 }, 3, 1); for (int seq = 0; seq < 3; seq++) { acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein .getSequenceAt(seq).getDatasetSequence(), map); } Set acfList = Collections.singleton(acf); AlignViewportI dnaView = new AlignViewport(cdna); AlignViewportI proteinView = new AlignViewport(protein); protein.setCodonFrames(acfList); /* * Select Seq1 and Seq2 in the protein, column 1 (K/-). Expect mapped * sequence group to cover Seq1, columns 0-3 (ACG). Because the selection * only includes a gap in Seq2 there is no mappable selection region in the * corresponding DNA. */ SequenceGroup sg = new SequenceGroup(); sg.setColourText(true); sg.setIdColour(Color.GREEN); sg.setOutlineColour(Color.LIGHT_GRAY); sg.addSequence(protein.getSequenceAt(0), false); sg.addSequence(protein.getSequenceAt(1), false); sg.setStartRes(1); sg.setEndRes(1); /* * Verify the mapped sequence group in dna */ SequenceGroup mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView); assertTrue(mappedGroup.getColourText()); assertSame(sg.getIdColour(), mappedGroup.getIdColour()); assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour()); assertEquals(1, mappedGroup.getSequences().size()); assertSame(cdna.getSequenceAt(0), mappedGroup.getSequences().get(0)); // Seq2 in protein has a gap in column 1 - ignored // Seq1 has K which should map to columns 0-3 in Seq1 assertEquals(0, mappedGroup.getStartRes()); assertEquals(3, mappedGroup.getEndRes()); /* * Now select cols 2-4 in protein. These cover Seq1:AS Seq2:LQ Seq3:VM which * extend over DNA columns 3-12, 1-7, 6-13 respectively, or 1-13 overall. */ sg.setStartRes(2); sg.setEndRes(4); mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView); assertEquals(1, mappedGroup.getStartRes()); assertEquals(13, mappedGroup.getEndRes()); /* * Verify mapping sequence group from dna to protein */ sg.clear(); sg.addSequence(cdna.getSequenceAt(0), false); // select columns 4,5 - includes Seq1:codon2 (A) only sg.setStartRes(4); sg.setEndRes(5); mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView); assertEquals(2, mappedGroup.getStartRes()); assertEquals(2, mappedGroup.getEndRes()); // add Seq2 to dna selection cols 4-5 include codons 1 and 2 (LQ) sg.addSequence(cdna.getSequenceAt(1), false); mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView); assertEquals(2, mappedGroup.getStartRes()); assertEquals(4, mappedGroup.getEndRes()); // add Seq3 to dna selection cols 4-5 include codon 1 (Q) sg.addSequence(cdna.getSequenceAt(2), false); mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView); assertEquals(0, mappedGroup.getStartRes()); assertEquals(4, mappedGroup.getEndRes()); } @Test public void testFindMappingsForSequence() { SequenceI seq1 = new Sequence("Seq1", "ABC"); SequenceI seq2 = new Sequence("Seq2", "ABC"); SequenceI seq3 = new Sequence("Seq3", "ABC"); SequenceI seq4 = new Sequence("Seq4", "ABC"); seq1.createDatasetSequence(); seq2.createDatasetSequence(); seq3.createDatasetSequence(); seq4.createDatasetSequence(); /* * Create mappings from seq1 to seq2, seq2 to seq1, seq3 to seq1 */ AlignedCodonFrame acf1 = new AlignedCodonFrame(); MapList map = new MapList(new int[] { 1, 3 }, new int[] { 1, 3 },1, 1); acf1.addMap(seq1.getDatasetSequence(), seq2.getDatasetSequence(), map); AlignedCodonFrame acf2 = new AlignedCodonFrame(); acf2.addMap(seq2.getDatasetSequence(), seq1.getDatasetSequence(), map); AlignedCodonFrame acf3 = new AlignedCodonFrame(); acf3.addMap(seq3.getDatasetSequence(), seq1.getDatasetSequence(), map); Set mappings = new HashSet(); mappings.add(acf1); mappings.add(acf2); mappings.add(acf3); /* * Seq1 has three mappings */ List result = MappingUtils.findMappingsForSequence( seq1, mappings); assertEquals(3, result.size()); assertTrue(result.contains(acf1)); assertTrue(result.contains(acf2)); assertTrue(result.contains(acf3)); /* * Seq2 has two mappings */ result = MappingUtils.findMappingsForSequence(seq2, mappings); assertEquals(2, result.size()); assertTrue(result.contains(acf1)); assertTrue(result.contains(acf2)); /* * Seq3 has one mapping */ result = MappingUtils.findMappingsForSequence(seq3, mappings); assertEquals(1, result.size()); assertTrue(result.contains(acf3)); /* * Seq4 has no mappings */ result = MappingUtils.findMappingsForSequence(seq4, mappings); assertEquals(0, result.size()); result = MappingUtils.findMappingsForSequence(null, mappings); assertEquals(0, result.size()); result = MappingUtils.findMappingsForSequence(seq1, null); assertEquals(0, result.size()); result = MappingUtils.findMappingsForSequence(null, null); assertEquals(0, result.size()); } }