X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Futil%2FMappingUtilsTest.java;fp=test%2Fjalview%2Futil%2FMappingUtilsTest.java;h=f0f3be7cae9d27e99495f41ad5cbdca51f1b9c54;hb=8647c805617f33b905c4f72ebe30aab3c21b56f4;hp=0000000000000000000000000000000000000000;hpb=ad15cff29620f960119f80176f1fd443da9f6763;p=jalview.git diff --git a/test/jalview/util/MappingUtilsTest.java b/test/jalview/util/MappingUtilsTest.java new file mode 100644 index 0000000..f0f3be7 --- /dev/null +++ b/test/jalview/util/MappingUtilsTest.java @@ -0,0 +1,400 @@ +package jalview.util; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import jalview.api.AlignViewportI; +import jalview.datamodel.AlignedCodonFrame; +import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.ColumnSelection; +import jalview.datamodel.SearchResults; +import jalview.datamodel.SearchResults.Match; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceGroup; +import jalview.gui.AlignViewport; +import jalview.io.AppletFormatAdapter; +import jalview.io.FormatAdapter; + +import java.awt.Color; +import java.io.IOException; +import java.util.Collections; +import java.util.Set; + +import org.junit.Test; + +public class MappingUtilsTest +{ + private AlignViewportI dnaView; + private AlignViewportI proteinView; + + /** + * Simple test of mapping with no intron involved. + */ + @Test + public void testBuildSearchResults() + { + final Sequence seq1 = new Sequence("Seq1", "C-G-TA-GC"); + seq1.createDatasetSequence(); + + final Sequence aseq1 = new Sequence("Seq1", "-P-R"); + aseq1.createDatasetSequence(); + + /* + * Map dna bases 1-6 to protein residues 1-2 + */ + AlignedCodonFrame acf = new AlignedCodonFrame(); + MapList map = new MapList(new int[] + { 1, 6 }, new int[] + { 1, 2 }, 3, 1); + acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map); + Set acfList = Collections.singleton(acf); + + /* + * Check protein residue 1 maps to codon 1-3, 2 to codon 4-6 + */ + SearchResults sr = MappingUtils.buildSearchResults(aseq1, 1, acfList); + assertEquals(1, sr.getResults().size()); + Match m = sr.getResults().get(0); + assertEquals(seq1.getDatasetSequence(), m.getSequence()); + assertEquals(1, m.getStart()); + assertEquals(3, m.getEnd()); + sr = MappingUtils.buildSearchResults(aseq1, 2, acfList); + assertEquals(1, sr.getResults().size()); + m = sr.getResults().get(0); + assertEquals(seq1.getDatasetSequence(), m.getSequence()); + assertEquals(4, m.getStart()); + assertEquals(6, m.getEnd()); + + /* + * Check inverse mappings, from codons 1-3, 4-6 to protein 1, 2 + */ + for (int i = 1; i < 7; i++) + { + sr = MappingUtils.buildSearchResults(seq1, i, acfList); + assertEquals(1, sr.getResults().size()); + m = sr.getResults().get(0); + assertEquals(aseq1.getDatasetSequence(), m.getSequence()); + int residue = i > 3 ? 2 : 1; + assertEquals(residue, m.getStart()); + assertEquals(residue, m.getEnd()); + } + } + + /** + * Simple test of mapping with introns involved. + */ + @Test + public void testBuildSearchResults_withIntro() + { + final Sequence seq1 = new Sequence("Seq1", "C-G-TAGA-GCAGCTT"); + seq1.createDatasetSequence(); + + final Sequence aseq1 = new Sequence("Seq1", "-P-R"); + aseq1.createDatasetSequence(); + + /* + * Map dna bases [2, 4, 5], [7, 9, 11] to protein residues 1 and 2 + */ + AlignedCodonFrame acf = new AlignedCodonFrame(); + MapList map = new MapList(new int[] + { 2, 2, 4, 5, 7, 7, 9, 9, 11, 11 }, new int[] + { 1, 2 }, 3, 1); + acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map); + Set acfList = Collections.singleton(acf); + + /* + * Check protein residue 1 maps to [2, 4, 5] + */ + SearchResults sr = MappingUtils.buildSearchResults(aseq1, 1, acfList); + assertEquals(2, sr.getResults().size()); + Match m = sr.getResults().get(0); + assertEquals(seq1.getDatasetSequence(), m.getSequence()); + assertEquals(2, m.getStart()); + assertEquals(2, m.getEnd()); + m = sr.getResults().get(1); + assertEquals(seq1.getDatasetSequence(), m.getSequence()); + assertEquals(4, m.getStart()); + assertEquals(5, m.getEnd()); + + /* + * Check protein residue 2 maps to [7, 9, 11] + */ + sr = MappingUtils.buildSearchResults(aseq1, 2, acfList); + assertEquals(3, sr.getResults().size()); + m = sr.getResults().get(0); + assertEquals(seq1.getDatasetSequence(), m.getSequence()); + assertEquals(7, m.getStart()); + assertEquals(7, m.getEnd()); + m = sr.getResults().get(1); + assertEquals(seq1.getDatasetSequence(), m.getSequence()); + assertEquals(9, m.getStart()); + assertEquals(9, m.getEnd()); + m = sr.getResults().get(2); + assertEquals(seq1.getDatasetSequence(), m.getSequence()); + assertEquals(11, m.getStart()); + assertEquals(11, m.getEnd()); + + /* + * Check inverse mappings, from codons to protein + */ + for (int i = 1; i < 14; i++) + { + sr = MappingUtils.buildSearchResults(seq1, i, acfList); + int residue = (i == 2 || i == 4 || i == 5) ? 1 : (i == 7 || i == 9 + || i == 11 ? 2 : 0); + if (residue == 0) + { + assertEquals(0, sr.getResults().size()); + continue; + } + assertEquals(1, sr.getResults().size()); + m = sr.getResults().get(0); + assertEquals(aseq1.getDatasetSequence(), m.getSequence()); + assertEquals(residue, m.getStart()); + assertEquals(residue, m.getEnd()); + } + } + + /** + * Test mapping a sequence group. + * + * @throws IOException + */ + @Test + public void testMapSequenceGroup() throws IOException + { + /* + * Set up dna and protein Seq1/2/3 with mappings (held on the protein + * viewport). + */ + AlignmentI cdna = loadAlignment(">Seq1\nACG\n>Seq2\nTGA\n>Seq3\nTAC\n", + "FASTA"); + cdna.setDataset(null); + AlignmentI protein = loadAlignment(">Seq1\nK\n>Seq2\nL\n>Seq3\nQ\n", + "FASTA"); + protein.setDataset(null); + AlignedCodonFrame acf = new AlignedCodonFrame(); + MapList map = new MapList(new int[] + { 1, 3 }, new int[] + { 1, 1 }, 3, 1); + for (int seq = 0; seq < 3; seq++) + { + acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein + .getSequenceAt(seq).getDatasetSequence(), map); + } + Set acfList = Collections.singleton(acf); + + AlignViewportI dnaView = new AlignViewport(cdna); + AlignViewportI proteinView = new AlignViewport(protein); + protein.setCodonFrames(acfList); + + /* + * Select Seq1 and Seq3 in the protein + */ + SequenceGroup sg = new SequenceGroup(); + sg.setColourText(true); + sg.setIdColour(Color.GREEN); + sg.setOutlineColour(Color.LIGHT_GRAY); + sg.addSequence(protein.getSequenceAt(0), false); + sg.addSequence(protein.getSequenceAt(2), false); + + /* + * Verify the mapped sequence group in dna + */ + SequenceGroup mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView); + assertTrue(mappedGroup.getColourText()); + assertSame(sg.getIdColour(), mappedGroup.getIdColour()); + assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour()); + assertEquals(2, mappedGroup.getSequences().size()); + assertSame(cdna.getSequenceAt(0), mappedGroup.getSequences().get(0)); + assertSame(cdna.getSequenceAt(2), mappedGroup.getSequences().get(1)); + + /* + * Verify mapping sequence group from dna to protein + */ + sg.clear(); + sg.addSequence(cdna.getSequenceAt(1), false); + sg.addSequence(cdna.getSequenceAt(0), false); + mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView); + assertTrue(mappedGroup.getColourText()); + assertSame(sg.getIdColour(), mappedGroup.getIdColour()); + assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour()); + assertEquals(2, mappedGroup.getSequences().size()); + assertSame(protein.getSequenceAt(1), mappedGroup.getSequences().get(0)); + assertSame(protein.getSequenceAt(0), mappedGroup.getSequences().get(1)); + } + + /** + * Helper method to load an alignment and ensure dataset sequences are set up. + * + * @param data + * @param format + * TODO + * @return + * @throws IOException + */ + protected AlignmentI loadAlignment(final String data, String format) + throws IOException + { + Alignment a = new FormatAdapter().readFile(data, + AppletFormatAdapter.PASTE, format); + a.setDataset(null); + return a; + } + + /** + * Test mapping a column selection in protein to its dna equivalent + * + * @throws IOException + */ + @Test + public void testMapColumnSelection_proteinToDna() throws IOException + { + setupMappedAlignments(); + + ColumnSelection colsel = new ColumnSelection(); + + /* + * Column 0 in protein picks up Seq2/L, Seq3/G which map to cols 0-4 and 0-3 + * in dna respectively, overall 0-4 + */ + colsel.addElement(0); + ColumnSelection cs = MappingUtils.mapColumnSelection(colsel, + proteinView, dnaView); + assertEquals("[0, 1, 2, 3, 4]", cs.getSelected().toString()); + + /* + * Column 1 in protein picks up Seq1/K which maps to cols 0-3 in dna + */ + colsel.clear(); + colsel.addElement(1); + cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView); + assertEquals("[0, 1, 2, 3]", cs.getSelected().toString()); + + /* + * Column 2 in protein picks up gaps only - no mapping + */ + colsel.clear(); + colsel.addElement(2); + cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView); + assertEquals("[]", cs.getSelected().toString()); + + /* + * Column 3 in protein picks up Seq1/P, Seq2/Q, Seq3/S which map to columns + * 6-9, 6-10, 5-8 respectively, overall to 5-10 + */ + colsel.clear(); + colsel.addElement(3); + cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView); + assertEquals("[5, 6, 7, 8, 9, 10]", cs.getSelected().toString()); + + /* + * Combine selection of columns 1 and 3 to get a discontiguous mapped + * selection + */ + colsel.clear(); + colsel.addElement(1); + colsel.addElement(3); + cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView); + assertEquals("[0, 1, 2, 3, 5, 6, 7, 8, 9, 10]", cs.getSelected() + .toString()); + } + + /** + * @throws IOException + */ + protected void setupMappedAlignments() throws IOException + { + /* + * Set up dna and protein Seq1/2/3 with mappings (held on the protein + * viewport). Lower case for introns. + */ + AlignmentI cdna = loadAlignment(">Seq1\nAC-GctGtC-T\n" + + ">Seq2\nTc-GA-G-T-Tc\n" + ">Seq3\nTtTT-AaCGg-\n", + "FASTA"); + cdna.setDataset(null); + AlignmentI protein = loadAlignment( + ">Seq1\n-K-P\n>Seq2\nL--Q\n>Seq3\nG--S\n", + "FASTA"); + protein.setDataset(null); + AlignedCodonFrame acf = new AlignedCodonFrame(); + MapList map = new MapList(new int[] + { 1, 3, 6, 6, 8, 9 }, new int[] + { 1, 2 }, 3, 1); + acf.addMap(cdna.getSequenceAt(0).getDatasetSequence(), protein + .getSequenceAt(0).getDatasetSequence(), map); + map = new MapList(new int[] + { 1, 1, 3, 4, 5, 7 }, new int[] + { 1, 2 }, 3, 1); + acf.addMap(cdna.getSequenceAt(1).getDatasetSequence(), protein + .getSequenceAt(1).getDatasetSequence(), map); + map = new MapList(new int[] + { 1, 1, 3, 4, 5, 5, 7, 8 }, new int[] + { 1, 2 }, 3, 1); + acf.addMap(cdna.getSequenceAt(2).getDatasetSequence(), protein + .getSequenceAt(2).getDatasetSequence(), map); + Set acfList = Collections.singleton(acf); + + dnaView = new AlignViewport(cdna); + proteinView = new AlignViewport(protein); + protein.setCodonFrames(acfList); + } + + /** + * Test mapping a column selection including hidden columns + * + * @throws IOException + */ + @Test + public void testMapColumnSelection_hiddenColumns() throws IOException + { + setupMappedAlignments(); + + ColumnSelection colsel = new ColumnSelection(); + + /* + * Column 0 in protein picks up Seq2/L, Seq3/G which map to cols 0-4 and 0-3 + * in dna respectively, overall 0-4 + */ + colsel.addElement(0); + ColumnSelection cs = MappingUtils.mapColumnSelection(colsel, + proteinView, dnaView); + assertEquals("[0, 1, 2, 3, 4]", cs.getSelected().toString()); + + fail("write me"); + } + + /** + * Test mapping a column selection in dna to its protein equivalent + * + * @throws IOException + */ + @Test + public void testMapColumnSelection_dnaToProtein() throws IOException + { + setupMappedAlignments(); + + ColumnSelection colsel = new ColumnSelection(); + + /* + * Column 0 in dna picks up first bases which map to residue 1, columns 0-1 + * in protein. + */ + colsel.addElement(0); + ColumnSelection cs = MappingUtils.mapColumnSelection(colsel, dnaView, + proteinView); + assertEquals("[0, 1]", cs.getSelected().toString()); + + /* + * Columns 3-5 in dna map to the first residues in protein Seq1, Seq2, and + * the first two in Seq3. Overall to columns 0, 1, 3 (col2 is all gaps). + */ + colsel.addElement(3); + colsel.addElement(4); + colsel.addElement(5); + cs = MappingUtils.mapColumnSelection(colsel, dnaView, proteinView); + assertEquals("[0, 1, 3]", cs.getSelected().toString()); + } +}