From 7f61a28aad5fe2ef3df8aa4024ea3e7d8fbe1548 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Wed, 1 Apr 2015 14:59:27 +0100 Subject: [PATCH] JAL-845 fixed start/end range of a mapped SequenceGroup --- src/jalview/datamodel/SequenceGroup.java | 12 +- src/jalview/util/MappingUtils.java | 92 +++++++++++-- test/jalview/util/MappingUtilsTest.java | 212 ++++++++++++++++++++++++++++-- 3 files changed, 286 insertions(+), 30 deletions(-) diff --git a/src/jalview/datamodel/SequenceGroup.java b/src/jalview/datamodel/SequenceGroup.java index feb714a..3439248 100755 --- a/src/jalview/datamodel/SequenceGroup.java +++ b/src/jalview/datamodel/SequenceGroup.java @@ -20,11 +20,6 @@ */ package jalview.datamodel; -import jalview.analysis.AAFrequency; -import jalview.analysis.Conservation; -import jalview.schemes.ColourSchemeI; -import jalview.schemes.ResidueProperties; - import java.awt.Color; import java.util.ArrayList; import java.util.Hashtable; @@ -32,6 +27,11 @@ import java.util.List; import java.util.Map; import java.util.Vector; +import jalview.analysis.AAFrequency; +import jalview.analysis.Conservation; +import jalview.schemes.ColourSchemeI; +import jalview.schemes.ResidueProperties; + /** * Collects a set contiguous ranges on a set of sequences * @@ -76,8 +76,10 @@ public class SequenceGroup implements AnnotatedCollectionI */ public ColourSchemeI cs; + // start column (base 0) int startRes = 0; + // end column (base 0) int endRes = 0; public Color outlineColour = Color.black; diff --git a/src/jalview/util/MappingUtils.java b/src/jalview/util/MappingUtils.java index 4cfb49e..ece1bac 100644 --- a/src/jalview/util/MappingUtils.java +++ b/src/jalview/util/MappingUtils.java @@ -1,5 +1,13 @@ package jalview.util; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + import jalview.analysis.AlignmentSorter; import jalview.api.AlignViewportI; import jalview.commands.CommandI; @@ -17,13 +25,6 @@ import jalview.datamodel.Sequence; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; - /** * Helper methods for manipulations involving sequence mappings. * @@ -270,8 +271,8 @@ public final class MappingUtils * @param mapTo * @return */ - public static SequenceGroup mapSequenceGroup(SequenceGroup sg, - AlignViewportI mapFrom, AlignViewportI mapTo) + public static SequenceGroup mapSequenceGroup(final SequenceGroup sg, + final AlignViewportI mapFrom, final AlignViewportI mapTo) { /* * Note the SequenceGroup holds aligned sequences, the mappings hold dataset @@ -281,18 +282,50 @@ public final class MappingUtils AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo; Set codonFrames = protein.getAlignment() .getCodonFrames(); - /* - * Copy group name, name colours, but not sequences or sequence colour - * scheme + * Copy group name, colours etc, but not sequences or sequence colour scheme */ SequenceGroup mappedGroup = new SequenceGroup(sg); mappedGroup.cs = mapTo.getGlobalColourScheme(); mappedGroup.clear(); - // TODO set width of mapped group + int minStartCol = -1; + int maxEndCol = -1; + final int selectionStartRes = sg.getStartRes(); + final int selectionEndRes = sg.getEndRes(); for (SequenceI selected : sg.getSequences()) { + /* + * Find the widest range of non-gapped positions in the selection range + */ + int firstUngappedPos = selectionStartRes; + while (firstUngappedPos <= selectionEndRes + && Comparison.isGap(selected.getCharAt(firstUngappedPos))) + { + firstUngappedPos++; + } + + /* + * If this sequence is only gaps in the selected range, skip it + */ + if (firstUngappedPos > selectionEndRes) + { + continue; + } + + int lastUngappedPos = selectionEndRes; + while (lastUngappedPos >= selectionStartRes + && Comparison.isGap(selected.getCharAt(lastUngappedPos))) + { + lastUngappedPos--; + } + + /* + * Find the selected start/end residue positions in sequence + */ + int startResiduePos = selected.findPosition(firstUngappedPos); + int endResiduePos = selected.findPosition(lastUngappedPos); + for (AlignedCodonFrame acf : codonFrames) { SequenceI mappedSequence = targetIsNucleotide ? acf @@ -301,8 +334,39 @@ public final class MappingUtils { for (SequenceI seq : mapTo.getAlignment().getSequences()) { + int mappedStartResidue = 0; + int mappedEndResidue = 0; if (seq.getDatasetSequence() == mappedSequence) { + /* + * Found a sequence mapping. Locate the start/end mapped residues. + */ + SearchResults sr = buildSearchResults(selected, + startResiduePos, Collections.singleton(acf)); + for (Match m : sr.getResults()) + { + mappedStartResidue = m.getStart(); + mappedEndResidue = m.getEnd(); + } + sr = buildSearchResults(selected, endResiduePos, + Collections.singleton(acf)); + for (Match m : sr.getResults()) + { + mappedStartResidue = Math.min(mappedStartResidue, + m.getStart()); + mappedEndResidue = Math.max(mappedEndResidue, m.getEnd()); + } + + /* + * Find the mapped aligned columns, save the range. Note findIndex + * returns a base 1 position, SequenceGroup uses base 0 + */ + int mappedStartCol = seq.findIndex(mappedStartResidue) - 1; + minStartCol = minStartCol == -1 ? mappedStartCol : Math.min( + minStartCol, mappedStartCol); + int mappedEndCol = seq.findIndex(mappedEndResidue) - 1; + maxEndCol = maxEndCol == -1 ? mappedEndCol : Math.max( + maxEndCol, mappedEndCol); mappedGroup.addSequence(seq, false); break; } @@ -310,6 +374,8 @@ public final class MappingUtils } } } + mappedGroup.setStartRes(minStartCol < 0 ? 0 : minStartCol); + mappedGroup.setEndRes(maxEndCol < 0 ? 0 : maxEndCol); return mappedGroup; } diff --git a/test/jalview/util/MappingUtilsTest.java b/test/jalview/util/MappingUtilsTest.java index f32e7ff..41efa73 100644 --- a/test/jalview/util/MappingUtilsTest.java +++ b/test/jalview/util/MappingUtilsTest.java @@ -3,7 +3,15 @@ package jalview.util; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; + +import java.awt.Color; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.Set; + +import org.junit.Test; + import jalview.api.AlignViewportI; import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; @@ -17,14 +25,6 @@ import jalview.gui.AlignViewport; import jalview.io.AppletFormatAdapter; import jalview.io.FormatAdapter; -import java.awt.Color; -import java.io.IOException; -import java.util.Arrays; -import java.util.Collections; -import java.util.Set; - -import org.junit.Test; - public class MappingUtilsTest { private AlignViewportI dnaView; @@ -159,12 +159,12 @@ public class MappingUtilsTest } /** - * Test mapping a sequence group. + * Test mapping a sequence group made of entire sequences. * * @throws IOException */ @Test - public void testMapSequenceGroup() throws IOException + public void testMapSequenceGroup_sequences() throws IOException { /* * Set up dna and protein Seq1/2/3 with mappings (held on the protein @@ -192,7 +192,7 @@ public class MappingUtilsTest protein.setCodonFrames(acfList); /* - * Select Seq1 and Seq3 in the protein + * Select Seq1 and Seq3 in the protein (startRes=endRes=0) */ SequenceGroup sg = new SequenceGroup(); sg.setColourText(true); @@ -211,6 +211,8 @@ public class MappingUtilsTest assertEquals(2, mappedGroup.getSequences().size()); assertSame(cdna.getSequenceAt(0), mappedGroup.getSequences().get(0)); assertSame(cdna.getSequenceAt(2), mappedGroup.getSequences().get(1)); + assertEquals(0, mappedGroup.getStartRes()); + assertEquals(2, mappedGroup.getEndRes()); /* * Verify mapping sequence group from dna to protein @@ -218,6 +220,8 @@ public class MappingUtilsTest sg.clear(); sg.addSequence(cdna.getSequenceAt(1), false); sg.addSequence(cdna.getSequenceAt(0), false); + sg.setStartRes(0); + sg.setEndRes(2); mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView); assertTrue(mappedGroup.getColourText()); assertSame(sg.getIdColour(), mappedGroup.getIdColour()); @@ -225,6 +229,8 @@ public class MappingUtilsTest assertEquals(2, mappedGroup.getSequences().size()); assertSame(protein.getSequenceAt(1), mappedGroup.getSequences().get(0)); assertSame(protein.getSequenceAt(0), mappedGroup.getSequences().get(1)); + assertEquals(0, mappedGroup.getStartRes()); + assertEquals(0, mappedGroup.getEndRes()); } /** @@ -399,4 +405,186 @@ public class MappingUtilsTest Arrays.toString(MappingUtils.flattenRanges(new int[] { 1, 4, 7, 9, 12, 12, 15 }))); } + + /** + * Test mapping a sequence group made of entire columns. + * + * @throws IOException + */ + @Test + public void testMapSequenceGroup_columns() throws IOException + { + /* + * Set up dna and protein Seq1/2/3 with mappings (held on the protein + * viewport). + */ + AlignmentI cdna = loadAlignment( + ">Seq1\nACGGCA\n>Seq2\nTGACAG\n>Seq3\nTACGTA\n", + "FASTA"); + cdna.setDataset(null); + AlignmentI protein = loadAlignment(">Seq1\nKA\n>Seq2\nLQ\n>Seq3\nQV\n", + "FASTA"); + protein.setDataset(null); + AlignedCodonFrame acf = new AlignedCodonFrame(); + MapList map = new MapList(new int[] + { 1, 6 }, new int[] + { 1, 2 }, 3, 1); + for (int seq = 0; seq < 3; seq++) + { + acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein + .getSequenceAt(seq).getDatasetSequence(), map); + } + Set acfList = Collections.singleton(acf); + + AlignViewportI dnaView = new AlignViewport(cdna); + AlignViewportI proteinView = new AlignViewport(protein); + protein.setCodonFrames(acfList); + + /* + * Select all sequences, column 2 in the protein + */ + SequenceGroup sg = new SequenceGroup(); + sg.setColourText(true); + sg.setIdColour(Color.GREEN); + sg.setOutlineColour(Color.LIGHT_GRAY); + sg.addSequence(protein.getSequenceAt(0), false); + sg.addSequence(protein.getSequenceAt(1), false); + sg.addSequence(protein.getSequenceAt(2), false); + sg.setStartRes(1); + sg.setEndRes(1); + + /* + * Verify the mapped sequence group in dna + */ + SequenceGroup mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView); + assertTrue(mappedGroup.getColourText()); + assertSame(sg.getIdColour(), mappedGroup.getIdColour()); + assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour()); + assertEquals(3, mappedGroup.getSequences().size()); + assertSame(cdna.getSequenceAt(0), mappedGroup.getSequences().get(0)); + assertSame(cdna.getSequenceAt(1), mappedGroup.getSequences().get(1)); + assertSame(cdna.getSequenceAt(2), mappedGroup.getSequences().get(2)); + assertEquals(3, mappedGroup.getStartRes()); + assertEquals(5, mappedGroup.getEndRes()); + + /* + * Verify mapping sequence group from dna to protein + */ + sg.clear(); + sg.addSequence(cdna.getSequenceAt(0), false); + sg.addSequence(cdna.getSequenceAt(1), false); + sg.addSequence(cdna.getSequenceAt(2), false); + // select columns 2 and 3 in DNA which span protein columns 0 and 1 + sg.setStartRes(2); + sg.setEndRes(3); + mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView); + assertTrue(mappedGroup.getColourText()); + assertSame(sg.getIdColour(), mappedGroup.getIdColour()); + assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour()); + assertEquals(3, mappedGroup.getSequences().size()); + assertSame(protein.getSequenceAt(0), mappedGroup.getSequences().get(0)); + assertSame(protein.getSequenceAt(1), mappedGroup.getSequences().get(1)); + assertSame(protein.getSequenceAt(2), mappedGroup.getSequences().get(2)); + assertEquals(0, mappedGroup.getStartRes()); + assertEquals(1, mappedGroup.getEndRes()); + } + + /** + * Test mapping a sequence group made of a sequences/columns region. + * + * @throws IOException + */ + @Test + public void testMapSequenceGroup_region() throws IOException + { + /* + * Set up gapped dna and protein Seq1/2/3 with mappings (held on the protein + * viewport). + */ + AlignmentI cdna = loadAlignment( + ">Seq1\nA-CG-GC--AT-CA\n>Seq2\n-TG-AC-AG-T-AT\n>Seq3\n-T--ACG-TAAT-G\n", + "FASTA"); + cdna.setDataset(null); + AlignmentI protein = loadAlignment( + ">Seq1\n-KA-S\n>Seq2\n--L-QY\n>Seq3\nQ-V-M\n", "FASTA"); + protein.setDataset(null); + AlignedCodonFrame acf = new AlignedCodonFrame(); + MapList map = new MapList(new int[] + { 1, 9 }, new int[] + { 1, 3 }, 3, 1); + for (int seq = 0; seq < 3; seq++) + { + acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein + .getSequenceAt(seq).getDatasetSequence(), map); + } + Set acfList = Collections.singleton(acf); + + AlignViewportI dnaView = new AlignViewport(cdna); + AlignViewportI proteinView = new AlignViewport(protein); + protein.setCodonFrames(acfList); + + /* + * Select Seq1 and Seq2 in the protein, column 1 (K/-). Expect mapped + * sequence group to cover Seq1, columns 0-3 (ACG). Because the selection + * only includes a gap in Seq2 there is no mappable selection region in the + * corresponding DNA. + */ + SequenceGroup sg = new SequenceGroup(); + sg.setColourText(true); + sg.setIdColour(Color.GREEN); + sg.setOutlineColour(Color.LIGHT_GRAY); + sg.addSequence(protein.getSequenceAt(0), false); + sg.addSequence(protein.getSequenceAt(1), false); + sg.setStartRes(1); + sg.setEndRes(1); + + /* + * Verify the mapped sequence group in dna + */ + SequenceGroup mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView); + assertTrue(mappedGroup.getColourText()); + assertSame(sg.getIdColour(), mappedGroup.getIdColour()); + assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour()); + assertEquals(1, mappedGroup.getSequences().size()); + assertSame(cdna.getSequenceAt(0), mappedGroup.getSequences().get(0)); + // Seq2 in protein has a gap in column 1 - ignored + // Seq1 has K which should map to columns 0-3 in Seq1 + assertEquals(0, mappedGroup.getStartRes()); + assertEquals(3, mappedGroup.getEndRes()); + + /* + * Now select cols 2-4 in protein. These cover Seq1:AS Seq2:LQ Seq3:VM which + * extend over DNA columns 3-12, 1-7, 6-13 respectively, or 1-13 overall. + */ + sg.setStartRes(2); + sg.setEndRes(4); + mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView); + assertEquals(1, mappedGroup.getStartRes()); + assertEquals(13, mappedGroup.getEndRes()); + + /* + * Verify mapping sequence group from dna to protein + */ + sg.clear(); + sg.addSequence(cdna.getSequenceAt(0), false); + + // select columns 4,5 - includes Seq1:codon2 (A) only + sg.setStartRes(4); + sg.setEndRes(5); + mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView); + assertEquals(2, mappedGroup.getStartRes()); + assertEquals(2, mappedGroup.getEndRes()); + + // add Seq2 to dna selection cols 4-5 include codons 1 and 2 (LQ) + sg.addSequence(cdna.getSequenceAt(1), false); + mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView); + assertEquals(2, mappedGroup.getStartRes()); + assertEquals(4, mappedGroup.getEndRes()); + + // add Seq3 to dna selection cols 4-5 include codon 1 (Q) + sg.addSequence(cdna.getSequenceAt(2), false); + mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView); + assertEquals(0, mappedGroup.getStartRes()); + assertEquals(4, mappedGroup.getEndRes()); + } } -- 1.7.10.2