package jalview.util;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
import jalview.analysis.AlignmentSorter;
import jalview.api.AlignViewportI;
import jalview.commands.CommandI;
import jalview.datamodel.SequenceGroup;
import jalview.datamodel.SequenceI;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
/**
* Helper methods for manipulations involving sequence mappings.
*
* @param mapTo
* @return
*/
- public static SequenceGroup mapSequenceGroup(SequenceGroup sg,
- AlignViewportI mapFrom, AlignViewportI mapTo)
+ public static SequenceGroup mapSequenceGroup(final SequenceGroup sg,
+ final AlignViewportI mapFrom, final AlignViewportI mapTo)
{
/*
* Note the SequenceGroup holds aligned sequences, the mappings hold dataset
AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
Set<AlignedCodonFrame> codonFrames = protein.getAlignment()
.getCodonFrames();
-
/*
- * Copy group name, name colours, but not sequences or sequence colour
- * scheme
+ * Copy group name, colours etc, but not sequences or sequence colour scheme
*/
SequenceGroup mappedGroup = new SequenceGroup(sg);
mappedGroup.cs = mapTo.getGlobalColourScheme();
mappedGroup.clear();
- // TODO set width of mapped group
+ int minStartCol = -1;
+ int maxEndCol = -1;
+ final int selectionStartRes = sg.getStartRes();
+ final int selectionEndRes = sg.getEndRes();
for (SequenceI selected : sg.getSequences())
{
+ /*
+ * Find the widest range of non-gapped positions in the selection range
+ */
+ int firstUngappedPos = selectionStartRes;
+ while (firstUngappedPos <= selectionEndRes
+ && Comparison.isGap(selected.getCharAt(firstUngappedPos)))
+ {
+ firstUngappedPos++;
+ }
+
+ /*
+ * If this sequence is only gaps in the selected range, skip it
+ */
+ if (firstUngappedPos > selectionEndRes)
+ {
+ continue;
+ }
+
+ int lastUngappedPos = selectionEndRes;
+ while (lastUngappedPos >= selectionStartRes
+ && Comparison.isGap(selected.getCharAt(lastUngappedPos)))
+ {
+ lastUngappedPos--;
+ }
+
+ /*
+ * Find the selected start/end residue positions in sequence
+ */
+ int startResiduePos = selected.findPosition(firstUngappedPos);
+ int endResiduePos = selected.findPosition(lastUngappedPos);
+
for (AlignedCodonFrame acf : codonFrames)
{
SequenceI mappedSequence = targetIsNucleotide ? acf
{
for (SequenceI seq : mapTo.getAlignment().getSequences())
{
+ int mappedStartResidue = 0;
+ int mappedEndResidue = 0;
if (seq.getDatasetSequence() == mappedSequence)
{
+ /*
+ * Found a sequence mapping. Locate the start/end mapped residues.
+ */
+ SearchResults sr = buildSearchResults(selected,
+ startResiduePos, Collections.singleton(acf));
+ for (Match m : sr.getResults())
+ {
+ mappedStartResidue = m.getStart();
+ mappedEndResidue = m.getEnd();
+ }
+ sr = buildSearchResults(selected, endResiduePos,
+ Collections.singleton(acf));
+ for (Match m : sr.getResults())
+ {
+ mappedStartResidue = Math.min(mappedStartResidue,
+ m.getStart());
+ mappedEndResidue = Math.max(mappedEndResidue, m.getEnd());
+ }
+
+ /*
+ * Find the mapped aligned columns, save the range. Note findIndex
+ * returns a base 1 position, SequenceGroup uses base 0
+ */
+ int mappedStartCol = seq.findIndex(mappedStartResidue) - 1;
+ minStartCol = minStartCol == -1 ? mappedStartCol : Math.min(
+ minStartCol, mappedStartCol);
+ int mappedEndCol = seq.findIndex(mappedEndResidue) - 1;
+ maxEndCol = maxEndCol == -1 ? mappedEndCol : Math.max(
+ maxEndCol, mappedEndCol);
mappedGroup.addSequence(seq, false);
break;
}
}
}
}
+ mappedGroup.setStartRes(minStartCol < 0 ? 0 : minStartCol);
+ mappedGroup.setEndRes(maxEndCol < 0 ? 0 : maxEndCol);
return mappedGroup;
}
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
+
+import java.awt.Color;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Set;
+
+import org.junit.Test;
+
import jalview.api.AlignViewportI;
import jalview.datamodel.AlignedCodonFrame;
import jalview.datamodel.Alignment;
import jalview.io.AppletFormatAdapter;
import jalview.io.FormatAdapter;
-import java.awt.Color;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Set;
-
-import org.junit.Test;
-
public class MappingUtilsTest
{
private AlignViewportI dnaView;
}
/**
- * Test mapping a sequence group.
+ * Test mapping a sequence group made of entire sequences.
*
* @throws IOException
*/
@Test
- public void testMapSequenceGroup() throws IOException
+ public void testMapSequenceGroup_sequences() throws IOException
{
/*
* Set up dna and protein Seq1/2/3 with mappings (held on the protein
protein.setCodonFrames(acfList);
/*
- * Select Seq1 and Seq3 in the protein
+ * Select Seq1 and Seq3 in the protein (startRes=endRes=0)
*/
SequenceGroup sg = new SequenceGroup();
sg.setColourText(true);
assertEquals(2, mappedGroup.getSequences().size());
assertSame(cdna.getSequenceAt(0), mappedGroup.getSequences().get(0));
assertSame(cdna.getSequenceAt(2), mappedGroup.getSequences().get(1));
+ assertEquals(0, mappedGroup.getStartRes());
+ assertEquals(2, mappedGroup.getEndRes());
/*
* Verify mapping sequence group from dna to protein
sg.clear();
sg.addSequence(cdna.getSequenceAt(1), false);
sg.addSequence(cdna.getSequenceAt(0), false);
+ sg.setStartRes(0);
+ sg.setEndRes(2);
mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView);
assertTrue(mappedGroup.getColourText());
assertSame(sg.getIdColour(), mappedGroup.getIdColour());
assertEquals(2, mappedGroup.getSequences().size());
assertSame(protein.getSequenceAt(1), mappedGroup.getSequences().get(0));
assertSame(protein.getSequenceAt(0), mappedGroup.getSequences().get(1));
+ assertEquals(0, mappedGroup.getStartRes());
+ assertEquals(0, mappedGroup.getEndRes());
}
/**
Arrays.toString(MappingUtils.flattenRanges(new int[]
{ 1, 4, 7, 9, 12, 12, 15 })));
}
+
+ /**
+ * Test mapping a sequence group made of entire columns.
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testMapSequenceGroup_columns() throws IOException
+ {
+ /*
+ * Set up dna and protein Seq1/2/3 with mappings (held on the protein
+ * viewport).
+ */
+ AlignmentI cdna = loadAlignment(
+ ">Seq1\nACGGCA\n>Seq2\nTGACAG\n>Seq3\nTACGTA\n",
+ "FASTA");
+ cdna.setDataset(null);
+ AlignmentI protein = loadAlignment(">Seq1\nKA\n>Seq2\nLQ\n>Seq3\nQV\n",
+ "FASTA");
+ protein.setDataset(null);
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ MapList map = new MapList(new int[]
+ { 1, 6 }, new int[]
+ { 1, 2 }, 3, 1);
+ for (int seq = 0; seq < 3; seq++)
+ {
+ acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein
+ .getSequenceAt(seq).getDatasetSequence(), map);
+ }
+ Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
+
+ AlignViewportI dnaView = new AlignViewport(cdna);
+ AlignViewportI proteinView = new AlignViewport(protein);
+ protein.setCodonFrames(acfList);
+
+ /*
+ * Select all sequences, column 2 in the protein
+ */
+ SequenceGroup sg = new SequenceGroup();
+ sg.setColourText(true);
+ sg.setIdColour(Color.GREEN);
+ sg.setOutlineColour(Color.LIGHT_GRAY);
+ sg.addSequence(protein.getSequenceAt(0), false);
+ sg.addSequence(protein.getSequenceAt(1), false);
+ sg.addSequence(protein.getSequenceAt(2), false);
+ sg.setStartRes(1);
+ sg.setEndRes(1);
+
+ /*
+ * Verify the mapped sequence group in dna
+ */
+ SequenceGroup mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView);
+ assertTrue(mappedGroup.getColourText());
+ assertSame(sg.getIdColour(), mappedGroup.getIdColour());
+ assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour());
+ assertEquals(3, mappedGroup.getSequences().size());
+ assertSame(cdna.getSequenceAt(0), mappedGroup.getSequences().get(0));
+ assertSame(cdna.getSequenceAt(1), mappedGroup.getSequences().get(1));
+ assertSame(cdna.getSequenceAt(2), mappedGroup.getSequences().get(2));
+ assertEquals(3, mappedGroup.getStartRes());
+ assertEquals(5, mappedGroup.getEndRes());
+
+ /*
+ * Verify mapping sequence group from dna to protein
+ */
+ sg.clear();
+ sg.addSequence(cdna.getSequenceAt(0), false);
+ sg.addSequence(cdna.getSequenceAt(1), false);
+ sg.addSequence(cdna.getSequenceAt(2), false);
+ // select columns 2 and 3 in DNA which span protein columns 0 and 1
+ sg.setStartRes(2);
+ sg.setEndRes(3);
+ mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView);
+ assertTrue(mappedGroup.getColourText());
+ assertSame(sg.getIdColour(), mappedGroup.getIdColour());
+ assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour());
+ assertEquals(3, mappedGroup.getSequences().size());
+ assertSame(protein.getSequenceAt(0), mappedGroup.getSequences().get(0));
+ assertSame(protein.getSequenceAt(1), mappedGroup.getSequences().get(1));
+ assertSame(protein.getSequenceAt(2), mappedGroup.getSequences().get(2));
+ assertEquals(0, mappedGroup.getStartRes());
+ assertEquals(1, mappedGroup.getEndRes());
+ }
+
+ /**
+ * Test mapping a sequence group made of a sequences/columns region.
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testMapSequenceGroup_region() throws IOException
+ {
+ /*
+ * Set up gapped dna and protein Seq1/2/3 with mappings (held on the protein
+ * viewport).
+ */
+ AlignmentI cdna = loadAlignment(
+ ">Seq1\nA-CG-GC--AT-CA\n>Seq2\n-TG-AC-AG-T-AT\n>Seq3\n-T--ACG-TAAT-G\n",
+ "FASTA");
+ cdna.setDataset(null);
+ AlignmentI protein = loadAlignment(
+ ">Seq1\n-KA-S\n>Seq2\n--L-QY\n>Seq3\nQ-V-M\n", "FASTA");
+ protein.setDataset(null);
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ MapList map = new MapList(new int[]
+ { 1, 9 }, new int[]
+ { 1, 3 }, 3, 1);
+ for (int seq = 0; seq < 3; seq++)
+ {
+ acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein
+ .getSequenceAt(seq).getDatasetSequence(), map);
+ }
+ Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
+
+ AlignViewportI dnaView = new AlignViewport(cdna);
+ AlignViewportI proteinView = new AlignViewport(protein);
+ protein.setCodonFrames(acfList);
+
+ /*
+ * Select Seq1 and Seq2 in the protein, column 1 (K/-). Expect mapped
+ * sequence group to cover Seq1, columns 0-3 (ACG). Because the selection
+ * only includes a gap in Seq2 there is no mappable selection region in the
+ * corresponding DNA.
+ */
+ SequenceGroup sg = new SequenceGroup();
+ sg.setColourText(true);
+ sg.setIdColour(Color.GREEN);
+ sg.setOutlineColour(Color.LIGHT_GRAY);
+ sg.addSequence(protein.getSequenceAt(0), false);
+ sg.addSequence(protein.getSequenceAt(1), false);
+ sg.setStartRes(1);
+ sg.setEndRes(1);
+
+ /*
+ * Verify the mapped sequence group in dna
+ */
+ SequenceGroup mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView);
+ assertTrue(mappedGroup.getColourText());
+ assertSame(sg.getIdColour(), mappedGroup.getIdColour());
+ assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour());
+ assertEquals(1, mappedGroup.getSequences().size());
+ assertSame(cdna.getSequenceAt(0), mappedGroup.getSequences().get(0));
+ // Seq2 in protein has a gap in column 1 - ignored
+ // Seq1 has K which should map to columns 0-3 in Seq1
+ assertEquals(0, mappedGroup.getStartRes());
+ assertEquals(3, mappedGroup.getEndRes());
+
+ /*
+ * Now select cols 2-4 in protein. These cover Seq1:AS Seq2:LQ Seq3:VM which
+ * extend over DNA columns 3-12, 1-7, 6-13 respectively, or 1-13 overall.
+ */
+ sg.setStartRes(2);
+ sg.setEndRes(4);
+ mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView);
+ assertEquals(1, mappedGroup.getStartRes());
+ assertEquals(13, mappedGroup.getEndRes());
+
+ /*
+ * Verify mapping sequence group from dna to protein
+ */
+ sg.clear();
+ sg.addSequence(cdna.getSequenceAt(0), false);
+
+ // select columns 4,5 - includes Seq1:codon2 (A) only
+ sg.setStartRes(4);
+ sg.setEndRes(5);
+ mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView);
+ assertEquals(2, mappedGroup.getStartRes());
+ assertEquals(2, mappedGroup.getEndRes());
+
+ // add Seq2 to dna selection cols 4-5 include codons 1 and 2 (LQ)
+ sg.addSequence(cdna.getSequenceAt(1), false);
+ mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView);
+ assertEquals(2, mappedGroup.getStartRes());
+ assertEquals(4, mappedGroup.getEndRes());
+
+ // add Seq3 to dna selection cols 4-5 include codon 1 (Q)
+ sg.addSequence(cdna.getSequenceAt(2), false);
+ mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView);
+ assertEquals(0, mappedGroup.getStartRes());
+ assertEquals(4, mappedGroup.getEndRes());
+ }
}