From 882ed2993331d2ce171a2bbdc7e8729f12ff603b Mon Sep 17 00:00:00 2001 From: gmungoc Date: Thu, 29 Oct 2020 11:30:56 +0000 Subject: [PATCH] JAL-3700 mapped sequence group includes short (or all gapped) sequences --- src/jalview/util/MappingUtils.java | 30 +++++++++++-------- test/jalview/util/MappingUtilsTest.java | 49 +++++++++++++++---------------- 2 files changed, 41 insertions(+), 38 deletions(-) diff --git a/src/jalview/util/MappingUtils.java b/src/jalview/util/MappingUtils.java index 6294ca1..177c54d 100644 --- a/src/jalview/util/MappingUtils.java +++ b/src/jalview/util/MappingUtils.java @@ -343,26 +343,23 @@ public final class MappingUtils firstUngappedPos++; } - /* - * If this sequence is only gaps in the selected range, skip it - */ - if (firstUngappedPos > selectionEndRes) - { - continue; - } + boolean allGapped = (firstUngappedPos > selectionEndRes); int lastUngappedPos = selectionEndRes; - while (lastUngappedPos >= selectionStartRes - && Comparison.isGap(selected.getCharAt(lastUngappedPos))) + if (!allGapped) { - lastUngappedPos--; + while (lastUngappedPos >= selectionStartRes + && Comparison.isGap(selected.getCharAt(lastUngappedPos))) + { + lastUngappedPos--; + } } /* * Find the selected start/end residue positions in sequence */ - int startResiduePos = selected.findPosition(firstUngappedPos); - int endResiduePos = selected.findPosition(lastUngappedPos); + int startResiduePos = allGapped ? 0 : selected.findPosition(firstUngappedPos); + int endResiduePos = allGapped ? 0 : selected.findPosition(lastUngappedPos); for (AlignedCodonFrame acf : codonFrames) { @@ -376,6 +373,14 @@ public final class MappingUtils { continue; } + mappedGroup.addSequence(seq, false); + if (allGapped) + { + /* + * sequence is mapped but includes no mapped residues + */ + continue; + } int mappedStartResidue = 0; int mappedEndResidue = 0; List mapping = Arrays.asList(acf); @@ -403,7 +408,6 @@ public final class MappingUtils int mappedEndCol = seq.findIndex(mappedEndResidue) - 1; maxEndCol = maxEndCol == -1 ? mappedEndCol : Math.max(maxEndCol, mappedEndCol); - mappedGroup.addSequence(seq, false); break; } } diff --git a/test/jalview/util/MappingUtilsTest.java b/test/jalview/util/MappingUtilsTest.java index 0997fec..08673ae 100644 --- a/test/jalview/util/MappingUtilsTest.java +++ b/test/jalview/util/MappingUtilsTest.java @@ -26,7 +26,6 @@ import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; import static org.testng.AssertJUnit.fail; - import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; import java.awt.Color; @@ -241,8 +240,8 @@ public class MappingUtilsTest .asList(new AlignedCodonFrame[] { acf }); - AlignViewportI dnaView = new AlignViewport(cdna); - AlignViewportI proteinView = new AlignViewport(protein); + AlignViewportI theDnaView = new AlignViewport(cdna); + AlignViewportI theProteinView = new AlignViewport(protein); protein.setCodonFrames(acfList); /* @@ -260,7 +259,7 @@ public class MappingUtilsTest * Verify the mapped sequence group in dna */ SequenceGroup mappedGroup = MappingUtils.mapSequenceGroup(sg, - proteinView, dnaView); + theProteinView, theDnaView); assertTrue(mappedGroup.getColourText()); assertSame(sg.getIdColour(), mappedGroup.getIdColour()); assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour()); @@ -278,7 +277,7 @@ public class MappingUtilsTest sg.addSequence(cdna.getSequenceAt(0), false); sg.setStartRes(0); sg.setEndRes(2); - mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView); + mappedGroup = MappingUtils.mapSequenceGroup(sg, theDnaView, theProteinView); assertTrue(mappedGroup.getColourText()); assertSame(sg.getIdColour(), mappedGroup.getIdColour()); assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour()); @@ -532,8 +531,8 @@ public class MappingUtilsTest .asList(new AlignedCodonFrame[] { acf }); - AlignViewportI dnaView = new AlignViewport(cdna); - AlignViewportI proteinView = new AlignViewport(protein); + AlignViewportI theDnaView = new AlignViewport(cdna); + AlignViewportI theProteinView = new AlignViewport(protein); protein.setCodonFrames(acfList); /* @@ -553,7 +552,7 @@ public class MappingUtilsTest * Verify the mapped sequence group in dna */ SequenceGroup mappedGroup = MappingUtils.mapSequenceGroup(sg, - proteinView, dnaView); + theProteinView, theDnaView); assertTrue(mappedGroup.getColourText()); assertSame(sg.getIdColour(), mappedGroup.getIdColour()); assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour()); @@ -574,7 +573,7 @@ public class MappingUtilsTest // select columns 2 and 3 in DNA which span protein columns 0 and 1 sg.setStartRes(2); sg.setEndRes(3); - mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView); + mappedGroup = MappingUtils.mapSequenceGroup(sg, theDnaView, theProteinView); assertTrue(mappedGroup.getColourText()); assertSame(sg.getIdColour(), mappedGroup.getIdColour()); assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour()); @@ -599,11 +598,11 @@ public class MappingUtilsTest * viewport). */ AlignmentI cdna = loadAlignment( - ">Seq1\nA-CG-GC--AT-CA\n>Seq2\n-TG-AC-AG-T-AT\n>Seq3\n-T--ACG-TAAT-G\n", + ">Cds11\nA-CG-GC--AT-CA\n>Cds2\n-TG-AC-AG-T-AT\n>Cds3\n-T--ACG-TAAT-G\n", FileFormat.Fasta); cdna.setDataset(null); AlignmentI protein = loadAlignment( - ">Seq1\n-KA-S\n>Seq2\n--L-QY\n>Seq3\nQ-V-M\n", + ">Pep1\n-KA-S\n>Pep2\n--L-QY\n>Pep3\nQ-V-M\n", FileFormat.Fasta); protein.setDataset(null); AlignedCodonFrame acf = new AlignedCodonFrame(); @@ -617,15 +616,14 @@ public class MappingUtilsTest .asList(new AlignedCodonFrame[] { acf }); - AlignViewportI dnaView = new AlignViewport(cdna); - AlignViewportI proteinView = new AlignViewport(protein); + AlignViewportI theDnaView = new AlignViewport(cdna); + AlignViewportI theProteinView = new AlignViewport(protein); protein.setCodonFrames(acfList); /* - * Select Seq1 and Seq2 in the protein, column 1 (K/-). Expect mapped - * sequence group to cover Seq1, columns 0-3 (ACG). Because the selection - * only includes a gap in Seq2 there is no mappable selection region in the - * corresponding DNA. + * Select Pep1 and Pep2 in the protein, column 1 (K/-). Expect mapped + * sequence group to cover Cds1, columns 0-3 (ACG). Although the selection + * only includes a gap in Cds2, mapped Cds2 is included with 'no columns' */ SequenceGroup sg = new SequenceGroup(); sg.setColourText(true); @@ -640,14 +638,15 @@ public class MappingUtilsTest * Verify the mapped sequence group in dna */ SequenceGroup mappedGroup = MappingUtils.mapSequenceGroup(sg, - proteinView, dnaView); + theProteinView, theDnaView); assertTrue(mappedGroup.getColourText()); assertSame(sg.getIdColour(), mappedGroup.getIdColour()); assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour()); - assertEquals(1, mappedGroup.getSequences().size()); + assertEquals(2, mappedGroup.getSequences().size()); assertSame(cdna.getSequenceAt(0), mappedGroup.getSequences().get(0)); - // Seq2 in protein has a gap in column 1 - ignored - // Seq1 has K which should map to columns 0-3 in Seq1 + assertSame(cdna.getSequenceAt(1), mappedGroup.getSequences().get(1)); + // Pep2 in protein has a gap in column 1 - doesn't map to any column + // Pep1 has K which should map to columns 0-3 in Cds1 assertEquals(0, mappedGroup.getStartRes()); assertEquals(3, mappedGroup.getEndRes()); @@ -657,7 +656,7 @@ public class MappingUtilsTest */ sg.setStartRes(2); sg.setEndRes(4); - mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView); + mappedGroup = MappingUtils.mapSequenceGroup(sg, theProteinView, theDnaView); assertEquals(1, mappedGroup.getStartRes()); assertEquals(13, mappedGroup.getEndRes()); @@ -670,19 +669,19 @@ public class MappingUtilsTest // select columns 4,5 - includes Seq1:codon2 (A) only sg.setStartRes(4); sg.setEndRes(5); - mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView); + mappedGroup = MappingUtils.mapSequenceGroup(sg, theDnaView, theProteinView); assertEquals(2, mappedGroup.getStartRes()); assertEquals(2, mappedGroup.getEndRes()); // add Seq2 to dna selection cols 4-5 include codons 1 and 2 (LQ) sg.addSequence(cdna.getSequenceAt(1), false); - mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView); + mappedGroup = MappingUtils.mapSequenceGroup(sg, theDnaView, theProteinView); assertEquals(2, mappedGroup.getStartRes()); assertEquals(4, mappedGroup.getEndRes()); // add Seq3 to dna selection cols 4-5 include codon 1 (Q) sg.addSequence(cdna.getSequenceAt(2), false); - mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView); + mappedGroup = MappingUtils.mapSequenceGroup(sg, theDnaView, theProteinView); assertEquals(0, mappedGroup.getStartRes()); assertEquals(4, mappedGroup.getEndRes()); } -- 1.7.10.2