From: Jim Procter Date: Wed, 26 Jan 2022 19:36:23 +0000 (+0000) Subject: Merge branch 'task/JAL-3763_newDatasetForCds' into merge/develop_task/JAL-3763_newDat... X-Git-Tag: Release_2_11_2_0~15^2~1^2~1 X-Git-Url: http://source.jalview.org/gitweb/?p=jalview.git;a=commitdiff_plain;h=27aed96d1aeae689818bd0a0028553186e3c3839;hp=7312fed439c2a32de989c6f5155203ebc0a7f854 Merge branch 'task/JAL-3763_newDatasetForCds' into merge/develop_task/JAL-3763_newDatasetForCds Conflicts: src/jalview/analysis/AlignmentUtils.java src/jalview/util/MapList.java test/jalview/util/MapListTest.java test/jalview/util/MappingUtilsTest.java --- diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 23c5d64..f95ff73 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -22,6 +22,23 @@ package jalview.analysis; import java.util.Locale; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.NoSuchElementException; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; + +import jalview.bin.Cache; import jalview.commands.RemoveGapColCommand; import jalview.datamodel.AlignedCodon; import jalview.datamodel.AlignedCodonFrame; @@ -46,22 +63,6 @@ import jalview.util.IntRangeComparator; import jalview.util.MapList; import jalview.util.MappingUtils; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.NoSuchElementException; -import java.util.Set; -import java.util.SortedMap; -import java.util.TreeMap; - /** * grab bag of useful alignment manipulation operations Expect these to be * refactored elsewhere at some point. @@ -1997,45 +1998,31 @@ public class AlignmentUtils SequenceI newSeq = null; - final MapList maplist = mapping.getMap(); - if (maplist.isContiguous() && maplist.isFromForwardStrand()) - { - /* - * just a subsequence, keep same dataset sequence - */ - int start = maplist.getFromLowest(); - int end = maplist.getFromHighest(); - newSeq = seq.getSubSequence(start - 1, end); - newSeq.setName(seqId); - } - else - { - /* - * construct by splicing mapped from ranges - */ - char[] seqChars = seq.getSequence(); - List fromRanges = maplist.getFromRanges(); - int cdsWidth = MappingUtils.getLength(fromRanges); - char[] newSeqChars = new char[cdsWidth]; + /* + * construct CDS sequence by splicing mapped from ranges + */ + char[] seqChars = seq.getSequence(); + List fromRanges = mapping.getMap().getFromRanges(); + int cdsWidth = MappingUtils.getLength(fromRanges); + char[] newSeqChars = new char[cdsWidth]; - int newPos = 0; - for (int[] range : fromRanges) + int newPos = 0; + for (int[] range : fromRanges) + { + if (range[0] <= range[1]) { - if (range[0] <= range[1]) - { - // forward strand mapping - just copy the range - int length = range[1] - range[0] + 1; - System.arraycopy(seqChars, range[0] - 1, newSeqChars, newPos, - length); - newPos += length; - } - else + // forward strand mapping - just copy the range + int length = range[1] - range[0] + 1; + System.arraycopy(seqChars, range[0] - 1, newSeqChars, newPos, + length); + newPos += length; + } + else + { + // reverse strand mapping - copy and complement one by one + for (int i = range[0]; i >= range[1]; i--) { - // reverse strand mapping - copy and complement one by one - for (int i = range[0]; i >= range[1]; i--) - { - newSeqChars[newPos++] = Dna.getComplement(seqChars[i - 1]); - } + newSeqChars[newPos++] = Dna.getComplement(seqChars[i - 1]); } } @@ -2069,9 +2056,8 @@ public class AlignmentUtils } else { - System.err.println( - "JAL-2154 regression: warning - found (and ignnored a duplicate CDS sequence):" - + mtch.toString()); + Cache.log.error( + "JAL-2154 regression: warning - found (and ignored) a duplicate CDS sequence:" + mtch.toString()); } } } diff --git a/src/jalview/datamodel/AlignedCodonFrame.java b/src/jalview/datamodel/AlignedCodonFrame.java index fffa137..2f33e43 100644 --- a/src/jalview/datamodel/AlignedCodonFrame.java +++ b/src/jalview/datamodel/AlignedCodonFrame.java @@ -20,13 +20,13 @@ */ package jalview.datamodel; -import jalview.util.MapList; -import jalview.util.MappingUtils; - import java.util.AbstractList; import java.util.ArrayList; import java.util.List; +import jalview.util.MapList; +import jalview.util.MappingUtils; + /** * Stores mapping between the columns of a protein alignment and a DNA alignment * and a list of individual codon to amino acid mappings between sequences. @@ -107,6 +107,93 @@ public class AlignedCodonFrame { return mapping; } + + /** + * Returns true if the mapping covers the full length of the given sequence. + * This allows us to distinguish the CDS that codes for a protein from + * another overlapping CDS in the parent dna sequence. + * + * @param seq + * @return + */ + public boolean covers(SequenceI seq) + { + List mappedRanges = null; + MapList mapList = mapping.getMap(); + if (fromSeq == seq || fromSeq == seq.getDatasetSequence()) + { + mappedRanges = mapList.getFromRanges(); + } + else if (mapping.to == seq || mapping.to == seq.getDatasetSequence()) + { + mappedRanges = mapList.getToRanges(); + } + else + { + return false; + } + + /* + * check that each mapped range lies within the sequence range + * (necessary for circular CDS - example EMBL:J03321:AAA91567) + * and mapped length covers (at least) sequence length + */ + int length = 0; + for (int[] range : mappedRanges) + { + int from = Math.min(range[0], range[1]); + int to = Math.max(range[0], range[1]); + if (from < seq.getStart() || to > seq.getEnd()) + { + return false; + } + length += (to - from + 1); + } + // add 1 to mapped length to allow for a mapped stop codon + if (length + 1 < (seq.getEnd() - seq.getStart() + 1)) + { + return false; + } + return true; + } + + /** + * Adds any regions mapped to or from position {@code pos} in sequence + * {@code seq} to the given search results + * + * @param seq + * @param pos + * @param sr + */ + public void markMappedRegion(SequenceI seq, int pos, SearchResultsI sr) + { + int[] codon = null; + SequenceI mappedSeq = null; + SequenceI ds = seq.getDatasetSequence(); + if (ds == null) + { + ds = seq; + } + + if (this.fromSeq == seq || this.fromSeq == ds) + { + codon = this.mapping.map.locateInTo(pos, pos); + mappedSeq = this.mapping.to; + } + else if (this.mapping.to == seq || this.mapping.to == ds) + { + codon = this.mapping.map.locateInFrom(pos, pos); + mappedSeq = this.fromSeq; + } + + if (codon != null) + { + for (int i = 0; i < codon.length; i += 2) + { + sr.addResult(mappedSeq, codon[i], codon[i + 1]); + } + } + } } private List mappings; @@ -261,9 +348,12 @@ public class AlignedCodonFrame } /** + * Return the corresponding aligned or dataset dna sequence for given amino + * acid sequence, or null if not found. returns the sequence from the first + * mapping found that involves the protein sequence. * - * @param sequenceRef - * @return null or corresponding aaSeq entry for dnaSeq entry + * @param aaSeqRef + * @return */ public SequenceI getDnaForAaSeq(SequenceI aaSeqRef) { @@ -293,7 +383,8 @@ public class AlignedCodonFrame /** * Add search results for regions in other sequences that translate or are - * translated from a particular position in seq + * translated from a particular position in seq (which may be an aligned or + * dataset sequence) * * @param seq * @param index @@ -304,69 +395,15 @@ public class AlignedCodonFrame public void markMappedRegion(SequenceI seq, int index, SearchResultsI results) { - int[] codon; SequenceI ds = seq.getDatasetSequence(); - for (SequenceToSequenceMapping ssm : mappings) + if (ds == null) { - if (ssm.fromSeq == seq || ssm.fromSeq == ds) - { - codon = ssm.mapping.map.locateInTo(index, index); - if (codon != null) - { - for (int i = 0; i < codon.length; i += 2) - { - results.addResult(ssm.mapping.to, codon[i], codon[i + 1]); - } - } - } - else if (ssm.mapping.to == seq || ssm.mapping.to == ds) - { - { - codon = ssm.mapping.map.locateInFrom(index, index); - if (codon != null) - { - for (int i = 0; i < codon.length; i += 2) - { - results.addResult(ssm.fromSeq, codon[i], codon[i + 1]); - } - } - } - } + ds = seq; } - } - - /** - * Returns the DNA codon positions (base 1) for the given position (base 1) in - * a mapped protein sequence, or null if no mapping is found. - * - * Intended for use in aligning cDNA to match aligned protein. Only the first - * mapping found is returned, so not suitable for use if multiple protein - * sequences are mapped to the same cDNA (but aligning cDNA as protein is - * ill-defined for this case anyway). - * - * @param seq - * the DNA dataset sequence - * @param aaPos - * residue position (base 1) in a protein sequence - * @return - */ - public int[] getDnaPosition(SequenceI seq, int aaPos) - { - /* - * Adapted from markMappedRegion(). - */ - MapList ml = null; - int i = 0; for (SequenceToSequenceMapping ssm : mappings) { - if (ssm.fromSeq == seq) - { - ml = getdnaToProt()[i]; - break; - } - i++; + ssm.markMappedRegion(ds, index, results); } - return ml == null ? null : ml.locateInFrom(aaPos, aaPos); } /** @@ -767,7 +804,7 @@ public class AlignedCodonFrame * Two AlignedCodonFrame objects are equal if they hold the same ordered list * of mappings * - * @see SequenceToSequenceMapping# + * @see SequenceToSequenceMapping#equals */ @Override public boolean equals(Object obj) @@ -783,4 +820,55 @@ public class AlignedCodonFrame { return mappings; } + + /** + * Returns the first mapping found which is between the two given sequences, + * and covers the full extent of both. + * + * @param seq1 + * @param seq2 + * @return + */ + public SequenceToSequenceMapping getCoveringMapping(SequenceI seq1, + SequenceI seq2) + { + for (SequenceToSequenceMapping mapping : mappings) + { + if (mapping.covers(seq2) && mapping.covers(seq1)) + { + return mapping; + } + } + return null; + } + + /** + * Returns the first mapping found which is between the given dataset sequence + * and another, is a triplet mapping (3:1 or 1:3), and covers the full extent + * of both sequences involved + * + * @param seq + * @return + */ + public SequenceToSequenceMapping getCoveringCodonMapping(SequenceI seq) + { + for (SequenceToSequenceMapping mapping : mappings) + { + if (mapping.getMapping().getMap().isTripletMap() + && mapping.covers(seq)) + { + if (mapping.fromSeq == seq + && mapping.covers(mapping.getMapping().getTo())) + { + return mapping; + } + else if (mapping.getMapping().getTo() == seq + && mapping.covers(mapping.fromSeq)) + { + return mapping; + } + } + } + return null; + } } diff --git a/src/jalview/datamodel/SearchResults.java b/src/jalview/datamodel/SearchResults.java index 7c3bba7..5c929fc 100755 --- a/src/jalview/datamodel/SearchResults.java +++ b/src/jalview/datamodel/SearchResults.java @@ -349,8 +349,10 @@ public class SearchResults implements SearchResultsI } /** - * Two SearchResults are considered equal if they contain the same matches in - * the same order. + * Two SearchResults are considered equal if they contain the same matches + * (Sequence, start position, end position) in the same order + * + * @see Match#equals(Object) */ @Override public boolean equals(Object obj) diff --git a/src/jalview/gui/PopupMenu.java b/src/jalview/gui/PopupMenu.java index 6b32efa..6cd763b 100644 --- a/src/jalview/gui/PopupMenu.java +++ b/src/jalview/gui/PopupMenu.java @@ -840,8 +840,13 @@ public class PopupMenu extends JPopupMenu implements ColourChangeListener * show local rather than linked feature coordinates */ int[] beginRange = mf.getMappedPositions(start, start); - start = beginRange[0]; int[] endRange = mf.getMappedPositions(end, end); + if (beginRange == null || endRange == null) + { + // e.g. variant extending to stop codon so not mappable + return; + } + start = beginRange[0]; end = endRange[endRange.length - 1]; } StringBuilder desc = new StringBuilder(); diff --git a/src/jalview/util/MappingUtils.java b/src/jalview/util/MappingUtils.java index 4e07a08..c8b5190 100644 --- a/src/jalview/util/MappingUtils.java +++ b/src/jalview/util/MappingUtils.java @@ -29,12 +29,14 @@ import java.util.Map; import jalview.analysis.AlignmentSorter; import jalview.api.AlignViewportI; +import jalview.bin.Cache; import jalview.commands.CommandI; import jalview.commands.EditCommand; import jalview.commands.EditCommand.Action; import jalview.commands.EditCommand.Edit; import jalview.commands.OrderCommand; import jalview.datamodel.AlignedCodonFrame; +import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentOrder; import jalview.datamodel.ColumnSelection; @@ -78,7 +80,7 @@ public final class MappingUtils action = action.getUndoAction(); } // TODO write this - System.err.println("MappingUtils.mapCutOrPaste not yet implemented"); + Cache.log.error("MappingUtils.mapCutOrPaste not yet implemented"); } /** @@ -364,52 +366,45 @@ public final class MappingUtils for (AlignedCodonFrame acf : codonFrames) { - SequenceI mappedSequence = targetIsNucleotide - ? acf.getDnaForAaSeq(selected) - : acf.getAaForDnaSeq(selected); - if (mappedSequence != null) + for (SequenceI seq : mapTo.getAlignment().getSequences()) { - for (SequenceI seq : mapTo.getAlignment().getSequences()) + SequenceI peptide = targetIsNucleotide ? selected : seq; + SequenceI cds = targetIsNucleotide ? seq : selected; + SequenceToSequenceMapping s2s = acf.getCoveringMapping(cds, + peptide); + if (s2s == null) { - int mappedStartResidue = 0; - int mappedEndResidue = 0; - if (seq.getDatasetSequence() == mappedSequence) - { - /* - * Found a sequence mapping. Locate the start/end mapped residues. - */ - List mapping = Arrays - .asList(new AlignedCodonFrame[] - { acf }); - SearchResultsI sr = buildSearchResults(selected, - startResiduePos, mapping); - for (SearchResultMatchI m : sr.getResults()) - { - mappedStartResidue = m.getStart(); - mappedEndResidue = m.getEnd(); - } - sr = buildSearchResults(selected, endResiduePos, mapping); - for (SearchResultMatchI m : sr.getResults()) - { - mappedStartResidue = Math.min(mappedStartResidue, - m.getStart()); - mappedEndResidue = Math.max(mappedEndResidue, m.getEnd()); - } - - /* - * Find the mapped aligned columns, save the range. Note findIndex - * returns a base 1 position, SequenceGroup uses base 0 - */ - int mappedStartCol = seq.findIndex(mappedStartResidue) - 1; - minStartCol = minStartCol == -1 ? mappedStartCol - : Math.min(minStartCol, mappedStartCol); - int mappedEndCol = seq.findIndex(mappedEndResidue) - 1; - maxEndCol = maxEndCol == -1 ? mappedEndCol - : Math.max(maxEndCol, mappedEndCol); - mappedGroup.addSequence(seq, false); - break; - } + continue; + } + int mappedStartResidue = 0; + int mappedEndResidue = 0; + List mapping = Arrays.asList(acf); + SearchResultsI sr = buildSearchResults(selected, startResiduePos, + mapping); + for (SearchResultMatchI m : sr.getResults()) + { + mappedStartResidue = m.getStart(); + mappedEndResidue = m.getEnd(); } + sr = buildSearchResults(selected, endResiduePos, mapping); + for (SearchResultMatchI m : sr.getResults()) + { + mappedStartResidue = Math.min(mappedStartResidue, m.getStart()); + mappedEndResidue = Math.max(mappedEndResidue, m.getEnd()); + } + + /* + * Find the mapped aligned columns, save the range. Note findIndex + * returns a base 1 position, SequenceGroup uses base 0 + */ + int mappedStartCol = seq.findIndex(mappedStartResidue) - 1; + minStartCol = minStartCol == -1 ? mappedStartCol + : Math.min(minStartCol, mappedStartCol); + int mappedEndCol = seq.findIndex(mappedEndResidue) - 1; + maxEndCol = maxEndCol == -1 ? mappedEndCol + : Math.max(maxEndCol, mappedEndCol); + mappedGroup.addSequence(seq, false); + break; } } } @@ -449,20 +444,23 @@ public final class MappingUtils { for (AlignedCodonFrame acf : mappings) { - SequenceI mappedSeq = mappingToNucleotide ? acf.getDnaForAaSeq(seq) - : acf.getAaForDnaSeq(seq); - if (mappedSeq != null) - { for (SequenceI seq2 : mapTo.getSequences()) { - if (seq2.getDatasetSequence() == mappedSeq) + /* + * the corresponding peptide / CDS is the one for which there is + * a complete ('covering') mapping to 'seq' + */ + SequenceI peptide = mappingToNucleotide ? seq2 : seq; + SequenceI cds = mappingToNucleotide ? seq : seq2; + SequenceToSequenceMapping s2s = acf.getCoveringMapping(cds, + peptide); + if (s2s != null) { mappedOrder.add(seq2); j++; break; } } - } } } @@ -524,7 +522,7 @@ public final class MappingUtils if (colsel == null) { - return; // mappedColumns; + return; } char fromGapChar = mapFrom.getAlignment().getGapCharacter(); @@ -548,7 +546,7 @@ public final class MappingUtils mapHiddenColumns(regions.next(), codonFrames, newHidden, fromSequences, toSequences, fromGapChar); } - return; // mappedColumns; + return; } /** @@ -666,7 +664,9 @@ public final class MappingUtils */ for (SequenceI toSeq : toSequences) { - if (toSeq.getDatasetSequence() == mappedSeq) + if (toSeq.getDatasetSequence() == mappedSeq + && mappedStartResidue >= toSeq.getStart() + && mappedEndResidue <= toSeq.getEnd()) { int mappedStartCol = toSeq.findIndex(mappedStartResidue); int mappedEndCol = toSeq.findIndex(mappedEndResidue); diff --git a/src/jalview/viewmodel/seqfeatures/FeatureRendererModel.java b/src/jalview/viewmodel/seqfeatures/FeatureRendererModel.java index 853bc3b..eb1030c 100644 --- a/src/jalview/viewmodel/seqfeatures/FeatureRendererModel.java +++ b/src/jalview/viewmodel/seqfeatures/FeatureRendererModel.java @@ -39,9 +39,9 @@ import jalview.api.AlignViewportI; import jalview.api.FeatureColourI; import jalview.api.FeaturesDisplayedI; import jalview.datamodel.AlignedCodonFrame; +import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; import jalview.datamodel.AlignmentI; import jalview.datamodel.MappedFeatures; -import jalview.datamodel.Mapping; import jalview.datamodel.SearchResultMatchI; import jalview.datamodel.SearchResults; import jalview.datamodel.SearchResultsI; @@ -1233,18 +1233,18 @@ public abstract class FeatureRendererModel * todo: direct lookup of CDS for peptide and vice-versa; for now, * have to search through an unordered list of mappings for a candidate */ - Mapping mapping = null; + SequenceToSequenceMapping mapping = null; SequenceI mapFrom = null; for (AlignedCodonFrame acf : mappings) { - mapping = acf.getMappingForSequence(sequence); - if (mapping == null || !mapping.getMap().isTripletMap()) + mapping = acf.getCoveringCodonMapping(ds); + if (mapping == null) { - continue; // we are only looking for 3:1 or 1:3 mappings + continue; } SearchResultsI sr = new SearchResults(); - acf.markMappedRegion(ds, pos, sr); + mapping.markMappedRegion(ds, pos, sr); for (SearchResultMatchI match : sr.getResults()) { int fromRes = match.getStart(); @@ -1297,7 +1297,7 @@ public abstract class FeatureRendererModel } } - return new MappedFeatures(mapping, mapFrom, pos, residue, result); + return new MappedFeatures(mapping.getMapping(), mapFrom, pos, residue, result); } @Override diff --git a/test/jalview/datamodel/AlignedCodonFrameTest.java b/test/jalview/datamodel/AlignedCodonFrameTest.java index fb4073a..337ac1a 100644 --- a/test/jalview/datamodel/AlignedCodonFrameTest.java +++ b/test/jalview/datamodel/AlignedCodonFrameTest.java @@ -22,20 +22,22 @@ package jalview.datamodel; import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertFalse; +import static org.testng.AssertJUnit.assertNotNull; import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; -import jalview.gui.JvOptionPane; -import jalview.util.MapList; - import java.util.Arrays; import java.util.List; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; +import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; +import jalview.gui.JvOptionPane; +import jalview.util.MapList; + public class AlignedCodonFrameTest { @@ -98,52 +100,67 @@ public class AlignedCodonFrameTest public void testGetMappedRegion() { // introns lower case, exons upper case - final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T"); - seq1.createDatasetSequence(); - final Sequence seq2 = new Sequence("Seq2", "-TA-gG-Gg-CG-a"); - seq2.createDatasetSequence(); + final Sequence dna1 = new Sequence("Seq1/10-18", "c-G-TA-gC-gT-T"); + dna1.createDatasetSequence(); + final Sequence dna2 = new Sequence("Seq2/20-28", "-TA-gG-Gg-CG-a"); + dna2.createDatasetSequence(); - final Sequence aseq1 = new Sequence("Seq1", "-P-R"); - aseq1.createDatasetSequence(); - final Sequence aseq2 = new Sequence("Seq2", "-LY-Q"); - aseq2.createDatasetSequence(); + final Sequence pep1 = new Sequence("Seq1/3-4", "-P-R"); + pep1.createDatasetSequence(); + final Sequence pep2 = new Sequence("Seq2/7-9", "-LY-Q"); + pep2.createDatasetSequence(); /* * First with no mappings */ AlignedCodonFrame acf = new AlignedCodonFrame(); - assertNull(acf.getMappedRegion(seq1, aseq1, 1)); + assertNull(acf.getMappedRegion(dna1, pep1, 3)); /* * Set up the mappings for the exons (upper-case bases) * Note residue Q is unmapped */ - MapList map = new MapList(new int[] { 2, 4, 6, 6, 8, 9 }, new int[] { - 1, 2 }, 3, 1); - acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map); - map = new MapList(new int[] { 1, 2, 4, 5, 7, 8 }, new int[] { 1, 2 }, + MapList map1 = new MapList(new int[] { 11, 13, 15, 15, 17, 18 }, new int[] { + 3, 4 }, 3, 1); + acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map1); + MapList map2 = new MapList(new int[] { 20, 21, 23, 24, 26, 27 }, new int[] { 7, 9 }, 3, 1); - acf.addMap(seq2.getDatasetSequence(), aseq2.getDatasetSequence(), map); + acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map2); - assertArrayEquals(new int[] { 2, 4 }, - acf.getMappedRegion(seq1, aseq1, 1)); - assertArrayEquals(new int[] { 6, 6, 8, 9 }, - acf.getMappedRegion(seq1, aseq1, 2)); - assertArrayEquals(new int[] { 1, 2, 4, 4 }, - acf.getMappedRegion(seq2, aseq2, 1)); - assertArrayEquals(new int[] { 5, 5, 7, 8 }, - acf.getMappedRegion(seq2, aseq2, 2)); + /* + * get codon positions for peptide position + */ + assertArrayEquals(new int[] { 11, 13 }, + acf.getMappedRegion(dna1, pep1, 3)); + assertArrayEquals(new int[] { 15, 15, 17, 18 }, + acf.getMappedRegion(dna1, pep1, 4)); + assertArrayEquals(new int[] { 20, 21, 23, 23 }, + acf.getMappedRegion(dna2, pep2, 7)); + assertArrayEquals(new int[] { 24, 24, 26, 27 }, + acf.getMappedRegion(dna2, pep2, 8)); /* - * No mapping from seq2 to Q + * No mapping from dna2 to Q */ - assertNull(acf.getMappedRegion(seq2, aseq2, 3)); + assertNull(acf.getMappedRegion(dna2, pep2, 9)); /* - * No mapping from sequence 1 to sequence 2 + * No mapping from dna1 to pep2 */ - assertNull(acf.getMappedRegion(seq1, aseq2, 1)); + assertNull(acf.getMappedRegion(dna1, pep2, 7)); + + /* + * get peptide position for codon position + */ + assertArrayEquals(new int[] { 3, 3 }, + acf.getMappedRegion(pep1, dna1, 11)); + assertArrayEquals(new int[] { 3, 3 }, + acf.getMappedRegion(pep1, dna1, 12)); + assertArrayEquals(new int[] { 3, 3 }, + acf.getMappedRegion(pep1, dna1, 13)); + assertNull(acf.getMappedRegion(pep1, dna1, 14)); // intron base, not mapped + } @Test(groups = { "Functional" }) @@ -486,4 +503,212 @@ public class AlignedCodonFrameTest assertEquals(1, acf.getMappingsFromSequence(seq1).size()); assertSame(before, acf.getMappingsFromSequence(seq1).get(0)); } + + @Test(groups = { "Functional" }) + public void testGetCoveringMapping() + { + SequenceI dna = new Sequence("dna", "acttcaATGGCGGACtaattt"); + SequenceI cds = new Sequence("cds/7-15", "ATGGCGGAC"); + cds.setDatasetSequence(dna); + SequenceI pep = new Sequence("pep", "MAD"); + + /* + * with null argument or no mappings + */ + AlignedCodonFrame acf = new AlignedCodonFrame(); + assertNull(acf.getCoveringMapping(null, null)); + assertNull(acf.getCoveringMapping(dna, null)); + assertNull(acf.getCoveringMapping(null, pep)); + assertNull(acf.getCoveringMapping(dna, pep)); + + /* + * with a non-covering mapping e.g. overlapping exon + */ + MapList map = new MapList(new int[] { 7, 9 }, new int[] { + 1, 1 }, 3, 1); + acf.addMap(dna, pep, map); + assertNull(acf.getCoveringMapping(dna, pep)); + + acf = new AlignedCodonFrame(); + MapList map2 = new MapList(new int[] { 13, 18 }, new int[] { + 2, 2 }, 3, 1); + acf.addMap(dna, pep, map2); + assertNull(acf.getCoveringMapping(dna, pep)); + + /* + * with a covering mapping from CDS (dataset) to protein + */ + acf = new AlignedCodonFrame(); + MapList map3 = new MapList(new int[] { 7, 15 }, new int[] { + 1, 3 }, 3, 1); + acf.addMap(dna, pep, map3); + assertNull(acf.getCoveringMapping(dna, pep)); + SequenceToSequenceMapping mapping = acf.getCoveringMapping(cds, pep); + assertNotNull(mapping); + + /* + * with a mapping that extends to stop codon + */ + acf = new AlignedCodonFrame(); + MapList map4 = new MapList(new int[] { 7, 18 }, new int[] { + 1, 3 }, 3, 1); + acf.addMap(dna, pep, map4); + assertNull(acf.getCoveringMapping(dna, pep)); + assertNull(acf.getCoveringMapping(cds, pep)); + SequenceI cds2 = new Sequence("cds/7-18", "ATGGCGGACtaa"); + cds2.setDatasetSequence(dna); + mapping = acf.getCoveringMapping(cds2, pep); + assertNotNull(mapping); + } + + /** + * Test the method that adds mapped positions to SearchResults + */ + @Test(groups = { "Functional" }) + public void testMarkMappedRegion() + { + // introns lower case, exons upper case + final Sequence dna1 = new Sequence("Seq1/10-18", "c-G-TA-gC-gT-T"); + dna1.createDatasetSequence(); + final Sequence dna2 = new Sequence("Seq2/20-28", "-TA-gG-Gg-CG-a"); + dna2.createDatasetSequence(); + + final Sequence pep1 = new Sequence("Seq1/3-4", "-P-R"); + pep1.createDatasetSequence(); + final Sequence pep2 = new Sequence("Seq2/7-9", "-LY-Q"); + pep2.createDatasetSequence(); + + /* + * First with no mappings + */ + AlignedCodonFrame acf = new AlignedCodonFrame(); + SearchResults sr = new SearchResults(); + acf.markMappedRegion(dna1, 12, sr); + assertTrue(sr.isEmpty()); + + /* + * Set up the mappings for the exons (upper-case bases) + * Note residue Q is unmapped + */ + MapList map1 = new MapList(new int[] { 11, 13, 15, 15, 17, 18 }, new int[] { + 3, 4 }, 3, 1); + acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map1); + MapList map2 = new MapList(new int[] { 20, 21, 23, 24, 26, 27 }, new int[] { 7, 8 }, + 3, 1); + acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map2); + + /* + * intron bases are not mapped + */ + acf.markMappedRegion(dna1, 10, sr); + assertTrue(sr.isEmpty()); + + /* + * Q is not mapped + */ + acf.markMappedRegion(pep2, 9, sr); + assertTrue(sr.isEmpty()); + + /* + * mark peptide position for exon position (of aligned sequence) + */ + acf.markMappedRegion(dna1, 11, sr); + SearchResults expected = new SearchResults(); + expected.addResult(pep1.getDatasetSequence(), 3, 3); + assertEquals(sr, expected); + + /* + * mark peptide position for exon position of dataset sequence - same result + */ + sr = new SearchResults(); + acf.markMappedRegion(dna1.getDatasetSequence(), 11, sr); + assertEquals(sr, expected); + + /* + * marking the same position a second time should not create a duplicate match + */ + acf.markMappedRegion(dna1.getDatasetSequence(), 12, sr); + assertEquals(sr, expected); + + /* + * mark exon positions for peptide position (of aligned sequence) + */ + sr = new SearchResults(); + acf.markMappedRegion(pep2, 7, sr); // codon positions 20, 21, 23 + expected = new SearchResults(); + expected.addResult(dna2.getDatasetSequence(), 20, 21); + expected.addResult(dna2.getDatasetSequence(), 23, 23); + assertEquals(sr, expected); + + /* + * add another codon to the same SearchResults + */ + acf.markMappedRegion(pep1.getDatasetSequence(), 4, sr); // codon positions 15, 17, 18 + expected.addResult(dna1.getDatasetSequence(), 15, 15); + expected.addResult(dna1.getDatasetSequence(), 17, 18); + assertEquals(sr, expected); + } + + @Test(groups = { "Functional" }) + public void testGetCoveringCodonMapping() + { + SequenceI dna = new Sequence("dna/10-30", "acttcaATGGCGGACtaattt"); + // CDS sequence with its own dataset sequence (JAL-3763) + SequenceI cds = new Sequence("cds/1-9", "-A--TGGC-GGAC"); + cds.createDatasetSequence(); + SequenceI pep = new Sequence("pep/1-3", "MAD"); + + /* + * with null argument or no mappings + */ + AlignedCodonFrame acf = new AlignedCodonFrame(); + assertNull(acf.getCoveringCodonMapping(null)); + assertNull(acf.getCoveringCodonMapping(dna)); + assertNull(acf.getCoveringCodonMapping(pep)); + + /* + * with a non-covering mapping e.g. overlapping exon + */ + MapList map = new MapList(new int[] { 16, 18 }, new int[] { + 1, 1 }, 3, 1); + acf.addMap(dna, pep, map); + assertNull(acf.getCoveringCodonMapping(dna)); + assertNull(acf.getCoveringCodonMapping(pep)); + + acf = new AlignedCodonFrame(); + MapList map2 = new MapList(new int[] { 13, 18 }, new int[] { + 2, 2 }, 3, 1); + acf.addMap(dna, pep, map2); + assertNull(acf.getCoveringCodonMapping(dna)); + assertNull(acf.getCoveringCodonMapping(pep)); + + /* + * with a covering mapping from CDS (dataset) to protein + */ + acf = new AlignedCodonFrame(); + MapList map3 = new MapList(new int[] { 1, 9 }, new int[] { + 1, 3 }, 3, 1); + acf.addMap(cds.getDatasetSequence(), pep, map3); + assertNull(acf.getCoveringCodonMapping(dna)); + SequenceToSequenceMapping mapping = acf.getCoveringCodonMapping(pep); + assertNotNull(mapping); + SequenceToSequenceMapping mapping2 = acf.getCoveringCodonMapping(cds.getDatasetSequence()); + assertSame(mapping, mapping2); + + /* + * with a mapping that extends to stop codon + * (EMBL CDS location often includes the stop codon) + * - getCoveringCodonMapping is lenient (doesn't require exact length match) + */ + SequenceI cds2 = new Sequence("cds/1-12", "-A--TGGC-GGACTAA"); + cds2.createDatasetSequence(); + acf = new AlignedCodonFrame(); + MapList map4 = new MapList(new int[] { 1, 12 }, new int[] { + 1, 3 }, 3, 1); + acf.addMap(cds2, pep, map4); + mapping = acf.getCoveringCodonMapping(cds2.getDatasetSequence()); + assertNotNull(mapping); + mapping2 = acf.getCoveringCodonMapping(pep); + assertSame(mapping, mapping2); + } } diff --git a/test/jalview/util/MapListTest.java b/test/jalview/util/MapListTest.java index cf10aba..3f1f7f3 100644 --- a/test/jalview/util/MapListTest.java +++ b/test/jalview/util/MapListTest.java @@ -46,7 +46,7 @@ public class MapListTest { Cache.initLogger(); } - + @BeforeClass(alwaysRun = true) public void setUpJvOptionPane() { @@ -827,11 +827,12 @@ public class MapListTest assertArrayEquals(new int[] { 5, 6 }, merged.get(1)); assertArrayEquals(new int[] { 12, 8 }, merged.get(2)); assertArrayEquals(new int[] { 8, 7 }, merged.get(3)); - + // 'subsumed' ranges are preserved ranges.clear(); ranges.add(new int[] { 10, 30 }); - ranges.add(new int[] { 15, 25 }); + ranges.add(new int[] { 15, 25 }); + merged = MapList.coalesceRanges(ranges); assertEquals(2, merged.size()); assertArrayEquals(new int[] { 10, 30 }, merged.get(0)); diff --git a/test/jalview/util/MappingUtilsTest.java b/test/jalview/util/MappingUtilsTest.java index 3418f3c..bd81d30 100644 --- a/test/jalview/util/MappingUtilsTest.java +++ b/test/jalview/util/MappingUtilsTest.java @@ -36,6 +36,16 @@ import java.util.List; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; +import java.awt.Color; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + import jalview.api.AlignViewportI; import jalview.bin.Cache; import jalview.commands.EditCommand; @@ -243,7 +253,7 @@ public class MappingUtilsTest protein.setCodonFrames(acfList); /* - * Select Seq1 and Seq3 in the protein (startRes=endRes=0) + * Select Seq1 and Seq3 in the protein */ SequenceGroup sg = new SequenceGroup(); sg.setColourText(true); @@ -251,6 +261,7 @@ public class MappingUtilsTest sg.setOutlineColour(Color.LIGHT_GRAY); sg.addSequence(protein.getSequenceAt(0), false); sg.addSequence(protein.getSequenceAt(2), false); + sg.setEndRes(protein.getWidth() - 1); /* * Verify the mapped sequence group in dna @@ -264,7 +275,7 @@ public class MappingUtilsTest assertSame(cdna.getSequenceAt(0), mappedGroup.getSequences().get(0)); assertSame(cdna.getSequenceAt(2), mappedGroup.getSequences().get(1)); assertEquals(0, mappedGroup.getStartRes()); - assertEquals(2, mappedGroup.getEndRes()); + assertEquals(2, mappedGroup.getEndRes()); // 3 columns (1 codon) /* * Verify mapping sequence group from dna to protein @@ -1356,4 +1367,105 @@ public class MappingUtilsTest // expected } } + + /** + * Test mapping a sequence group where sequences in and outside the group + * share a dataset sequence (e.g. alternative CDS for the same gene) + *

+ * This scenario doesn't arise after JAL-3763 changes, but test left as still valid + * @throws IOException + */ + @Test(groups = { "Functional" }) + public void testMapSequenceGroup_sharedDataset() throws IOException + { + /* + * Set up dna and protein Seq1/2/3 with mappings (held on the protein + * viewport). CDS sequences share the same 'gene' dataset sequence. + */ + SequenceI dna = new Sequence("dna", "aaatttgggcccaaatttgggccc"); + SequenceI cds1 = new Sequence("cds1/1-6", "aaattt"); + SequenceI cds2 = new Sequence("cds1/4-9", "tttggg"); + SequenceI cds3 = new Sequence("cds1/19-24", "gggccc"); + + cds1.setDatasetSequence(dna); + cds2.setDatasetSequence(dna); + cds3.setDatasetSequence(dna); + + SequenceI pep1 = new Sequence("pep1", "KF"); + SequenceI pep2 = new Sequence("pep2", "FG"); + SequenceI pep3 = new Sequence("pep3", "GP"); + pep1.createDatasetSequence(); + pep2.createDatasetSequence(); + pep3.createDatasetSequence(); + + /* + * add mappings from coding positions of dna to respective peptides + */ + AlignedCodonFrame acf = new AlignedCodonFrame(); + acf.addMap(dna, pep1, + new MapList(new int[] + { 1, 6 }, new int[] { 1, 2 }, 3, 1)); + acf.addMap(dna, pep2, + new MapList(new int[] + { 4, 9 }, new int[] { 1, 2 }, 3, 1)); + acf.addMap(dna, pep3, + new MapList(new int[] + { 19, 24 }, new int[] { 1, 2 }, 3, 1)); + + List acfList = Arrays + .asList(new AlignedCodonFrame[] + { acf }); + + AlignmentI cdna = new Alignment(new SequenceI[] { cds1, cds2, cds3 }); + AlignmentI protein = new Alignment( + new SequenceI[] + { pep1, pep2, pep3 }); + AlignViewportI cdnaView = new AlignViewport(cdna); + AlignViewportI peptideView = new AlignViewport(protein); + protein.setCodonFrames(acfList); + + /* + * Select pep1 and pep3 in the protein alignment + */ + SequenceGroup sg = new SequenceGroup(); + sg.setColourText(true); + sg.setIdColour(Color.GREEN); + sg.setOutlineColour(Color.LIGHT_GRAY); + sg.addSequence(pep1, false); + sg.addSequence(pep3, false); + sg.setEndRes(protein.getWidth() - 1); + + /* + * Verify the mapped sequence group in dna is cds1 and cds3 + */ + SequenceGroup mappedGroup = MappingUtils.mapSequenceGroup(sg, + peptideView, cdnaView); + assertTrue(mappedGroup.getColourText()); + assertSame(sg.getIdColour(), mappedGroup.getIdColour()); + assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour()); + assertEquals(2, mappedGroup.getSequences().size()); + assertSame(cds1, mappedGroup.getSequences().get(0)); + assertSame(cds3, mappedGroup.getSequences().get(1)); + // columns 1-6 selected (0-5 base zero) + assertEquals(0, mappedGroup.getStartRes()); + assertEquals(5, mappedGroup.getEndRes()); + + /* + * Select mapping sequence group from dna to protein + */ + sg.clear(); + sg.addSequence(cds2, false); + sg.addSequence(cds1, false); + sg.setStartRes(0); + sg.setEndRes(cdna.getWidth() - 1); + mappedGroup = MappingUtils.mapSequenceGroup(sg, cdnaView, peptideView); + assertTrue(mappedGroup.getColourText()); + assertSame(sg.getIdColour(), mappedGroup.getIdColour()); + assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour()); + assertEquals(2, mappedGroup.getSequences().size()); + assertSame(protein.getSequenceAt(1), mappedGroup.getSequences().get(0)); + assertSame(protein.getSequenceAt(0), mappedGroup.getSequences().get(1)); + assertEquals(0, mappedGroup.getStartRes()); + assertEquals(1, mappedGroup.getEndRes()); // two columns + } }