From 10bdbbde9cd0307e3d4c2b39c47062b59b305155 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Fri, 1 Apr 2016 21:00:26 +0100 Subject: [PATCH] JAL-1705 ensure the right mapping is used to align CDS to cDNA --- src/jalview/analysis/AlignmentUtils.java | 29 +++++++++-- src/jalview/datamodel/AlignedCodonFrame.java | 25 ++++++++++ src/jalview/gui/AlignFrame.java | 5 +- test/jalview/analysis/AlignmentUtilsTests.java | 61 +++++++++++++++++++++++- 4 files changed, 113 insertions(+), 7 deletions(-) diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index eb1ee4b..14e3907 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -1366,12 +1366,13 @@ public class AlignmentUtils * Constructs an alignment consisting of the mapped (CDS) regions in the given * nucleotide sequences, and updates mappings to match. The CDS sequences are * added to the original alignment's dataset, which is shared by the new - * alignment. + * alignment. Mappings from nucleotide to CDS, and from CDS to protein, are + * added to the alignment dataset. * * @param dna * aligned dna sequences * @param mappings - * from dna to protein; these are replaced with new mappings + * from dna to protein * @param al * @return an alignment whose sequences are the cds-only parts of the dna * sequences (or null if no mappings are found) @@ -2093,7 +2094,7 @@ public class AlignmentUtils Map> map = new TreeMap>(); /* - * report any sequences that have no mapping so can't be realigned + * r any sequences that have no mapping so can't be realigned */ unmapped.addAll(unaligned.getSequences()); @@ -2106,7 +2107,7 @@ public class AlignmentUtils SequenceI fromSeq = mapping.findAlignedSequence(seq, aligned); if (fromSeq != null) { - Mapping seqMap = mapping.getMappingForSequence(seq); + Mapping seqMap = mapping.getMappingBetween(fromSeq, seq); if (addMappedPositions(seq, fromSeq, seqMap, map)) { unmapped.remove(seq); @@ -2137,6 +2138,11 @@ public class AlignmentUtils static boolean addMappedPositions(SequenceI seq, SequenceI fromSeq, Mapping seqMap, Map> map) { + if (seqMap == null) + { + return false; + } + char[] fromChars = fromSeq.getSequence(); int toStart = seq.getStart(); char[] toChars = seq.getSequence(); @@ -2193,4 +2199,19 @@ public class AlignmentUtils } return true; } + + // strictly temporary hack until proper criteria for aligning protein to cds + // are in place; this is so Ensembl -> fetch xrefs Uniprot aligns the Uniprot + public static boolean looksLikeEnsembl(AlignmentI alignment) + { + for (SequenceI seq : alignment.getSequences()) + { + String name = seq.getName(); + if (!name.startsWith("ENSG") && !name.startsWith("ENST")) + { + return false; + } + } + return true; + } } diff --git a/src/jalview/datamodel/AlignedCodonFrame.java b/src/jalview/datamodel/AlignedCodonFrame.java index a56f1d4..6d6cdb5 100644 --- a/src/jalview/datamodel/AlignedCodonFrame.java +++ b/src/jalview/datamodel/AlignedCodonFrame.java @@ -649,4 +649,29 @@ public class AlignedCodonFrame { return mappings == null ? "null" : mappings.toString(); } + + /** + * Returns the first mapping found that is from 'fromSeq' to 'toSeq', or null + * if none found + * + * @param fromSeq + * aligned or dataset sequence + * @param toSeq + * aligned or dataset sequence + * @return + */ + public Mapping getMappingBetween(SequenceI fromSeq, SequenceI toSeq) + { + for (SequenceToSequenceMapping mapping : mappings) + { + SequenceI from = mapping.fromSeq; + SequenceI to = mapping.mapping.to; + if ((from == fromSeq || from == fromSeq.getDatasetSequence()) + && (to == toSeq || to == toSeq.getDatasetSequence())) + { + return mapping.mapping; + } + } + return null; + } } diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index d161cfd..d1f3421 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -4779,8 +4779,9 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, * pending getting Embl transcripts to 'align', * we are only doing this for Ensembl */ - // TODO want to do this also when fetching UNIPROT for Ensembl - if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)) + // TODO proper criteria for 'can align as cdna' + if (DBRefSource.ENSEMBL.equalsIgnoreCase(source) + || AlignmentUtils.looksLikeEnsembl(alignment)) { copyAlignment.alignAs(alignment); copyAlignmentIsAligned = true; diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 3d3736f..7ccbf97 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -50,6 +50,7 @@ import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.TreeMap; import org.testng.annotations.Test; @@ -1953,7 +1954,65 @@ public class AlignmentUtilsTests */ dna.addCodonFrame(acf); AlignmentUtils.alignAs(cds, dna); - assertEquals("---GGGTTT---", cds.getSequenceAt(0).getSequenceAsString()); + assertEquals("---GGGTTT", cds.getSequenceAt(0).getSequenceAsString()); assertEquals("CCC------AAA", cds.getSequenceAt(1).getSequenceAsString()); } + + @Test(groups = { "Functional" }) + public void testAddMappedPositions() + { + SequenceI from = new Sequence("dna", "ggAA-ATcc-TT-g"); + SequenceI seq1 = new Sequence("cds", "AAATTT"); + from.createDatasetSequence(); + seq1.createDatasetSequence(); + Mapping mapping = new Mapping(seq1, new MapList( + new int[] { 3, 6, 9, 10 }, + new int[] { 1, 6 }, 1, 1)); + Map> map = new TreeMap>(); + AlignmentUtils.addMappedPositions(seq1, from, mapping, map); + + /* + * verify map has seq1 residues in columns 3,4,6,7,11,12 + */ + assertEquals(6, map.size()); + assertEquals('A', map.get(3).get(seq1).charValue()); + assertEquals('A', map.get(4).get(seq1).charValue()); + assertEquals('A', map.get(6).get(seq1).charValue()); + assertEquals('T', map.get(7).get(seq1).charValue()); + assertEquals('T', map.get(11).get(seq1).charValue()); + assertEquals('T', map.get(12).get(seq1).charValue()); + + /* + * + */ + } + + /** + * Test case where the mapping 'from' range includes a stop codon which is + * absent in the 'to' range + */ + @Test(groups = { "Functional" }) + public void testAddMappedPositions_withStopCodon() + { + SequenceI from = new Sequence("dna", "ggAA-ATcc-TT-g"); + SequenceI seq1 = new Sequence("cds", "AAATTT"); + from.createDatasetSequence(); + seq1.createDatasetSequence(); + Mapping mapping = new Mapping(seq1, new MapList( + new int[] { 3, 6, 9, 10 }, + new int[] { 1, 6 }, 1, 1)); + Map> map = new TreeMap>(); + AlignmentUtils.addMappedPositions(seq1, from, mapping, map); + + /* + * verify map has seq1 residues in columns 3,4,6,7,11,12 + */ + assertEquals(6, map.size()); + assertEquals('A', map.get(3).get(seq1).charValue()); + assertEquals('A', map.get(4).get(seq1).charValue()); + assertEquals('A', map.get(6).get(seq1).charValue()); + assertEquals('T', map.get(7).get(seq1).charValue()); + assertEquals('T', map.get(11).get(seq1).charValue()); + assertEquals('T', map.get(12).get(seq1).charValue()); + } } -- 1.7.10.2