From e1435414d9938464ecb0f3e4d7920d4983e757f7 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Mon, 30 Nov 2015 09:30:20 +0000 Subject: [PATCH] JAL-653 (experimental) methods to resolve dummy to real sequence --- src/jalview/datamodel/AlignedCodonFrame.java | 113 +++++++++++++++++++++ test/jalview/datamodel/AlignedCodonFrameTest.java | 85 ++++++++++++++++ 2 files changed, 198 insertions(+) diff --git a/src/jalview/datamodel/AlignedCodonFrame.java b/src/jalview/datamodel/AlignedCodonFrame.java index 9c642cf..d24593a 100644 --- a/src/jalview/datamodel/AlignedCodonFrame.java +++ b/src/jalview/datamodel/AlignedCodonFrame.java @@ -461,4 +461,117 @@ public class AlignedCodonFrame } return result; } + + /** + * Test whether the given sequence is substitutable for one or more dummy + * sequences in this mapping + * + * @param map + * @param seq + * @return + */ + public boolean isRealisableWith(SequenceI seq) + { + return realiseWith(seq, false) > 0; + } + + /** + * Replace any matchable mapped dummy sequences with the given real one. + * Returns the count of sequence mappings instantiated. + * + * @param seq + * @return + */ + public int realiseWith(SequenceI seq) + { + return realiseWith(seq, true); + } + + /** + * Returns the number of mapped dummy sequences that could be replaced with + * the given real sequence. + * + * @param seq + * a dataset sequence + * @param doUpdate + * if true, performs replacements, else only counts + * @return + */ + protected int realiseWith(SequenceI seq, boolean doUpdate) + { + SequenceI ds = seq.getDatasetSequence() != null ? seq + .getDatasetSequence() : seq; + int count = 0; + + /* + * check for replaceable DNA ('map from') sequences + */ + for (int i = 0; i < dnaSeqs.length; i++) + { + SequenceI dna = dnaSeqs[i]; + if (dna instanceof SequenceDummy + && dna.getName().equals(ds.getName())) + { + Mapping mapping = dnaToProt[i]; + int mapStart = mapping.getMap().getFromLowest(); + int mapEnd = mapping.getMap().getFromHighest(); + boolean mappable = couldReplaceSequence(dna, ds, mapStart, mapEnd); + if (mappable) + { + count++; + if (doUpdate) + { + dnaSeqs[i] = ds; + } + } + } + + /* + * check for replaceable protein ('map to') sequences + */ + SequenceI prot = dnaToProt[i].getTo(); + Mapping mapping = dnaToProt[i]; + int mapStart = mapping.getMap().getToLowest(); + int mapEnd = mapping.getMap().getToHighest(); + boolean mappable = couldReplaceSequence(prot, ds, mapStart, mapEnd); + if (mappable) + { + count++; + if (doUpdate) + { + dnaToProt[i].setTo(ds); + } + } + } + return count; + } + + /** + * Helper method to test whether a 'real' sequence could replace a 'dummy' + * sequence in the map. The criteria are that they have the same name, and + * that the mapped region overlaps the candidate sequence. + * + * @param existing + * @param replacement + * @param mapStart + * @param mapEnd + * @return + */ + protected static boolean couldReplaceSequence(SequenceI existing, + SequenceI replacement, int mapStart, int mapEnd) + { + if (existing instanceof SequenceDummy + && existing.getName().equals(replacement.getName())) + { + int start = replacement.getStart(); + int end = replacement.getEnd(); + boolean mappingOverlapsSequence = (mapStart >= start && mapStart <= end) + || (mapEnd >= start && mapEnd <= end); + if (mappingOverlapsSequence) + { + return true; + } + } + return false; + } } diff --git a/test/jalview/datamodel/AlignedCodonFrameTest.java b/test/jalview/datamodel/AlignedCodonFrameTest.java index a0757cc..69e584a 100644 --- a/test/jalview/datamodel/AlignedCodonFrameTest.java +++ b/test/jalview/datamodel/AlignedCodonFrameTest.java @@ -21,7 +21,9 @@ package jalview.datamodel; import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertFalse; import static org.testng.AssertJUnit.assertNull; +import static org.testng.AssertJUnit.assertTrue; import jalview.util.MapList; @@ -176,4 +178,87 @@ public class AlignedCodonFrameTest assertEquals("[C, T, T]", Arrays.toString(acf.getMappedCodon( aseq1.getDatasetSequence(), 13))); } + + @Test(groups = { "Functional" }) + public void testCouldReplaceSequence() + { + SequenceI seq1 = new Sequence("Seq1/10-21", "aaacccgggttt"); + SequenceI seq2 = new Sequence("Seq2", "PG"); + SequenceI seq1proxy = new SequenceDummy("Seq1"); + + // map to region within sequence is ok + assertTrue(AlignedCodonFrame.couldReplaceSequence(seq1proxy, seq1, 12, + 17)); + // map to region overlapping sequence is ok + assertTrue(AlignedCodonFrame.couldReplaceSequence(seq1proxy, seq1, 5, + 10)); + assertTrue(AlignedCodonFrame.couldReplaceSequence(seq1proxy, seq1, 21, + 26)); + // map to region before sequence is not ok + assertFalse(AlignedCodonFrame.couldReplaceSequence(seq1proxy, seq1, 4, + 9)); + // map to region after sequence is not ok + assertFalse(AlignedCodonFrame.couldReplaceSequence(seq1proxy, seq1, 22, + 27)); + + /* + * test should fail if name doesn't match + */ + seq1proxy.setName("Seq1a"); + assertFalse(AlignedCodonFrame.couldReplaceSequence(seq1proxy, seq1, 12, + 17)); + seq1proxy.setName("Seq1"); + seq1.setName("Seq1a"); + assertFalse(AlignedCodonFrame.couldReplaceSequence(seq1proxy, seq1, 12, + 17)); + } + + @Test(groups = { "Functional" }) + public void testIsRealisableWith() + { + SequenceI seq1 = new Sequence("Seq1", "tttaaaCCCGGGtttaaa"); + SequenceI seq2 = new Sequence("Seq2", "PG"); + SequenceI seq1proxy = new SequenceDummy("Seq1"); + seq1.createDatasetSequence(); + seq2.createDatasetSequence(); + MapList mapList = new MapList(new int[] { 7, 12 }, new int[] { 2, 3 }, + 3, 1); + AlignedCodonFrame acf = new AlignedCodonFrame(); + acf.addMap(seq1proxy, seq2, mapList); + + /* + * Seq2 is mapped to SequenceDummy seq1proxy bases 4-9 + * This is 'realisable' from real sequence Seq1 + */ + assertTrue(acf.isRealisableWith(seq1)); + + /* + * test should fail if name doesn't match + */ + seq1proxy.setName("Seq1a"); + assertFalse(acf.isRealisableWith(seq1)); + seq1proxy.setName("Seq1"); + seq1.setName("Seq1a"); + assertFalse(acf.isRealisableWith(seq1)); + seq1.setName("Seq1"); + + /* + * test should fail if no sequence overlap with mapping of bases 7-12 + * use artificial start/end values to test this + */ + seq1.setStart(1); + seq1.setEnd(6); + // seq1 precedes mapped region: + assertFalse(acf.isRealisableWith(seq1)); + seq1.setEnd(7); + // seq1 includes first mapped base: + assertTrue(acf.isRealisableWith(seq1)); + seq1.setStart(13); + seq1.setEnd(18); + // seq1 follows mapped region: + assertFalse(acf.isRealisableWith(seq1)); + seq1.setStart(12); + // seq1 includes last mapped base: + assertTrue(acf.isRealisableWith(seq1)); + } } -- 1.7.10.2