From f6d5cf647066635879824367816e4717e07bbb9d Mon Sep 17 00:00:00 2001 From: gmungoc Date: Thu, 28 Jan 2016 16:43:51 +0000 Subject: [PATCH] JAL-1705 refine feature copying to including 5' and 3' overlaps --- src/jalview/analysis/AlignmentUtils.java | 53 +++++++++++-- src/jalview/ext/ensembl/EnsemblSeqProxy.java | 45 +---------- test/jalview/analysis/AlignmentUtilsTests.java | 96 ++++++++++++++++++++++-- 3 files changed, 139 insertions(+), 55 deletions(-) diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index fe95dca..41538eb 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -34,6 +34,7 @@ import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; +import jalview.io.gff.SequenceOntology; import jalview.schemes.ResidueProperties; import jalview.util.DBRefUtils; import jalview.util.MapList; @@ -1406,23 +1407,27 @@ public class AlignmentUtils /* * transfer any features on dna that overlap the CDS */ - transferFeatures(dnaSeq, cds, dnaToCds, "CDS" /* SequenceOntology.CDS */); + transferFeatures(dnaSeq, cds, dnaToCds, null, "CDS" /* SequenceOntology.CDS */); } return cdsSequences; } /** - * Transfers any co-located features on 'fromSeq' to 'toSeq', adjusting the + * Transfers co-located features on 'fromSeq' to 'toSeq', adjusting the * feature start/end ranges, optionally omitting specified feature types. + * Returns the number of features copied. * * @param fromSeq * @param toSeq + * @param select + * if not null, only features of this type are copied (including + * subtypes in the Sequence Ontology) * @param mapping * the mapping from 'fromSeq' to 'toSeq' * @param omitting */ - protected static void transferFeatures(SequenceI fromSeq, - SequenceI toSeq, MapList mapping, String... omitting) + public static int transferFeatures(SequenceI fromSeq, SequenceI toSeq, + MapList mapping, String select, String... omitting) { SequenceI copyTo = toSeq; while (copyTo.getDatasetSequence() != null) @@ -1430,12 +1435,18 @@ public class AlignmentUtils copyTo = copyTo.getDatasetSequence(); } + SequenceOntology so = SequenceOntology.getInstance(); + int count = 0; SequenceFeature[] sfs = fromSeq.getSequenceFeatures(); if (sfs != null) { for (SequenceFeature sf : sfs) { String type = sf.getType(); + if (select != null && !so.isA(type, select)) + { + continue; + } boolean omit = false; for (String toOmit : omitting) { @@ -1453,16 +1464,48 @@ public class AlignmentUtils * locate the mapped range - null if either start or end is * not mapped (no partial overlaps are calculated) */ - int[] mappedTo = mapping.locateInTo(sf.getBegin(), sf.getEnd()); + int start = sf.getBegin(); + int end = sf.getEnd(); + int[] mappedTo = mapping.locateInTo(start, end); + /* + * if whole exon range doesn't map, try interpreting it + * as 5' or 3' exon overlapping the CDS range + */ + if (mappedTo == null) + { + mappedTo = mapping.locateInTo(end, end); + if (mappedTo != null) + { + /* + * end of exon is in CDS range - 5' overlap + * to a range from the start of the peptide + */ + mappedTo[0] = 1; + } + } + if (mappedTo == null) + { + mappedTo = mapping.locateInTo(start, start); + if (mappedTo != null) + { + /* + * start of exon is in CDS range - 3' overlap + * to a range up to the end of the peptide + */ + mappedTo[1] = toSeq.getLength(); + } + } if (mappedTo != null) { SequenceFeature copy = new SequenceFeature(sf); copy.setBegin(Math.min(mappedTo[0], mappedTo[1])); copy.setEnd(Math.max(mappedTo[0], mappedTo[1])); copyTo.addSequenceFeature(copy); + count++; } } } + return count; } /** diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index e241874..cbeaae9 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -1,5 +1,6 @@ package jalview.ext.ensembl; +import jalview.analysis.AlignmentUtils; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; @@ -847,7 +848,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient peptide = peptide.getDatasetSequence(); } - mapExonFeaturesToProtein(dnaSeq, peptide, dnaToProtein); + AlignmentUtils.transferFeatures(dnaSeq, peptide, dnaToProtein, + SequenceOntology.EXON); LinkedHashMap variants = buildDnaVariantsMap( dnaSeq, dnaToProtein); @@ -878,47 +880,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient } /** - * Transfers exon features to the corresponding mapped regions of the protein - * sequence. This is useful because it allows visualisation of exon boundaries - * on the peptide (using 'colour by label' for the exon name). Returns the - * number of features written. - * - * @param dnaSeq - * @param peptide - * @param dnaToProtein - */ - static int mapExonFeaturesToProtein(SequenceI dnaSeq, SequenceI peptide, - MapList dnaToProtein) - { - SequenceFeature[] sfs = dnaSeq.getSequenceFeatures(); - if (sfs == null) - { - return 0; - } - - SequenceOntology so = SequenceOntology.getInstance(); - int count = 0; - - for (SequenceFeature sf : sfs) - { - if (so.isA(sf.getType(), SequenceOntology.EXON)) - { - int start = sf.getBegin(); - int end = sf.getEnd(); - int[] mapsTo = dnaToProtein.locateInTo(start, end); - if (mapsTo != null) - { - SequenceFeature copy = new SequenceFeature(SequenceOntology.EXON, - sf.getDescription(), mapsTo[0], mapsTo[1], 0f, null); - peptide.addSequenceFeature(copy); - count++; - } - } - } - return count; - } - - /** * Builds a map whose key is position in the protein sequence, and value is an * array of all variants for the coding codon positions * diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 09bd64e..a82a881 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -1342,6 +1342,9 @@ public class AlignmentUtilsTests "AAA---CCCTTT---"); } + /** + * Tests for transferring features between mapped sequences + */ @Test(groups = { "Functional" }) public void testTransferFeatures() { @@ -1380,34 +1383,111 @@ public class AlignmentUtilsTests new int[] { 1, 6 }, 1, 1); /* - * behaviour of transferFeatures depends on MapList.locateInTo() - * if start and end positions are mapped, returns the mapped region - * if either is not mapped, does _not_ search for overlapped region + * transferFeatures() will build 'partial overlap' for regions + * that partially overlap 5' or 3' (start or end) of target sequence */ - AlignmentUtils.transferFeatures(dna, cds, map); + AlignmentUtils.transferFeatures(dna, cds, map, null); SequenceFeature[] sfs = cds.getSequenceFeatures(); - assertEquals(4, sfs.length); + assertEquals(6, sfs.length); SequenceFeature sf = sfs[0]; + assertEquals("type2", sf.getType()); + assertEquals("desc2", sf.getDescription()); + assertEquals(2f, sf.getScore()); + assertEquals(1, sf.getBegin()); + assertEquals(1, sf.getEnd()); + + sf = sfs[1]; assertEquals("type3", sf.getType()); assertEquals("desc3", sf.getDescription()); assertEquals(3f, sf.getScore()); assertEquals(1, sf.getBegin()); assertEquals(3, sf.getEnd()); - sf = sfs[1]; + sf = sfs[2]; assertEquals("type4", sf.getType()); assertEquals(2, sf.getBegin()); assertEquals(5, sf.getEnd()); - sf = sfs[2]; + sf = sfs[3]; assertEquals("type5", sf.getType()); assertEquals(1, sf.getBegin()); assertEquals(6, sf.getEnd()); - sf = sfs[3]; + sf = sfs[4]; assertEquals("type8", sf.getType()); assertEquals(6, sf.getBegin()); assertEquals(6, sf.getEnd()); + + sf = sfs[5]; + assertEquals("type9", sf.getType()); + assertEquals(6, sf.getBegin()); + assertEquals(6, sf.getEnd()); + } + + /** + * Tests for transferring features between mapped sequences + */ + @Test(groups = { "Functional" }) + public void testTransferFeatures_withOmit() + { + SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt"); + SequenceI cds = new Sequence("cds/10-15", "TAGGCC"); + + MapList map = new MapList(new int[] { 4, 6, 10, 12 }, + new int[] { 1, 6 }, 1, 1); + + // [5, 11] maps to [2, 5] + dna.addSequenceFeature(new SequenceFeature("type4", "desc4", 5, 11, 4f, + null)); + // [4, 12] maps to [1, 6] + dna.addSequenceFeature(new SequenceFeature("type5", "desc5", 4, 12, 5f, + null)); + // [12, 12] maps to [6, 6] + dna.addSequenceFeature(new SequenceFeature("type8", "desc8", 12, 12, + 8f, null)); + + // desc4 and desc8 are the 'omit these' varargs + AlignmentUtils.transferFeatures(dna, cds, map, null, "type4", "type8"); + SequenceFeature[] sfs = cds.getSequenceFeatures(); + assertEquals(1, sfs.length); + + SequenceFeature sf = sfs[0]; + assertEquals("type5", sf.getType()); + assertEquals(1, sf.getBegin()); + assertEquals(6, sf.getEnd()); + } + + /** + * Tests for transferring features between mapped sequences + */ + @Test(groups = { "Functional" }) + public void testTransferFeatures_withSelect() + { + SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt"); + SequenceI cds = new Sequence("cds/10-15", "TAGGCC"); + + MapList map = new MapList(new int[] { 4, 6, 10, 12 }, + new int[] { 1, 6 }, 1, 1); + + // [5, 11] maps to [2, 5] + dna.addSequenceFeature(new SequenceFeature("type4", "desc4", 5, 11, 4f, + null)); + // [4, 12] maps to [1, 6] + dna.addSequenceFeature(new SequenceFeature("type5", "desc5", 4, 12, 5f, + null)); + // [12, 12] maps to [6, 6] + dna.addSequenceFeature(new SequenceFeature("type8", "desc8", 12, 12, + 8f, null)); + + // "type5" is the 'select this type' argument + AlignmentUtils.transferFeatures(dna, cds, map, "type5"); + SequenceFeature[] sfs = cds.getSequenceFeatures(); + assertEquals(1, sfs.length); + + SequenceFeature sf = sfs[0]; + assertEquals("type5", sf.getType()); + assertEquals(1, sf.getBegin()); + assertEquals(6, sf.getEnd()); } } -- 1.7.10.2