From: gmungoc Date: Thu, 5 Oct 2017 14:16:49 +0000 (+0100) Subject: JAL-2755 transfer chromosomal reference from transcript to CDS X-Git-Tag: Release_2_11_0~207 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=5f9309d21b8fbdd907161aeaf9e6ca6622312cb6;p=jalview.git JAL-2755 transfer chromosomal reference from transcript to CDS --- diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 9a46a91..228446b 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -29,6 +29,7 @@ import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; +import jalview.datamodel.GeneLociI; import jalview.datamodel.IncompleteCodonException; import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; @@ -394,7 +395,7 @@ public class AlignmentUtils * Answers true if the mappings include one between the given (dataset) * sequences. */ - public static boolean mappingExists(List mappings, + protected static boolean mappingExists(List mappings, SequenceI aaSeq, SequenceI cdnaSeq) { if (mappings != null) @@ -1633,7 +1634,7 @@ public class AlignmentUtils AlignmentI dataset, SequenceI[] products) { if (dataset == null || dataset.getDataset() != null) - { + { throw new IllegalArgumentException( "IMPLEMENTATION ERROR: dataset.getDataset() must be null!"); } @@ -1645,10 +1646,10 @@ public class AlignmentUtils { productSeqs = new HashSet(); for (SequenceI seq : products) - { + { productSeqs.add(seq.getDatasetSequence() == null ? seq : seq .getDatasetSequence()); - } + } } /* @@ -1670,15 +1671,15 @@ public class AlignmentUtils List seqMappings = MappingUtils .findMappingsForSequence(dnaSeq, mappings); for (AlignedCodonFrame mapping : seqMappings) - { + { List mappingsFromSequence = mapping .getMappingsFromSequence(dnaSeq); for (Mapping aMapping : mappingsFromSequence) - { + { MapList mapList = aMapping.getMap(); if (mapList.getFromRatio() == 1) - { + { /* * not a dna-to-protein mapping (likely dna-to-cds) */ @@ -1704,15 +1705,15 @@ public class AlignmentUtils if (cdsSeq != null) { if (!foundSeqs.contains(cdsSeq)) - { + { foundSeqs.add(cdsSeq); SequenceI derivedSequence = cdsSeq.deriveSequence(); cdsSeqs.add(derivedSequence); if (!dataset.getSequences().contains(cdsSeq)) - { + { dataset.addSequence(cdsSeq); + } } - } continue; } @@ -1763,7 +1764,7 @@ public class AlignmentUtils * add another mapping from original 'from' range to CDS */ AlignedCodonFrame dnaToCdsMapping = new AlignedCodonFrame(); - MapList dnaToCdsMap = new MapList(mapList.getFromRanges(), + final MapList dnaToCdsMap = new MapList(mapList.getFromRanges(), cdsRange, 1, 1); dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeqDss, dnaToCdsMap); @@ -1773,6 +1774,13 @@ public class AlignmentUtils } /* + * transfer dna chromosomal loci (if known) to the CDS + * sequence (via the mapping) + */ + final MapList cdsToDnaMap = dnaToCdsMap.getInverse(); + transferGeneLoci(dnaSeq, cdsToDnaMap, cdsSeq); + + /* * add DBRef with mapping from protein to CDS * (this enables Get Cross-References from protein alignment) * This is tricky because we can't have two DBRefs with the @@ -1795,13 +1803,11 @@ public class AlignmentUtils * create a cross-reference from CDS to the source sequence's * primary reference and vice versa */ - String source = primRef.getSource(); String version = primRef.getVersion(); DBRefEntry cdsCrossRef = new DBRefEntry(source, source + ":" + version, primRef.getAccessionId()); - cdsCrossRef.setMap(new Mapping(dnaDss, new MapList(dnaToCdsMap - .getInverse()))); + cdsCrossRef.setMap(new Mapping(dnaDss, new MapList(cdsToDnaMap))); cdsSeqDss.addDBRef(cdsCrossRef); dnaSeq.addDBRef(new DBRefEntry(source, version, cdsSeq @@ -1818,16 +1824,16 @@ public class AlignmentUtils proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap .getInverse())); proteinProduct.addDBRef(proteinToCdsRef); - } + } /* * transfer any features on dna that overlap the CDS */ transferFeatures(dnaSeq, cdsSeq, dnaToCdsMap, null, SequenceOntologyI.CDS); + } } } - } AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs .size()])); @@ -1837,6 +1843,38 @@ public class AlignmentUtils } /** + * Tries to transfer gene loci (dbref to chromosome positions) from fromSeq to + * toSeq, mediated by the given mapping between the sequences + * + * @param fromSeq + * @param targetToFrom + * Map + * @param targetSeq + */ + protected static void transferGeneLoci(SequenceI fromSeq, + MapList targetToFrom, SequenceI targetSeq) + { + if (targetSeq.getGeneLoci() != null) + { + // already have - don't override + return; + } + GeneLociI fromLoci = fromSeq.getGeneLoci(); + if (fromLoci == null) + { + return; + } + + MapList newMap = targetToFrom.traverse(fromLoci.getMap()); + + if (newMap != null) + { + targetSeq.setGeneLoci(fromLoci.getSpeciesId(), + fromLoci.getAssemblyId(), fromLoci.getChromosomeId(), newMap); + } + } + + /** * A helper method that finds a CDS sequence in the alignment dataset that is * mapped to the given protein sequence, and either is, or has a mapping from, * the given dna sequence. @@ -2004,19 +2042,19 @@ public class AlignmentUtils } /** - * add any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to + * Adds any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to * the given mapping. * * @param cdsSeq * @param contig + * @param proteinProduct * @param mapping - * @return list of DBRefEntrys added. + * @return list of DBRefEntrys added */ - public static List propagateDBRefsToCDS(SequenceI cdsSeq, + protected static List propagateDBRefsToCDS(SequenceI cdsSeq, SequenceI contig, SequenceI proteinProduct, Mapping mapping) { - - // gather direct refs from contig congrent with mapping + // gather direct refs from contig congruent with mapping List direct = new ArrayList(); HashSet directSources = new HashSet(); if (contig.getDBRefs() != null) @@ -2096,7 +2134,7 @@ public class AlignmentUtils * subtypes in the Sequence Ontology) * @param omitting */ - public static int transferFeatures(SequenceI fromSeq, SequenceI toSeq, + protected static int transferFeatures(SequenceI fromSeq, SequenceI toSeq, MapList mapping, String select, String... omitting) { SequenceI copyTo = toSeq; @@ -2234,7 +2272,7 @@ public class AlignmentUtils * @param dnaSeq * @return */ - public static List findCdsPositions(SequenceI dnaSeq) + protected static List findCdsPositions(SequenceI dnaSeq) { List result = new ArrayList(); diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 7c64193..d229a39 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -34,6 +34,7 @@ import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.Annotation; import jalview.datamodel.DBRefEntry; +import jalview.datamodel.GeneLociI; import jalview.datamodel.Mapping; import jalview.datamodel.SearchResultMatchI; import jalview.datamodel.SearchResultsI; @@ -71,7 +72,7 @@ public class AlignmentUtilsTests JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION); } - public static Sequence ts = new Sequence("short", + private static Sequence ts = new Sequence("short", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm"); @Test(groups = { "Functional" }) @@ -2569,4 +2570,67 @@ public class AlignmentUtilsTests assertEquals(s_as3, uas3.getSequenceAsString()); } + @Test(groups = { "Functional" }) + public void testTransferGeneLoci() + { + SequenceI from = new Sequence("transcript", + "aaacccgggTTTAAACCCGGGtttaaacccgggttt"); + SequenceI to = new Sequence("CDS", "TTTAAACCCGGG"); + MapList map = new MapList(new int[] { 1, 12 }, new int[] { 10, 21 }, 1, + 1); + + /* + * first with nothing to transfer + */ + AlignmentUtils.transferGeneLoci(from, map, to); + assertNull(to.getGeneLoci()); + + /* + * next with gene loci set on 'from' sequence + */ + int[] exons = new int[] { 100, 105, 155, 164, 210, 229 }; + MapList geneMap = new MapList(new int[] { 1, 36 }, exons, 1, 1); + from.setGeneLoci("human", "GRCh38", "7", geneMap); + AlignmentUtils.transferGeneLoci(from, map, to); + + GeneLociI toLoci = to.getGeneLoci(); + assertNotNull(toLoci); + // DBRefEntry constructor upper-cases 'source' + assertEquals("HUMAN", toLoci.getSpeciesId()); + assertEquals("GRCh38", toLoci.getAssemblyId()); + assertEquals("7", toLoci.getChromosomeId()); + + /* + * transcript 'exons' are 1-6, 7-16, 17-36 + * CDS 1:12 is transcript 10-21 + * transcript 'CDS' is 10-16, 17-21 + * which is 'gene' 158-164, 210-214 + */ + MapList toMap = toLoci.getMap(); + assertEquals(1, toMap.getFromRanges().size()); + assertEquals(2, toMap.getFromRanges().get(0).length); + assertEquals(1, toMap.getFromRanges().get(0)[0]); + assertEquals(12, toMap.getFromRanges().get(0)[1]); + assertEquals(1, toMap.getToRanges().size()); + assertEquals(4, toMap.getToRanges().get(0).length); + assertEquals(158, toMap.getToRanges().get(0)[0]); + assertEquals(164, toMap.getToRanges().get(0)[1]); + assertEquals(210, toMap.getToRanges().get(0)[2]); + assertEquals(214, toMap.getToRanges().get(0)[3]); + // or summarised as (but toString might change in future): + assertEquals("[ [1, 12] ] 1:1 to [ [158, 164, 210, 214] ]", + toMap.toString()); + + /* + * an existing value is not overridden + */ + geneMap = new MapList(new int[] { 1, 36 }, new int[] { 36, 1 }, 1, 1); + from.setGeneLoci("inhuman", "GRCh37", "6", geneMap); + AlignmentUtils.transferGeneLoci(from, map, to); + assertEquals("GRCh38", toLoci.getAssemblyId()); + assertEquals("7", toLoci.getChromosomeId()); + toMap = toLoci.getMap(); + assertEquals("[ [1, 12] ] 1:1 to [ [158, 164, 210, 214] ]", + toMap.toString()); + } }