From 6fe36904fddf9ecb85e67974f48081bba373e8ab Mon Sep 17 00:00:00 2001 From: gmungoc Date: Tue, 26 Sep 2017 13:14:59 +0100 Subject: [PATCH] JAL-2738 JAL-2154 add DBRef from transcript to synthesized CDS and vice versa --- src/jalview/analysis/AlignmentUtils.java | 65 +++++++++++++----------- test/jalview/analysis/AlignmentUtilsTests.java | 42 +++++++++++++-- 2 files changed, 74 insertions(+), 33 deletions(-) diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 6acac01..c88a462 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -1633,7 +1633,7 @@ public class AlignmentUtils AlignmentI dataset, SequenceI[] products) { if (dataset == null || dataset.getDataset() != null) - { + { throw new IllegalArgumentException( "IMPLEMENTATION ERROR: dataset.getDataset() must be null!"); } @@ -1645,10 +1645,10 @@ public class AlignmentUtils { productSeqs = new HashSet(); for (SequenceI seq : products) - { - productSeqs.add(seq.getDatasetSequence() == null ? seq - : seq.getDatasetSequence()); - } + { + productSeqs.add(seq.getDatasetSequence() == null ? seq : seq + .getDatasetSequence()); + } } /* @@ -1670,15 +1670,15 @@ public class AlignmentUtils List seqMappings = MappingUtils .findMappingsForSequence(dnaSeq, mappings); for (AlignedCodonFrame mapping : seqMappings) - { + { List mappingsFromSequence = mapping .getMappingsFromSequence(dnaSeq); for (Mapping aMapping : mappingsFromSequence) - { + { MapList mapList = aMapping.getMap(); if (mapList.getFromRatio() == 1) - { + { /* * not a dna-to-protein mapping (likely dna-to-cds) */ @@ -1704,15 +1704,15 @@ public class AlignmentUtils if (cdsSeq != null) { if (!foundSeqs.contains(cdsSeq)) - { + { foundSeqs.add(cdsSeq); SequenceI derivedSequence = cdsSeq.deriveSequence(); cdsSeqs.add(derivedSequence); if (!dataset.getSequences().contains(cdsSeq)) - { + { dataset.addSequence(cdsSeq); - } } + } continue; } @@ -1740,9 +1740,8 @@ public class AlignmentUtils /* * add a mapping from CDS to the (unchanged) mapped to range */ - List cdsRange = Collections - .singletonList(new int[] - { 1, cdsSeq.getLength() }); + List cdsRange = Collections.singletonList(new int[] { 1, + cdsSeq.getLength() }); MapList cdsToProteinMap = new MapList(cdsRange, mapList.getToRanges(), mapList.getFromRatio(), mapList.getToRatio()); @@ -1792,40 +1791,46 @@ public class AlignmentUtils for (DBRefEntry primRef : dnaDss.getPrimaryDBRefs()) { - // creates a complementary cross-reference to the source sequence's - // primary reference. - - DBRefEntry cdsCrossRef = new DBRefEntry(primRef.getSource(), - primRef.getSource() + ":" + primRef.getVersion(), - primRef.getAccessionId()); - cdsCrossRef - .setMap(new Mapping(dnaDss, new MapList(dnaToCdsMap))); + /* + * create a cross-reference from CDS to the source sequence's + * primary reference and vice versa + */ + + String source = primRef.getSource(); + String version = primRef.getVersion(); + DBRefEntry cdsCrossRef = new DBRefEntry(source, source + ":" + + version, primRef.getAccessionId()); + cdsCrossRef.setMap(new Mapping(dnaDss, new MapList(dnaToCdsMap + .getInverse()))); cdsSeqDss.addDBRef(cdsCrossRef); + dnaSeq.addDBRef(new DBRefEntry(source, version, cdsSeq + .getName(), new Mapping(cdsSeqDss, dnaToCdsMap))); + // problem here is that the cross-reference is synthesized - // cdsSeq.getName() may be like 'CDS|dnaaccession' or // 'CDS|emblcdsacc' // assuming cds version same as dna ?!? - DBRefEntry proteinToCdsRef = new DBRefEntry(primRef.getSource(), - primRef.getVersion(), cdsSeq.getName()); + DBRefEntry proteinToCdsRef = new DBRefEntry(source, version, + cdsSeq.getName()); // - proteinToCdsRef.setMap( - new Mapping(cdsSeqDss, cdsToProteinMap.getInverse())); + proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap + .getInverse())); proteinProduct.addDBRef(proteinToCdsRef); - } + } /* * transfer any features on dna that overlap the CDS */ transferFeatures(dnaSeq, cdsSeq, dnaToCdsMap, null, SequenceOntologyI.CDS); - } } } + } - AlignmentI cds = new Alignment( - cdsSeqs.toArray(new SequenceI[cdsSeqs.size()])); + AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs + .size()])); cds.setDataset(dataset); return cds; diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 4439bb9..7c64193 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -1044,14 +1044,18 @@ public class AlignmentUtilsTests dna.addCodonFrame(acf); /* - * In this case, mappings originally came from matching Uniprot accessions - so need an xref on dna involving those regions. These are normally constructed from CDS annotation + * In this case, mappings originally came from matching Uniprot accessions + * - so need an xref on dna involving those regions. + * These are normally constructed from CDS annotation */ DBRefEntry dna1xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep1", new Mapping(mapfordna1)); - dna1.getDatasetSequence().addDBRef(dna1xref); + dna1.addDBRef(dna1xref); + assertEquals(2, dna1.getDBRefs().length); // to self and to pep1 DBRefEntry dna2xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep2", new Mapping(mapfordna2)); - dna2.getDatasetSequence().addDBRef(dna2xref); + dna2.addDBRef(dna2xref); + assertEquals(2, dna2.getDBRefs().length); // to self and to pep2 /* * execute method under test: @@ -1106,6 +1110,38 @@ public class AlignmentUtilsTests assertEquals(cdsMapping.getInverse(), dbref.getMap().getMap()); /* + * verify cDNA has added a dbref with mapping to CDS + */ + assertEquals(3, dna1.getDBRefs().length); + DBRefEntry dbRefEntry = dna1.getDBRefs()[2]; + assertSame(cds1Dss, dbRefEntry.getMap().getTo()); + MapList dnaToCdsMapping = new MapList(new int[] { 4, 6, 10, 12 }, + new int[] { 1, 6 }, 1, 1); + assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap()); + assertEquals(3, dna2.getDBRefs().length); + dbRefEntry = dna2.getDBRefs()[2]; + assertSame(cds2Dss, dbRefEntry.getMap().getTo()); + dnaToCdsMapping = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, + new int[] { 1, 9 }, 1, 1); + assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap()); + + /* + * verify CDS has added a dbref with mapping to cDNA + */ + assertEquals(2, cds1Dss.getDBRefs().length); + dbRefEntry = cds1Dss.getDBRefs()[1]; + assertSame(dna1.getDatasetSequence(), dbRefEntry.getMap().getTo()); + MapList cdsToDnaMapping = new MapList(new int[] { 1, 6 }, new int[] { + 4, 6, 10, 12 }, 1, 1); + assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap()); + assertEquals(2, cds2Dss.getDBRefs().length); + dbRefEntry = cds2Dss.getDBRefs()[1]; + assertSame(dna2.getDatasetSequence(), dbRefEntry.getMap().getTo()); + cdsToDnaMapping = new MapList(new int[] { 1, 9 }, new int[] { 1, 3, 7, + 9, 13, 15 }, 1, 1); + assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap()); + + /* * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide * the mappings are on the shared alignment dataset * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep) -- 1.7.10.2