From ea6b884f0d86a85e79bf4a6a229b52fc4a8f2892 Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Tue, 23 Aug 2016 18:42:53 +0100 Subject: [PATCH] JAL-2154 propagate from contig to CDS after constructing CDS sequence, and only propagate refs with congruent mappings *and* corresponding mapping on protein product --- src/jalview/analysis/AlignmentUtils.java | 40 +++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index b57fbbf..c4e51b1 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -1708,6 +1708,8 @@ public class AlignmentUtils mappings.add(cdsToProteinMapping); } + propagateDBRefsToCDS(cdsSeqDss, dnaSeq.getDatasetSequence(), + proteinProduct, aMapping); /* * add another mapping from original 'from' range to CDS */ @@ -1879,8 +1881,6 @@ public class AlignmentUtils SequenceI newSeq = new Sequence(seqId, newSeqChars, 1, newPos); // newSeq.setDescription(mapFromId); - propagateDBRefsToCDS(newSeq, seq, mapping); - return newSeq; } @@ -1894,11 +1894,12 @@ public class AlignmentUtils * @return list of DBRefEntrys added. */ public static List propagateDBRefsToCDS(SequenceI cdsSeq, - SequenceI contig, Mapping mapping) + SequenceI contig, SequenceI proteinProduct, Mapping mapping) { // gather direct refs from contig congrent with mapping List direct = new ArrayList(); + HashSet directSources = new HashSet(); if (contig.getDBRefs() != null) { for (DBRefEntry dbr : contig.getDBRefs()) @@ -1910,24 +1911,51 @@ public class AlignmentUtils if (mapping.getMap().equals(map)) { direct.add(dbr); + directSources.add(dbr.getSource()); } } } } - + DBRefEntry[] onSource = DBRefUtils.selectRefs( + proteinProduct.getDBRefs(), + directSources.toArray(new String[0])); List propagated = new ArrayList(); // and generate appropriate mappings for (DBRefEntry cdsref : direct) { - Mapping cdsmap = cdsref.getMap(); + // clone maplist and mapping MapList cdsposmap = new MapList(Arrays.asList(new int[][] { new int[] - { cdsSeq.getStart(), cdsSeq.getEnd() } }), cdsmap.getMap() + { cdsSeq.getStart(), cdsSeq.getEnd() } }), cdsref.getMap().getMap() .getToRanges(), 3, 1); + Mapping cdsmap = new Mapping(cdsref.getMap().getTo(), cdsref.getMap() + .getMap()); + // create dbref DBRefEntry newref = new DBRefEntry(cdsref.getSource(), cdsref.getVersion(), cdsref.getAccessionId(), new Mapping( cdsmap.getTo(), cdsposmap)); + + // and see if we can map to the protein product for this mapping. + // onSource is the filtered set of accessions on protein that we are + // tranferring, so we assume accession is the same. + if (cdsmap.getTo() == null && onSource != null) + { + List sourceRefs = DBRefUtils.searchRefs(onSource, + cdsref.getAccessionId()); + if (sourceRefs != null) + { + for (DBRefEntry srcref : sourceRefs) + { + if (srcref.getSource().equalsIgnoreCase(cdsref.getSource())) + { + // we have found a complementary dbref on the protein product, so + // update mapping's getTo + newref.getMap().setTo(proteinProduct); + } + } + } + } cdsSeq.addDBRef(newref); propagated.add(newref); } -- 1.7.10.2