From 7b286499500acfd548f601f1aa92f3f9019caac1 Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Fri, 26 Aug 2016 13:21:33 +0100 Subject: [PATCH] JAL-2154 check that xref is a coding xref before adding sequence to retrieved xrefs --- src/jalview/analysis/CrossRef.java | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index 9932f2c..aefc6f8 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -222,6 +222,9 @@ public class CrossRef boolean found = false; DBRefEntry[] xrfs = DBRefUtils .selectDbRefs(!fromDna, dss.getDBRefs()); + // ENST & ENSP comes in to both Protein and nucleotide, so we need to + // filter them + // out later. if ((xrfs == null || xrfs.length == 0) && dataset != null) { /* @@ -249,11 +252,15 @@ public class CrossRef List sourceRefs = DBRefUtils.searchRefsForSource(xrfs, source); Iterator refIterator = sourceRefs.iterator(); + // At this point, if we are retrieving Ensembl, we still don't filter out + // ENST when looking for protein crossrefs. while (refIterator.hasNext()) { DBRefEntry xref = refIterator.next(); found = false; - if (xref.hasMap()) + // we're only interested in coding cross-references, not + // locus->transcript + if (xref.hasMap() && xref.getMap().getMap().isTripletMap()) { SequenceI mappedTo = xref.getMap().getTo(); if (mappedTo != null) @@ -332,7 +339,9 @@ public class CrossRef { SequenceI matchedSeq = matcher.findIdMatch(xref.getSource() + "|" + xref.getAccessionId()); - if (matchedSeq != null) + // if there was a match, check it's at least the right type of + // molecule! + if (matchedSeq != null && matchedSeq.isProtein() == fromDna) { if (constructMapping(seq, matchedSeq, xref, cf, fromDna)) { -- 1.7.10.2