From 639ac6c56cbbd596611ba7a2152380d6c9474090 Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Sun, 2 Oct 2016 12:15:21 +0100 Subject: [PATCH] JAL-2210 discover candidate dataset sequences for an xref based on primaryDBRefs first, and only fall back to sequence name match if none are found --- src/jalview/analysis/CrossRef.java | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index 6779b87..1ddeb20 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -637,9 +637,12 @@ public class CrossRef } /** - * Returns the first identical sequence in the dataset if any, else null + * Returns null or the first sequence in the dataset which is identical to + * xref.mapTo, and has a) a primary dbref matching xref, or if none found, the + * first one with an ID source|xrefacc * * @param xref + * with map and mapped-to sequence * @return */ SequenceI findInDataset(DBRefEntry xref) @@ -658,23 +661,37 @@ public class CrossRef { return dss; } - ; + DBRefEntry template = new DBRefEntry(xref.getSource(), null, + xref.getAccessionId()); + /** + * remember the first ID match - in case we don't find a match to template + */ + SequenceI firstIdMatch = null; for (SequenceI seq : dataset.getSequences()) { + // first check primary refs. + List match = DBRefUtils.searchRefs(seq.getPrimaryDBRefs() + .toArray(new DBRefEntry[0]), template); + if (match != null && match.size() == 1 && sameSequence(seq, dss)) + { + return seq; + } /* * clumsy alternative to using SequenceIdMatcher which currently * returns sequences with a dbref to the matched accession id * which we don't want */ - if (name.equals(seq.getName()) || seq.getName().startsWith(name2)) + if (firstIdMatch == null + && (name.equals(seq.getName()) || seq.getName().startsWith( + name2))) { if (sameSequence(seq, dss)) { - return seq; + firstIdMatch = seq; } } } - return null; + return firstIdMatch; } /** -- 1.7.10.2