From b9940c76e03cfc61550c94e5865019f5d6231daf Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Tue, 27 Sep 2016 15:16:34 +0100 Subject: [PATCH] JAL-2210 promote direct dbrefs from verified sources to primary for a sequence --- src/jalview/datamodel/Sequence.java | 2 + src/jalview/util/DBRefUtils.java | 116 +++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java index d4290a3..6b57ef7 100755 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@ -994,6 +994,8 @@ public class Sequence extends ASequence implements SequenceI temp[temp.length - 1] = entry; dbrefs = temp; + + DBRefUtils.ensurePrimaries(this); } @Override diff --git a/src/jalview/util/DBRefUtils.java b/src/jalview/util/DBRefUtils.java index 405f6e6..f414a9c 100755 --- a/src/jalview/util/DBRefUtils.java +++ b/src/jalview/util/DBRefUtils.java @@ -26,6 +26,7 @@ import jalview.datamodel.PDBEntry; import jalview.datamodel.SequenceI; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Hashtable; @@ -608,4 +609,119 @@ public class DBRefUtils return matches; } + /** + * promote direct database references to primary for nucleotide or protein + * sequences if they have an appropriate primary ref + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Seq TypePrimary DBDirect which will be promoted
peptidesEnsemblUniprot
peptidesEnsemblUniprot
dnaEnsemblENA
+ * + * @param sequence + */ + public static void ensurePrimaries(SequenceI sequence) + { + List pr = sequence.getPrimaryDBRefs(); + if (pr.size() == 0) + { + // nothing to do + return; + } + List selfs = new ArrayList(); + selfs.addAll(Arrays.asList(selectDbRefs(!sequence.isProtein(), + sequence.getDBRefs()))); + + // filter non-primary refs + for (DBRefEntry p : pr) + { + while (selfs.contains(p)) + { + selfs.remove(p); + } + } + List toPromote = new ArrayList(); + + for (DBRefEntry p : pr) + { + List promType = new ArrayList(); + if (sequence.isProtein()) + { + switch (getCanonicalName(p.getSource())) + { + case DBRefSource.UNIPROT: + // case DBRefSource.UNIPROTKB: + // case DBRefSource.UP_NAME: + // search for and promote ensembl + promType.add(DBRefSource.ENSEMBL); + break; + case DBRefSource.ENSEMBL: + // search for and promote Uniprot + promType.add(DBRefSource.UNIPROT); + break; + } + } + else + { + // TODO: promote transcript refs + } + + // collate candidates and promote them + DBRefEntry[] candidates = selectRefs( + selfs.toArray(new DBRefEntry[0]), + promType.toArray(new String[0])); + if (candidates != null) + { + for (DBRefEntry cand : candidates) + { + if (cand.hasMap()) + { + if (cand.getMap().getTo() != null + && cand.getMap().getTo() != sequence) + { + // can't promote refs with mappings to other sequences + continue; + } + if (cand.getMap().getMap().getFromLowest() != sequence + .getStart() + && cand.getMap().getMap().getFromHighest() != sequence + .getEnd()) + { + // can't promote refs with mappings from a region of this sequence + // - eg CDS + continue; + } + } + // and promote + cand.setVersion(p.getVersion() + " (promoted)"); + selfs.remove(cand); + toPromote.add(cand); + if (!cand.isPrimaryCandidate()) + { + System.out.println("Warning: Couldn't promote dbref " + + cand.toString() + " for sequence " + + sequence.toString()); + } + } + } + } + } + } -- 1.7.10.2