X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Futil%2FDBRefUtils.java;fp=src%2Fjalview%2Futil%2FDBRefUtils.java;h=04cb75eb60edc7b30c2e458a42ad85d7a6e91c4c;hb=7d67fb613ec026dc9a265e351e7fab542e3f1d61;hp=d5d0cf53632f0f271c69867b907c5d7997735de2;hpb=02e38bb826828ab2991584cf4b737c0138cb6c44;p=jalview.git diff --git a/src/jalview/util/DBRefUtils.java b/src/jalview/util/DBRefUtils.java index d5d0cf5..04cb75e 100755 --- a/src/jalview/util/DBRefUtils.java +++ b/src/jalview/util/DBRefUtils.java @@ -26,11 +26,12 @@ import jalview.datamodel.PDBEntry; import jalview.datamodel.SequenceI; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; -import java.util.Hashtable; import java.util.List; import java.util.Map; +import java.util.Set; import com.stevesoft.pat.Regex; @@ -59,6 +60,18 @@ public class DBRefUtils canonicalSourceNameLookup.put("pdb", DBRefSource.PDB); canonicalSourceNameLookup.put("ensembl", DBRefSource.ENSEMBL); + // Ensembl Gn and Tr are for Ensembl genomic and transcript IDs as served + // from ENA. + canonicalSourceNameLookup.put("ensembl-tr", DBRefSource.ENSEMBL); + canonicalSourceNameLookup.put("ensembl-gn", DBRefSource.ENSEMBL); + + // Make sure we have lowercase entries for all canonical string lookups + Set keys = canonicalSourceNameLookup.keySet(); + for (String k : keys) + { + canonicalSourceNameLookup.put(k.toLowerCase(), + canonicalSourceNameLookup.get(k)); + } dasCoordinateSystemsLookup.put("pdbresnum", DBRefSource.PDB); dasCoordinateSystemsLookup.put("uniprot", DBRefSource.UNIPROT); @@ -87,14 +100,14 @@ public class DBRefUtils HashSet srcs = new HashSet(); for (String src : sources) { - srcs.add(src); + srcs.add(src.toUpperCase()); } List res = new ArrayList(); for (DBRefEntry dbr : dbrefs) { String source = getCanonicalName(dbr.getSource()); - if (srcs.contains(source)) + if (srcs.contains(source.toUpperCase())) { res.add(dbr); } @@ -235,7 +248,8 @@ public class DBRefUtils public boolean matches(DBRefEntry refa, DBRefEntry refb) { if (refa.getSource() == null - || refb.getSource().equals(refa.getSource())) + || DBRefUtils.getCanonicalName(refb.getSource()).equals( + DBRefUtils.getCanonicalName(refa.getSource()))) { if (refa.getVersion() == null || refb.getVersion().equals(refa.getVersion())) @@ -266,7 +280,7 @@ public class DBRefUtils @Override public boolean matches(DBRefEntry refa, DBRefEntry refb) { - if (nullOrEqual(refa.getSource(), refb.getSource()) + if (nullOrEqualSource(refa.getSource(), refb.getSource()) && nullOrEqual(refa.getVersion(), refb.getVersion()) && nullOrEqual(refa.getAccessionId(), refb.getAccessionId()) && nullOrEqual(refa.getMap(), refb.getMap())) @@ -287,8 +301,10 @@ public class DBRefUtils @Override public boolean matches(DBRefEntry refa, DBRefEntry refb) { - if (refa.getSource() != null && refb.getSource() != null - && refb.getSource().equals(refa.getSource())) + if (refa.getSource() != null + && refb.getSource() != null + && DBRefUtils.getCanonicalName(refb.getSource()).equals( + DBRefUtils.getCanonicalName(refa.getSource()))) { // We dont care about version if (refa.getAccessionId() != null && refb.getAccessionId() != null @@ -318,8 +334,10 @@ public class DBRefUtils @Override public boolean matches(DBRefEntry refa, DBRefEntry refb) { - if (refa.getSource() != null && refb.getSource() != null - && refb.getSource().equals(refa.getSource())) + if (refa.getSource() != null + && refb.getSource() != null + && DBRefUtils.getCanonicalName(refb.getSource()).equals( + DBRefUtils.getCanonicalName(refa.getSource()))) { // We dont care about version if (refa.getAccessionId() != null && refb.getAccessionId() != null @@ -354,8 +372,10 @@ public class DBRefUtils @Override public boolean matches(DBRefEntry refa, DBRefEntry refb) { - if (refa.getSource() != null && refb.getSource() != null - && refb.getSource().equals(refa.getSource())) + if (refa.getSource() != null + && refb.getSource() != null + && DBRefUtils.getCanonicalName(refb.getSource()).equals( + DBRefUtils.getCanonicalName(refa.getSource()))) { // We dont care about version // if ((refa.getVersion()==null || refb.getVersion()==null) @@ -393,8 +413,10 @@ public class DBRefUtils @Override public boolean matches(DBRefEntry refa, DBRefEntry refb) { - if (refa.getSource() != null && refb.getSource() != null - && refb.getSource().equals(refa.getSource())) + if (refa.getSource() != null + && refb.getSource() != null + && DBRefUtils.getCanonicalName(refb.getSource()).equals( + DBRefUtils.getCanonicalName(refa.getSource()))) { // We dont care about version @@ -485,9 +507,7 @@ public class DBRefUtils PDBEntry pdbr = new PDBEntry(); pdbr.setId(pdbid); pdbr.setType(PDBEntry.Type.PDB); - pdbr.setProperty(new Hashtable()); pdbr.setChainCode(chaincode); - // pdbr.getProperty().put("CHAIN", chaincode); seq.addPDBId(pdbr); } else @@ -521,7 +541,28 @@ public class DBRefUtils { return true; } - return (o1 == null ? o2.equals(o1) : o1.equals(o2)); + return o1.equals(o2); + } + + /** + * canonicalise source string before comparing. null is always wildcard + * + * @param o1 + * - null or source string to compare + * @param o2 + * - null or source string to compare + * @return true if either o1 or o2 are null, or o1 equals o2 under + * DBRefUtils.getCanonicalName + * (o1).equals(DBRefUtils.getCanonicalName(o2)) + */ + public static boolean nullOrEqualSource(String o1, String o2) + { + if (o1 == null || o2 == null) + { + return true; + } + return DBRefUtils.getCanonicalName(o1).equals( + DBRefUtils.getCanonicalName(o2)); } /** @@ -569,4 +610,127 @@ public class DBRefUtils return matches; } + /** + * promote direct database references to primary for nucleotide or protein + * sequences if they have an appropriate primary ref + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Seq TypePrimary DBDirect which will be promoted
peptidesEnsemblUniprot
peptidesEnsemblUniprot
dnaEnsemblENA
+ * + * @param sequence + */ + public static void ensurePrimaries(SequenceI sequence) + { + List pr = sequence.getPrimaryDBRefs(); + if (pr.size() == 0) + { + // nothing to do + return; + } + List selfs = new ArrayList(); + { + DBRefEntry[] selfArray = selectDbRefs(!sequence.isProtein(), + sequence.getDBRefs()); + if (selfArray == null || selfArray.length == 0) + { + // nothing to do + return; + } + selfs.addAll(Arrays.asList(selfArray)); + } + + // filter non-primary refs + for (DBRefEntry p : pr) + { + while (selfs.contains(p)) + { + selfs.remove(p); + } + } + List toPromote = new ArrayList(); + + for (DBRefEntry p : pr) + { + List promType = new ArrayList(); + if (sequence.isProtein()) + { + switch (getCanonicalName(p.getSource())) + { + case DBRefSource.UNIPROT: + // case DBRefSource.UNIPROTKB: + // case DBRefSource.UP_NAME: + // search for and promote ensembl + promType.add(DBRefSource.ENSEMBL); + break; + case DBRefSource.ENSEMBL: + // search for and promote Uniprot + promType.add(DBRefSource.UNIPROT); + break; + } + } + else + { + // TODO: promote transcript refs + } + + // collate candidates and promote them + DBRefEntry[] candidates = selectRefs( + selfs.toArray(new DBRefEntry[0]), + promType.toArray(new String[0])); + if (candidates != null) + { + for (DBRefEntry cand : candidates) + { + if (cand.hasMap()) + { + if (cand.getMap().getTo() != null + && cand.getMap().getTo() != sequence) + { + // can't promote refs with mappings to other sequences + continue; + } + if (cand.getMap().getMap().getFromLowest() != sequence + .getStart() + && cand.getMap().getMap().getFromHighest() != sequence + .getEnd()) + { + // can't promote refs with mappings from a region of this sequence + // - eg CDS + continue; + } + } + // and promote + cand.setVersion(p.getVersion() + " (promoted)"); + selfs.remove(cand); + toPromote.add(cand); + if (!cand.isPrimaryCandidate()) + { + System.out.println("Warning: Couldn't promote dbref " + + cand.toString() + " for sequence " + + sequence.toString()); + } + } + } + } + } + }