X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Futil%2FDBRefUtils.java;h=50a34fc9b5017cc4000f6d28c018b8d8a93f9a43;hb=d90895bf6eed41ff1b2d413306afae3cac458756;hp=c85a489990c185f9237d71d95df41a0dcab3d2b6;hpb=f35bd7373186cb3262858b97480b962a64b65ae1;p=jalview.git diff --git a/src/jalview/util/DBRefUtils.java b/src/jalview/util/DBRefUtils.java index c85a489..50a34fc 100755 --- a/src/jalview/util/DBRefUtils.java +++ b/src/jalview/util/DBRefUtils.java @@ -26,11 +26,12 @@ import jalview.datamodel.PDBEntry; import jalview.datamodel.SequenceI; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; -import java.util.Hashtable; import java.util.List; import java.util.Map; +import java.util.Set; import com.stevesoft.pat.Regex; @@ -52,8 +53,25 @@ public class DBRefUtils canonicalSourceNameLookup.put("uniprotkb/swiss-prot", DBRefSource.UNIPROT); canonicalSourceNameLookup.put("uniprotkb/trembl", DBRefSource.UNIPROT); + + // Ensembl values for dbname in xref REST service: + canonicalSourceNameLookup.put("uniprot/sptrembl", DBRefSource.UNIPROT); + canonicalSourceNameLookup.put("uniprot/swissprot", DBRefSource.UNIPROT); + canonicalSourceNameLookup.put("pdb", DBRefSource.PDB); canonicalSourceNameLookup.put("ensembl", DBRefSource.ENSEMBL); + // Ensembl Gn and Tr are for Ensembl genomic and transcript IDs as served + // from ENA. + canonicalSourceNameLookup.put("ensembl-tr", DBRefSource.ENSEMBL); + canonicalSourceNameLookup.put("ensembl-gn", DBRefSource.ENSEMBL); + + // Make sure we have lowercase entries for all canonical string lookups + Set keys = canonicalSourceNameLookup.keySet(); + for (String k : keys) + { + canonicalSourceNameLookup.put(k.toLowerCase(), + canonicalSourceNameLookup.get(k)); + } dasCoordinateSystemsLookup.put("pdbresnum", DBRefSource.PDB); dasCoordinateSystemsLookup.put("uniprot", DBRefSource.UNIPROT); @@ -62,11 +80,14 @@ public class DBRefUtils } /** + * Returns those DBRefEntry objects whose source identifier (once converted to + * Jalview's canonical form) is in the list of sources to search for. Returns + * null if no matches found. * * @param dbrefs - * array of DBRef objects to search + * DBRefEntry objects to search * @param sources - * String[] array of source DBRef IDs to retrieve + * array of sources to select * @return */ public static DBRefEntry[] selectRefs(DBRefEntry[] dbrefs, @@ -79,14 +100,14 @@ public class DBRefUtils HashSet srcs = new HashSet(); for (String src : sources) { - srcs.add(src); + srcs.add(src.toUpperCase()); } List res = new ArrayList(); for (DBRefEntry dbr : dbrefs) { String source = getCanonicalName(dbr.getSource()); - if (srcs.contains(source)) + if (srcs.contains(source.toUpperCase())) { res.add(dbr); } @@ -119,8 +140,8 @@ public class DBRefUtils return false; } String coordsys = dasCoordinateSystemsLookup.get(string.toLowerCase()); - return coordsys == null ? false : coordsys.equals(dBRefEntry - .getSource()); + return coordsys == null ? false + : coordsys.equals(dBRefEntry.getSource()); } /** @@ -143,8 +164,8 @@ public class DBRefUtils } /** - * Returns an array of those references that match the given entry, or null if - * no matches. Currently uses a comparator which matches if + * Returns a (possibly empty) list of those references that match the given + * entry. Currently uses a comparator which matches if *
    *
  • database sources are the same
  • *
  • accession ids are the same
  • @@ -157,15 +178,35 @@ public class DBRefUtils * pattern to match * @return */ - public static DBRefEntry[] searchRefs(DBRefEntry[] ref, DBRefEntry entry) + public static List searchRefs(DBRefEntry[] ref, + DBRefEntry entry) { return searchRefs(ref, entry, matchDbAndIdAndEitherMapOrEquivalentMapList); } /** - * Returns an array of those references that match the given entry, according - * to the given comparator. Returns null if no matches. + * Returns a list of those references that match the given accession id + *
      + *
    • database sources are the same
    • + *
    • accession ids are the same
    • + *
    • both have no mapping, or the mappings are the same
    • + *
    + * + * @param refs + * Set of references to search + * @param accId + * accession id to match + * @return + */ + public static List searchRefs(DBRefEntry[] refs, String accId) + { + return searchRefs(refs, new DBRefEntry("", "", accId), matchId); + } + + /** + * Returns a (possibly empty) list of those references that match the given + * entry, according to the given comparator. * * @param refs * an array of database references to search @@ -174,14 +215,14 @@ public class DBRefUtils * @param comparator * @return */ - static DBRefEntry[] searchRefs(DBRefEntry[] refs, DBRefEntry entry, + static List searchRefs(DBRefEntry[] refs, DBRefEntry entry, DbRefComp comparator) { + List rfs = new ArrayList(); if (refs == null || entry == null) { - return null; + return rfs; } - List rfs = new ArrayList(); for (int i = 0; i < refs.length; i++) { if (comparator.matches(entry, refs[i])) @@ -189,7 +230,7 @@ public class DBRefUtils rfs.add(refs[i]); } } - return rfs.size() == 0 ? null : rfs.toArray(new DBRefEntry[rfs.size()]); + return rfs; } interface DbRefComp @@ -207,7 +248,8 @@ public class DBRefUtils public boolean matches(DBRefEntry refa, DBRefEntry refb) { if (refa.getSource() == null - || refb.getSource().equals(refa.getSource())) + || DBRefUtils.getCanonicalName(refb.getSource()).equals( + DBRefUtils.getCanonicalName(refa.getSource()))) { if (refa.getVersion() == null || refb.getVersion().equals(refa.getVersion())) @@ -215,9 +257,8 @@ public class DBRefUtils if (refa.getAccessionId() == null || refb.getAccessionId().equals(refa.getAccessionId())) { - if (refa.getMap() == null - || (refb.getMap() != null && refb.getMap().equals( - refa.getMap()))) + if (refa.getMap() == null || (refb.getMap() != null + && refb.getMap().equals(refa.getMap()))) { return true; } @@ -238,7 +279,7 @@ public class DBRefUtils @Override public boolean matches(DBRefEntry refa, DBRefEntry refb) { - if (nullOrEqual(refa.getSource(), refb.getSource()) + if (nullOrEqualSource(refa.getSource(), refb.getSource()) && nullOrEqual(refa.getVersion(), refb.getVersion()) && nullOrEqual(refa.getAccessionId(), refb.getAccessionId()) && nullOrEqual(refa.getMap(), refb.getMap())) @@ -260,16 +301,17 @@ public class DBRefUtils public boolean matches(DBRefEntry refa, DBRefEntry refb) { if (refa.getSource() != null && refb.getSource() != null - && refb.getSource().equals(refa.getSource())) + && DBRefUtils.getCanonicalName(refb.getSource()).equals( + DBRefUtils.getCanonicalName(refa.getSource()))) { // We dont care about version if (refa.getAccessionId() != null && refb.getAccessionId() != null - // FIXME should be && not || here? + // FIXME should be && not || here? || refb.getAccessionId().equals(refa.getAccessionId())) { if ((refa.getMap() == null || refb.getMap() == null) - || (refa.getMap() != null && refb.getMap() != null && refb - .getMap().equals(refa.getMap()))) + || (refa.getMap() != null && refb.getMap() != null + && refb.getMap().equals(refa.getMap()))) { return true; } @@ -291,7 +333,8 @@ public class DBRefUtils public boolean matches(DBRefEntry refa, DBRefEntry refb) { if (refa.getSource() != null && refb.getSource() != null - && refb.getSource().equals(refa.getSource())) + && DBRefUtils.getCanonicalName(refb.getSource()).equals( + DBRefUtils.getCanonicalName(refa.getSource()))) { // We dont care about version if (refa.getAccessionId() != null && refb.getAccessionId() != null @@ -300,11 +343,12 @@ public class DBRefUtils if ((refa.getMap() == null && refb.getMap() == null) || (refa.getMap() != null && refb.getMap() != null)) { - if ((refb.getMap().getMap() == null && refa.getMap().getMap() == null) + if ((refb.getMap().getMap() == null + && refa.getMap().getMap() == null) || (refb.getMap().getMap() != null - && refa.getMap().getMap() != null && refb - .getMap().getMap().getInverse() - .equals(refa.getMap().getMap()))) + && refa.getMap().getMap() != null + && refb.getMap().getMap().getInverse() + .equals(refa.getMap().getMap()))) { return true; } @@ -327,7 +371,8 @@ public class DBRefUtils public boolean matches(DBRefEntry refa, DBRefEntry refb) { if (refa.getSource() != null && refb.getSource() != null - && refb.getSource().equals(refa.getSource())) + && DBRefUtils.getCanonicalName(refb.getSource()).equals( + DBRefUtils.getCanonicalName(refa.getSource()))) { // We dont care about version // if ((refa.getVersion()==null || refb.getVersion()==null) @@ -340,12 +385,13 @@ public class DBRefUtils { return true; } - if (refa.getMap() != null - && refb.getMap() != null - && ((refb.getMap().getMap() == null && refa.getMap() - .getMap() == null) || (refb.getMap().getMap() != null - && refa.getMap().getMap() != null && refb - .getMap().getMap().equals(refa.getMap().getMap())))) + if (refa.getMap() != null && refb.getMap() != null + && ((refb.getMap().getMap() == null + && refa.getMap().getMap() == null) + || (refb.getMap().getMap() != null + && refa.getMap().getMap() != null + && refb.getMap().getMap() + .equals(refa.getMap().getMap())))) { return true; } @@ -356,9 +402,9 @@ public class DBRefUtils }; /** - * accession ID and DB must be identical. Version is ignored. No map on either - * or map but no maplist on either or maplist of map on a is equivalent to the - * maplist of map on b. + * accession ID and DB must be identical, or null on a. Version is ignored. No + * map on either or map but no maplist on either or maplist of map on a is + * equivalent to the maplist of map on b. */ public static DbRefComp matchDbAndIdAndEitherMapOrEquivalentMapList = new DbRefComp() { @@ -366,23 +412,26 @@ public class DBRefUtils public boolean matches(DBRefEntry refa, DBRefEntry refb) { if (refa.getSource() != null && refb.getSource() != null - && refb.getSource().equals(refa.getSource())) + && DBRefUtils.getCanonicalName(refb.getSource()).equals( + DBRefUtils.getCanonicalName(refa.getSource()))) { // We dont care about version - if (refa.getAccessionId() != null && refb.getAccessionId() != null - && refb.getAccessionId().equals(refa.getAccessionId())) + + if (refa.getAccessionId() == null + || refa.getAccessionId().equals(refb.getAccessionId())) { if (refa.getMap() == null || refb.getMap() == null) { return true; } if ((refa.getMap() != null && refb.getMap() != null) - && (refb.getMap().getMap() == null && refa.getMap() - .getMap() == null) + && (refb.getMap().getMap() == null + && refa.getMap().getMap() == null) || (refb.getMap().getMap() != null - && refa.getMap().getMap() != null && (refb - .getMap().getMap().equals(refa.getMap().getMap())))) - { // getMap().getMap().containsEither(false,refa.getMap().getMap()) + && refa.getMap().getMap() != null + && (refb.getMap().getMap() + .equals(refa.getMap().getMap())))) + { return true; } } @@ -392,6 +441,23 @@ public class DBRefUtils }; /** + * accession ID only must be identical. + */ + public static DbRefComp matchId = new DbRefComp() + { + @Override + public boolean matches(DBRefEntry refa, DBRefEntry refb) + { + if (refa.getAccessionId() != null && refb.getAccessionId() != null + && refb.getAccessionId().equals(refa.getAccessionId())) + { + return true; + } + return false; + } + }; + + /** * Parses a DBRefEntry and adds it to the sequence, also a PDBEntry if the * database is PDB. *

    @@ -439,9 +505,7 @@ public class DBRefUtils PDBEntry pdbr = new PDBEntry(); pdbr.setId(pdbid); pdbr.setType(PDBEntry.Type.PDB); - pdbr.setProperty(new Hashtable()); pdbr.setChainCode(chaincode); - // pdbr.getProperty().put("CHAIN", chaincode); seq.addPDBId(pdbr); } else @@ -475,7 +539,195 @@ public class DBRefUtils { return true; } - return (o1 == null ? o2.equals(o1) : o1.equals(o2)); + return o1.equals(o2); + } + + /** + * canonicalise source string before comparing. null is always wildcard + * + * @param o1 + * - null or source string to compare + * @param o2 + * - null or source string to compare + * @return true if either o1 or o2 are null, or o1 equals o2 under + * DBRefUtils.getCanonicalName + * (o1).equals(DBRefUtils.getCanonicalName(o2)) + */ + public static boolean nullOrEqualSource(String o1, String o2) + { + if (o1 == null || o2 == null) + { + return true; + } + return DBRefUtils.getCanonicalName(o1) + .equals(DBRefUtils.getCanonicalName(o2)); + } + + /** + * Selects just the DNA or protein references from a set of references + * + * @param selectDna + * if true, select references to 'standard' DNA databases, else to + * 'standard' peptide databases + * @param refs + * a set of references to select from + * @return + */ + public static DBRefEntry[] selectDbRefs(boolean selectDna, + DBRefEntry[] refs) + { + return selectRefs(refs, + selectDna ? DBRefSource.DNACODINGDBS : DBRefSource.PROTEINDBS); + // could attempt to find other cross + // refs here - ie PDB xrefs + // (not dna, not protein seq) + } + + /** + * Returns the (possibly empty) list of those supplied dbrefs which have the + * specified source database, with a case-insensitive match of source name + * + * @param dbRefs + * @param source + * @return + */ + public static List searchRefsForSource(DBRefEntry[] dbRefs, + String source) + { + List matches = new ArrayList(); + if (dbRefs != null && source != null) + { + for (DBRefEntry dbref : dbRefs) + { + if (source.equalsIgnoreCase(dbref.getSource())) + { + matches.add(dbref); + } + } + } + return matches; + } + + /** + * promote direct database references to primary for nucleotide or protein + * sequences if they have an appropriate primary ref + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
    Seq TypePrimary DBDirect which will be promoted
    peptidesEnsemblUniprot
    peptidesEnsemblUniprot
    dnaEnsemblENA
    + * + * @param sequence + */ + public static void ensurePrimaries(SequenceI sequence) + { + List pr = sequence.getPrimaryDBRefs(); + if (pr.size() == 0) + { + // nothing to do + return; + } + List selfs = new ArrayList(); + { + DBRefEntry[] selfArray = selectDbRefs(!sequence.isProtein(), + sequence.getDBRefs()); + if (selfArray == null || selfArray.length == 0) + { + // nothing to do + return; + } + selfs.addAll(Arrays.asList(selfArray)); + } + + // filter non-primary refs + for (DBRefEntry p : pr) + { + while (selfs.contains(p)) + { + selfs.remove(p); + } + } + List toPromote = new ArrayList(); + + for (DBRefEntry p : pr) + { + List promType = new ArrayList(); + if (sequence.isProtein()) + { + switch (getCanonicalName(p.getSource())) + { + case DBRefSource.UNIPROT: + // case DBRefSource.UNIPROTKB: + // case DBRefSource.UP_NAME: + // search for and promote ensembl + promType.add(DBRefSource.ENSEMBL); + break; + case DBRefSource.ENSEMBL: + // search for and promote Uniprot + promType.add(DBRefSource.UNIPROT); + break; + } + } + else + { + // TODO: promote transcript refs + } + + // collate candidates and promote them + DBRefEntry[] candidates = selectRefs(selfs.toArray(new DBRefEntry[0]), + promType.toArray(new String[0])); + if (candidates != null) + { + for (DBRefEntry cand : candidates) + { + if (cand.hasMap()) + { + if (cand.getMap().getTo() != null + && cand.getMap().getTo() != sequence) + { + // can't promote refs with mappings to other sequences + continue; + } + if (cand.getMap().getMap().getFromLowest() != sequence + .getStart() + && cand.getMap().getMap().getFromHighest() != sequence + .getEnd()) + { + // can't promote refs with mappings from a region of this sequence + // - eg CDS + continue; + } + } + // and promote + cand.setVersion(p.getVersion() + " (promoted)"); + selfs.remove(cand); + toPromote.add(cand); + if (!cand.isPrimaryCandidate()) + { + System.out.println( + "Warning: Couldn't promote dbref " + cand.toString() + + " for sequence " + sequence.toString()); + } + } + } + } } }