X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Futil%2FDBRefUtils.java;h=381038c4752af5d0d36d33b90f55d3f3a66531b3;hb=7459ed95f5ad213bafb1c691b193283850889e52;hp=405f6e6f520601ebd8785c2d26be1e5081475347;hpb=14e9fd31857401af4f5547430330d3d557cbd277;p=jalview.git diff --git a/src/jalview/util/DBRefUtils.java b/src/jalview/util/DBRefUtils.java index 405f6e6..381038c 100755 --- a/src/jalview/util/DBRefUtils.java +++ b/src/jalview/util/DBRefUtils.java @@ -22,16 +22,16 @@ package jalview.util; import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; +import jalview.datamodel.Mapping; import jalview.datamodel.PDBEntry; import jalview.datamodel.SequenceI; import java.util.ArrayList; +import java.util.BitSet; import java.util.HashMap; import java.util.HashSet; -import java.util.Hashtable; import java.util.List; import java.util.Map; -import java.util.Set; import com.stevesoft.pat.Regex; @@ -40,6 +40,16 @@ import com.stevesoft.pat.Regex; */ public class DBRefUtils { + + public final static int DB_SOURCE = 1; + public final static int DB_VERSION = 2; + public final static int DB_ID = 4; + public final static int DB_MAP = 8; + + + public final static int SEARCH_MODE_NO_MAP_NO_VERSION = DB_SOURCE | DB_ID; + public final static int SEARCH_MODE_FULL = DB_SOURCE | DB_VERSION | DB_ID | DB_MAP; + /* * lookup from lower-case form of a name to its canonical (standardised) form */ @@ -66,12 +76,13 @@ public class DBRefUtils canonicalSourceNameLookup.put("ensembl-gn", DBRefSource.ENSEMBL); // Make sure we have lowercase entries for all canonical string lookups - Set keys = canonicalSourceNameLookup.keySet(); - for (String k : keys) - { - canonicalSourceNameLookup.put(k.toLowerCase(), - canonicalSourceNameLookup.get(k)); - } +// BH 2019.01.25 unnecessary -- they are all lower case already + //Set keys = canonicalSourceNameLookup.keySet(); +// for (String k : keys) +// { +// canonicalSourceNameLookup.put(k.toLowerCase(), +// canonicalSourceNameLookup.get(k)); +// } dasCoordinateSystemsLookup.put("pdbresnum", DBRefSource.PDB); dasCoordinateSystemsLookup.put("uniprot", DBRefSource.UNIPROT); @@ -90,24 +101,28 @@ public class DBRefUtils * array of sources to select * @return */ - public static DBRefEntry[] selectRefs(DBRefEntry[] dbrefs, + public static List selectRefs(List dbrefs, String[] sources) { if (dbrefs == null || sources == null) { return dbrefs; } + + // BH TODO HashSet srcs = new HashSet(); for (String src : sources) { - srcs.add(src); + srcs.add(src.toUpperCase()); } + int nrefs = dbrefs.size(); List res = new ArrayList(); - for (DBRefEntry dbr : dbrefs) + for (int ib = 0; ib < nrefs; ib++) { + DBRefEntry dbr = dbrefs.get(ib); String source = getCanonicalName(dbr.getSource()); - if (srcs.contains(source)) + if (srcs.contains(source.toUpperCase())) { res.add(dbr); } @@ -115,12 +130,29 @@ public class DBRefUtils if (res.size() > 0) { - DBRefEntry[] reply = new DBRefEntry[res.size()]; - return res.toArray(reply); + //List reply = new DBRefEntry[res.size()]; + return res;//.toArray(reply); } return null; } + private static boolean selectRefsBS(List dbrefs, String sourceKeys, BitSet bsSelect) { + if (dbrefs == null || sourceKeys == null) + { + return false; + } + for (int i = 0, n = dbrefs.size(); i < n; i++) + { + DBRefEntry dbr = dbrefs.get(i); + String sourceKey = dbr.getSourceKey(); + if (sourceKeys.indexOf(sourceKey) < 0) { + bsSelect.clear(i); + } + } + return !bsSelect.isEmpty(); + } + + /** * isDasCoordinateSystem * @@ -140,8 +172,8 @@ public class DBRefUtils return false; } String coordsys = dasCoordinateSystemsLookup.get(string.toLowerCase()); - return coordsys == null ? false : coordsys.equals(dBRefEntry - .getSource()); + return coordsys == null ? false + : coordsys.equals(dBRefEntry.getSource()); } /** @@ -176,13 +208,14 @@ public class DBRefUtils * Set of references to search * @param entry * pattern to match + * @param mode SEARCH_MODE_FULL for all; SEARCH_MODE_NO_MAP_NO_VERSION optional * @return */ - public static List searchRefs(DBRefEntry[] ref, - DBRefEntry entry) + public static List searchRefs(List ref, + DBRefEntry entry, int mode) { return searchRefs(ref, entry, - matchDbAndIdAndEitherMapOrEquivalentMapList); + matchDbAndIdAndEitherMapOrEquivalentMapList, mode); } /** @@ -199,11 +232,26 @@ public class DBRefUtils * accession id to match * @return */ - public static List searchRefs(DBRefEntry[] refs, String accId) + public static List searchRefs(List refs, String accId) { - return searchRefs(refs, new DBRefEntry("", "", accId), matchId); + List rfs = new ArrayList(); + if (refs == null || accId == null) + { + return rfs; + } + for (int i = 0, n = refs.size(); i < n; i++) + { + DBRefEntry e = refs.get(i); + if (accId.equals(e.getAccessionId())) + { + rfs.add(e); + } + } + return rfs; +// return searchRefs(refs, new DBRefEntry("", "", accId), matchId, SEARCH_MODE_FULL); } + /** * Returns a (possibly empty) list of those references that match the given * entry, according to the given comparator. @@ -213,21 +261,23 @@ public class DBRefUtils * @param entry * an entry to compare against * @param comparator + * @param mode SEARCH_MODE_FULL for all; SEARCH_MODE_NO_MAP_NO_VERSION optional * @return */ - static List searchRefs(DBRefEntry[] refs, DBRefEntry entry, - DbRefComp comparator) + static List searchRefs(List refs, DBRefEntry entry, + DbRefComp comparator, int mode) { List rfs = new ArrayList(); if (refs == null || entry == null) { return rfs; } - for (int i = 0; i < refs.length; i++) + for (int i = 0, n = refs.size(); i < n; i++) { - if (comparator.matches(entry, refs[i])) + DBRefEntry e = refs.get(i); + if (comparator.matches(entry, e, SEARCH_MODE_FULL)) { - rfs.add(refs[i]); + rfs.add(e); } } return rfs; @@ -235,8 +285,12 @@ public class DBRefUtils interface DbRefComp { - public boolean matches(DBRefEntry refa, DBRefEntry refb); - } + default public boolean matches(DBRefEntry refa, DBRefEntry refb) { + return matches(refa, refb, SEARCH_MODE_FULL); + }; + + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode); +} /** * match on all non-null fields in refa @@ -244,22 +298,21 @@ public class DBRefUtils // TODO unused - remove? public static DbRefComp matchNonNullonA = new DbRefComp() { - @Override - public boolean matches(DBRefEntry refa, DBRefEntry refb) + @Override + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) { - if (refa.getSource() == null + if ((mode & DB_SOURCE) != 0 && refa.getSource() == null || DBRefUtils.getCanonicalName(refb.getSource()).equals( DBRefUtils.getCanonicalName(refa.getSource()))) { - if (refa.getVersion() == null + if ((mode & DB_VERSION) != 0 && refa.getVersion() == null || refb.getVersion().equals(refa.getVersion())) { - if (refa.getAccessionId() == null + if ((mode & DB_ID) != 0 && refa.getAccessionId() == null || refb.getAccessionId().equals(refa.getAccessionId())) { - if (refa.getMap() == null - || (refb.getMap() != null && refb.getMap().equals( - refa.getMap()))) + if ((mode & DB_MAP) != 0 && refa.getMap() == null || (refb.getMap() != null + && refb.getMap().equals(refa.getMap()))) { return true; } @@ -278,7 +331,7 @@ public class DBRefUtils public static DbRefComp matchEitherNonNull = new DbRefComp() { @Override - public boolean matches(DBRefEntry refa, DBRefEntry refb) + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) { if (nullOrEqualSource(refa.getSource(), refb.getSource()) && nullOrEqual(refa.getVersion(), refb.getVersion()) @@ -289,6 +342,7 @@ public class DBRefUtils } return false; } + }; /** @@ -299,7 +353,7 @@ public class DBRefUtils public static DbRefComp matchDbAndIdAndEitherMap = new DbRefComp() { @Override - public boolean matches(DBRefEntry refa, DBRefEntry refb) + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) { if (refa.getSource() != null && refb.getSource() != null && DBRefUtils.getCanonicalName(refb.getSource()).equals( @@ -307,12 +361,12 @@ public class DBRefUtils { // We dont care about version if (refa.getAccessionId() != null && refb.getAccessionId() != null - // FIXME should be && not || here? + // FIXME should be && not || here? || refb.getAccessionId().equals(refa.getAccessionId())) { if ((refa.getMap() == null || refb.getMap() == null) - || (refa.getMap() != null && refb.getMap() != null && refb - .getMap().equals(refa.getMap()))) + || (refa.getMap() != null && refb.getMap() != null + && refb.getMap().equals(refa.getMap()))) { return true; } @@ -331,7 +385,7 @@ public class DBRefUtils public static DbRefComp matchDbAndIdAndComplementaryMapList = new DbRefComp() { @Override - public boolean matches(DBRefEntry refa, DBRefEntry refb) + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) { if (refa.getSource() != null && refb.getSource() != null && DBRefUtils.getCanonicalName(refb.getSource()).equals( @@ -344,11 +398,12 @@ public class DBRefUtils if ((refa.getMap() == null && refb.getMap() == null) || (refa.getMap() != null && refb.getMap() != null)) { - if ((refb.getMap().getMap() == null && refa.getMap().getMap() == null) + if ((refb.getMap().getMap() == null + && refa.getMap().getMap() == null) || (refb.getMap().getMap() != null - && refa.getMap().getMap() != null && refb - .getMap().getMap().getInverse() - .equals(refa.getMap().getMap()))) + && refa.getMap().getMap() != null + && refb.getMap().getMap().getInverse() + .equals(refa.getMap().getMap()))) { return true; } @@ -368,7 +423,7 @@ public class DBRefUtils public static DbRefComp matchDbAndIdAndEquivalentMapList = new DbRefComp() { @Override - public boolean matches(DBRefEntry refa, DBRefEntry refb) + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) { if (refa.getSource() != null && refb.getSource() != null && DBRefUtils.getCanonicalName(refb.getSource()).equals( @@ -385,12 +440,13 @@ public class DBRefUtils { return true; } - if (refa.getMap() != null - && refb.getMap() != null - && ((refb.getMap().getMap() == null && refa.getMap() - .getMap() == null) || (refb.getMap().getMap() != null - && refa.getMap().getMap() != null && refb - .getMap().getMap().equals(refa.getMap().getMap())))) + if (refa.getMap() != null && refb.getMap() != null + && ((refb.getMap().getMap() == null + && refa.getMap().getMap() == null) + || (refb.getMap().getMap() != null + && refa.getMap().getMap() != null + && refb.getMap().getMap() + .equals(refa.getMap().getMap())))) { return true; } @@ -408,7 +464,7 @@ public class DBRefUtils public static DbRefComp matchDbAndIdAndEitherMapOrEquivalentMapList = new DbRefComp() { @Override - public boolean matches(DBRefEntry refa, DBRefEntry refb) + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) { if (refa.getSource() != null && refb.getSource() != null && DBRefUtils.getCanonicalName(refb.getSource()).equals( @@ -424,11 +480,12 @@ public class DBRefUtils return true; } if ((refa.getMap() != null && refb.getMap() != null) - && (refb.getMap().getMap() == null && refa.getMap() - .getMap() == null) + && (refb.getMap().getMap() == null + && refa.getMap().getMap() == null) || (refb.getMap().getMap() != null - && refa.getMap().getMap() != null && (refb - .getMap().getMap().equals(refa.getMap().getMap())))) + && refa.getMap().getMap() != null + && (refb.getMap().getMap() + .equals(refa.getMap().getMap())))) { return true; } @@ -444,7 +501,7 @@ public class DBRefUtils public static DbRefComp matchId = new DbRefComp() { @Override - public boolean matches(DBRefEntry refa, DBRefEntry refb) + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) { if (refa.getAccessionId() != null && refb.getAccessionId() != null && refb.getAccessionId().equals(refa.getAccessionId())) @@ -503,9 +560,7 @@ public class DBRefUtils PDBEntry pdbr = new PDBEntry(); pdbr.setId(pdbid); pdbr.setType(PDBEntry.Type.PDB); - pdbr.setProperty(new Hashtable()); pdbr.setChainCode(chaincode); - // pdbr.getProperty().put("CHAIN", chaincode); seq.addPDBId(pdbr); } else @@ -559,8 +614,8 @@ public class DBRefUtils { return true; } - return DBRefUtils.getCanonicalName(o1).equals( - DBRefUtils.getCanonicalName(o2)); + return DBRefUtils.getCanonicalName(o1) + .equals(DBRefUtils.getCanonicalName(o2)); } /** @@ -573,11 +628,11 @@ public class DBRefUtils * a set of references to select from * @return */ - public static DBRefEntry[] selectDbRefs(boolean selectDna, - DBRefEntry[] refs) + public static List selectDbRefs(boolean selectDna, + List refs) { - return selectRefs(refs, selectDna ? DBRefSource.DNACODINGDBS - : DBRefSource.PROTEINDBS); + return selectRefs(refs, + selectDna ? DBRefSource.DNACODINGDBS : DBRefSource.PROTEINDBS); // could attempt to find other cross // refs here - ie PDB xrefs // (not dna, not protein seq) @@ -591,7 +646,7 @@ public class DBRefUtils * @param source * @return */ - public static List searchRefsForSource(DBRefEntry[] dbRefs, + public static List searchRefsForSource(List dbRefs, String source) { List matches = new ArrayList(); @@ -608,4 +663,142 @@ public class DBRefUtils return matches; } + /** + * promote direct database references to primary for nucleotide or protein + * sequences if they have an appropriate primary ref + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Seq TypePrimary DBDirect which will be promoted
peptidesEnsemblUniprot
peptidesEnsemblUniprot
dnaEnsemblENA
+ * + * @param sequence + */ + public static void ensurePrimaries(SequenceI sequence, List pr) + { + if (pr.size() == 0) + { + // nothing to do + return; + } + int sstart = sequence.getStart(); + int send = sequence.getEnd(); + boolean isProtein = sequence.isProtein(); + BitSet bsSelect = new BitSet(); + +// List selfs = new ArrayList(); +// { + +// List selddfs = selectDbRefs(!isprot, sequence.getDBRefs()); +// if (selfs == null || selfs.size() == 0) +// { +// // nothing to do +// return; +// } + + List dbrefs = sequence.getDBRefs(); + bsSelect.set(0, dbrefs.size()); + + if (!selectRefsBS(dbrefs, isProtein ? DBRefSource.PROTEINDBSKEYS : DBRefSource.DNACODINGDBSKEYS, bsSelect)) + return; + +// selfs.addAll(selfArray); +// } + + // filter non-primary refs + for (int ip = pr.size(); --ip >= 0;) + { + DBRefEntry p = pr.get(ip); + for (int i = bsSelect.nextSetBit(0); i >= 0; i = bsSelect.nextSetBit(i + 1)) { + if (dbrefs.get(i) == p) + bsSelect.clear(i); + } +// while (selfs.contains(p)) +// { +// selfs.remove(p); +// } + } +// List toPromote = new ArrayList(); + + + + for (int ip = pr.size(), keys = 0; --ip >= 0 && keys != DBRefSource.ALL_MASKS;) + { + DBRefEntry p = pr.get(ip); + if (isProtein) + { + switch (getCanonicalName(p.getSource())) + { + case DBRefSource.UNIPROT: + keys |= DBRefSource.UNIPROT_MASK; + break; + case DBRefSource.ENSEMBL: + keys |= DBRefSource.ENSEMBL_MASK; + break; + } + } + else + { + // TODO: promote transcript refs ?? + } + if (keys == 0 || !selectRefsBS(dbrefs, DBRefSource.PROMTYPES[keys], bsSelect)) + return; +// if (candidates != null) + { + for (int ic = bsSelect.nextSetBit(0); ic >= 0; ic = bsSelect.nextSetBit(ic + 1)) +// for (int ic = 0, n = candidates.size(); ic < n; ic++) + { + DBRefEntry cand = dbrefs.get(ic);//candidates.get(ic); + if (cand.hasMap()) + { + Mapping map = cand.getMap(); + SequenceI cto = map.getTo(); + if (cto != null + && cto != sequence) + { + // can't promote refs with mappings to other sequences + continue; + } + MapList mlist = map.getMap(); + if (mlist.getFromLowest() != sstart + && mlist.getFromHighest() != send) + { + // can't promote refs with mappings from a region of this sequence + // - eg CDS + continue; + } + } + // and promote + cand.setVersion(p.getVersion() + " (promoted)"); + bsSelect.clear(ic); + //selfs.remove(cand); +// toPromote.add(cand); + if (!cand.isPrimaryCandidate()) + { + System.out.println( + "Warning: Couldn't promote dbref " + cand.toString() + + " for sequence " + sequence.toString()); + } + } + } + } + } + }