X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Futil%2FDBRefUtils.java;h=e17a336a59021fe19ed88d16e52bcabac7cf99b1;hb=5faac1104c41690e38e259fae5bb700236021360;hp=411a7b9830407aaaf05d94c660669096e4346285;hpb=0ff8400b045359dc745a49a92e2152fcb68ccdbd;p=jalview.git diff --git a/src/jalview/util/DBRefUtils.java b/src/jalview/util/DBRefUtils.java index 411a7b9..e17a336 100755 --- a/src/jalview/util/DBRefUtils.java +++ b/src/jalview/util/DBRefUtils.java @@ -1,121 +1,182 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Development Version 2.4.1) - * Copyright (C) 2009 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. + * This file is part of Jalview. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.util; -import java.util.*; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Map; -import jalview.datamodel.*; +import com.stevesoft.pat.Regex; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.Mapping; +import jalview.datamodel.PDBEntry; +import jalview.datamodel.SequenceI; + +/** + * Utilities for handling DBRef objects and their collections. + */ public class DBRefUtils { - /** - * Utilities for handling DBRef objects and their collections. + /* + * lookup from lower-case form of a name to its canonical (standardised) form */ + private static Map canonicalSourceNameLookup = new HashMap<>(); + + public final static int DB_SOURCE = 1; + + public final static int DB_VERSION = 2; + + public final static int DB_ID = 4; + + public final static int DB_MAP = 8; + + public final static int SEARCH_MODE_NO_MAP_NO_VERSION = DB_SOURCE | DB_ID; + + public final static int SEARCH_MODE_FULL = DB_SOURCE | DB_VERSION | DB_ID + | DB_MAP; + + static + { + // TODO load these from a resource file? + canonicalSourceNameLookup.put("uniprotkb/swiss-prot", + DBRefSource.UNIPROT); + canonicalSourceNameLookup.put("uniprotkb/trembl", DBRefSource.UNIPROT); + + // Ensembl values for dbname in xref REST service: + canonicalSourceNameLookup.put("uniprot/sptrembl", DBRefSource.UNIPROT); + canonicalSourceNameLookup.put("uniprot/swissprot", DBRefSource.UNIPROT); + + canonicalSourceNameLookup.put("pdb", DBRefSource.PDB); + canonicalSourceNameLookup.put("ensembl", DBRefSource.ENSEMBL); + // Ensembl Gn and Tr are for Ensembl genomic and transcript IDs as served + // from ENA. + canonicalSourceNameLookup.put("ensembl-tr", DBRefSource.ENSEMBL); + canonicalSourceNameLookup.put("ensembl-gn", DBRefSource.ENSEMBL); + + // guarantee we always have lowercase entries for canonical string lookups + for (String k : canonicalSourceNameLookup.keySet()) + { + canonicalSourceNameLookup.put(k.toLowerCase(Locale.ROOT), + canonicalSourceNameLookup.get(k)); + } + } + /** + * Returns those DBRefEntry objects whose source identifier (once converted to + * Jalview's canonical form) is in the list of sources to search for. Returns + * null if no matches found. * * @param dbrefs - * Vector of DBRef objects to search + * DBRefEntry objects to search * @param sources - * String[] array of source DBRef IDs to retrieve - * @return Vector + * array of sources to select + * @return */ - public static DBRefEntry[] selectRefs(DBRefEntry[] dbrefs, + public static List selectRefs(List dbrefs, String[] sources) { - if (dbrefs == null) - { - return null; - } - if (sources == null) + if (dbrefs == null || sources == null) { return dbrefs; } - Hashtable srcs = new Hashtable(); - Vector res = new Vector(); - for (int i = 0; i < sources.length; i++) + // BH TODO (what?) + HashSet srcs = new HashSet(); + for (String src : sources) { - srcs.put(new String(sources[i]), new Integer(i)); + srcs.add(src.toUpperCase(Locale.ROOT)); } - for (int i = 0, j = dbrefs.length; i < j; i++) + + int nrefs = dbrefs.size(); + List res = new ArrayList(); + for (int ib = 0; ib < nrefs; ib++) { - if (srcs.containsKey(dbrefs[i].getSource())) + DBRefEntry dbr = dbrefs.get(ib); + String source = getCanonicalName(dbr.getSource()); + if (srcs.contains(source.toUpperCase(Locale.ROOT))) { - res.addElement(dbrefs[i]); + res.add(dbr); } } - if (res.size() > 0) { - DBRefEntry[] reply = new DBRefEntry[res.size()]; - for (int i = 0; i < res.size(); i++) + // List reply = new DBRefEntry[res.size()]; + return res;// .toArray(reply); + } + return null; + } + + private static boolean selectRefsBS(List dbrefs, + int sourceKeys, BitSet bsSelect) + { + if (dbrefs == null || sourceKeys == 0) + { + return false; + } + for (int i = 0, n = dbrefs.size(); i < n; i++) + { + DBRefEntry dbr = dbrefs.get(i); + if ((dbr.getSourceKey() & sourceKeys) != 0) { - reply[i] = (DBRefEntry) res.elementAt(i); + bsSelect.clear(i); } - return reply; } - res = null; - // there are probable memory leaks in the hashtable! - return null; + return !bsSelect.isEmpty(); } /** - * isDasCoordinateSystem + * Returns a (possibly empty) list of those references that match the given + * entry, according to the given comparator. * - * @param string - * String - * @param dBRefEntry - * DBRefEntry - * @return boolean true if Source DBRefEntry is compatible with DAS - * CoordinateSystem name + * @param refs + * an array of database references to search + * @param entry + * an entry to compare against + * @param comparator + * @return */ - public static Hashtable DasCoordinateSystemsLookup = null; - - public static boolean isDasCoordinateSystem(String string, - DBRefEntry dBRefEntry) + static List searchRefs(DBRefEntry[] refs, DBRefEntry entry, + DbRefComp comparator) { - if (DasCoordinateSystemsLookup == null) + List rfs = new ArrayList<>(); + if (refs == null || entry == null) { - // TODO: Make a DasCoordinateSystemsLookup properties resource - // Initialise - DasCoordinateSystemsLookup = new Hashtable(); - DasCoordinateSystemsLookup.put("pdbresnum", - jalview.datamodel.DBRefSource.PDB); - DasCoordinateSystemsLookup.put("uniprot", - jalview.datamodel.DBRefSource.UNIPROT); - DasCoordinateSystemsLookup.put("EMBL", - jalview.datamodel.DBRefSource.EMBL); - // DasCoordinateSystemsLookup.put("EMBL", - // jalview.datamodel.DBRefSource.EMBLCDS); + return rfs; } - - String coordsys = (String) DasCoordinateSystemsLookup.get(string - .toLowerCase()); - if (coordsys != null) + for (int i = 0; i < refs.length; i++) { - return coordsys.equals(dBRefEntry.getSource()); + if (comparator.matches(entry, refs[i])) + { + rfs.add(refs[i]); + } } - return false; + return rfs; } - public static Hashtable CanonicalSourceNameLookup = null; - /** * look up source in an internal list of database reference sources and return * the canonical jalview name for the source, or the original string if it has @@ -127,89 +188,138 @@ public class DBRefUtils */ public static String getCanonicalName(String source) { - if (CanonicalSourceNameLookup == null) - { - CanonicalSourceNameLookup = new Hashtable(); - CanonicalSourceNameLookup.put("uniprotkb/swiss-prot", - jalview.datamodel.DBRefSource.UNIPROT); - CanonicalSourceNameLookup.put("uniprotkb/trembl", - jalview.datamodel.DBRefSource.UNIPROT); - CanonicalSourceNameLookup.put("pdb", - jalview.datamodel.DBRefSource.PDB); - } - String canonical = (String) CanonicalSourceNameLookup.get(source - .toLowerCase()); - if (canonical == null) + if (source == null) { - return source; + return null; } - return canonical; + String canonical = canonicalSourceNameLookup + .get(source.toLowerCase(Locale.ROOT)); + return canonical == null ? source : canonical; } /** - * find RefEntry corresponding to a particular pattern the equals method of - * each entry is used, from String attributes right down to Mapping - * attributes. + * Returns a (possibly empty) list of those references that match the given + * entry. Currently uses a comparator which matches if + *
    + *
  • database sources are the same
  • + *
  • accession ids are the same
  • + *
  • both have no mapping, or the mappings are the same
  • + *
* * @param ref - * Set of references to search + * Set of references to search * @param entry - * pattern to collect - null any entry for wildcard match + * pattern to match + * @param mode + * SEARCH_MODE_FULL for all; SEARCH_MODE_NO_MAP_NO_VERSION optional * @return */ - public static DBRefEntry[] searchRefs(DBRefEntry[] ref, DBRefEntry entry) + public static List searchRefs(List ref, + DBRefEntry entry, int mode) { return searchRefs(ref, entry, - matchDbAndIdAndEitherMapOrEquivalentMapList); + matchDbAndIdAndEitherMapOrEquivalentMapList, mode); } - public static DBRefEntry[] searchRefs(DBRefEntry[] ref, DBRefEntry entry, - DbRefComp comparator) + /** + * Returns a list of those references that match the given accession id + *
    + *
  • database sources are the same
  • + *
  • accession ids are the same
  • + *
  • both have no mapping, or the mappings are the same
  • + *
+ * + * @param refs + * Set of references to search + * @param accId + * accession id to match + * @return + */ + public static List searchRefs(List refs, + String accId) { - if (ref == null || entry == null) - return null; - Vector rfs = new Vector(); - for (int i = 0; i < ref.length; i++) + List rfs = new ArrayList(); + if (refs == null || accId == null) { - if (comparator.matches(entry, ref[i])) + return rfs; + } + for (int i = 0, n = refs.size(); i < n; i++) + { + DBRefEntry e = refs.get(i); + if (accId.equals(e.getAccessionId())) { - rfs.addElement(ref[i]); + rfs.add(e); } } - // TODO Auto-generated method stub - if (rfs.size() > 0) + return rfs; + // return searchRefs(refs, new DBRefEntry("", "", accId), matchId, + // SEARCH_MODE_FULL); + } + + /** + * Returns a (possibly empty) list of those references that match the given + * entry, according to the given comparator. + * + * @param refs + * an array of database references to search + * @param entry + * an entry to compare against + * @param comparator + * @param mode + * SEARCH_MODE_FULL for all; SEARCH_MODE_NO_MAP_NO_VERSION optional + * @return + */ + static List searchRefs(List refs, + DBRefEntry entry, DbRefComp comparator, int mode) + { + List rfs = new ArrayList(); + if (refs == null || entry == null) { - DBRefEntry[] rf = new DBRefEntry[rfs.size()]; - rfs.copyInto(rf); - return rf; + return rfs; } - return null; + for (int i = 0, n = refs.size(); i < n; i++) + { + DBRefEntry e = refs.get(i); + if (comparator.matches(entry, e, SEARCH_MODE_FULL)) + { + rfs.add(e); + } + } + return rfs; } - public interface DbRefComp + interface DbRefComp { - public boolean matches(DBRefEntry refa, DBRefEntry refb); + default public boolean matches(DBRefEntry refa, DBRefEntry refb) + { + return matches(refa, refb, SEARCH_MODE_FULL); + }; + + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode); } /** * match on all non-null fields in refa */ + // TODO unused - remove? would be broken by equating "" with null public static DbRefComp matchNonNullonA = new DbRefComp() { - public boolean matches(DBRefEntry refa, DBRefEntry refb) + @Override + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) { - if (refa.getSource() == null - || refb.getSource().equals(refa.getSource())) + if ((mode & DB_SOURCE) != 0 && (refa.getSource() == null + || DBRefUtils.getCanonicalName(refb.getSource()).equals( + DBRefUtils.getCanonicalName(refa.getSource())))) { - if (refa.getVersion() == null - || refb.getVersion().equals(refa.getVersion())) + if ((mode & DB_VERSION) != 0 && (refa.getVersion() == null + || refb.getVersion().equals(refa.getVersion()))) { - if (refa.getAccessionId() == null - || refb.getAccessionId().equals(refa.getAccessionId())) + if ((mode & DB_ID) != 0 && (refa.getAccessionId() == null + || refb.getAccessionId().equals(refa.getAccessionId()))) { - if (refa.getMap() == null - || (refb.getMap() != null && refb.getMap().equals( - refa.getMap()))) + if ((mode & DB_MAP) != 0 + && (refa.getMap() == null || (refb.getMap() != null + && refb.getMap().equals(refa.getMap())))) { return true; } @@ -224,53 +334,115 @@ public class DBRefUtils * either field is null or field matches for all of source, version, accession * id and map. */ + // TODO unused - remove? public static DbRefComp matchEitherNonNull = new DbRefComp() { - public boolean matches(DBRefEntry refa, DBRefEntry refb) + @Override + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) { - if ((refa.getSource() == null || refb.getSource() == null) - || refb.getSource().equals(refa.getSource())) + if (nullOrEqualSource(refa.getSource(), refb.getSource()) + && nullOrEqual(refa.getVersion(), refb.getVersion()) + && nullOrEqual(refa.getAccessionId(), refb.getAccessionId()) + && nullOrEqual(refa.getMap(), refb.getMap())) { - if ((refa.getVersion() == null || refb.getVersion() == null) - || refb.getVersion().equals(refa.getVersion())) + return true; + } + return false; + } + + }; + + /** + * Parses a DBRefEntry and adds it to the sequence, also a PDBEntry if the + * database is PDB. + *

+ * Used by file parsers to generate DBRefs from annotation within file (eg + * Stockholm) + * + * @param dbname + * @param version + * @param acn + * @param seq + * where to annotate with reference + * @return parsed version of entry that was added to seq (if any) + */ + public static DBRefEntry parseToDbRef(SequenceI seq, String dbname, + String version, String acn) + { + DBRefEntry ref = null; + if (dbname != null) + { + String locsrc = DBRefUtils.getCanonicalName(dbname); + if (locsrc.equals(DBRefSource.PDB)) + { + /* + * Check for PFAM style stockhom PDB accession id citation e.g. + * "1WRI A; 7-80;" + */ + Regex r = new com.stevesoft.pat.Regex( + "([0-9][0-9A-Za-z]{3})\\s*(.?)\\s*;\\s*([0-9]+)-([0-9]+)"); + if (r.search(acn.trim())) { - if ((refa.getAccessionId() == null || refb.getAccessionId() == null) - || refb.getAccessionId().equals(refa.getAccessionId())) + String pdbid = r.stringMatched(1); + String chaincode = r.stringMatched(2); + if (chaincode == null) { - if ((refa.getMap() == null || refb.getMap() == null) - || (refb.getMap() != null && refb.getMap().equals( - refa.getMap()))) - { - return true; - } + chaincode = " "; } + // String mapstart = r.stringMatched(3); + // String mapend = r.stringMatched(4); + if (chaincode.equals(" ")) + { + chaincode = "_"; + } + // construct pdb ref. + ref = new DBRefEntry(locsrc, version, pdbid + chaincode); + PDBEntry pdbr = new PDBEntry(); + pdbr.setId(pdbid); + pdbr.setType(PDBEntry.Type.PDB); + pdbr.setChainCode(chaincode); + seq.addPDBId(pdbr); + } + else + { + System.err.println("Malformed PDB DR line:" + acn); } } - return false; + else + { + // default: + ref = new DBRefEntry(locsrc, version, acn.trim()); + } } - }; + if (ref != null) + { + seq.addDBRef(ref); + } + return ref; + } /** * accession ID and DB must be identical. Version is ignored. Map is either * not defined or is a match (or is compatible?) */ + // TODO unused - remove? public static DbRefComp matchDbAndIdAndEitherMap = new DbRefComp() { - public boolean matches(DBRefEntry refa, DBRefEntry refb) + @Override + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) { if (refa.getSource() != null && refb.getSource() != null - && refb.getSource().equals(refa.getSource())) + && DBRefUtils.getCanonicalName(refb.getSource()).equals( + DBRefUtils.getCanonicalName(refa.getSource()))) { // We dont care about version - // if ((refa.getVersion()==null || refb.getVersion()==null) - // || refb.getVersion().equals(refa.getVersion())) - // { if (refa.getAccessionId() != null && refb.getAccessionId() != null + // FIXME should be && not || here? || refb.getAccessionId().equals(refa.getAccessionId())) { if ((refa.getMap() == null || refb.getMap() == null) - || (refa.getMap() != null && refb.getMap() != null && refb - .getMap().equals(refa.getMap()))) + || (refa.getMap() != null && refb.getMap() != null + && refb.getMap().equals(refa.getMap()))) { return true; } @@ -285,30 +457,33 @@ public class DBRefUtils * or map but no maplist on either or maplist of map on a is the complement of * maplist of map on b. */ + // TODO unused - remove? public static DbRefComp matchDbAndIdAndComplementaryMapList = new DbRefComp() { - public boolean matches(DBRefEntry refa, DBRefEntry refb) + @Override + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) { if (refa.getSource() != null && refb.getSource() != null - && refb.getSource().equals(refa.getSource())) + && DBRefUtils.getCanonicalName(refb.getSource()).equals( + DBRefUtils.getCanonicalName(refa.getSource()))) { // We dont care about version - // if ((refa.getVersion()==null || refb.getVersion()==null) - // || refb.getVersion().equals(refa.getVersion())) - // { if (refa.getAccessionId() != null && refb.getAccessionId() != null || refb.getAccessionId().equals(refa.getAccessionId())) { if ((refa.getMap() == null && refb.getMap() == null) || (refa.getMap() != null && refb.getMap() != null)) - if ((refb.getMap().getMap() == null && refa.getMap().getMap() == null) + { + if ((refb.getMap().getMap() == null + && refa.getMap().getMap() == null) || (refb.getMap().getMap() != null - && refa.getMap().getMap() != null && refb - .getMap().getMap().getInverse().equals( - refa.getMap().getMap()))) + && refa.getMap().getMap() != null + && refb.getMap().getMap().getInverse() + .equals(refa.getMap().getMap()))) { return true; } + } } } return false; @@ -320,12 +495,15 @@ public class DBRefUtils * or or map but no maplist on either or maplist of map on a is equivalent to * the maplist of map on b. */ + // TODO unused - remove? public static DbRefComp matchDbAndIdAndEquivalentMapList = new DbRefComp() { - public boolean matches(DBRefEntry refa, DBRefEntry refb) + @Override + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) { if (refa.getSource() != null && refb.getSource() != null - && refb.getSource().equals(refa.getSource())) + && DBRefUtils.getCanonicalName(refb.getSource()).equals( + DBRefUtils.getCanonicalName(refa.getSource()))) { // We dont care about version // if ((refa.getVersion()==null || refb.getVersion()==null) @@ -338,12 +516,13 @@ public class DBRefUtils { return true; } - if (refa.getMap() != null - && refb.getMap() != null - && ((refb.getMap().getMap() == null && refa.getMap() - .getMap() == null) || (refb.getMap().getMap() != null - && refa.getMap().getMap() != null && refb - .getMap().getMap().equals(refa.getMap().getMap())))) + if (refa.getMap() != null && refb.getMap() != null + && ((refb.getMap().getMap() == null + && refa.getMap().getMap() == null) + || (refb.getMap().getMap() != null + && refa.getMap().getMap() != null + && refb.getMap().getMap() + .equals(refa.getMap().getMap())))) { return true; } @@ -354,37 +533,35 @@ public class DBRefUtils }; /** - * accession ID and DB must be identical. Version is ignored. No map on either - * or map but no maplist on either or maplist of map on a is equivalent to the - * maplist of map on b. + * accession ID and DB must be identical, or null on a. Version is ignored. No + * map on either or map but no maplist on either or maplist of map on a is + * equivalent to the maplist of map on b. */ public static DbRefComp matchDbAndIdAndEitherMapOrEquivalentMapList = new DbRefComp() { - public boolean matches(DBRefEntry refa, DBRefEntry refb) + @Override + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) { -// System.err.println("Comparing A: "+refa.getSrcAccString()+(refa.hasMap()?" has map.":".")); -// System.err.println("Comparing B: "+refb.getSrcAccString()+(refb.hasMap()?" has map.":".")); if (refa.getSource() != null && refb.getSource() != null - && refb.getSource().equals(refa.getSource())) + && DBRefUtils.getCanonicalName(refb.getSource()).equals( + DBRefUtils.getCanonicalName(refa.getSource()))) { // We dont care about version - // if ((refa.getVersion()==null || refb.getVersion()==null) - // || refb.getVersion().equals(refa.getVersion())) - // { - if (refa.getAccessionId() != null && refb.getAccessionId() != null - && refb.getAccessionId().equals(refa.getAccessionId())) + if (refa.getAccessionId() == null + || refa.getAccessionId().equals(refb.getAccessionId())) { if (refa.getMap() == null || refb.getMap() == null) { return true; } if ((refa.getMap() != null && refb.getMap() != null) - && (refb.getMap().getMap() == null && refa.getMap() - .getMap() == null) + && (refb.getMap().getMap() == null + && refa.getMap().getMap() == null) || (refb.getMap().getMap() != null - && refa.getMap().getMap() != null && - (refb - .getMap().getMap().equals(refa.getMap().getMap())))) { // getMap().getMap().containsEither(false,refa.getMap().getMap()) + && refa.getMap().getMap() != null + && (refb.getMap().getMap() + .equals(refa.getMap().getMap())))) + { return true; } } @@ -394,57 +571,252 @@ public class DBRefUtils }; /** - * used by file parsers to generate DBRefs from annotation within file (eg - * stockholm) + * Returns the (possibly empty) list of those supplied dbrefs which have the + * specified source database, with a case-insensitive match of source name * - * @param dbname - * @param version - * @param acn - * @param seq - * where to anotate with reference - * @return parsed version of entry that was added to seq (if any) + * @param dbRefs + * @param source + * @return */ - public static DBRefEntry parseToDbRef(SequenceI seq, String dbname, - String version, String acn) + public static List searchRefsForSource(DBRefEntry[] dbRefs, + String source) { - DBRefEntry ref = null; - if (dbname != null) + List matches = new ArrayList<>(); + if (dbRefs != null && source != null) { - String locsrc = jalview.util.DBRefUtils.getCanonicalName(dbname); - if (locsrc.equals(jalview.datamodel.DBRefSource.PDB)) + for (DBRefEntry dbref : dbRefs) { - // check for chaincode and mapping - // PFAM style stockhom PDB citation - com.stevesoft.pat.Regex r = new com.stevesoft.pat.Regex( - "([0-9][0-9A-Za-z]{3})\\s*(.?)\\s*;([0-9]+)-([0-9]+)"); - if (r.search(acn.trim())) + if (source.equalsIgnoreCase(dbref.getSource())) { - String pdbid = r.stringMatched(1); - String chaincode = r.stringMatched(2); - String mapstart = r.stringMatched(3); - String mapend = r.stringMatched(4); - if (chaincode.equals(" ")) - { - chaincode = "_"; - } - // construct pdb ref. - ref = new DBRefEntry(locsrc, version, pdbid + chaincode); - PDBEntry pdbr = new PDBEntry(); - pdbr.setId(pdbid); - seq.addPDBId(pdbr); + matches.add(dbref); } } - else + } + return matches; + } + + /** + * Returns true if either object is null, or they are equal + * + * @param o1 + * @param o2 + * @return + */ + public static boolean nullOrEqual(Object o1, Object o2) + { + if (o1 == null || o2 == null) + { + return true; + } + return o1.equals(o2); + } + + /** + * canonicalise source string before comparing. null is always wildcard + * + * @param o1 + * - null or source string to compare + * @param o2 + * - null or source string to compare + * @return true if either o1 or o2 are null, or o1 equals o2 under + * DBRefUtils.getCanonicalName + * (o1).equals(DBRefUtils.getCanonicalName(o2)) + */ + public static boolean nullOrEqualSource(String o1, String o2) + { + if (o1 == null || o2 == null) + { + return true; + } + return DBRefUtils.getCanonicalName(o1) + .equals(DBRefUtils.getCanonicalName(o2)); + } + + /** + * Selects just the DNA or protein references from a set of references + * + * @param selectDna + * if true, select references to 'standard' DNA databases, else to + * 'standard' peptide databases + * @param refs + * a set of references to select from + * @return + */ + public static List selectDbRefs(boolean selectDna, + List refs) + { + return selectRefs(refs, + selectDna ? DBRefSource.DNACODINGDBS : DBRefSource.PROTEINDBS); + // could attempt to find other cross + // refs here - ie PDB xrefs + // (not dna, not protein seq) + } + + /** + * Returns the (possibly empty) list of those supplied dbrefs which have the + * specified source database, with a case-insensitive match of source name + * + * @param dbRefs + * @param source + * @return + */ + public static List searchRefsForSource( + List dbRefs, String source) + { + List matches = new ArrayList(); + if (dbRefs != null && source != null) + { + for (DBRefEntry dbref : dbRefs) { - // default: - ref = new DBRefEntry(locsrc, version, acn); + if (source.equalsIgnoreCase(dbref.getSource())) + { + matches.add(dbref); + } } } - if (ref != null) + return matches; + } + + /** + * promote direct database references to primary for nucleotide or protein + * sequences if they have an appropriate primary ref + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Seq TypePrimary DBDirect which will be promoted
peptidesEnsemblUniprot
peptidesEnsemblUniprot
dnaEnsemblENA
+ * + * @param sequence + */ + public static void ensurePrimaries(SequenceI sequence, + List pr) + { + if (pr.size() == 0) { - seq.addDBRef(ref); + // nothing to do + return; + } + int sstart = sequence.getStart(); + int send = sequence.getEnd(); + boolean isProtein = sequence.isProtein(); + BitSet bsSelect = new BitSet(); + + // List selfs = new ArrayList(); + // { + + // List selddfs = selectDbRefs(!isprot, sequence.getDBRefs()); + // if (selfs == null || selfs.size() == 0) + // { + // // nothing to do + // return; + // } + + List dbrefs = sequence.getDBRefs(); + bsSelect.set(0, dbrefs.size()); + + if (!selectRefsBS(dbrefs, isProtein ? DBRefSource.PROTEIN_MASK + : DBRefSource.DNA_CODING_MASK, bsSelect)) + return; + + // selfs.addAll(selfArray); + // } + + // filter non-primary refs + for (int ip = pr.size(); --ip >= 0;) + { + DBRefEntry p = pr.get(ip); + for (int i = bsSelect.nextSetBit(0); i >= 0; i = bsSelect + .nextSetBit(i + 1)) + { + if (dbrefs.get(i) == p) + bsSelect.clear(i); + } + // while (selfs.contains(p)) + // { + // selfs.remove(p); + // } + } + // List toPromote = new ArrayList(); + + for (int ip = pr.size(), keys = 0; --ip >= 0 + && keys != DBRefSource.PRIMARY_MASK;) + { + DBRefEntry p = pr.get(ip); + if (isProtein) + { + switch (getCanonicalName(p.getSource())) + { + case DBRefSource.UNIPROT: + keys |= DBRefSource.UNIPROT_MASK; + break; + case DBRefSource.ENSEMBL: + keys |= DBRefSource.ENSEMBL_MASK; + break; + } + } + else + { + // TODO: promote transcript refs ?? + } + if (keys == 0 || !selectRefsBS(dbrefs, keys, bsSelect)) + return; + // if (candidates != null) + { + for (int ic = bsSelect.nextSetBit(0); ic >= 0; ic = bsSelect + .nextSetBit(ic + 1)) + // for (int ic = 0, n = candidates.size(); ic < n; ic++) + { + DBRefEntry cand = dbrefs.get(ic);// candidates.get(ic); + if (cand.hasMap()) + { + Mapping map = cand.getMap(); + SequenceI cto = map.getTo(); + if (cto != null && cto != sequence) + { + // can't promote refs with mappings to other sequences + continue; + } + MapList mlist = map.getMap(); + if (mlist.getFromLowest() != sstart + && mlist.getFromHighest() != send) + { + // can't promote refs with mappings from a region of this sequence + // - eg CDS + continue; + } + } + // and promote - not that version must be non-null here, + // as p must have passed isPrimaryCandidate() + cand.setVersion(cand.getVersion() + " (promoted)"); + bsSelect.clear(ic); + // selfs.remove(cand); + // toPromote.add(cand); + if (!cand.isPrimaryCandidate()) + { + System.out.println( + "Warning: Couldn't promote dbref " + cand.toString() + + " for sequence " + sequence.toString()); + } + } + } } - return ref; } }