*/
package jalview.util;
-import jalview.datamodel.DBRefEntry;
-import jalview.datamodel.DBRefSource;
-import jalview.datamodel.Mapping;
-import jalview.datamodel.PDBEntry;
-import jalview.datamodel.SequenceI;
+import java.util.Locale;
import java.util.ArrayList;
import java.util.BitSet;
import com.stevesoft.pat.Regex;
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.PDBEntry;
+import jalview.datamodel.SequenceI;
+
/**
* Utilities for handling DBRef objects and their collections.
*/
-public class DBRefUtils {
+public class DBRefUtils
+{
+ /*
+ * lookup from lower-case form of a name to its canonical (standardised) form
+ */
+ private static Map<String, String> canonicalSourceNameLookup = new HashMap<>();
public final static int DB_SOURCE = 1;
public final static int DB_VERSION = 2;
public final static int SEARCH_MODE_NO_MAP_NO_VERSION = DB_SOURCE | DB_ID;
public final static int SEARCH_MODE_FULL = DB_SOURCE | DB_VERSION | DB_ID | DB_MAP;
- /*
- * lookup from lower-case form of a name to its canonical (standardised) form
- */
- private static Map<String, String> canonicalSourceNameLookup = new HashMap<String, String>();
-
- private static Map<String, String> dasCoordinateSystemsLookup = new HashMap<String, String>();
-
- static {
+ static
+ {
// TODO load these from a resource file?
canonicalSourceNameLookup.put("uniprotkb/swiss-prot", DBRefSource.UNIPROT);
canonicalSourceNameLookup.put("uniprotkb/trembl", DBRefSource.UNIPROT);
canonicalSourceNameLookup.put("ensembl-tr", DBRefSource.ENSEMBL);
canonicalSourceNameLookup.put("ensembl-gn", DBRefSource.ENSEMBL);
- // Make sure we have lowercase entries for all canonical string lookups
-// BH 2019.01.25 unnecessary -- they are all lower case already
- // Set<String> keys = canonicalSourceNameLookup.keySet();
-// for (String k : keys)
-// {
-// canonicalSourceNameLookup.put(k.toLowerCase(),
-// canonicalSourceNameLookup.get(k));
-// }
-
- dasCoordinateSystemsLookup.put("pdbresnum", DBRefSource.PDB);
- dasCoordinateSystemsLookup.put("uniprot", DBRefSource.UNIPROT);
- dasCoordinateSystemsLookup.put("embl", DBRefSource.EMBL);
- // dasCoordinateSystemsLookup.put("embl", DBRefSource.EMBLCDS);
+ // guarantee we always have lowercase entries for canonical string lookups
+ for (String k : canonicalSourceNameLookup.keySet())
+ {
+ canonicalSourceNameLookup.put(k.toLowerCase(Locale.ROOT),
+ canonicalSourceNameLookup.get(k));
+ }
+ }
+
+ /**
+ * Returns those DBRefEntry objects whose source identifier (once converted to
+ * Jalview's canonical form) is in the list of sources to search for. Returns
+ * null if no matches found.
+ *
+ * @param dbrefs DBRefEntry objects to search
+ * @param sources array of sources to select
+ * @return
+ */
+ public static List<DBRefEntry> selectRefs(List<DBRefEntry> dbrefs, String[] sources)
+ {
+ if (dbrefs == null || sources == null)
+ {
+ return dbrefs;
}
- /**
- * Returns those DBRefEntry objects whose source identifier (once converted to
- * Jalview's canonical form) is in the list of sources to search for. Returns
- * null if no matches found.
- *
- * @param dbrefs DBRefEntry objects to search
- * @param sources array of sources to select
- * @return
- */
- public static List<DBRefEntry> selectRefs(List<DBRefEntry> dbrefs, String[] sources) {
- if (dbrefs == null || sources == null) {
- return dbrefs;
- }
-
- // BH TODO
- HashSet<String> srcs = new HashSet<String>();
- for (String src : sources) {
- srcs.add(src.toUpperCase());
- }
-
- int nrefs = dbrefs.size();
- List<DBRefEntry> res = new ArrayList<DBRefEntry>();
- for (int ib = 0; ib < nrefs; ib++) {
- DBRefEntry dbr = dbrefs.get(ib);
- String source = getCanonicalName(dbr.getSource());
- if (srcs.contains(source.toUpperCase())) {
- res.add(dbr);
- }
- }
-
- if (res.size() > 0) {
- // List<DBRefEntry> reply = new DBRefEntry[res.size()];
- return res;// .toArray(reply);
- }
- return null;
+ // BH TODO (what?)
+ HashSet<String> srcs = new HashSet<String>();
+ for (String src : sources)
+ {
+ srcs.add(src.toUpperCase(Locale.ROOT));
}
- private static boolean selectRefsBS(List<DBRefEntry> dbrefs, int sourceKeys, BitSet bsSelect) {
- if (dbrefs == null || sourceKeys == 0) {
- return false;
- }
- for (int i = 0, n = dbrefs.size(); i < n; i++) {
- DBRefEntry dbr = dbrefs.get(i);
- if ((dbr.getSourceKey() & sourceKeys) != 0) {
- bsSelect.clear(i);
- }
- }
- return !bsSelect.isEmpty();
+ int nrefs = dbrefs.size();
+ List<DBRefEntry> res = new ArrayList<DBRefEntry>();
+ for (int ib = 0; ib < nrefs; ib++)
+ {
+ DBRefEntry dbr = dbrefs.get(ib);
+ String source = getCanonicalName(dbr.getSource());
+ if (srcs.contains(source.toUpperCase(Locale.ROOT)))
+ {
+ res.add(dbr);
+ }
}
-
- /**
- * isDasCoordinateSystem
- *
- * @param string String
- * @param dBRefEntry DBRefEntry
- * @return boolean true if Source DBRefEntry is compatible with DAS
- * CoordinateSystem name
- */
-
- public static boolean isDasCoordinateSystem(String string, DBRefEntry dBRefEntry) {
- if (string == null || dBRefEntry == null) {
- return false;
- }
- String coordsys = dasCoordinateSystemsLookup.get(string.toLowerCase());
- return coordsys == null ? false : coordsys.equals(dBRefEntry.getSource());
+ if (res.size() > 0)
+ {
+ // List<DBRefEntry> reply = new DBRefEntry[res.size()];
+ return res;// .toArray(reply);
}
+ return null;
+ }
+
+ private static boolean selectRefsBS(List<DBRefEntry> dbrefs, int sourceKeys, BitSet bsSelect)
+ {
+ if (dbrefs == null || sourceKeys == 0)
+ {
+ return false;
+ }
+ for (int i = 0, n = dbrefs.size(); i < n; i++)
+ {
+ DBRefEntry dbr = dbrefs.get(i);
+ if ((dbr.getSourceKey() & sourceKeys) != 0)
+ {
+ bsSelect.clear(i);
+ }
+ }
+ return !bsSelect.isEmpty();
+ }
+
+ /**
+ * Returns a (possibly empty) list of those references that match the given
+ * entry, according to the given comparator.
+ *
+ * @param refs
+ * an array of database references to search
+ * @param entry
+ * an entry to compare against
+ * @param comparator
+ * @return
+ */
+ static List<DBRefEntry> searchRefs(DBRefEntry[] refs, DBRefEntry entry,
+ DbRefComp comparator)
+ {
+ List<DBRefEntry> rfs = new ArrayList<>();
+ if (refs == null || entry == null)
+ {
+ return rfs;
+ }
+ for (int i = 0; i < refs.length; i++)
+ {
+ if (comparator.matches(entry, refs[i]))
+ {
+ rfs.add(refs[i]);
+ }
+ }
+ return rfs;
+ }
/**
* look up source in an internal list of database reference sources and return
* @return canonical jalview source (one of jalview.datamodel.DBRefSource.*) or
* original source
*/
- public static String getCanonicalName(String source) {
- if (source == null) {
- return null;
- }
- String canonical = canonicalSourceNameLookup.get(source.toLowerCase());
- return canonical == null ? source : canonical;
+ public static String getCanonicalName(String source)
+ {
+ if (source == null)
+ {
+ return null;
+ }
+ String canonical = canonicalSourceNameLookup.get(source.toLowerCase(Locale.ROOT));
+ return canonical == null ? source : canonical;
}
/**
};
+ private static Regex PARSE_REGEX;
+
+ private static Regex getParseRegex()
+ {
+ return (PARSE_REGEX == null ? PARSE_REGEX = Platform.newRegex(
+ "([0-9][0-9A-Za-z]{3})\\s*(.?)\\s*;\\s*([0-9]+)-([0-9]+)")
+ : PARSE_REGEX);
+ }
+
+ /**
+ * Parses a DBRefEntry and adds it to the sequence, also a PDBEntry if the
+ * database is PDB.
+ * <p>
+ * Used by file parsers to generate DBRefs from annotation within file (eg
+ * Stockholm)
+ *
+ * @param dbname
+ * @param version
+ * @param acn
+ * @param seq
+ * where to annotate with reference
+ * @return parsed version of entry that was added to seq (if any)
+ */
+ public static DBRefEntry parseToDbRef(SequenceI seq, String dbname,
+ String version, String acn)
+ {
+ DBRefEntry ref = null;
+ if (dbname != null)
+ {
+ String locsrc = DBRefUtils.getCanonicalName(dbname);
+ if (locsrc.equals(DBRefSource.PDB))
+ {
+ /*
+ * Check for PFAM style stockhom PDB accession id citation e.g.
+ * "1WRI A; 7-80;"
+ */
+ Regex r = getParseRegex();
+ if (r.search(acn.trim()))
+ {
+ String pdbid = r.stringMatched(1);
+ String chaincode = r.stringMatched(2);
+ if (chaincode == null)
+ {
+ chaincode = " ";
+ }
+ // String mapstart = r.stringMatched(3);
+ // String mapend = r.stringMatched(4);
+ if (chaincode.equals(" "))
+ {
+ chaincode = "_";
+ }
+ // construct pdb ref.
+ ref = new DBRefEntry(locsrc, version, pdbid + chaincode);
+ PDBEntry pdbr = new PDBEntry();
+ pdbr.setId(pdbid);
+ pdbr.setType(PDBEntry.Type.PDB);
+ pdbr.setChainCode(chaincode);
+ seq.addPDBId(pdbr);
+ }
+ else
+ {
+ System.err.println("Malformed PDB DR line:" + acn);
+ }
+ }
+ else
+ {
+ // default:
+ ref = new DBRefEntry(locsrc, version, acn.trim());
+ }
+ }
+ if (ref != null)
+ {
+ seq.addDBRef(ref);
+ }
+ return ref;
+ }
+
/**
* accession ID and DB must be identical. Version is ignored. Map is either not
* defined or is a match (or is compatible?)
* map on either or map but no maplist on either or maplist of map on a is
* equivalent to the maplist of map on b.
*/
- public static DbRefComp matchDbAndIdAndEitherMapOrEquivalentMapList = new DbRefComp() {
+ public static DbRefComp matchDbAndIdAndEitherMapOrEquivalentMapList = new DbRefComp()
+ {
@Override
- public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) {
+ public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode)
+ {
if (refa.getSource() != null && refb.getSource() != null && DBRefUtils.getCanonicalName(refb.getSource())
- .equals(DBRefUtils.getCanonicalName(refa.getSource()))) {
+ .equals(DBRefUtils.getCanonicalName(refa.getSource())))
+ {
// We dont care about version
-
- if (refa.getAccessionId() == null || refa.getAccessionId().equals(refb.getAccessionId())) {
- if (refa.getMap() == null || refb.getMap() == null) {
+ if (refa.getAccessionId() == null || refa.getAccessionId().equals(refb.getAccessionId()))
+ {
+ if (refa.getMap() == null || refb.getMap() == null)
+ {
return true;
}
if ((refa.getMap() != null && refb.getMap() != null)
&& (refb.getMap().getMap() == null && refa.getMap().getMap() == null)
|| (refb.getMap().getMap() != null && refa.getMap().getMap() != null
- && (refb.getMap().getMap().equals(refa.getMap().getMap())))) {
+ && (refb.getMap().getMap().equals(refa.getMap().getMap()))))
+ {
return true;
}
}
};
/**
- * accession ID only must be identical.
- */
- public static DbRefComp matchId = new DbRefComp() {
- @Override
- public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) {
- if (refa.getAccessionId() != null && refb.getAccessionId() != null
- && refb.getAccessionId().equals(refa.getAccessionId())) {
- return true;
- }
- return false;
- }
- };
-
- /**
- * Parses a DBRefEntry and adds it to the sequence, also a PDBEntry if the
- * database is PDB.
- * <p>
- * Used by file parsers to generate DBRefs from annotation within file (eg
- * Stockholm)
- *
- * @param dbname
- * @param version
- * @param acn
- * @param seq where to annotate with reference
- * @return parsed version of entry that was added to seq (if any)
- */
- public static DBRefEntry parseToDbRef(SequenceI seq, String dbname, String version, String acn) {
- DBRefEntry ref = null;
- if (dbname != null) {
- String locsrc = DBRefUtils.getCanonicalName(dbname);
- if (locsrc.equals(DBRefSource.PDB)) {
- /*
- * Check for PFAM style stockhom PDB accession id citation e.g. "1WRI A; 7-80;"
- */
- Regex r = new com.stevesoft.pat.Regex("([0-9][0-9A-Za-z]{3})\\s*(.?)\\s*;\\s*([0-9]+)-([0-9]+)");
- if (r.search(acn.trim())) {
- String pdbid = r.stringMatched(1);
- String chaincode = r.stringMatched(2);
- if (chaincode == null) {
- chaincode = " ";
- }
- // String mapstart = r.stringMatched(3);
- // String mapend = r.stringMatched(4);
- if (chaincode.equals(" ")) {
- chaincode = "_";
- }
- // construct pdb ref.
- ref = new DBRefEntry(locsrc, version, pdbid + chaincode);
- PDBEntry pdbr = new PDBEntry();
- pdbr.setId(pdbid);
- pdbr.setType(PDBEntry.Type.PDB);
- pdbr.setChainCode(chaincode);
- seq.addPDBId(pdbr);
- } else {
- System.err.println("Malformed PDB DR line:" + acn);
- }
- } else {
- // default:
- ref = new DBRefEntry(locsrc, version, acn);
- }
- }
- if (ref != null) {
- seq.addDBRef(ref);
- }
- return ref;
- }
+ * Returns the (possibly empty) list of those supplied dbrefs which have the
+ * specified source database, with a case-insensitive match of source name
+ *
+ * @param dbRefs
+ * @param source
+ * @return
+ */
+ public static List<DBRefEntry> searchRefsForSource(DBRefEntry[] dbRefs,
+ String source)
+ {
+ List<DBRefEntry> matches = new ArrayList<>();
+ if (dbRefs != null && source != null)
+ {
+ for (DBRefEntry dbref : dbRefs)
+ {
+ if (source.equalsIgnoreCase(dbref.getSource()))
+ {
+ matches.add(dbref);
+ }
+ }
+ }
+ return matches;
+ }
/**
* Returns true if either object is null, or they are equal