*/
package jalview.util;
-import jalview.datamodel.DBRefEntry;
-import jalview.datamodel.DBRefSource;
-import jalview.datamodel.Mapping;
-import jalview.datamodel.PDBEntry;
-import jalview.datamodel.SequenceI;
-
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import com.stevesoft.pat.Regex;
+import jalview.bin.Console;
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.PDBEntry;
+import jalview.datamodel.SequenceI;
+
/**
* Utilities for handling DBRef objects and their collections.
*/
public class DBRefUtils
{
-
- public final static int DB_SOURCE = 1;
- public final static int DB_VERSION = 2;
- public final static int DB_ID = 4;
- public final static int DB_MAP = 8;
-
-
- public final static int SEARCH_MODE_NO_MAP_NO_VERSION = DB_SOURCE | DB_ID;
- public final static int SEARCH_MODE_FULL = DB_SOURCE | DB_VERSION | DB_ID | DB_MAP;
-
/*
* lookup from lower-case form of a name to its canonical (standardised) form
*/
- private static Map<String, String> canonicalSourceNameLookup = new HashMap<String, String>();
+ private static Map<String, String> canonicalSourceNameLookup = new HashMap<>();
+
+ public final static int DB_SOURCE = 1;
- private static Map<String, String> dasCoordinateSystemsLookup = new HashMap<String, String>();
+ public final static int DB_VERSION = 2;
+
+ public final static int DB_ID = 4;
+
+ public final static int DB_MAP = 8;
+
+ public final static int SEARCH_MODE_NO_MAP_NO_VERSION = DB_SOURCE | DB_ID;
+
+ public final static int SEARCH_MODE_FULL = DB_SOURCE | DB_VERSION | DB_ID
+ | DB_MAP;
static
{
canonicalSourceNameLookup.put("ensembl-tr", DBRefSource.ENSEMBL);
canonicalSourceNameLookup.put("ensembl-gn", DBRefSource.ENSEMBL);
- // Make sure we have lowercase entries for all canonical string lookups
-// BH 2019.01.25 unnecessary -- they are all lower case already
- //Set<String> keys = canonicalSourceNameLookup.keySet();
-// for (String k : keys)
-// {
-// canonicalSourceNameLookup.put(k.toLowerCase(),
-// canonicalSourceNameLookup.get(k));
-// }
-
- dasCoordinateSystemsLookup.put("pdbresnum", DBRefSource.PDB);
- dasCoordinateSystemsLookup.put("uniprot", DBRefSource.UNIPROT);
- dasCoordinateSystemsLookup.put("embl", DBRefSource.EMBL);
- // dasCoordinateSystemsLookup.put("embl", DBRefSource.EMBLCDS);
+ // guarantee we always have lowercase entries for canonical string lookups
+ for (String k : canonicalSourceNameLookup.keySet())
+ {
+ canonicalSourceNameLookup.put(k.toLowerCase(Locale.ROOT),
+ canonicalSourceNameLookup.get(k));
+ }
}
/**
return dbrefs;
}
- // BH TODO
+ // BH TODO (what?)
HashSet<String> srcs = new HashSet<String>();
for (String src : sources)
{
- srcs.add(src.toUpperCase());
+ srcs.add(src.toUpperCase(Locale.ROOT));
}
int nrefs = dbrefs.size();
{
DBRefEntry dbr = dbrefs.get(ib);
String source = getCanonicalName(dbr.getSource());
- if (srcs.contains(source.toUpperCase()))
+ if (srcs.contains(source.toUpperCase(Locale.ROOT)))
{
res.add(dbr);
}
}
-
if (res.size() > 0)
{
- //List<DBRefEntry> reply = new DBRefEntry[res.size()];
- return res;//.toArray(reply);
+ // List<DBRefEntry> reply = new DBRefEntry[res.size()];
+ return res;// .toArray(reply);
}
return null;
}
- private static boolean selectRefsBS(List<DBRefEntry> dbrefs, String sourceKeys, BitSet bsSelect) {
- if (dbrefs == null || sourceKeys == null)
- {
- return false;
- }
- for (int i = 0, n = dbrefs.size(); i < n; i++)
- {
- DBRefEntry dbr = dbrefs.get(i);
- String sourceKey = dbr.getSourceKey();
- if (sourceKeys.indexOf(sourceKey) < 0) {
- bsSelect.clear(i);
- }
- }
- return !bsSelect.isEmpty();
+ private static boolean selectRefsBS(List<DBRefEntry> dbrefs,
+ int sourceKeys, BitSet bsSelect)
+ {
+ if (dbrefs == null || sourceKeys == 0)
+ {
+ return false;
+ }
+ for (int i = 0, n = dbrefs.size(); i < n; i++)
+ {
+ DBRefEntry dbr = dbrefs.get(i);
+ if ((dbr.getSourceKey() & sourceKeys) != 0)
+ {
+ bsSelect.clear(i);
+ }
+ }
+ return !bsSelect.isEmpty();
}
-
/**
- * isDasCoordinateSystem
+ * Returns a (possibly empty) list of those references that match the given
+ * entry, according to the given comparator.
*
- * @param string
- * String
- * @param dBRefEntry
- * DBRefEntry
- * @return boolean true if Source DBRefEntry is compatible with DAS
- * CoordinateSystem name
+ * @param refs
+ * an array of database references to search
+ * @param entry
+ * an entry to compare against
+ * @param comparator
+ * @return
*/
-
- public static boolean isDasCoordinateSystem(String string,
- DBRefEntry dBRefEntry)
+ static List<DBRefEntry> searchRefs(DBRefEntry[] refs, DBRefEntry entry,
+ DbRefComp comparator)
{
- if (string == null || dBRefEntry == null)
+ List<DBRefEntry> rfs = new ArrayList<>();
+ if (refs == null || entry == null)
{
- return false;
+ return rfs;
}
- String coordsys = dasCoordinateSystemsLookup.get(string.toLowerCase());
- return coordsys == null ? false
- : coordsys.equals(dBRefEntry.getSource());
+ for (int i = 0; i < refs.length; i++)
+ {
+ if (comparator.matches(entry, refs[i]))
+ {
+ rfs.add(refs[i]);
+ }
+ }
+ return rfs;
}
/**
{
return null;
}
- String canonical = canonicalSourceNameLookup.get(source.toLowerCase());
+ String canonical = canonicalSourceNameLookup
+ .get(source.toLowerCase(Locale.ROOT));
return canonical == null ? source : canonical;
}
* Set of references to search
* @param entry
* pattern to match
- * @param mode SEARCH_MODE_FULL for all; SEARCH_MODE_NO_MAP_NO_VERSION optional
+ * @param mode
+ * SEARCH_MODE_FULL for all; SEARCH_MODE_NO_MAP_NO_VERSION optional
* @return
*/
public static List<DBRefEntry> searchRefs(List<DBRefEntry> ref,
* accession id to match
* @return
*/
- public static List<DBRefEntry> searchRefs(List<DBRefEntry> refs, String accId)
+ public static List<DBRefEntry> searchRefs(List<DBRefEntry> refs,
+ String accId)
{
- List<DBRefEntry> rfs = new ArrayList<DBRefEntry>();
- if (refs == null || accId == null)
- {
- return rfs;
- }
- for (int i = 0, n = refs.size(); i < n; i++)
- {
- DBRefEntry e = refs.get(i);
- if (accId.equals(e.getAccessionId()))
- {
- rfs.add(e);
- }
- }
- return rfs;
-// return searchRefs(refs, new DBRefEntry("", "", accId), matchId, SEARCH_MODE_FULL);
+ List<DBRefEntry> rfs = new ArrayList<DBRefEntry>();
+ if (refs == null || accId == null)
+ {
+ return rfs;
+ }
+ for (int i = 0, n = refs.size(); i < n; i++)
+ {
+ DBRefEntry e = refs.get(i);
+ if (accId.equals(e.getAccessionId()))
+ {
+ rfs.add(e);
+ }
+ }
+ return rfs;
+ // return searchRefs(refs, new DBRefEntry("", "", accId), matchId,
+ // SEARCH_MODE_FULL);
}
-
/**
* Returns a (possibly empty) list of those references that match the given
* entry, according to the given comparator.
* @param entry
* an entry to compare against
* @param comparator
- * @param mode SEARCH_MODE_FULL for all; SEARCH_MODE_NO_MAP_NO_VERSION optional
+ * @param mode
+ * SEARCH_MODE_FULL for all; SEARCH_MODE_NO_MAP_NO_VERSION optional
* @return
*/
- static List<DBRefEntry> searchRefs(List<DBRefEntry> refs, DBRefEntry entry,
- DbRefComp comparator, int mode)
+ static List<DBRefEntry> searchRefs(List<DBRefEntry> refs,
+ DBRefEntry entry, DbRefComp comparator, int mode)
{
List<DBRefEntry> rfs = new ArrayList<DBRefEntry>();
if (refs == null || entry == null)
}
for (int i = 0, n = refs.size(); i < n; i++)
{
- DBRefEntry e = refs.get(i);
+ DBRefEntry e = refs.get(i);
if (comparator.matches(entry, e, SEARCH_MODE_FULL))
{
rfs.add(e);
interface DbRefComp
{
- default public boolean matches(DBRefEntry refa, DBRefEntry refb) {
- return matches(refa, refb, SEARCH_MODE_FULL);
+ default public boolean matches(DBRefEntry refa, DBRefEntry refb)
+ {
+ return matches(refa, refb, SEARCH_MODE_FULL);
};
public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode);
-}
+ }
/**
* match on all non-null fields in refa
*/
- // TODO unused - remove?
+ // TODO unused - remove? would be broken by equating "" with null
public static DbRefComp matchNonNullonA = new DbRefComp()
{
- @Override
+ @Override
public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode)
{
- if ((mode & DB_SOURCE) != 0 && refa.getSource() == null
+ if ((mode & DB_SOURCE) != 0 && (refa.getSource() == null
|| DBRefUtils.getCanonicalName(refb.getSource()).equals(
- DBRefUtils.getCanonicalName(refa.getSource())))
+ DBRefUtils.getCanonicalName(refa.getSource()))))
{
- if ((mode & DB_VERSION) != 0 && refa.getVersion() == null
- || refb.getVersion().equals(refa.getVersion()))
+ if ((mode & DB_VERSION) != 0 && (refa.getVersion() == null
+ || refb.getVersion().equals(refa.getVersion())))
{
- if ((mode & DB_ID) != 0 && refa.getAccessionId() == null
- || refb.getAccessionId().equals(refa.getAccessionId()))
+ if ((mode & DB_ID) != 0 && (refa.getAccessionId() == null
+ || refb.getAccessionId().equals(refa.getAccessionId())))
{
- if ((mode & DB_MAP) != 0 && refa.getMap() == null || (refb.getMap() != null
- && refb.getMap().equals(refa.getMap())))
+ if ((mode & DB_MAP) != 0
+ && (refa.getMap() == null || (refb.getMap() != null
+ && refb.getMap().equals(refa.getMap()))))
{
return true;
}
};
/**
+ * Parses a DBRefEntry and adds it to the sequence, also a PDBEntry if the
+ * database is PDB.
+ * <p>
+ * Used by file parsers to generate DBRefs from annotation within file (eg
+ * Stockholm)
+ *
+ * @param dbname
+ * @param version
+ * @param acn
+ * @param seq
+ * where to annotate with reference
+ * @return parsed version of entry that was added to seq (if any)
+ */
+ public static DBRefEntry parseToDbRef(SequenceI seq, String dbname,
+ String version, String acn)
+ {
+ DBRefEntry ref = null;
+ if (dbname != null)
+ {
+ String locsrc = DBRefUtils.getCanonicalName(dbname);
+ if (locsrc.equals(DBRefSource.PDB))
+ {
+ /*
+ * Check for PFAM style stockhom PDB accession id citation e.g.
+ * "1WRI A; 7-80;"
+ */
+ Regex r = new com.stevesoft.pat.Regex(
+ "([0-9][0-9A-Za-z]{3})\\s*(.?)\\s*;\\s*([0-9]+)-([0-9]+)");
+ if (r.search(acn.trim()))
+ {
+ String pdbid = r.stringMatched(1);
+ String chaincode = r.stringMatched(2);
+ if (chaincode == null)
+ {
+ chaincode = " ";
+ }
+ // String mapstart = r.stringMatched(3);
+ // String mapend = r.stringMatched(4);
+ if (chaincode.equals(" "))
+ {
+ chaincode = "_";
+ }
+ // construct pdb ref.
+ ref = new DBRefEntry(locsrc, version, pdbid + chaincode);
+ PDBEntry pdbr = new PDBEntry();
+ pdbr.setId(pdbid);
+ pdbr.setType(PDBEntry.Type.PDB);
+ pdbr.setChainCode(chaincode);
+ seq.addPDBId(pdbr);
+ }
+ else
+ {
+ jalview.bin.Console.errPrintln("Malformed PDB DR line:" + acn);
+ }
+ }
+ else
+ {
+ // default:
+ ref = new DBRefEntry(locsrc, version, acn.trim());
+ }
+ }
+ if (ref != null)
+ {
+ seq.addDBRef(ref);
+ }
+ return ref;
+ }
+
+ /**
* accession ID and DB must be identical. Version is ignored. Map is either
* not defined or is a match (or is compatible?)
*/
DBRefUtils.getCanonicalName(refa.getSource())))
{
// We dont care about version
-
if (refa.getAccessionId() == null
|| refa.getAccessionId().equals(refb.getAccessionId()))
{
};
/**
- * accession ID only must be identical.
- */
- public static DbRefComp matchId = new DbRefComp()
- {
- @Override
- public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode)
- {
- if (refa.getAccessionId() != null && refb.getAccessionId() != null
- && refb.getAccessionId().equals(refa.getAccessionId()))
- {
- return true;
- }
- return false;
- }
- };
-
- /**
- * Parses a DBRefEntry and adds it to the sequence, also a PDBEntry if the
- * database is PDB.
- * <p>
- * Used by file parsers to generate DBRefs from annotation within file (eg
- * Stockholm)
+ * Returns the (possibly empty) list of those supplied dbrefs which have the
+ * specified source database, with a case-insensitive match of source name
*
- * @param dbname
- * @param version
- * @param acn
- * @param seq
- * where to annotate with reference
- * @return parsed version of entry that was added to seq (if any)
+ * @param dbRefs
+ * @param source
+ * @return
*/
- public static DBRefEntry parseToDbRef(SequenceI seq, String dbname,
- String version, String acn)
+ public static List<DBRefEntry> searchRefsForSource(DBRefEntry[] dbRefs,
+ String source)
{
- DBRefEntry ref = null;
- if (dbname != null)
+ List<DBRefEntry> matches = new ArrayList<>();
+ if (dbRefs != null && source != null)
{
- String locsrc = DBRefUtils.getCanonicalName(dbname);
- if (locsrc.equals(DBRefSource.PDB))
+ for (DBRefEntry dbref : dbRefs)
{
- /*
- * Check for PFAM style stockhom PDB accession id citation e.g.
- * "1WRI A; 7-80;"
- */
- Regex r = new com.stevesoft.pat.Regex(
- "([0-9][0-9A-Za-z]{3})\\s*(.?)\\s*;\\s*([0-9]+)-([0-9]+)");
- if (r.search(acn.trim()))
- {
- String pdbid = r.stringMatched(1);
- String chaincode = r.stringMatched(2);
- if (chaincode == null)
- {
- chaincode = " ";
- }
- // String mapstart = r.stringMatched(3);
- // String mapend = r.stringMatched(4);
- if (chaincode.equals(" "))
- {
- chaincode = "_";
- }
- // construct pdb ref.
- ref = new DBRefEntry(locsrc, version, pdbid + chaincode);
- PDBEntry pdbr = new PDBEntry();
- pdbr.setId(pdbid);
- pdbr.setType(PDBEntry.Type.PDB);
- pdbr.setChainCode(chaincode);
- seq.addPDBId(pdbr);
- }
- else
+ if (source.equalsIgnoreCase(dbref.getSource()))
{
- System.err.println("Malformed PDB DR line:" + acn);
+ matches.add(dbref);
}
}
- else
- {
- // default:
- ref = new DBRefEntry(locsrc, version, acn);
- }
}
- if (ref != null)
- {
- seq.addDBRef(ref);
- }
- return ref;
+ return matches;
}
/**
* @param source
* @return
*/
- public static List<DBRefEntry> searchRefsForSource(List<DBRefEntry> dbRefs,
- String source)
+ public static List<DBRefEntry> searchRefsForSource(
+ List<DBRefEntry> dbRefs, String source)
{
List<DBRefEntry> matches = new ArrayList<DBRefEntry>();
if (dbRefs != null && source != null)
*
* @param sequence
*/
- public static void ensurePrimaries(SequenceI sequence, List<DBRefEntry> pr)
+ public static void ensurePrimaries(SequenceI sequence,
+ List<DBRefEntry> pr)
{
if (pr.size() == 0)
{
int send = sequence.getEnd();
boolean isProtein = sequence.isProtein();
BitSet bsSelect = new BitSet();
-
-// List<DBRefEntry> selfs = new ArrayList<DBRefEntry>();
-// {
-// List<DBRefEntry> selddfs = selectDbRefs(!isprot, sequence.getDBRefs());
-// if (selfs == null || selfs.size() == 0)
-// {
-// // nothing to do
-// return;
-// }
+ // List<DBRefEntry> selfs = new ArrayList<DBRefEntry>();
+ // {
+
+ // List<DBRefEntry> selddfs = selectDbRefs(!isprot, sequence.getDBRefs());
+ // if (selfs == null || selfs.size() == 0)
+ // {
+ // // nothing to do
+ // return;
+ // }
List<DBRefEntry> dbrefs = sequence.getDBRefs();
bsSelect.set(0, dbrefs.size());
-
- if (!selectRefsBS(dbrefs, isProtein ? DBRefSource.PROTEINDBSKEYS : DBRefSource.DNACODINGDBSKEYS, bsSelect))
- return;
-
-// selfs.addAll(selfArray);
-// }
+
+ if (!selectRefsBS(dbrefs, isProtein ? DBRefSource.PROTEIN_MASK
+ : DBRefSource.DNA_CODING_MASK, bsSelect))
+ return;
+
+ // selfs.addAll(selfArray);
+ // }
// filter non-primary refs
for (int ip = pr.size(); --ip >= 0;)
{
DBRefEntry p = pr.get(ip);
- for (int i = bsSelect.nextSetBit(0); i >= 0; i = bsSelect.nextSetBit(i + 1)) {
- if (dbrefs.get(i) == p)
- bsSelect.clear(i);
+ for (int i = bsSelect.nextSetBit(0); i >= 0; i = bsSelect
+ .nextSetBit(i + 1))
+ {
+ if (dbrefs.get(i) == p)
+ bsSelect.clear(i);
}
-// while (selfs.contains(p))
-// {
-// selfs.remove(p);
-// }
+ // while (selfs.contains(p))
+ // {
+ // selfs.remove(p);
+ // }
}
-// List<DBRefEntry> toPromote = new ArrayList<DBRefEntry>();
+ // List<DBRefEntry> toPromote = new ArrayList<DBRefEntry>();
-
-
- for (int ip = pr.size(), keys = 0; --ip >= 0 && keys != DBRefSource.ALL_MASKS;)
+ for (int ip = pr.size(), keys = 0; --ip >= 0
+ && keys != DBRefSource.PRIMARY_MASK;)
{
DBRefEntry p = pr.get(ip);
if (isProtein)
switch (getCanonicalName(p.getSource()))
{
case DBRefSource.UNIPROT:
- keys |= DBRefSource.UNIPROT_MASK;
+ keys |= DBRefSource.UNIPROT_MASK;
break;
case DBRefSource.ENSEMBL:
- keys |= DBRefSource.ENSEMBL_MASK;
+ keys |= DBRefSource.ENSEMBL_MASK;
break;
}
}
{
// TODO: promote transcript refs ??
}
- if (keys == 0 || !selectRefsBS(dbrefs, DBRefSource.PROMTYPES[keys], bsSelect))
- return;
-// if (candidates != null)
+ if (keys == 0 || !selectRefsBS(dbrefs, keys, bsSelect))
+ return;
+ // if (candidates != null)
{
- for (int ic = bsSelect.nextSetBit(0); ic >= 0; ic = bsSelect.nextSetBit(ic + 1))
-// for (int ic = 0, n = candidates.size(); ic < n; ic++)
+ for (int ic = bsSelect.nextSetBit(0); ic >= 0; ic = bsSelect
+ .nextSetBit(ic + 1))
+ // for (int ic = 0, n = candidates.size(); ic < n; ic++)
{
- DBRefEntry cand = dbrefs.get(ic);//candidates.get(ic);
+ DBRefEntry cand = dbrefs.get(ic);// candidates.get(ic);
if (cand.hasMap())
{
- Mapping map = cand.getMap();
- SequenceI cto = map.getTo();
- if (cto != null
- && cto != sequence)
+ Mapping map = cand.getMap();
+ SequenceI cto = map.getTo();
+ if (cto != null && cto != sequence)
{
// can't promote refs with mappings to other sequences
continue;
continue;
}
}
- // and promote
- cand.setVersion(p.getVersion() + " (promoted)");
+ // and promote - not that version must be non-null here,
+ // as p must have passed isPrimaryCandidate()
+ cand.setVersion(cand.getVersion() + " (promoted)");
bsSelect.clear(ic);
- //selfs.remove(cand);
-// toPromote.add(cand);
+ // selfs.remove(cand);
+ // toPromote.add(cand);
if (!cand.isPrimaryCandidate())
{
- System.out.println(
- "Warning: Couldn't promote dbref " + cand.toString()
- + " for sequence " + sequence.toString());
+ if (Console.isDebugEnabled())
+ {
+ Console.debug(
+ "Warning: Couldn't promote dbref " + cand.toString()
+ + " for sequence " + sequence.toString());
+ }
}
}
}