import jalview.datamodel.DBRefEntry;
import jalview.datamodel.DBRefSource;
+import jalview.datamodel.Mapping;
import jalview.datamodel.PDBEntry;
import jalview.datamodel.SequenceI;
import java.util.ArrayList;
-import java.util.Arrays;
+import java.util.BitSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import com.stevesoft.pat.Regex;
*/
public class DBRefUtils
{
+
+ public final static int DB_SOURCE = 1;
+ public final static int DB_VERSION = 2;
+ public final static int DB_ID = 4;
+ public final static int DB_MAP = 8;
+
+
+ public final static int SEARCH_MODE_NO_MAP_NO_VERSION = DB_SOURCE | DB_ID;
+ public final static int SEARCH_MODE_FULL = DB_SOURCE | DB_VERSION | DB_ID | DB_MAP;
+
/*
* lookup from lower-case form of a name to its canonical (standardised) form
*/
canonicalSourceNameLookup.put("ensembl-gn", DBRefSource.ENSEMBL);
// Make sure we have lowercase entries for all canonical string lookups
- Set<String> keys = canonicalSourceNameLookup.keySet();
- for (String k : keys)
- {
- canonicalSourceNameLookup.put(k.toLowerCase(),
- canonicalSourceNameLookup.get(k));
- }
+// BH 2019.01.25 unnecessary -- they are all lower case already
+ //Set<String> keys = canonicalSourceNameLookup.keySet();
+// for (String k : keys)
+// {
+// canonicalSourceNameLookup.put(k.toLowerCase(),
+// canonicalSourceNameLookup.get(k));
+// }
dasCoordinateSystemsLookup.put("pdbresnum", DBRefSource.PDB);
dasCoordinateSystemsLookup.put("uniprot", DBRefSource.UNIPROT);
* array of sources to select
* @return
*/
- public static DBRefEntry[] selectRefs(DBRefEntry[] dbrefs,
+ public static List<DBRefEntry> selectRefs(List<DBRefEntry> dbrefs,
String[] sources)
{
if (dbrefs == null || sources == null)
{
return dbrefs;
}
+
+ // BH TODO
HashSet<String> srcs = new HashSet<String>();
for (String src : sources)
{
srcs.add(src.toUpperCase());
}
+ int nrefs = dbrefs.size();
List<DBRefEntry> res = new ArrayList<DBRefEntry>();
- for (DBRefEntry dbr : dbrefs)
+ for (int ib = 0; ib < nrefs; ib++)
{
+ DBRefEntry dbr = dbrefs.get(ib);
String source = getCanonicalName(dbr.getSource());
if (srcs.contains(source.toUpperCase()))
{
if (res.size() > 0)
{
- DBRefEntry[] reply = new DBRefEntry[res.size()];
- return res.toArray(reply);
+ //List<DBRefEntry> reply = new DBRefEntry[res.size()];
+ return res;//.toArray(reply);
}
return null;
}
+ private static boolean selectRefsBS(List<DBRefEntry> dbrefs, String sourceKeys, BitSet bsSelect) {
+ if (dbrefs == null || sourceKeys == null)
+ {
+ return false;
+ }
+ for (int i = 0, n = dbrefs.size(); i < n; i++)
+ {
+ DBRefEntry dbr = dbrefs.get(i);
+ String sourceKey = dbr.getSourceKey();
+ if (sourceKeys.indexOf(sourceKey) < 0) {
+ bsSelect.clear(i);
+ }
+ }
+ return !bsSelect.isEmpty();
+ }
+
+
/**
* isDasCoordinateSystem
*
* Set of references to search
* @param entry
* pattern to match
+ * @param mode SEARCH_MODE_FULL for all; SEARCH_MODE_NO_MAP_NO_VERSION optional
* @return
*/
- public static List<DBRefEntry> searchRefs(DBRefEntry[] ref,
- DBRefEntry entry)
+ public static List<DBRefEntry> searchRefs(List<DBRefEntry> ref,
+ DBRefEntry entry, int mode)
{
return searchRefs(ref, entry,
- matchDbAndIdAndEitherMapOrEquivalentMapList);
+ matchDbAndIdAndEitherMapOrEquivalentMapList, mode);
}
/**
* accession id to match
* @return
*/
- public static List<DBRefEntry> searchRefs(DBRefEntry[] refs, String accId)
+ public static List<DBRefEntry> searchRefs(List<DBRefEntry> refs, String accId)
{
- return searchRefs(refs, new DBRefEntry("", "", accId), matchId);
+ List<DBRefEntry> rfs = new ArrayList<DBRefEntry>();
+ if (refs == null || accId == null)
+ {
+ return rfs;
+ }
+ for (int i = 0, n = refs.size(); i < n; i++)
+ {
+ DBRefEntry e = refs.get(i);
+ if (accId.equals(e.getAccessionId()))
+ {
+ rfs.add(e);
+ }
+ }
+ return rfs;
+// return searchRefs(refs, new DBRefEntry("", "", accId), matchId, SEARCH_MODE_FULL);
}
+
/**
* Returns a (possibly empty) list of those references that match the given
* entry, according to the given comparator.
* @param entry
* an entry to compare against
* @param comparator
+ * @param mode SEARCH_MODE_FULL for all; SEARCH_MODE_NO_MAP_NO_VERSION optional
* @return
*/
- static List<DBRefEntry> searchRefs(DBRefEntry[] refs, DBRefEntry entry,
- DbRefComp comparator)
+ static List<DBRefEntry> searchRefs(List<DBRefEntry> refs, DBRefEntry entry,
+ DbRefComp comparator, int mode)
{
List<DBRefEntry> rfs = new ArrayList<DBRefEntry>();
if (refs == null || entry == null)
{
return rfs;
}
- for (int i = 0; i < refs.length; i++)
+ for (int i = 0, n = refs.size(); i < n; i++)
{
- if (comparator.matches(entry, refs[i]))
+ DBRefEntry e = refs.get(i);
+ if (comparator.matches(entry, e, SEARCH_MODE_FULL))
{
- rfs.add(refs[i]);
+ rfs.add(e);
}
}
return rfs;
interface DbRefComp
{
- public boolean matches(DBRefEntry refa, DBRefEntry refb);
- }
+ default public boolean matches(DBRefEntry refa, DBRefEntry refb) {
+ return matches(refa, refb, SEARCH_MODE_FULL);
+ };
+
+ public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode);
+}
/**
* match on all non-null fields in refa
// TODO unused - remove?
public static DbRefComp matchNonNullonA = new DbRefComp()
{
- @Override
- public boolean matches(DBRefEntry refa, DBRefEntry refb)
+ @Override
+ public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode)
{
- if (refa.getSource() == null
+ if ((mode & DB_SOURCE) != 0 && refa.getSource() == null
|| DBRefUtils.getCanonicalName(refb.getSource()).equals(
DBRefUtils.getCanonicalName(refa.getSource())))
{
- if (refa.getVersion() == null
+ if ((mode & DB_VERSION) != 0 && refa.getVersion() == null
|| refb.getVersion().equals(refa.getVersion()))
{
- if (refa.getAccessionId() == null
+ if ((mode & DB_ID) != 0 && refa.getAccessionId() == null
|| refb.getAccessionId().equals(refa.getAccessionId()))
{
- if (refa.getMap() == null || (refb.getMap() != null
+ if ((mode & DB_MAP) != 0 && refa.getMap() == null || (refb.getMap() != null
&& refb.getMap().equals(refa.getMap())))
{
return true;
public static DbRefComp matchEitherNonNull = new DbRefComp()
{
@Override
- public boolean matches(DBRefEntry refa, DBRefEntry refb)
+ public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode)
{
if (nullOrEqualSource(refa.getSource(), refb.getSource())
&& nullOrEqual(refa.getVersion(), refb.getVersion())
}
return false;
}
+
};
/**
public static DbRefComp matchDbAndIdAndEitherMap = new DbRefComp()
{
@Override
- public boolean matches(DBRefEntry refa, DBRefEntry refb)
+ public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode)
{
if (refa.getSource() != null && refb.getSource() != null
&& DBRefUtils.getCanonicalName(refb.getSource()).equals(
public static DbRefComp matchDbAndIdAndComplementaryMapList = new DbRefComp()
{
@Override
- public boolean matches(DBRefEntry refa, DBRefEntry refb)
+ public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode)
{
if (refa.getSource() != null && refb.getSource() != null
&& DBRefUtils.getCanonicalName(refb.getSource()).equals(
public static DbRefComp matchDbAndIdAndEquivalentMapList = new DbRefComp()
{
@Override
- public boolean matches(DBRefEntry refa, DBRefEntry refb)
+ public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode)
{
if (refa.getSource() != null && refb.getSource() != null
&& DBRefUtils.getCanonicalName(refb.getSource()).equals(
public static DbRefComp matchDbAndIdAndEitherMapOrEquivalentMapList = new DbRefComp()
{
@Override
- public boolean matches(DBRefEntry refa, DBRefEntry refb)
+ public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode)
{
if (refa.getSource() != null && refb.getSource() != null
&& DBRefUtils.getCanonicalName(refb.getSource()).equals(
public static DbRefComp matchId = new DbRefComp()
{
@Override
- public boolean matches(DBRefEntry refa, DBRefEntry refb)
+ public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode)
{
if (refa.getAccessionId() != null && refb.getAccessionId() != null
&& refb.getAccessionId().equals(refa.getAccessionId()))
* a set of references to select from
* @return
*/
- public static DBRefEntry[] selectDbRefs(boolean selectDna,
- DBRefEntry[] refs)
+ public static List<DBRefEntry> selectDbRefs(boolean selectDna,
+ List<DBRefEntry> refs)
{
return selectRefs(refs,
selectDna ? DBRefSource.DNACODINGDBS : DBRefSource.PROTEINDBS);
* @param source
* @return
*/
- public static List<DBRefEntry> searchRefsForSource(DBRefEntry[] dbRefs,
+ public static List<DBRefEntry> searchRefsForSource(List<DBRefEntry> dbRefs,
String source)
{
List<DBRefEntry> matches = new ArrayList<DBRefEntry>();
*
* @param sequence
*/
- public static void ensurePrimaries(SequenceI sequence)
+ public static void ensurePrimaries(SequenceI sequence, List<DBRefEntry> pr)
{
- List<DBRefEntry> pr = sequence.getPrimaryDBRefs();
if (pr.size() == 0)
{
// nothing to do
return;
}
- List<DBRefEntry> selfs = new ArrayList<DBRefEntry>();
- {
- DBRefEntry[] selfArray = selectDbRefs(!sequence.isProtein(),
- sequence.getDBRefs());
- if (selfArray == null || selfArray.length == 0)
- {
- // nothing to do
- return;
- }
- selfs.addAll(Arrays.asList(selfArray));
- }
+ int sstart = sequence.getStart();
+ int send = sequence.getEnd();
+ boolean isProtein = sequence.isProtein();
+ BitSet bsSelect = new BitSet();
+
+// List<DBRefEntry> selfs = new ArrayList<DBRefEntry>();
+// {
+
+// List<DBRefEntry> selddfs = selectDbRefs(!isprot, sequence.getDBRefs());
+// if (selfs == null || selfs.size() == 0)
+// {
+// // nothing to do
+// return;
+// }
+
+ List<DBRefEntry> dbrefs = sequence.getDBRefs();
+ bsSelect.set(0, dbrefs.size());
+
+ if (!selectRefsBS(dbrefs, isProtein ? DBRefSource.PROTEINDBSKEYS : DBRefSource.DNACODINGDBSKEYS, bsSelect))
+ return;
+
+// selfs.addAll(selfArray);
+// }
// filter non-primary refs
- for (DBRefEntry p : pr)
+ for (int ip = pr.size(); --ip >= 0;)
{
- while (selfs.contains(p))
- {
- selfs.remove(p);
+ DBRefEntry p = pr.get(ip);
+ for (int i = bsSelect.nextSetBit(0); i >= 0; i = bsSelect.nextSetBit(i + 1)) {
+ if (dbrefs.get(i) == p)
+ bsSelect.clear(i);
}
+// while (selfs.contains(p))
+// {
+// selfs.remove(p);
+// }
}
- List<DBRefEntry> toPromote = new ArrayList<DBRefEntry>();
+// List<DBRefEntry> toPromote = new ArrayList<DBRefEntry>();
- for (DBRefEntry p : pr)
+
+
+ for (int ip = pr.size(), keys = 0; --ip >= 0 && keys != DBRefSource.ALL_MASKS;)
{
- List<String> promType = new ArrayList<String>();
- if (sequence.isProtein())
+ DBRefEntry p = pr.get(ip);
+ if (isProtein)
{
switch (getCanonicalName(p.getSource()))
{
case DBRefSource.UNIPROT:
- // case DBRefSource.UNIPROTKB:
- // case DBRefSource.UP_NAME:
- // search for and promote ensembl
- promType.add(DBRefSource.ENSEMBL);
+ keys |= DBRefSource.UNIPROT_MASK;
break;
case DBRefSource.ENSEMBL:
- // search for and promote Uniprot
- promType.add(DBRefSource.UNIPROT);
+ keys |= DBRefSource.ENSEMBL_MASK;
break;
}
}
else
{
- // TODO: promote transcript refs
+ // TODO: promote transcript refs ??
}
-
- // collate candidates and promote them
- DBRefEntry[] candidates = selectRefs(selfs.toArray(new DBRefEntry[0]),
- promType.toArray(new String[0]));
- if (candidates != null)
+ if (keys == 0 || !selectRefsBS(dbrefs, DBRefSource.PROMTYPES[keys], bsSelect))
+ return;
+// if (candidates != null)
{
- for (DBRefEntry cand : candidates)
+ for (int ic = bsSelect.nextSetBit(0); ic >= 0; ic = bsSelect.nextSetBit(ic + 1))
+// for (int ic = 0, n = candidates.size(); ic < n; ic++)
{
+ DBRefEntry cand = dbrefs.get(ic);//candidates.get(ic);
if (cand.hasMap())
{
- if (cand.getMap().getTo() != null
- && cand.getMap().getTo() != sequence)
+ Mapping map = cand.getMap();
+ SequenceI cto = map.getTo();
+ if (cto != null
+ && cto != sequence)
{
// can't promote refs with mappings to other sequences
continue;
}
- if (cand.getMap().getMap().getFromLowest() != sequence
- .getStart()
- && cand.getMap().getMap().getFromHighest() != sequence
- .getEnd())
+ MapList mlist = map.getMap();
+ if (mlist.getFromLowest() != sstart
+ && mlist.getFromHighest() != send)
{
// can't promote refs with mappings from a region of this sequence
// - eg CDS
}
// and promote
cand.setVersion(p.getVersion() + " (promoted)");
- selfs.remove(cand);
- toPromote.add(cand);
+ bsSelect.clear(ic);
+ //selfs.remove(cand);
+// toPromote.add(cand);
if (!cand.isPrimaryCandidate())
{
System.out.println(