matches = new ArrayList<>();
if (dbRefs != null && source != null)
{
for (DBRefEntry dbref : dbRefs)
{
if (source.equalsIgnoreCase(dbref.getSource()))
{
matches.add(dbref);
}
}
}
return matches;
}
/**
* Parses a DBRefEntry and adds it to the sequence, also a PDBEntry if the
* database is PDB.
*
* Used by file parsers to generate DBRefs from annotation within file (eg
* Stockholm)
*
* @param dbname
* @param version
* @param acn
* @param seq where to annotate with reference
* @return parsed version of entry that was added to seq (if any)
*/
public static DBRefEntry parseToDbRef(SequenceI seq, String dbname, String version, String acn) {
DBRefEntry ref = null;
if (dbname != null) {
String locsrc = DBRefUtils.getCanonicalName(dbname);
if (locsrc.equals(DBRefSource.PDB)) {
/*
* Check for PFAM style stockhom PDB accession id citation e.g. "1WRI A; 7-80;"
*/
Regex r = new com.stevesoft.pat.Regex("([0-9][0-9A-Za-z]{3})\\s*(.?)\\s*;\\s*([0-9]+)-([0-9]+)");
if (r.search(acn.trim())) {
String pdbid = r.stringMatched(1);
String chaincode = r.stringMatched(2);
if (chaincode == null) {
chaincode = " ";
}
// String mapstart = r.stringMatched(3);
// String mapend = r.stringMatched(4);
if (chaincode.equals(" ")) {
chaincode = "_";
}
// construct pdb ref.
ref = new DBRefEntry(locsrc, version, pdbid + chaincode);
PDBEntry pdbr = new PDBEntry();
pdbr.setId(pdbid);
pdbr.setType(PDBEntry.Type.PDB);
pdbr.setChainCode(chaincode);
seq.addPDBId(pdbr);
} else {
System.err.println("Malformed PDB DR line:" + acn);
}
} else {
// default:
ref = new DBRefEntry(locsrc, version, acn);
}
}
if (ref != null) {
seq.addDBRef(ref);
}
return ref;
}
/**
* Returns true if either object is null, or they are equal
*
* @param o1
* @param o2
* @return
*/
public static boolean nullOrEqual(Object o1, Object o2) {
if (o1 == null || o2 == null) {
return true;
}
return o1.equals(o2);
}
/**
* canonicalise source string before comparing. null is always wildcard
*
* @param o1 - null or source string to compare
* @param o2 - null or source string to compare
* @return true if either o1 or o2 are null, or o1 equals o2 under
* DBRefUtils.getCanonicalName
* (o1).equals(DBRefUtils.getCanonicalName(o2))
*/
public static boolean nullOrEqualSource(String o1, String o2) {
if (o1 == null || o2 == null) {
return true;
}
return DBRefUtils.getCanonicalName(o1).equals(DBRefUtils.getCanonicalName(o2));
}
/**
* Selects just the DNA or protein references from a set of references
*
* @param selectDna if true, select references to 'standard' DNA databases, else
* to 'standard' peptide databases
* @param refs a set of references to select from
* @return
*/
public static List selectDbRefs(boolean selectDna, List refs) {
return selectRefs(refs, selectDna ? DBRefSource.DNACODINGDBS : DBRefSource.PROTEINDBS);
// could attempt to find other cross
// refs here - ie PDB xrefs
// (not dna, not protein seq)
}
/**
* Returns the (possibly empty) list of those supplied dbrefs which have the
* specified source database, with a case-insensitive match of source name
*
* @param dbRefs
* @param source
* @return
*/
public static List searchRefsForSource(List dbRefs, String source) {
List matches = new ArrayList();
if (dbRefs != null && source != null) {
for (DBRefEntry dbref : dbRefs) {
if (source.equalsIgnoreCase(dbref.getSource())) {
matches.add(dbref);
}
}
}
return matches;
}
/**
* promote direct database references to primary for nucleotide or protein
* sequences if they have an appropriate primary ref
*
*
* Seq Type |
* Primary DB |
* Direct which will be promoted |
*
*
* peptides |
* Ensembl |
* Uniprot |
*
*
* peptides |
* Ensembl |
* Uniprot |
*
*
* dna |
* Ensembl |
* ENA |
*
*
*
* @param sequence
*/
public static void ensurePrimaries(SequenceI sequence, List pr) {
if (pr.size() == 0) {
// nothing to do
return;
}
int sstart = sequence.getStart();
int send = sequence.getEnd();
boolean isProtein = sequence.isProtein();
BitSet bsSelect = new BitSet();
// List selfs = new ArrayList();
// {
// List selddfs = selectDbRefs(!isprot, sequence.getDBRefs());
// if (selfs == null || selfs.size() == 0)
// {
// // nothing to do
// return;
// }
List dbrefs = sequence.getDBRefs();
bsSelect.set(0, dbrefs.size());
if (!selectRefsBS(dbrefs, isProtein ? DBRefSource.PROTEIN_MASK : DBRefSource.DNA_CODING_MASK, bsSelect))
return;
// selfs.addAll(selfArray);
// }
// filter non-primary refs
for (int ip = pr.size(); --ip >= 0;) {
DBRefEntry p = pr.get(ip);
for (int i = bsSelect.nextSetBit(0); i >= 0; i = bsSelect.nextSetBit(i + 1)) {
if (dbrefs.get(i) == p)
bsSelect.clear(i);
}
// while (selfs.contains(p))
// {
// selfs.remove(p);
// }
}
// List toPromote = new ArrayList();
for (int ip = pr.size(), keys = 0; --ip >= 0 && keys != DBRefSource.PRIMARY_MASK;) {
DBRefEntry p = pr.get(ip);
if (isProtein) {
switch (getCanonicalName(p.getSource())) {
case DBRefSource.UNIPROT:
keys |= DBRefSource.UNIPROT_MASK;
break;
case DBRefSource.ENSEMBL:
keys |= DBRefSource.ENSEMBL_MASK;
break;
}
} else {
// TODO: promote transcript refs ??
}
if (keys == 0 || !selectRefsBS(dbrefs, keys, bsSelect))
return;
// if (candidates != null)
{
for (int ic = bsSelect.nextSetBit(0); ic >= 0; ic = bsSelect.nextSetBit(ic + 1))
// for (int ic = 0, n = candidates.size(); ic < n; ic++)
{
DBRefEntry cand = dbrefs.get(ic);// candidates.get(ic);
if (cand.hasMap()) {
Mapping map = cand.getMap();
SequenceI cto = map.getTo();
if (cto != null && cto != sequence) {
// can't promote refs with mappings to other sequences
continue;
}
MapList mlist = map.getMap();
if (mlist.getFromLowest() != sstart && mlist.getFromHighest() != send) {
// can't promote refs with mappings from a region of this sequence
// - eg CDS
continue;
}
}
// and promote - not that version must be non-null here,
// as p must have passed isPrimaryCandidate()
cand.setVersion(p.getVersion() + " (promoted)");
bsSelect.clear(ic);
// selfs.remove(cand);
// toPromote.add(cand);
if (!cand.isPrimaryCandidate()) {
System.out.println("Warning: Couldn't promote dbref " + cand.toString() + " for sequence "
+ sequence.toString());
}
}
}
}
}
}