import jalview.datamodel.SequenceI;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
-import java.util.Hashtable;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import com.stevesoft.pat.Regex;
canonicalSourceNameLookup.put("ensembl-tr", DBRefSource.ENSEMBL);
canonicalSourceNameLookup.put("ensembl-gn", DBRefSource.ENSEMBL);
+ // Make sure we have lowercase entries for all canonical string lookups
+ Set<String> keys = canonicalSourceNameLookup.keySet();
+ for (String k : keys)
+ {
+ canonicalSourceNameLookup.put(k.toLowerCase(),
+ canonicalSourceNameLookup.get(k));
+ }
dasCoordinateSystemsLookup.put("pdbresnum", DBRefSource.PDB);
dasCoordinateSystemsLookup.put("uniprot", DBRefSource.UNIPROT);
public boolean matches(DBRefEntry refa, DBRefEntry refb)
{
if (refa.getSource() == null
- || refb.getSource().equals(refa.getSource()))
+ || DBRefUtils.getCanonicalName(refb.getSource()).equals(
+ DBRefUtils.getCanonicalName(refa.getSource())))
{
if (refa.getVersion() == null
|| refb.getVersion().equals(refa.getVersion()))
@Override
public boolean matches(DBRefEntry refa, DBRefEntry refb)
{
- if (nullOrEqual(refa.getSource(), refb.getSource())
+ if (nullOrEqualSource(refa.getSource(), refb.getSource())
&& nullOrEqual(refa.getVersion(), refb.getVersion())
&& nullOrEqual(refa.getAccessionId(), refb.getAccessionId())
&& nullOrEqual(refa.getMap(), refb.getMap()))
@Override
public boolean matches(DBRefEntry refa, DBRefEntry refb)
{
- if (refa.getSource() != null && refb.getSource() != null
- && refb.getSource().equals(refa.getSource()))
+ if (refa.getSource() != null
+ && refb.getSource() != null
+ && DBRefUtils.getCanonicalName(refb.getSource()).equals(
+ DBRefUtils.getCanonicalName(refa.getSource())))
{
// We dont care about version
if (refa.getAccessionId() != null && refb.getAccessionId() != null
@Override
public boolean matches(DBRefEntry refa, DBRefEntry refb)
{
- if (refa.getSource() != null && refb.getSource() != null
- && refb.getSource().equals(refa.getSource()))
+ if (refa.getSource() != null
+ && refb.getSource() != null
+ && DBRefUtils.getCanonicalName(refb.getSource()).equals(
+ DBRefUtils.getCanonicalName(refa.getSource())))
{
// We dont care about version
if (refa.getAccessionId() != null && refb.getAccessionId() != null
@Override
public boolean matches(DBRefEntry refa, DBRefEntry refb)
{
- if (refa.getSource() != null && refb.getSource() != null
- && refb.getSource().equals(refa.getSource()))
+ if (refa.getSource() != null
+ && refb.getSource() != null
+ && DBRefUtils.getCanonicalName(refb.getSource()).equals(
+ DBRefUtils.getCanonicalName(refa.getSource())))
{
// We dont care about version
// if ((refa.getVersion()==null || refb.getVersion()==null)
@Override
public boolean matches(DBRefEntry refa, DBRefEntry refb)
{
- if (refa.getSource() != null && refb.getSource() != null
- && refb.getSource().equals(refa.getSource()))
+ if (refa.getSource() != null
+ && refb.getSource() != null
+ && DBRefUtils.getCanonicalName(refb.getSource()).equals(
+ DBRefUtils.getCanonicalName(refa.getSource())))
{
// We dont care about version
PDBEntry pdbr = new PDBEntry();
pdbr.setId(pdbid);
pdbr.setType(PDBEntry.Type.PDB);
- pdbr.setProperty(new Hashtable());
pdbr.setChainCode(chaincode);
- // pdbr.getProperty().put("CHAIN", chaincode);
seq.addPDBId(pdbr);
}
else
{
return true;
}
- return (o1 == null ? o2.equals(o1) : o1.equals(o2));
+ return o1.equals(o2);
+ }
+
+ /**
+ * canonicalise source string before comparing. null is always wildcard
+ *
+ * @param o1
+ * - null or source string to compare
+ * @param o2
+ * - null or source string to compare
+ * @return true if either o1 or o2 are null, or o1 equals o2 under
+ * DBRefUtils.getCanonicalName
+ * (o1).equals(DBRefUtils.getCanonicalName(o2))
+ */
+ public static boolean nullOrEqualSource(String o1, String o2)
+ {
+ if (o1 == null || o2 == null)
+ {
+ return true;
+ }
+ return DBRefUtils.getCanonicalName(o1).equals(
+ DBRefUtils.getCanonicalName(o2));
}
/**
return matches;
}
+ /**
+ * promote direct database references to primary for nucleotide or protein
+ * sequences if they have an appropriate primary ref
+ * <table>
+ * <tr>
+ * <th>Seq Type</th>
+ * <th>Primary DB</th>
+ * <th>Direct which will be promoted</th>
+ * </tr>
+ * <tr align=center>
+ * <td>peptides</td>
+ * <td>Ensembl</td>
+ * <td>Uniprot</td>
+ * </tr>
+ * <tr align=center>
+ * <td>peptides</td>
+ * <td>Ensembl</td>
+ * <td>Uniprot</td>
+ * </tr>
+ * <tr align=center>
+ * <td>dna</td>
+ * <td>Ensembl</td>
+ * <td>ENA</td>
+ * </tr>
+ * </table>
+ *
+ * @param sequence
+ */
+ public static void ensurePrimaries(SequenceI sequence)
+ {
+ List<DBRefEntry> pr = sequence.getPrimaryDBRefs();
+ if (pr.size() == 0)
+ {
+ // nothing to do
+ return;
+ }
+ List<DBRefEntry> selfs = new ArrayList<DBRefEntry>();
+ {
+ DBRefEntry[] selfArray = selectDbRefs(!sequence.isProtein(),
+ sequence.getDBRefs());
+ if (selfArray == null || selfArray.length == 0)
+ {
+ // nothing to do
+ return;
+ }
+ selfs.addAll(Arrays.asList(selfArray));
+ }
+
+ // filter non-primary refs
+ for (DBRefEntry p : pr)
+ {
+ while (selfs.contains(p))
+ {
+ selfs.remove(p);
+ }
+ }
+ List<DBRefEntry> toPromote = new ArrayList<DBRefEntry>();
+
+ for (DBRefEntry p : pr)
+ {
+ List<String> promType = new ArrayList<String>();
+ if (sequence.isProtein())
+ {
+ switch (getCanonicalName(p.getSource()))
+ {
+ case DBRefSource.UNIPROT:
+ // case DBRefSource.UNIPROTKB:
+ // case DBRefSource.UP_NAME:
+ // search for and promote ensembl
+ promType.add(DBRefSource.ENSEMBL);
+ break;
+ case DBRefSource.ENSEMBL:
+ // search for and promote Uniprot
+ promType.add(DBRefSource.UNIPROT);
+ break;
+ }
+ }
+ else
+ {
+ // TODO: promote transcript refs
+ }
+
+ // collate candidates and promote them
+ DBRefEntry[] candidates = selectRefs(
+ selfs.toArray(new DBRefEntry[0]),
+ promType.toArray(new String[0]));
+ if (candidates != null)
+ {
+ for (DBRefEntry cand : candidates)
+ {
+ if (cand.hasMap())
+ {
+ if (cand.getMap().getTo() != null
+ && cand.getMap().getTo() != sequence)
+ {
+ // can't promote refs with mappings to other sequences
+ continue;
+ }
+ if (cand.getMap().getMap().getFromLowest() != sequence
+ .getStart()
+ && cand.getMap().getMap().getFromHighest() != sequence
+ .getEnd())
+ {
+ // can't promote refs with mappings from a region of this sequence
+ // - eg CDS
+ continue;
+ }
+ }
+ // and promote
+ cand.setVersion(p.getVersion() + " (promoted)");
+ selfs.remove(cand);
+ toPromote.add(cand);
+ if (!cand.isPrimaryCandidate())
+ {
+ System.out.println("Warning: Couldn't promote dbref "
+ + cand.toString() + " for sequence "
+ + sequence.toString());
+ }
+ }
+ }
+ }
+ }
+
}