import jalview.datamodel.SequenceI;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
-import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import java.util.Set;
/*
* lookup from lower-case form of a name to its canonical (standardised) form
*/
- private static Map<String, String> canonicalSourceNameLookup = new HashMap<String, String>();
+ private static Map<String, String> canonicalSourceNameLookup = new HashMap<>();
- private static Map<String, String> dasCoordinateSystemsLookup = new HashMap<String, String>();
static
{
canonicalSourceNameLookup.get(k));
}
- dasCoordinateSystemsLookup.put("pdbresnum", DBRefSource.PDB);
- dasCoordinateSystemsLookup.put("uniprot", DBRefSource.UNIPROT);
- dasCoordinateSystemsLookup.put("embl", DBRefSource.EMBL);
- // dasCoordinateSystemsLookup.put("embl", DBRefSource.EMBLCDS);
}
/**
{
return dbrefs;
}
- HashSet<String> srcs = new HashSet<String>();
+ HashSet<String> srcs = new HashSet<>();
for (String src : sources)
{
- srcs.add(src);
+ srcs.add(src.toUpperCase());
}
- List<DBRefEntry> res = new ArrayList<DBRefEntry>();
+ List<DBRefEntry> res = new ArrayList<>();
for (DBRefEntry dbr : dbrefs)
{
String source = getCanonicalName(dbr.getSource());
- if (srcs.contains(source))
+ if (srcs.contains(source.toUpperCase()))
{
res.add(dbr);
}
}
/**
- * isDasCoordinateSystem
- *
- * @param string
- * String
- * @param dBRefEntry
- * DBRefEntry
- * @return boolean true if Source DBRefEntry is compatible with DAS
- * CoordinateSystem name
- */
-
- public static boolean isDasCoordinateSystem(String string,
- DBRefEntry dBRefEntry)
- {
- if (string == null || dBRefEntry == null)
- {
- return false;
- }
- String coordsys = dasCoordinateSystemsLookup.get(string.toLowerCase());
- return coordsys == null ? false : coordsys.equals(dBRefEntry
- .getSource());
- }
-
- /**
* look up source in an internal list of database reference sources and return
* the canonical jalview name for the source, or the original string if it has
* no canonical form.
static List<DBRefEntry> searchRefs(DBRefEntry[] refs, DBRefEntry entry,
DbRefComp comparator)
{
- List<DBRefEntry> rfs = new ArrayList<DBRefEntry>();
+ List<DBRefEntry> rfs = new ArrayList<>();
if (refs == null || entry == null)
{
return rfs;
public boolean matches(DBRefEntry refa, DBRefEntry refb)
{
if (refa.getSource() == null
- || refb.getSource().equals(refa.getSource()))
+ || DBRefUtils.getCanonicalName(refb.getSource()).equals(
+ DBRefUtils.getCanonicalName(refa.getSource())))
{
if (refa.getVersion() == null
|| refb.getVersion().equals(refa.getVersion()))
if (refa.getAccessionId() == null
|| refb.getAccessionId().equals(refa.getAccessionId()))
{
- if (refa.getMap() == null
- || (refb.getMap() != null && refb.getMap().equals(
- refa.getMap())))
+ if (refa.getMap() == null || (refb.getMap() != null
+ && refb.getMap().equals(refa.getMap())))
{
return true;
}
@Override
public boolean matches(DBRefEntry refa, DBRefEntry refb)
{
- if (nullOrEqual(refa.getSource(), refb.getSource())
+ if (nullOrEqualSource(refa.getSource(), refb.getSource())
&& nullOrEqual(refa.getVersion(), refb.getVersion())
&& nullOrEqual(refa.getAccessionId(), refb.getAccessionId())
&& nullOrEqual(refa.getMap(), refb.getMap()))
public boolean matches(DBRefEntry refa, DBRefEntry refb)
{
if (refa.getSource() != null && refb.getSource() != null
- && refb.getSource().equals(refa.getSource()))
+ && DBRefUtils.getCanonicalName(refb.getSource()).equals(
+ DBRefUtils.getCanonicalName(refa.getSource())))
{
// We dont care about version
if (refa.getAccessionId() != null && refb.getAccessionId() != null
- // FIXME should be && not || here?
+ // FIXME should be && not || here?
|| refb.getAccessionId().equals(refa.getAccessionId()))
{
if ((refa.getMap() == null || refb.getMap() == null)
- || (refa.getMap() != null && refb.getMap() != null && refb
- .getMap().equals(refa.getMap())))
+ || (refa.getMap() != null && refb.getMap() != null
+ && refb.getMap().equals(refa.getMap())))
{
return true;
}
public boolean matches(DBRefEntry refa, DBRefEntry refb)
{
if (refa.getSource() != null && refb.getSource() != null
- && refb.getSource().equals(refa.getSource()))
+ && DBRefUtils.getCanonicalName(refb.getSource()).equals(
+ DBRefUtils.getCanonicalName(refa.getSource())))
{
// We dont care about version
if (refa.getAccessionId() != null && refb.getAccessionId() != null
if ((refa.getMap() == null && refb.getMap() == null)
|| (refa.getMap() != null && refb.getMap() != null))
{
- if ((refb.getMap().getMap() == null && refa.getMap().getMap() == null)
+ if ((refb.getMap().getMap() == null
+ && refa.getMap().getMap() == null)
|| (refb.getMap().getMap() != null
- && refa.getMap().getMap() != null && refb
- .getMap().getMap().getInverse()
- .equals(refa.getMap().getMap())))
+ && refa.getMap().getMap() != null
+ && refb.getMap().getMap().getInverse()
+ .equals(refa.getMap().getMap())))
{
return true;
}
public boolean matches(DBRefEntry refa, DBRefEntry refb)
{
if (refa.getSource() != null && refb.getSource() != null
- && refb.getSource().equals(refa.getSource()))
+ && DBRefUtils.getCanonicalName(refb.getSource()).equals(
+ DBRefUtils.getCanonicalName(refa.getSource())))
{
// We dont care about version
// if ((refa.getVersion()==null || refb.getVersion()==null)
{
return true;
}
- if (refa.getMap() != null
- && refb.getMap() != null
- && ((refb.getMap().getMap() == null && refa.getMap()
- .getMap() == null) || (refb.getMap().getMap() != null
- && refa.getMap().getMap() != null && refb
- .getMap().getMap().equals(refa.getMap().getMap()))))
+ if (refa.getMap() != null && refb.getMap() != null
+ && ((refb.getMap().getMap() == null
+ && refa.getMap().getMap() == null)
+ || (refb.getMap().getMap() != null
+ && refa.getMap().getMap() != null
+ && refb.getMap().getMap()
+ .equals(refa.getMap().getMap()))))
{
return true;
}
public boolean matches(DBRefEntry refa, DBRefEntry refb)
{
if (refa.getSource() != null && refb.getSource() != null
- && refb.getSource().equals(refa.getSource()))
+ && DBRefUtils.getCanonicalName(refb.getSource()).equals(
+ DBRefUtils.getCanonicalName(refa.getSource())))
{
// We dont care about version
return true;
}
if ((refa.getMap() != null && refb.getMap() != null)
- && (refb.getMap().getMap() == null && refa.getMap()
- .getMap() == null)
+ && (refb.getMap().getMap() == null
+ && refa.getMap().getMap() == null)
|| (refb.getMap().getMap() != null
- && refa.getMap().getMap() != null && (refb
- .getMap().getMap().equals(refa.getMap().getMap()))))
+ && refa.getMap().getMap() != null
+ && (refb.getMap().getMap()
+ .equals(refa.getMap().getMap()))))
{
return true;
}
PDBEntry pdbr = new PDBEntry();
pdbr.setId(pdbid);
pdbr.setType(PDBEntry.Type.PDB);
- pdbr.setProperty(new Hashtable());
pdbr.setChainCode(chaincode);
- // pdbr.getProperty().put("CHAIN", chaincode);
seq.addPDBId(pdbr);
}
else
{
return true;
}
- return (o1 == null ? o2.equals(o1) : o1.equals(o2));
+ return o1.equals(o2);
+ }
+
+ /**
+ * canonicalise source string before comparing. null is always wildcard
+ *
+ * @param o1
+ * - null or source string to compare
+ * @param o2
+ * - null or source string to compare
+ * @return true if either o1 or o2 are null, or o1 equals o2 under
+ * DBRefUtils.getCanonicalName
+ * (o1).equals(DBRefUtils.getCanonicalName(o2))
+ */
+ public static boolean nullOrEqualSource(String o1, String o2)
+ {
+ if (o1 == null || o2 == null)
+ {
+ return true;
+ }
+ return DBRefUtils.getCanonicalName(o1)
+ .equals(DBRefUtils.getCanonicalName(o2));
}
/**
public static DBRefEntry[] selectDbRefs(boolean selectDna,
DBRefEntry[] refs)
{
- return selectRefs(refs, selectDna ? DBRefSource.DNACODINGDBS
- : DBRefSource.PROTEINDBS);
+ return selectRefs(refs,
+ selectDna ? DBRefSource.DNACODINGDBS : DBRefSource.PROTEINDBS);
// could attempt to find other cross
// refs here - ie PDB xrefs
// (not dna, not protein seq)
public static List<DBRefEntry> searchRefsForSource(DBRefEntry[] dbRefs,
String source)
{
- List<DBRefEntry> matches = new ArrayList<DBRefEntry>();
+ List<DBRefEntry> matches = new ArrayList<>();
if (dbRefs != null && source != null)
{
for (DBRefEntry dbref : dbRefs)
return matches;
}
+ /**
+ * promote direct database references to primary for nucleotide or protein
+ * sequences if they have an appropriate primary ref
+ * <table>
+ * <tr>
+ * <th>Seq Type</th>
+ * <th>Primary DB</th>
+ * <th>Direct which will be promoted</th>
+ * </tr>
+ * <tr align=center>
+ * <td>peptides</td>
+ * <td>Ensembl</td>
+ * <td>Uniprot</td>
+ * </tr>
+ * <tr align=center>
+ * <td>peptides</td>
+ * <td>Ensembl</td>
+ * <td>Uniprot</td>
+ * </tr>
+ * <tr align=center>
+ * <td>dna</td>
+ * <td>Ensembl</td>
+ * <td>ENA</td>
+ * </tr>
+ * </table>
+ *
+ * @param sequence
+ */
+ public static void ensurePrimaries(SequenceI sequence)
+ {
+ List<DBRefEntry> pr = sequence.getPrimaryDBRefs();
+ if (pr.size() == 0)
+ {
+ // nothing to do
+ return;
+ }
+ List<DBRefEntry> selfs = new ArrayList<>();
+ {
+ DBRefEntry[] selfArray = selectDbRefs(!sequence.isProtein(),
+ sequence.getDBRefs());
+ if (selfArray == null || selfArray.length == 0)
+ {
+ // nothing to do
+ return;
+ }
+ selfs.addAll(Arrays.asList(selfArray));
+ }
+
+ // filter non-primary refs
+ for (DBRefEntry p : pr)
+ {
+ while (selfs.contains(p))
+ {
+ selfs.remove(p);
+ }
+ }
+ List<DBRefEntry> toPromote = new ArrayList<>();
+
+ for (DBRefEntry p : pr)
+ {
+ List<String> promType = new ArrayList<>();
+ if (sequence.isProtein())
+ {
+ switch (getCanonicalName(p.getSource()))
+ {
+ case DBRefSource.UNIPROT:
+ // case DBRefSource.UNIPROTKB:
+ // case DBRefSource.UP_NAME:
+ // search for and promote ensembl
+ promType.add(DBRefSource.ENSEMBL);
+ break;
+ case DBRefSource.ENSEMBL:
+ // search for and promote Uniprot
+ promType.add(DBRefSource.UNIPROT);
+ break;
+ }
+ }
+ else
+ {
+ // TODO: promote transcript refs
+ }
+
+ // collate candidates and promote them
+ DBRefEntry[] candidates = selectRefs(selfs.toArray(new DBRefEntry[0]),
+ promType.toArray(new String[0]));
+ if (candidates != null)
+ {
+ for (DBRefEntry cand : candidates)
+ {
+ if (cand.hasMap())
+ {
+ if (cand.getMap().getTo() != null
+ && cand.getMap().getTo() != sequence)
+ {
+ // can't promote refs with mappings to other sequences
+ continue;
+ }
+ if (cand.getMap().getMap().getFromLowest() != sequence
+ .getStart()
+ && cand.getMap().getMap().getFromHighest() != sequence
+ .getEnd())
+ {
+ // can't promote refs with mappings from a region of this sequence
+ // - eg CDS
+ continue;
+ }
+ }
+ // and promote
+ cand.setVersion(p.getVersion() + " (promoted)");
+ selfs.remove(cand);
+ toPromote.add(cand);
+ if (!cand.isPrimaryCandidate())
+ {
+ System.out.println(
+ "Warning: Couldn't promote dbref " + cand.toString()
+ + " for sequence " + sequence.toString());
+ }
+ }
+ }
+ }
+ }
+
}