import jalview.datamodel.SequenceI;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
-import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import java.util.Set;
canonicalSourceNameLookup.put("ensembl-tr", DBRefSource.ENSEMBL);
canonicalSourceNameLookup.put("ensembl-gn", DBRefSource.ENSEMBL);
+ canonicalSourceNameLookup.put("ensemblgenomes",
+ DBRefSource.ENSEMBLGENOMES);
+
// Make sure we have lowercase entries for all canonical string lookups
Set<String> keys = canonicalSourceNameLookup.keySet();
for (String k : keys)
HashSet<String> srcs = new HashSet<String>();
for (String src : sources)
{
- srcs.add(src);
+ srcs.add(src.toUpperCase());
}
List<DBRefEntry> res = new ArrayList<DBRefEntry>();
for (DBRefEntry dbr : dbrefs)
{
String source = getCanonicalName(dbr.getSource());
- if (srcs.contains(source))
+ if (srcs.contains(source.toUpperCase()))
{
res.add(dbr);
}
return false;
}
String coordsys = dasCoordinateSystemsLookup.get(string.toLowerCase());
- return coordsys == null ? false : coordsys.equals(dBRefEntry
- .getSource());
+ return coordsys == null ? false
+ : coordsys.equals(dBRefEntry.getSource());
}
/**
return null;
}
String canonical = canonicalSourceNameLookup.get(source.toLowerCase());
+ if (canonical==null)
+ {
+ if (source.toLowerCase().startsWith("ensembl"))
+ {
+ canonical = DBRefSource.ENSEMBL;
+ for (String ensembls: new String[] { "Protists","Plants","Bacteria","Fungi","Metazoa"})
+ {
+ if (source.toLowerCase().endsWith(ensembls.toLowerCase()))
+ {
+ canonical = DBRefSource.ENSEMBLGENOMES;
+ }
+ }
+ }
+ }
return canonical == null ? source : canonical;
}
if (refa.getAccessionId() == null
|| refb.getAccessionId().equals(refa.getAccessionId()))
{
- if (refa.getMap() == null
- || (refb.getMap() != null && refb.getMap().equals(
- refa.getMap())))
+ if (refa.getMap() == null || (refb.getMap() != null
+ && refb.getMap().equals(refa.getMap())))
{
return true;
}
{
// We dont care about version
if (refa.getAccessionId() != null && refb.getAccessionId() != null
- // FIXME should be && not || here?
+ // FIXME should be && not || here?
|| refb.getAccessionId().equals(refa.getAccessionId()))
{
if ((refa.getMap() == null || refb.getMap() == null)
- || (refa.getMap() != null && refb.getMap() != null && refb
- .getMap().equals(refa.getMap())))
+ || (refa.getMap() != null && refb.getMap() != null
+ && refb.getMap().equals(refa.getMap())))
{
return true;
}
if ((refa.getMap() == null && refb.getMap() == null)
|| (refa.getMap() != null && refb.getMap() != null))
{
- if ((refb.getMap().getMap() == null && refa.getMap().getMap() == null)
+ if ((refb.getMap().getMap() == null
+ && refa.getMap().getMap() == null)
|| (refb.getMap().getMap() != null
- && refa.getMap().getMap() != null && refb
- .getMap().getMap().getInverse()
- .equals(refa.getMap().getMap())))
+ && refa.getMap().getMap() != null
+ && refb.getMap().getMap().getInverse()
+ .equals(refa.getMap().getMap())))
{
return true;
}
{
return true;
}
- if (refa.getMap() != null
- && refb.getMap() != null
- && ((refb.getMap().getMap() == null && refa.getMap()
- .getMap() == null) || (refb.getMap().getMap() != null
- && refa.getMap().getMap() != null && refb
- .getMap().getMap().equals(refa.getMap().getMap()))))
+ if (refa.getMap() != null && refb.getMap() != null
+ && ((refb.getMap().getMap() == null
+ && refa.getMap().getMap() == null)
+ || (refb.getMap().getMap() != null
+ && refa.getMap().getMap() != null
+ && refb.getMap().getMap()
+ .equals(refa.getMap().getMap()))))
{
return true;
}
return true;
}
if ((refa.getMap() != null && refb.getMap() != null)
- && (refb.getMap().getMap() == null && refa.getMap()
- .getMap() == null)
+ && (refb.getMap().getMap() == null
+ && refa.getMap().getMap() == null)
|| (refb.getMap().getMap() != null
- && refa.getMap().getMap() != null && (refb
- .getMap().getMap().equals(refa.getMap().getMap()))))
+ && refa.getMap().getMap() != null
+ && (refb.getMap().getMap()
+ .equals(refa.getMap().getMap()))))
{
return true;
}
PDBEntry pdbr = new PDBEntry();
pdbr.setId(pdbid);
pdbr.setType(PDBEntry.Type.PDB);
- pdbr.setProperty(new Hashtable());
pdbr.setChainCode(chaincode);
- // pdbr.getProperty().put("CHAIN", chaincode);
seq.addPDBId(pdbr);
}
else
{
return true;
}
- return DBRefUtils.getCanonicalName(o1).equals(
- DBRefUtils.getCanonicalName(o2));
+ return DBRefUtils.getCanonicalName(o1)
+ .equals(DBRefUtils.getCanonicalName(o2));
}
/**
public static DBRefEntry[] selectDbRefs(boolean selectDna,
DBRefEntry[] refs)
{
- return selectRefs(refs, selectDna ? DBRefSource.DNACODINGDBS
- : DBRefSource.PROTEINDBS);
+ return selectRefs(refs,
+ selectDna ? DBRefSource.DNACODINGDBS : DBRefSource.PROTEINDBS);
// could attempt to find other cross
// refs here - ie PDB xrefs
// (not dna, not protein seq)
return matches;
}
+ /**
+ * promote direct database references to primary for nucleotide or protein
+ * sequences if they have an appropriate primary ref
+ * <table>
+ * <tr>
+ * <th>Seq Type</th>
+ * <th>Primary DB</th>
+ * <th>Direct which will be promoted</th>
+ * </tr>
+ * <tr align=center>
+ * <td>peptides</td>
+ * <td>Ensembl</td>
+ * <td>Uniprot</td>
+ * </tr>
+ * <tr align=center>
+ * <td>peptides</td>
+ * <td>Ensembl</td>
+ * <td>Uniprot</td>
+ * </tr>
+ * <tr align=center>
+ * <td>dna</td>
+ * <td>Ensembl</td>
+ * <td>ENA</td>
+ * </tr>
+ * </table>
+ *
+ * @param sequence
+ */
+ public static void ensurePrimaries(SequenceI sequence)
+ {
+ List<DBRefEntry> pr = sequence.getPrimaryDBRefs();
+ if (pr.size() == 0)
+ {
+ // nothing to do
+ return;
+ }
+ List<DBRefEntry> selfs = new ArrayList<DBRefEntry>();
+ {
+ DBRefEntry[] selfArray = selectDbRefs(!sequence.isProtein(),
+ sequence.getDBRefs());
+ if (selfArray == null || selfArray.length == 0)
+ {
+ // nothing to do
+ return;
+ }
+ selfs.addAll(Arrays.asList(selfArray));
+ }
+
+ // filter non-primary refs
+ for (DBRefEntry p : pr)
+ {
+ while (selfs.contains(p))
+ {
+ selfs.remove(p);
+ }
+ }
+ List<DBRefEntry> toPromote = new ArrayList<DBRefEntry>();
+
+ for (DBRefEntry p : pr)
+ {
+ List<String> promType = new ArrayList<String>();
+ if (sequence.isProtein())
+ {
+ switch (getCanonicalName(p.getSource()))
+ {
+ case DBRefSource.UNIPROT:
+ // case DBRefSource.UNIPROTKB:
+ // case DBRefSource.UP_NAME:
+ // search for and promote ensembl
+ promType.add(DBRefSource.ENSEMBL);
+ break;
+ case DBRefSource.ENSEMBL:
+ // search for and promote Uniprot
+ promType.add(DBRefSource.UNIPROT);
+ break;
+ }
+ }
+ else
+ {
+ // TODO: promote transcript refs
+ }
+
+ // collate candidates and promote them
+ DBRefEntry[] candidates = selectRefs(selfs.toArray(new DBRefEntry[0]),
+ promType.toArray(new String[0]));
+ if (candidates != null)
+ {
+ for (DBRefEntry cand : candidates)
+ {
+ if (cand.hasMap())
+ {
+ if (cand.getMap().getTo() != null
+ && cand.getMap().getTo() != sequence)
+ {
+ // can't promote refs with mappings to other sequences
+ continue;
+ }
+ if (cand.getMap().getMap().getFromLowest() != sequence
+ .getStart()
+ && cand.getMap().getMap().getFromHighest() != sequence
+ .getEnd())
+ {
+ // can't promote refs with mappings from a region of this sequence
+ // - eg CDS
+ continue;
+ }
+ }
+ // and promote
+ cand.setVersion(p.getVersion() + " (promoted)");
+ selfs.remove(cand);
+ toPromote.add(cand);
+ if (!cand.isPrimaryCandidate())
+ {
+ System.out.println(
+ "Warning: Couldn't promote dbref " + cand.toString()
+ + " for sequence " + sequence.toString());
+ }
+ }
+ }
+ }
+ }
+
}