X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Futil%2FDBRefUtils.java;h=4d5a0250b24c403591ae80f5b6b6e6bddb1924f0;hb=c17981672620e0b780a2338bd0c74e55cf9ddec2;hp=405f6e6f520601ebd8785c2d26be1e5081475347;hpb=14e9fd31857401af4f5547430330d3d557cbd277;p=jalview.git diff --git a/src/jalview/util/DBRefUtils.java b/src/jalview/util/DBRefUtils.java index 405f6e6..4d5a025 100755 --- a/src/jalview/util/DBRefUtils.java +++ b/src/jalview/util/DBRefUtils.java @@ -26,9 +26,9 @@ import jalview.datamodel.PDBEntry; import jalview.datamodel.SequenceI; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; -import java.util.Hashtable; import java.util.List; import java.util.Map; import java.util.Set; @@ -43,9 +43,8 @@ public class DBRefUtils /* * lookup from lower-case form of a name to its canonical (standardised) form */ - private static Map canonicalSourceNameLookup = new HashMap(); + private static Map canonicalSourceNameLookup = new HashMap<>(); - private static Map dasCoordinateSystemsLookup = new HashMap(); static { @@ -73,10 +72,6 @@ public class DBRefUtils canonicalSourceNameLookup.get(k)); } - dasCoordinateSystemsLookup.put("pdbresnum", DBRefSource.PDB); - dasCoordinateSystemsLookup.put("uniprot", DBRefSource.UNIPROT); - dasCoordinateSystemsLookup.put("embl", DBRefSource.EMBL); - // dasCoordinateSystemsLookup.put("embl", DBRefSource.EMBLCDS); } /** @@ -97,17 +92,17 @@ public class DBRefUtils { return dbrefs; } - HashSet srcs = new HashSet(); + HashSet srcs = new HashSet<>(); for (String src : sources) { - srcs.add(src); + srcs.add(src.toUpperCase()); } - List res = new ArrayList(); + List res = new ArrayList<>(); for (DBRefEntry dbr : dbrefs) { String source = getCanonicalName(dbr.getSource()); - if (srcs.contains(source)) + if (srcs.contains(source.toUpperCase())) { res.add(dbr); } @@ -122,29 +117,6 @@ public class DBRefUtils } /** - * isDasCoordinateSystem - * - * @param string - * String - * @param dBRefEntry - * DBRefEntry - * @return boolean true if Source DBRefEntry is compatible with DAS - * CoordinateSystem name - */ - - public static boolean isDasCoordinateSystem(String string, - DBRefEntry dBRefEntry) - { - if (string == null || dBRefEntry == null) - { - return false; - } - String coordsys = dasCoordinateSystemsLookup.get(string.toLowerCase()); - return coordsys == null ? false : coordsys.equals(dBRefEntry - .getSource()); - } - - /** * look up source in an internal list of database reference sources and return * the canonical jalview name for the source, or the original string if it has * no canonical form. @@ -218,7 +190,7 @@ public class DBRefUtils static List searchRefs(DBRefEntry[] refs, DBRefEntry entry, DbRefComp comparator) { - List rfs = new ArrayList(); + List rfs = new ArrayList<>(); if (refs == null || entry == null) { return rfs; @@ -257,9 +229,8 @@ public class DBRefUtils if (refa.getAccessionId() == null || refb.getAccessionId().equals(refa.getAccessionId())) { - if (refa.getMap() == null - || (refb.getMap() != null && refb.getMap().equals( - refa.getMap()))) + if (refa.getMap() == null || (refb.getMap() != null + && refb.getMap().equals(refa.getMap()))) { return true; } @@ -307,12 +278,12 @@ public class DBRefUtils { // We dont care about version if (refa.getAccessionId() != null && refb.getAccessionId() != null - // FIXME should be && not || here? + // FIXME should be && not || here? || refb.getAccessionId().equals(refa.getAccessionId())) { if ((refa.getMap() == null || refb.getMap() == null) - || (refa.getMap() != null && refb.getMap() != null && refb - .getMap().equals(refa.getMap()))) + || (refa.getMap() != null && refb.getMap() != null + && refb.getMap().equals(refa.getMap()))) { return true; } @@ -344,11 +315,12 @@ public class DBRefUtils if ((refa.getMap() == null && refb.getMap() == null) || (refa.getMap() != null && refb.getMap() != null)) { - if ((refb.getMap().getMap() == null && refa.getMap().getMap() == null) + if ((refb.getMap().getMap() == null + && refa.getMap().getMap() == null) || (refb.getMap().getMap() != null - && refa.getMap().getMap() != null && refb - .getMap().getMap().getInverse() - .equals(refa.getMap().getMap()))) + && refa.getMap().getMap() != null + && refb.getMap().getMap().getInverse() + .equals(refa.getMap().getMap()))) { return true; } @@ -385,12 +357,13 @@ public class DBRefUtils { return true; } - if (refa.getMap() != null - && refb.getMap() != null - && ((refb.getMap().getMap() == null && refa.getMap() - .getMap() == null) || (refb.getMap().getMap() != null - && refa.getMap().getMap() != null && refb - .getMap().getMap().equals(refa.getMap().getMap())))) + if (refa.getMap() != null && refb.getMap() != null + && ((refb.getMap().getMap() == null + && refa.getMap().getMap() == null) + || (refb.getMap().getMap() != null + && refa.getMap().getMap() != null + && refb.getMap().getMap() + .equals(refa.getMap().getMap())))) { return true; } @@ -424,11 +397,12 @@ public class DBRefUtils return true; } if ((refa.getMap() != null && refb.getMap() != null) - && (refb.getMap().getMap() == null && refa.getMap() - .getMap() == null) + && (refb.getMap().getMap() == null + && refa.getMap().getMap() == null) || (refb.getMap().getMap() != null - && refa.getMap().getMap() != null && (refb - .getMap().getMap().equals(refa.getMap().getMap())))) + && refa.getMap().getMap() != null + && (refb.getMap().getMap() + .equals(refa.getMap().getMap())))) { return true; } @@ -503,9 +477,7 @@ public class DBRefUtils PDBEntry pdbr = new PDBEntry(); pdbr.setId(pdbid); pdbr.setType(PDBEntry.Type.PDB); - pdbr.setProperty(new Hashtable()); pdbr.setChainCode(chaincode); - // pdbr.getProperty().put("CHAIN", chaincode); seq.addPDBId(pdbr); } else @@ -516,7 +488,7 @@ public class DBRefUtils else { // default: - ref = new DBRefEntry(locsrc, version, acn); + ref = new DBRefEntry(locsrc, version, acn.trim()); } } if (ref != null) @@ -559,8 +531,8 @@ public class DBRefUtils { return true; } - return DBRefUtils.getCanonicalName(o1).equals( - DBRefUtils.getCanonicalName(o2)); + return DBRefUtils.getCanonicalName(o1) + .equals(DBRefUtils.getCanonicalName(o2)); } /** @@ -576,8 +548,8 @@ public class DBRefUtils public static DBRefEntry[] selectDbRefs(boolean selectDna, DBRefEntry[] refs) { - return selectRefs(refs, selectDna ? DBRefSource.DNACODINGDBS - : DBRefSource.PROTEINDBS); + return selectRefs(refs, + selectDna ? DBRefSource.DNACODINGDBS : DBRefSource.PROTEINDBS); // could attempt to find other cross // refs here - ie PDB xrefs // (not dna, not protein seq) @@ -594,7 +566,7 @@ public class DBRefUtils public static List searchRefsForSource(DBRefEntry[] dbRefs, String source) { - List matches = new ArrayList(); + List matches = new ArrayList<>(); if (dbRefs != null && source != null) { for (DBRefEntry dbref : dbRefs) @@ -608,4 +580,126 @@ public class DBRefUtils return matches; } + /** + * promote direct database references to primary for nucleotide or protein + * sequences if they have an appropriate primary ref + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Seq TypePrimary DBDirect which will be promoted
peptidesEnsemblUniprot
peptidesEnsemblUniprot
dnaEnsemblENA
+ * + * @param sequence + */ + public static void ensurePrimaries(SequenceI sequence) + { + List pr = sequence.getPrimaryDBRefs(); + if (pr.size() == 0) + { + // nothing to do + return; + } + List selfs = new ArrayList<>(); + { + DBRefEntry[] selfArray = selectDbRefs(!sequence.isProtein(), + sequence.getDBRefs()); + if (selfArray == null || selfArray.length == 0) + { + // nothing to do + return; + } + selfs.addAll(Arrays.asList(selfArray)); + } + + // filter non-primary refs + for (DBRefEntry p : pr) + { + while (selfs.contains(p)) + { + selfs.remove(p); + } + } + List toPromote = new ArrayList<>(); + + for (DBRefEntry p : pr) + { + List promType = new ArrayList<>(); + if (sequence.isProtein()) + { + switch (getCanonicalName(p.getSource())) + { + case DBRefSource.UNIPROT: + // case DBRefSource.UNIPROTKB: + // case DBRefSource.UP_NAME: + // search for and promote ensembl + promType.add(DBRefSource.ENSEMBL); + break; + case DBRefSource.ENSEMBL: + // search for and promote Uniprot + promType.add(DBRefSource.UNIPROT); + break; + } + } + else + { + // TODO: promote transcript refs + } + + // collate candidates and promote them + DBRefEntry[] candidates = selectRefs(selfs.toArray(new DBRefEntry[0]), + promType.toArray(new String[0])); + if (candidates != null) + { + for (DBRefEntry cand : candidates) + { + if (cand.hasMap()) + { + if (cand.getMap().getTo() != null + && cand.getMap().getTo() != sequence) + { + // can't promote refs with mappings to other sequences + continue; + } + if (cand.getMap().getMap().getFromLowest() != sequence + .getStart() + && cand.getMap().getMap().getFromHighest() != sequence + .getEnd()) + { + // can't promote refs with mappings from a region of this sequence + // - eg CDS + continue; + } + } + // and promote + cand.setVersion(p.getVersion() + " (promoted)"); + selfs.remove(cand); + toPromote.add(cand); + if (!cand.isPrimaryCandidate()) + { + System.out.println( + "Warning: Couldn't promote dbref " + cand.toString() + + " for sequence " + sequence.toString()); + } + } + } + } + } + }