From beff7ac7ab7be2c24ccb179be16b8816d2c18610 Mon Sep 17 00:00:00 2001 From: hansonr Date: Mon, 4 Feb 2019 08:51:36 -0600 Subject: [PATCH] refactoring for DBMODList --- src/jalview/analysis/CrossRef.java | 6 +- src/jalview/datamodel/DBRefEntry.java | 110 +-- src/jalview/datamodel/DBRefSource.java | 255 ++--- src/jalview/datamodel/Sequence.java | 48 +- src/jalview/datamodel/SequenceGroup.java | 3 +- src/jalview/datamodel/SequenceI.java | 11 +- src/jalview/io/SequenceAnnotationReport.java | 36 +- src/jalview/util/DBRefUtils.java | 1302 ++++++++++++-------------- 8 files changed, 813 insertions(+), 958 deletions(-) diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index 4f01cea..00bb63a 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -485,7 +485,7 @@ public class CrossRef { List dbrSourceSet = new ArrayList(sourceRefs); List dsSeqs = dataset.getSequences(); - for (int ids = dsSeqs.size(); --ids >= 0;) + for (int ids = 0, nds = dsSeqs.size(); ids < nds; ids++) { SequenceI sq = dsSeqs.get(ids); boolean dupeFound = false; @@ -494,11 +494,11 @@ public class CrossRef if (sq.isProtein() == fromDna) { List sqdbrefs = sq.getPrimaryDBRefs(); - for (int idb = sqdbrefs.size(); --idb >= 0;) + for (int idb = 0, ndb = sqdbrefs.size(); idb < ndb; idb++) { DBRefEntry dbr = sqdbrefs.get(idb); List searchrefs = DBRefUtils.searchRefs(dbrSourceSet, dbr, DBRefUtils.SEARCH_MODE_FULL); - for (int isr = searchrefs.size(); --isr >= 0;) + for (int isr = 0, nsr = searchrefs.size(); isr < nsr; isr++) { sourceRefs.remove(searchrefs.get(isr)); dupeFound = true; diff --git a/src/jalview/datamodel/DBRefEntry.java b/src/jalview/datamodel/DBRefEntry.java index 8520e5c..54e8379 100755 --- a/src/jalview/datamodel/DBRefEntry.java +++ b/src/jalview/datamodel/DBRefEntry.java @@ -21,6 +21,7 @@ package jalview.datamodel; import jalview.api.DBRefEntryI; +import jalview.io.vamsas.Dbref; import jalview.util.DBRefUtils; import jalview.util.MapList; @@ -37,15 +38,15 @@ public class DBRefEntry implements DBRefEntryI */ public static final String CHROMOSOME = "chromosome"; - String source = ""; + private String source = ""; - String version = ""; + private String version = ""; private String ucversion; - String accessionId = ""; + private String accessionId = ""; - String sourceKey; + int sourceKey = Integer.MIN_VALUE; String canonicalSourceName; @@ -59,7 +60,12 @@ public class DBRefEntry implements DBRefEntryI { } - +/** + * + * @param source may not be null + * @param version may be null + * @param accessionId may be null + */ public DBRefEntry(String source, String version, String accessionId) { this(source, version, accessionId, null); @@ -68,36 +74,37 @@ public class DBRefEntry implements DBRefEntryI /** * * @param source - * canonical source (uppercase only) + * canonical source (turned to uppercase; cannot be null) * @param version - * (source dependent version string) + * (source dependent version string or null) * @param accessionId - * (source dependent accession number string) + * (source dependent accession number string or null) * @param map * (mapping from local sequence numbering to source accession - * numbering) + * numbering or null) */ public DBRefEntry(String source, String version, String accessionId, Mapping map) { - // BH 2019.01.25 made these always non-null. - // Is there a difference between "" and null for version? - // evidence is that source CANNOT be null. - setSource(source); + + this.source = source.toUpperCase(); setVersion(version); - setAccessionId(accessionId); + this.accessionId = accessionId; this.map = map; } + /** + * Clone an entry, this time not allowing any null fields except map. + * + */ public DBRefEntry(DBRefEntryI entry) { - this(entry.getSource(), entry.getVersion(), entry.getAccessionId(), entry.getMap() == null ? null : new Mapping(entry.getMap())); -// this((entry.getSource() == null ? "" : new String(entry.getSource())), -// (entry.getVersion() == null ? "" -// : new String(entry.getVersion())), -// (entry.getAccessionId() == null ? "" -// : new String(entry.getAccessionId())), -// (entry.getMap() == null ? null : new Mapping(entry.getMap()))); + this((entry.getSource() == null ? "" : new String(entry.getSource())), + (entry.getVersion() == null ? "" + : new String(entry.getVersion())), + (entry.getAccessionId() == null ? "" + : new String(entry.getAccessionId())), + (entry.getMap() == null ? null : new Mapping(entry.getMap()))); } @Override @@ -228,20 +235,16 @@ public class DBRefEntry implements DBRefEntryI return true; } - // BH 2019.01.25 source, accessionId, and version cannot be null. + // BH 2019.01.25/2019.02.04 source cannot/should not be null. // for example, StructureChooser has dbRef.getSource().equalsIgnoreCase... - if (entry != null + return (entry != null && (source != null && entry.getSource() != null && source.equalsIgnoreCase(entry.getSource())) && (accessionId != null && entry.getAccessionId() != null && accessionId.equalsIgnoreCase(entry.getAccessionId())) && (version != null && entry.getVersion() != null - && version.equalsIgnoreCase(entry.getVersion()))) - { - return true; - } - return false; + && version.equalsIgnoreCase(entry.getVersion()))); } @Override @@ -250,17 +253,23 @@ public class DBRefEntry implements DBRefEntryI return source; } - public String getSourceKey() + public int getSourceKey() { - return sourceKey; + return (sourceKey == Integer.MIN_VALUE ? (sourceKey = DBRefSource.getSourceKey(getCanonicalSourceName())) : sourceKey); } + /** + * can be null + */ @Override public String getVersion() { return version; } + /** + * can be null + */ @Override public String getAccessionId() { @@ -270,22 +279,28 @@ public class DBRefEntry implements DBRefEntryI @Override public void setAccessionId(String accessionId) { - this.accessionId = (accessionId == null ? "" : accessionId).toUpperCase(); + this.accessionId = accessionId; +// this.accessionId = (accessionId == null ? "" : accessionId).toUpperCase(); } + /** + * CAUTION! allows setting source null or not uppercase! + */ @Override public void setSource(String source) { - this.source = (source == null ? "" : source).toUpperCase(); - this.canonicalSourceName = DBRefUtils.getCanonicalName(this.source); - this.sourceKey = ";" + canonicalSourceName + ";"; + this.source = source; + +// this.source = (source == null ? "" : source).toUpperCase(); +// this.canonicalSourceName = DBRefUtils.getCanonicalName(this.source); +// this.sourceKey = DBRefSource.getSourceKey(this.canonicalSourceName); } @Override public void setVersion(String version) { - this.version = (version == null ? "" : version); - this.ucversion = this.version.toUpperCase(); + this.version = version; + this.ucversion = (version == null ? null : version.toUpperCase()); } @Override @@ -355,26 +370,13 @@ public class DBRefEntry implements DBRefEntryI return false; } } - if (version == null || version == "") + if (version == null) { // no version string implies the reference has not been verified at all. return false; } - // tricky - this test really needs to search the sequence's set of dbrefs to - // see if there is a primary reference that derived this reference. - String[] sources = DBRefSource.allSources(); - for (int i = sources.length; --i >= 0;) - { - if (ucversion.startsWith(sources[i])) // BH 2019.01.25 .toUpperCase() unnecessary here for allSources - { - // by convention, many secondary references inherit the primary - // reference's - // source string as a prefix for any version information from the - // secondary reference. - return false; - } - } - return true; + + return DBRefSource.isPrimaryCandidate(ucversion); } /** @@ -395,8 +397,8 @@ public class DBRefEntry implements DBRefEntryI * * @return */ - public Object getCanonicalSourceName() { - return canonicalSourceName; + public String getCanonicalSourceName() { + return (canonicalSourceName == null ? (canonicalSourceName = DBRefUtils.getCanonicalName(this.source)) : canonicalSourceName); } diff --git a/src/jalview/datamodel/DBRefSource.java b/src/jalview/datamodel/DBRefSource.java index f8039bb..c1e1741 100755 --- a/src/jalview/datamodel/DBRefSource.java +++ b/src/jalview/datamodel/DBRefSource.java @@ -20,10 +20,6 @@ */ package jalview.datamodel; -import java.lang.reflect.Field; -import java.util.ArrayList; -import java.util.List; - /** * BH 2018 SwingJS note: If additional final static Strings are added to this * file, they should be added to public static final String[] allTypes. @@ -45,145 +41,158 @@ public class DBRefSource - /** - * UNIPROT Accession Number - */ public static final String UNIPROT = "UNIPROT"; - - /** - * UNIPROT Entry Name - */ public static final String UP_NAME = "UNIPROT_NAME".toUpperCase(); - /** * Uniprot Knowledgebase/TrEMBL as served from EMBL protein products. */ public static final String UNIPROTKB = "UniProtKB/TrEMBL".toUpperCase(); - public static final String EMBLCDSProduct = "EMBLCDSProtein".toUpperCase(); - - - /** - * PDB Entry Code - */ - public static final String PDB = "PDB"; - - /** - * EMBL ID - */ - public static final String EMBL = "EMBL"; - - /** - * EMBLCDS ID - */ - public static final String EMBLCDS = "EMBLCDS"; - + public static final String ENSEMBL = "ENSEMBL"; + public static final String ENSEMBLGENOMES = "ENSEMBLGENOMES"; - /** - * PFAM ID - */ - public static final String PFAM = "PFAM"; - - /** - * RFAM ID - */ - public static final String RFAM = "RFAM"; + public static final String EMBL = "EMBL"; + public static final String EMBLCDS = "EMBLCDS"; + public static final String EMBLCDSProduct = "EMBLCDSProtein".toUpperCase(); - /** - * GeneDB ID - */ + public static final String PDB = "PDB"; + public static final String PFAM = "PFAM"; + public static final String RFAM = "RFAM"; public static final String GENEDB = "GeneDB".toUpperCase(); - - /** - * Ensembl - */ - public static final String ENSEMBL = "ENSEMBL"; + public static final String PDB_CANONICAL_NAME = PDB; + + + public static final String[] allSources = new String[] { + UNIPROT, + UP_NAME, UNIPROTKB, + ENSEMBL, ENSEMBLGENOMES, + EMBL, EMBLCDS, EMBLCDSProduct, + PDB, PFAM, RFAM, GENEDB + }; + + public static final int UNIPROT_MASK = 1<<0; + public static final int UP_NAME_MASK = 1<<1; + public static final int UNIPROT_KB_MASK = 1<<2; + public static final int ENSEMBL_MASK = 1<<3; + public static final int ENSEMBL_GENOMES_MASK = 1<<4; + public static final int EMBL_MASK = 1<<5; + public static final int EMBL_CDS_MASK = 1<<6; + public static final int EMBL_CDS_PRODUCT_MASK = 1<<7; + public static final int PDB_MASK = 1<<8; + public static final int PFAM_MASK = 1<<9; + public static final int RFAM_MASK = 1<<10; + public static final int GENE_DB_MASK = 1<<11; + + public static final int MASK_COUNT = 12; + + public static final int ALL_MASKS = (1 << MASK_COUNT) - 1; + + public static int getSourceKey(String name) { + for (int i = 0; i < MASK_COUNT; i++) { + if (name.equals(allSources[i])) + return 1<= 0;) - s += PROTEINDBS[i] + ";"; - PROTEINDBSKEYS = s; - - s = ";"; - for (int i = DNACODINGDBS.length; --i >= 0;) - s += DNACODINGDBS[i] + ";"; - DNACODINGDBSKEYS = s; - - PROMTYPES = new String[] { null, ";" + UNIPROT + ";", ";" + ENSEMBL + ";" , ";" + UNIPROT + ";" + ENSEMBL + ";" }; -} - - public static String[] allSourcesFromReflection; - - public static String[] allSources() + public static final String[] PROTEINDBS = { + UNIPROT, UNIPROTKB, + ENSEMBL, EMBLCDSProduct }; // Ensembl ENSP* entries are protein + + public static final int PROTEIN_MASK = + UNIPROT_MASK | UNIPROT_KB_MASK + | ENSEMBL_MASK | EMBL_CDS_PRODUCT_MASK ; + + + // for SequenceAnnotationReport only + +// public static final String[][] PRIMARY_SOURCES = new String[][] { +// CODINGDBS, DNACODINGDBS, PROTEINDBS }; +// + public static final int PRIMARY_SOURCES_MASK = CODING_MASK | DNA_CODING_MASK | PROTEIN_MASK; + + public static boolean isPrimarySource(String source) + { + return ((PRIMARY_SOURCES_MASK & getSourceKey(source)) != 0); + } - { - /** - * @j2sNative - * - * return C$.allTypes; - * - */ +// public static String[] allSourcesFromReflection; +// +// public static String[] allSources() +// +// { +// /** +// * @j2sNative +// * +// * return C$.allTypes; +// * +// */ +// +// { +// if (allSourcesFromReflection == null) +// { +// List src = new ArrayList<>(); +// for (Field f : DBRefSource.class.getFields()) +// { +// if (String.class.equals(f.getType())) +// { +// try +// { +// src.add((String) f.get(null)); +// } catch (Exception x) +// { +// x.printStackTrace(); +// } +// } +// } +// allSourcesFromReflection = src.toArray(new String[0]); +// } +// return allSourcesFromReflection; +// } +// } + + public static boolean isPrimaryCandidate(String ucversion) { + // tricky - this test really needs to search the sequence's set of dbrefs to + // see if there is a primary reference that derived this reference. + for (int i = allSources.length; --i >= 0;) { - if (allSourcesFromReflection == null) + if (ucversion.startsWith(allSources[i])) // BH 2019.01.25 .toUpperCase() unnecessary here for allSources { - List src = new ArrayList<>(); - for (Field f : DBRefSource.class.getFields()) - { - if (String.class.equals(f.getType())) - { - try - { - src.add((String) f.get(null)); - } catch (Exception x) - { - x.printStackTrace(); - } - } - } - allSourcesFromReflection = src.toArray(new String[0]); + // by convention, many secondary references inherit the primary + // reference's + // source string as a prefix for any version information from the + // secondary reference. + return false; } - return allSourcesFromReflection; } - } - + return true; +} + + + } diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java index 0acf701..7549ef5 100755 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@ -27,7 +27,6 @@ import jalview.util.Comparison; import jalview.util.DBRefUtils; import jalview.util.MapList; import jalview.util.StringUtils; -import jalview.ws.params.InvalidArgumentException; import java.util.ArrayList; import java.util.Arrays; @@ -56,7 +55,7 @@ public class Sequence extends ASequence implements SequenceI * @param */ @SuppressWarnings("serial") - protected class DBModList extends ArrayList { + public class DBModList extends ArrayList { protected int getModCount() { return modCount; @@ -66,19 +65,19 @@ public class Sequence extends ASequence implements SequenceI SequenceI datasetSequence; - String name; + private String name; private char[] sequence; - String description; + private String description; - int start; + private int start; - int end; + private int end; - Vector pdbIds; + private Vector pdbIds; - String vamsasId; + private String vamsasId; private DBModList dbrefs; // controlled access @@ -89,7 +88,7 @@ SequenceI datasetSequence; */ private int refModCount = 0; - RNA rna; + private RNA rna; /** * This annotation is displayed below the alignment but the positions are tied @@ -97,7 +96,7 @@ SequenceI datasetSequence; * * TODO: change to List<> */ - Vector annotation; + private Vector annotation; private SequenceFeaturesI sequenceFeatureStore; @@ -724,6 +723,9 @@ SequenceI datasetSequence; @Override public String getAssemblyId() { + // DEV NOTE: DBRefEntry is reused here to hold chromosomal locus of a gene sequence. + // source=species, version=assemblyId, accession=chromosome, map = positions. + return ref.getVersion(); } @@ -1410,35 +1412,22 @@ SequenceI datasetSequence; vamsasId = id; } - @SuppressWarnings("deprecation") -@Override - public void setDBRefs(List newDBrefs) throws InvalidArgumentException + @Deprecated + @Override + public void setDBRefs(DBModList newDBrefs) { if (dbrefs == null && datasetSequence != null && this != datasetSequence) { - datasetSequence.setDBRefs((DBModList)newDBrefs); + datasetSequence.setDBRefs(newDBrefs); return; } - if (newDBrefs != null && !(newDBrefs instanceof DBModList)) - throw new InvalidArgumentException("DBrefs must have DBModList class"); - - dbrefs = (DBModList)newDBrefs; + dbrefs = newDBrefs; refModCount = 0; } @Override - public void getDBRefsFrom(SequenceI seq) { - try { - setDBRefs(seq.getDBRefs()); - } catch (InvalidArgumentException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - - @Override - public List getDBRefs() + public DBModList getDBRefs() { if (dbrefs == null && datasetSequence != null && this != datasetSequence) @@ -1945,6 +1934,7 @@ private List primaryRefs; primaries.add(ref); } + // version must be not null, as otherwise it will not be a candidate, above DBRefUtils.ensurePrimaries(this, primaries); return primaries; } diff --git a/src/jalview/datamodel/SequenceGroup.java b/src/jalview/datamodel/SequenceGroup.java index 1e579ec..62f9fd8 100755 --- a/src/jalview/datamodel/SequenceGroup.java +++ b/src/jalview/datamodel/SequenceGroup.java @@ -25,7 +25,6 @@ import jalview.analysis.Conservation; import jalview.renderer.ResidueShader; import jalview.renderer.ResidueShaderI; import jalview.schemes.ColourSchemeI; -import jalview.ws.params.InvalidArgumentException; import java.awt.Color; import java.beans.PropertyChangeListener; @@ -262,7 +261,7 @@ public class SequenceGroup implements AnnotatedCollectionI if (seqipos != null) { seqipos.setDescription(seq.getDescription()); - seqipos.getDBRefsFrom(seq); + seqipos.setDBRefs(seq.getDBRefs()); seqipos.setSequenceFeatures(seq.getSequenceFeatures()); if (seq.getDatasetSequence() != null) { diff --git a/src/jalview/datamodel/SequenceI.java b/src/jalview/datamodel/SequenceI.java index 9e9758c..5a3aafd 100755 --- a/src/jalview/datamodel/SequenceI.java +++ b/src/jalview/datamodel/SequenceI.java @@ -20,6 +20,7 @@ */ package jalview.datamodel; +import jalview.datamodel.Sequence.DBModList; import jalview.datamodel.features.SequenceFeaturesI; import jalview.util.MapList; import jalview.ws.params.InvalidArgumentException; @@ -354,20 +355,17 @@ public interface SequenceI extends ASequenceI /** * set the array of Database references for the sequence. * - * BH 2019.01.25 added throw + * BH 2019.02.04 changes param to DBModlist * * @param dbs - * @throws InvalidArgumentException * @deprecated - use is discouraged since side-effects may occur if DBRefEntry * set are not normalised. * @throws InvalidArgumentException if the is not one created by Sequence itself */ @Deprecated - public void setDBRefs(List dbs) throws InvalidArgumentException; + public void setDBRefs(DBModList dbs); - public List getDBRefs(); - - public void getDBRefsFrom(SequenceI seq); + public DBModList getDBRefs(); /** * add the given entry to the list of DBRefs for this sequence, or replace a @@ -584,5 +582,6 @@ public interface SequenceI extends ASequenceI */ public int firstResidueOutsideIterator(Iterator it); + } diff --git a/src/jalview/io/SequenceAnnotationReport.java b/src/jalview/io/SequenceAnnotationReport.java index 29b9942..27e4da2 100644 --- a/src/jalview/io/SequenceAnnotationReport.java +++ b/src/jalview/io/SequenceAnnotationReport.java @@ -52,9 +52,7 @@ public class SequenceAnnotationReport private static final int MAX_SOURCES = 40; - private static final String[][] PRIMARY_SOURCES = new String[][] { - DBRefSource.CODINGDBS, DBRefSource.DNACODINGDBS, - DBRefSource.PROTEINDBS }; + // public static final String[][] PRIMARY_SOURCES moved to DBRefSource.java final String linkImageURL; @@ -78,8 +76,8 @@ public class SequenceAnnotationReport } String s1 = ref1.getSource(); String s2 = ref2.getSource(); - boolean s1Primary = isPrimarySource(s1); - boolean s2Primary = isPrimarySource(s2); + boolean s1Primary = DBRefSource.isPrimarySource(s1); + boolean s2Primary = DBRefSource.isPrimarySource(s2); if (s1Primary && !s2Primary) { return -1; @@ -100,20 +98,20 @@ public class SequenceAnnotationReport return comp; } - private boolean isPrimarySource(String source) - { - for (String[] primary : PRIMARY_SOURCES) - { - for (String s : primary) - { - if (source.equals(s)) - { - return true; - } - } - } - return false; - } +// private boolean isPrimarySource(String source) +// { +// for (String[] primary : DBRefSource.PRIMARY_SOURCES) +// { +// for (String s : primary) +// { +// if (source.equals(s)) +// { +// return true; +// } +// } +// } +// return false; +// } }; public SequenceAnnotationReport(String linkURL) diff --git a/src/jalview/util/DBRefUtils.java b/src/jalview/util/DBRefUtils.java index 381038c..485d98c 100755 --- a/src/jalview/util/DBRefUtils.java +++ b/src/jalview/util/DBRefUtils.java @@ -38,671 +38,543 @@ import com.stevesoft.pat.Regex; /** * Utilities for handling DBRef objects and their collections. */ -public class DBRefUtils -{ - - public final static int DB_SOURCE = 1; +public class DBRefUtils { + + public final static int DB_SOURCE = 1; public final static int DB_VERSION = 2; - public final static int DB_ID = 4; - public final static int DB_MAP = 8; - - + public final static int DB_ID = 4; + public final static int DB_MAP = 8; + public final static int SEARCH_MODE_NO_MAP_NO_VERSION = DB_SOURCE | DB_ID; public final static int SEARCH_MODE_FULL = DB_SOURCE | DB_VERSION | DB_ID | DB_MAP; - /* - * lookup from lower-case form of a name to its canonical (standardised) form - */ - private static Map canonicalSourceNameLookup = new HashMap(); + /* + * lookup from lower-case form of a name to its canonical (standardised) form + */ + private static Map canonicalSourceNameLookup = new HashMap(); - private static Map dasCoordinateSystemsLookup = new HashMap(); + private static Map dasCoordinateSystemsLookup = new HashMap(); - static - { - // TODO load these from a resource file? - canonicalSourceNameLookup.put("uniprotkb/swiss-prot", - DBRefSource.UNIPROT); - canonicalSourceNameLookup.put("uniprotkb/trembl", DBRefSource.UNIPROT); + static { + // TODO load these from a resource file? + canonicalSourceNameLookup.put("uniprotkb/swiss-prot", DBRefSource.UNIPROT); + canonicalSourceNameLookup.put("uniprotkb/trembl", DBRefSource.UNIPROT); - // Ensembl values for dbname in xref REST service: - canonicalSourceNameLookup.put("uniprot/sptrembl", DBRefSource.UNIPROT); - canonicalSourceNameLookup.put("uniprot/swissprot", DBRefSource.UNIPROT); + // Ensembl values for dbname in xref REST service: + canonicalSourceNameLookup.put("uniprot/sptrembl", DBRefSource.UNIPROT); + canonicalSourceNameLookup.put("uniprot/swissprot", DBRefSource.UNIPROT); - canonicalSourceNameLookup.put("pdb", DBRefSource.PDB); - canonicalSourceNameLookup.put("ensembl", DBRefSource.ENSEMBL); - // Ensembl Gn and Tr are for Ensembl genomic and transcript IDs as served - // from ENA. - canonicalSourceNameLookup.put("ensembl-tr", DBRefSource.ENSEMBL); - canonicalSourceNameLookup.put("ensembl-gn", DBRefSource.ENSEMBL); + canonicalSourceNameLookup.put("pdb", DBRefSource.PDB); + canonicalSourceNameLookup.put("ensembl", DBRefSource.ENSEMBL); + // Ensembl Gn and Tr are for Ensembl genomic and transcript IDs as served + // from ENA. + canonicalSourceNameLookup.put("ensembl-tr", DBRefSource.ENSEMBL); + canonicalSourceNameLookup.put("ensembl-gn", DBRefSource.ENSEMBL); - // Make sure we have lowercase entries for all canonical string lookups + // Make sure we have lowercase entries for all canonical string lookups // BH 2019.01.25 unnecessary -- they are all lower case already - //Set keys = canonicalSourceNameLookup.keySet(); + // Set keys = canonicalSourceNameLookup.keySet(); // for (String k : keys) // { // canonicalSourceNameLookup.put(k.toLowerCase(), // canonicalSourceNameLookup.get(k)); // } - dasCoordinateSystemsLookup.put("pdbresnum", DBRefSource.PDB); - dasCoordinateSystemsLookup.put("uniprot", DBRefSource.UNIPROT); - dasCoordinateSystemsLookup.put("embl", DBRefSource.EMBL); - // dasCoordinateSystemsLookup.put("embl", DBRefSource.EMBLCDS); - } - - /** - * Returns those DBRefEntry objects whose source identifier (once converted to - * Jalview's canonical form) is in the list of sources to search for. Returns - * null if no matches found. - * - * @param dbrefs - * DBRefEntry objects to search - * @param sources - * array of sources to select - * @return - */ - public static List selectRefs(List dbrefs, - String[] sources) - { - if (dbrefs == null || sources == null) - { - return dbrefs; - } - - // BH TODO - HashSet srcs = new HashSet(); - for (String src : sources) - { - srcs.add(src.toUpperCase()); - } - - int nrefs = dbrefs.size(); - List res = new ArrayList(); - for (int ib = 0; ib < nrefs; ib++) - { - DBRefEntry dbr = dbrefs.get(ib); - String source = getCanonicalName(dbr.getSource()); - if (srcs.contains(source.toUpperCase())) - { - res.add(dbr); - } - } - - if (res.size() > 0) - { - //List reply = new DBRefEntry[res.size()]; - return res;//.toArray(reply); - } - return null; - } - - private static boolean selectRefsBS(List dbrefs, String sourceKeys, BitSet bsSelect) { - if (dbrefs == null || sourceKeys == null) - { - return false; - } - for (int i = 0, n = dbrefs.size(); i < n; i++) - { - DBRefEntry dbr = dbrefs.get(i); - String sourceKey = dbr.getSourceKey(); - if (sourceKeys.indexOf(sourceKey) < 0) { - bsSelect.clear(i); - } - } - return !bsSelect.isEmpty(); - } - - - /** - * isDasCoordinateSystem - * - * @param string - * String - * @param dBRefEntry - * DBRefEntry - * @return boolean true if Source DBRefEntry is compatible with DAS - * CoordinateSystem name - */ - - public static boolean isDasCoordinateSystem(String string, - DBRefEntry dBRefEntry) - { - if (string == null || dBRefEntry == null) - { - return false; - } - String coordsys = dasCoordinateSystemsLookup.get(string.toLowerCase()); - return coordsys == null ? false - : coordsys.equals(dBRefEntry.getSource()); - } - - /** - * look up source in an internal list of database reference sources and return - * the canonical jalview name for the source, or the original string if it has - * no canonical form. - * - * @param source - * @return canonical jalview source (one of jalview.datamodel.DBRefSource.*) - * or original source - */ - public static String getCanonicalName(String source) - { - if (source == null) - { - return null; - } - String canonical = canonicalSourceNameLookup.get(source.toLowerCase()); - return canonical == null ? source : canonical; - } - - /** - * Returns a (possibly empty) list of those references that match the given - * entry. Currently uses a comparator which matches if - *
    - *
  • database sources are the same
  • - *
  • accession ids are the same
  • - *
  • both have no mapping, or the mappings are the same
  • - *
- * - * @param ref - * Set of references to search - * @param entry - * pattern to match - * @param mode SEARCH_MODE_FULL for all; SEARCH_MODE_NO_MAP_NO_VERSION optional - * @return - */ - public static List searchRefs(List ref, - DBRefEntry entry, int mode) - { - return searchRefs(ref, entry, - matchDbAndIdAndEitherMapOrEquivalentMapList, mode); - } - - /** - * Returns a list of those references that match the given accession id - *
    - *
  • database sources are the same
  • - *
  • accession ids are the same
  • - *
  • both have no mapping, or the mappings are the same
  • - *
- * - * @param refs - * Set of references to search - * @param accId - * accession id to match - * @return - */ - public static List searchRefs(List refs, String accId) - { - List rfs = new ArrayList(); - if (refs == null || accId == null) - { - return rfs; - } - for (int i = 0, n = refs.size(); i < n; i++) - { - DBRefEntry e = refs.get(i); - if (accId.equals(e.getAccessionId())) - { - rfs.add(e); - } - } - return rfs; + dasCoordinateSystemsLookup.put("pdbresnum", DBRefSource.PDB); + dasCoordinateSystemsLookup.put("uniprot", DBRefSource.UNIPROT); + dasCoordinateSystemsLookup.put("embl", DBRefSource.EMBL); + // dasCoordinateSystemsLookup.put("embl", DBRefSource.EMBLCDS); + } + + /** + * Returns those DBRefEntry objects whose source identifier (once converted to + * Jalview's canonical form) is in the list of sources to search for. Returns + * null if no matches found. + * + * @param dbrefs DBRefEntry objects to search + * @param sources array of sources to select + * @return + */ + public static List selectRefs(List dbrefs, String[] sources) { + if (dbrefs == null || sources == null) { + return dbrefs; + } + + // BH TODO + HashSet srcs = new HashSet(); + for (String src : sources) { + srcs.add(src.toUpperCase()); + } + + int nrefs = dbrefs.size(); + List res = new ArrayList(); + for (int ib = 0; ib < nrefs; ib++) { + DBRefEntry dbr = dbrefs.get(ib); + String source = getCanonicalName(dbr.getSource()); + if (srcs.contains(source.toUpperCase())) { + res.add(dbr); + } + } + + if (res.size() > 0) { + // List reply = new DBRefEntry[res.size()]; + return res;// .toArray(reply); + } + return null; + } + + private static boolean selectRefsBS(List dbrefs, int sourceKeys, BitSet bsSelect) { + if (dbrefs == null || sourceKeys == 0) { + return false; + } + for (int i = 0, n = dbrefs.size(); i < n; i++) { + DBRefEntry dbr = dbrefs.get(i); + if ((dbr.getSourceKey() & sourceKeys) != 0) { + bsSelect.clear(i); + } + } + return !bsSelect.isEmpty(); + } + + /** + * isDasCoordinateSystem + * + * @param string String + * @param dBRefEntry DBRefEntry + * @return boolean true if Source DBRefEntry is compatible with DAS + * CoordinateSystem name + */ + + public static boolean isDasCoordinateSystem(String string, DBRefEntry dBRefEntry) { + if (string == null || dBRefEntry == null) { + return false; + } + String coordsys = dasCoordinateSystemsLookup.get(string.toLowerCase()); + return coordsys == null ? false : coordsys.equals(dBRefEntry.getSource()); + } + + /** + * look up source in an internal list of database reference sources and return + * the canonical jalview name for the source, or the original string if it has + * no canonical form. + * + * @param source + * @return canonical jalview source (one of jalview.datamodel.DBRefSource.*) or + * original source + */ + public static String getCanonicalName(String source) { + if (source == null) { + return null; + } + String canonical = canonicalSourceNameLookup.get(source.toLowerCase()); + return canonical == null ? source : canonical; + } + + /** + * Returns a (possibly empty) list of those references that match the given + * entry. Currently uses a comparator which matches if + *
    + *
  • database sources are the same
  • + *
  • accession ids are the same
  • + *
  • both have no mapping, or the mappings are the same
  • + *
+ * + * @param ref Set of references to search + * @param entry pattern to match + * @param mode SEARCH_MODE_FULL for all; SEARCH_MODE_NO_MAP_NO_VERSION optional + * @return + */ + public static List searchRefs(List ref, DBRefEntry entry, int mode) { + return searchRefs(ref, entry, matchDbAndIdAndEitherMapOrEquivalentMapList, mode); + } + + /** + * Returns a list of those references that match the given accession id + *
    + *
  • database sources are the same
  • + *
  • accession ids are the same
  • + *
  • both have no mapping, or the mappings are the same
  • + *
+ * + * @param refs Set of references to search + * @param accId accession id to match + * @return + */ + public static List searchRefs(List refs, String accId) { + List rfs = new ArrayList(); + if (refs == null || accId == null) { + return rfs; + } + for (int i = 0, n = refs.size(); i < n; i++) { + DBRefEntry e = refs.get(i); + if (accId.equals(e.getAccessionId())) { + rfs.add(e); + } + } + return rfs; // return searchRefs(refs, new DBRefEntry("", "", accId), matchId, SEARCH_MODE_FULL); - } - - - /** - * Returns a (possibly empty) list of those references that match the given - * entry, according to the given comparator. - * - * @param refs - * an array of database references to search - * @param entry - * an entry to compare against - * @param comparator - * @param mode SEARCH_MODE_FULL for all; SEARCH_MODE_NO_MAP_NO_VERSION optional - * @return - */ - static List searchRefs(List refs, DBRefEntry entry, - DbRefComp comparator, int mode) - { - List rfs = new ArrayList(); - if (refs == null || entry == null) - { - return rfs; - } - for (int i = 0, n = refs.size(); i < n; i++) - { - DBRefEntry e = refs.get(i); - if (comparator.matches(entry, e, SEARCH_MODE_FULL)) - { - rfs.add(e); - } - } - return rfs; - } - - interface DbRefComp - { - default public boolean matches(DBRefEntry refa, DBRefEntry refb) { - return matches(refa, refb, SEARCH_MODE_FULL); - }; - - public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode); -} + } + + /** + * Returns a (possibly empty) list of those references that match the given + * entry, according to the given comparator. + * + * @param refs an array of database references to search + * @param entry an entry to compare against + * @param comparator + * @param mode SEARCH_MODE_FULL for all; SEARCH_MODE_NO_MAP_NO_VERSION + * optional + * @return + */ + static List searchRefs(List refs, DBRefEntry entry, DbRefComp comparator, int mode) { + List rfs = new ArrayList(); + if (refs == null || entry == null) { + return rfs; + } + for (int i = 0, n = refs.size(); i < n; i++) { + DBRefEntry e = refs.get(i); + if (comparator.matches(entry, e, SEARCH_MODE_FULL)) { + rfs.add(e); + } + } + return rfs; + } + + interface DbRefComp { + default public boolean matches(DBRefEntry refa, DBRefEntry refb) { + return matches(refa, refb, SEARCH_MODE_FULL); + }; + + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode); + } + + /** + * match on all non-null fields in refa + */ + // TODO unused - remove? would be broken by equating "" with null + public static DbRefComp matchNonNullonA = new DbRefComp() { + @Override + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) { + if ((mode & DB_SOURCE) != 0 && + (refa.getSource() == null || DBRefUtils.getCanonicalName(refb.getSource()) + .equals(DBRefUtils.getCanonicalName(refa.getSource())))) { + if ((mode & DB_VERSION) != 0 && + (refa.getVersion() == null || refb.getVersion().equals(refa.getVersion()))) { + if ((mode & DB_ID) != 0 && + (refa.getAccessionId() == null || refb.getAccessionId().equals(refa.getAccessionId()))) { + if ((mode & DB_MAP) != 0 && + (refa.getMap() == null || (refb.getMap() != null && refb.getMap().equals(refa.getMap())))) { + return true; + } + } + } + } + return false; + } + }; + + /** + * either field is null or field matches for all of source, version, accession + * id and map. + */ + // TODO unused - remove? + public static DbRefComp matchEitherNonNull = new DbRefComp() { + @Override + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) { + if (nullOrEqualSource(refa.getSource(), refb.getSource()) + && nullOrEqual(refa.getVersion(), refb.getVersion()) + && nullOrEqual(refa.getAccessionId(), refb.getAccessionId()) + && nullOrEqual(refa.getMap(), refb.getMap())) { + return true; + } + return false; + } + + }; + + /** + * accession ID and DB must be identical. Version is ignored. Map is either not + * defined or is a match (or is compatible?) + */ + // TODO unused - remove? + public static DbRefComp matchDbAndIdAndEitherMap = new DbRefComp() { + @Override + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) { + if (refa.getSource() != null && refb.getSource() != null && DBRefUtils.getCanonicalName(refb.getSource()) + .equals(DBRefUtils.getCanonicalName(refa.getSource()))) { + // We dont care about version + if (refa.getAccessionId() != null && refb.getAccessionId() != null + // FIXME should be && not || here? + || refb.getAccessionId().equals(refa.getAccessionId())) { + if ((refa.getMap() == null || refb.getMap() == null) || (refa.getMap() != null + && refb.getMap() != null && refb.getMap().equals(refa.getMap()))) { + return true; + } + } + } + return false; + } + }; + + /** + * accession ID and DB must be identical. Version is ignored. No map on either + * or map but no maplist on either or maplist of map on a is the complement of + * maplist of map on b. + */ + // TODO unused - remove? + public static DbRefComp matchDbAndIdAndComplementaryMapList = new DbRefComp() { + @Override + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) { + if (refa.getSource() != null && refb.getSource() != null && DBRefUtils.getCanonicalName(refb.getSource()) + .equals(DBRefUtils.getCanonicalName(refa.getSource()))) { + // We dont care about version + if (refa.getAccessionId() != null && refb.getAccessionId() != null + || refb.getAccessionId().equals(refa.getAccessionId())) { + if ((refa.getMap() == null && refb.getMap() == null) + || (refa.getMap() != null && refb.getMap() != null)) { + if ((refb.getMap().getMap() == null && refa.getMap().getMap() == null) + || (refb.getMap().getMap() != null && refa.getMap().getMap() != null + && refb.getMap().getMap().getInverse().equals(refa.getMap().getMap()))) { + return true; + } + } + } + } + return false; + } + }; + + /** + * accession ID and DB must be identical. Version is ignored. No map on both or + * or map but no maplist on either or maplist of map on a is equivalent to the + * maplist of map on b. + */ + // TODO unused - remove? + public static DbRefComp matchDbAndIdAndEquivalentMapList = new DbRefComp() { + @Override + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) { + if (refa.getSource() != null && refb.getSource() != null && DBRefUtils.getCanonicalName(refb.getSource()) + .equals(DBRefUtils.getCanonicalName(refa.getSource()))) { + // We dont care about version + // if ((refa.getVersion()==null || refb.getVersion()==null) + // || refb.getVersion().equals(refa.getVersion())) + // { + if (refa.getAccessionId() != null && refb.getAccessionId() != null + || refb.getAccessionId().equals(refa.getAccessionId())) { + if (refa.getMap() == null && refb.getMap() == null) { + return true; + } + if (refa.getMap() != null && refb.getMap() != null + && ((refb.getMap().getMap() == null && refa.getMap().getMap() == null) + || (refb.getMap().getMap() != null && refa.getMap().getMap() != null + && refb.getMap().getMap().equals(refa.getMap().getMap())))) { + return true; + } + } + } + return false; + } + }; + + /** + * accession ID and DB must be identical, or null on a. Version is ignored. No + * map on either or map but no maplist on either or maplist of map on a is + * equivalent to the maplist of map on b. + */ + public static DbRefComp matchDbAndIdAndEitherMapOrEquivalentMapList = new DbRefComp() { + @Override + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) { + if (refa.getSource() != null && refb.getSource() != null && DBRefUtils.getCanonicalName(refb.getSource()) + .equals(DBRefUtils.getCanonicalName(refa.getSource()))) { + // We dont care about version + + if (refa.getAccessionId() == null || refa.getAccessionId().equals(refb.getAccessionId())) { + if (refa.getMap() == null || refb.getMap() == null) { + return true; + } + if ((refa.getMap() != null && refb.getMap() != null) + && (refb.getMap().getMap() == null && refa.getMap().getMap() == null) + || (refb.getMap().getMap() != null && refa.getMap().getMap() != null + && (refb.getMap().getMap().equals(refa.getMap().getMap())))) { + return true; + } + } + } + return false; + } + }; + + /** + * accession ID only must be identical. + */ + public static DbRefComp matchId = new DbRefComp() { + @Override + public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) { + if (refa.getAccessionId() != null && refb.getAccessionId() != null + && refb.getAccessionId().equals(refa.getAccessionId())) { + return true; + } + return false; + } + }; + + /** + * Parses a DBRefEntry and adds it to the sequence, also a PDBEntry if the + * database is PDB. + *

+ * Used by file parsers to generate DBRefs from annotation within file (eg + * Stockholm) + * + * @param dbname + * @param version + * @param acn + * @param seq where to annotate with reference + * @return parsed version of entry that was added to seq (if any) + */ + public static DBRefEntry parseToDbRef(SequenceI seq, String dbname, String version, String acn) { + DBRefEntry ref = null; + if (dbname != null) { + String locsrc = DBRefUtils.getCanonicalName(dbname); + if (locsrc.equals(DBRefSource.PDB)) { + /* + * Check for PFAM style stockhom PDB accession id citation e.g. "1WRI A; 7-80;" + */ + Regex r = new com.stevesoft.pat.Regex("([0-9][0-9A-Za-z]{3})\\s*(.?)\\s*;\\s*([0-9]+)-([0-9]+)"); + if (r.search(acn.trim())) { + String pdbid = r.stringMatched(1); + String chaincode = r.stringMatched(2); + if (chaincode == null) { + chaincode = " "; + } + // String mapstart = r.stringMatched(3); + // String mapend = r.stringMatched(4); + if (chaincode.equals(" ")) { + chaincode = "_"; + } + // construct pdb ref. + ref = new DBRefEntry(locsrc, version, pdbid + chaincode); + PDBEntry pdbr = new PDBEntry(); + pdbr.setId(pdbid); + pdbr.setType(PDBEntry.Type.PDB); + pdbr.setChainCode(chaincode); + seq.addPDBId(pdbr); + } else { + System.err.println("Malformed PDB DR line:" + acn); + } + } else { + // default: + ref = new DBRefEntry(locsrc, version, acn); + } + } + if (ref != null) { + seq.addDBRef(ref); + } + return ref; + } + + /** + * Returns true if either object is null, or they are equal + * + * @param o1 + * @param o2 + * @return + */ + public static boolean nullOrEqual(Object o1, Object o2) { + if (o1 == null || o2 == null) { + return true; + } + return o1.equals(o2); + } + + /** + * canonicalise source string before comparing. null is always wildcard + * + * @param o1 - null or source string to compare + * @param o2 - null or source string to compare + * @return true if either o1 or o2 are null, or o1 equals o2 under + * DBRefUtils.getCanonicalName + * (o1).equals(DBRefUtils.getCanonicalName(o2)) + */ + public static boolean nullOrEqualSource(String o1, String o2) { + if (o1 == null || o2 == null) { + return true; + } + return DBRefUtils.getCanonicalName(o1).equals(DBRefUtils.getCanonicalName(o2)); + } + + /** + * Selects just the DNA or protein references from a set of references + * + * @param selectDna if true, select references to 'standard' DNA databases, else + * to 'standard' peptide databases + * @param refs a set of references to select from + * @return + */ + public static List selectDbRefs(boolean selectDna, List refs) { + return selectRefs(refs, selectDna ? DBRefSource.DNACODINGDBS : DBRefSource.PROTEINDBS); + // could attempt to find other cross + // refs here - ie PDB xrefs + // (not dna, not protein seq) + } + + /** + * Returns the (possibly empty) list of those supplied dbrefs which have the + * specified source database, with a case-insensitive match of source name + * + * @param dbRefs + * @param source + * @return + */ + public static List searchRefsForSource(List dbRefs, String source) { + List matches = new ArrayList(); + if (dbRefs != null && source != null) { + for (DBRefEntry dbref : dbRefs) { + if (source.equalsIgnoreCase(dbref.getSource())) { + matches.add(dbref); + } + } + } + return matches; + } + + /** + * promote direct database references to primary for nucleotide or protein + * sequences if they have an appropriate primary ref + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Seq TypePrimary DBDirect which will be promoted
peptidesEnsemblUniprot
peptidesEnsemblUniprot
dnaEnsemblENA
+ * + * @param sequence + */ + public static void ensurePrimaries(SequenceI sequence, List pr) { + if (pr.size() == 0) { + // nothing to do + return; + } + int sstart = sequence.getStart(); + int send = sequence.getEnd(); + boolean isProtein = sequence.isProtein(); + BitSet bsSelect = new BitSet(); - /** - * match on all non-null fields in refa - */ - // TODO unused - remove? - public static DbRefComp matchNonNullonA = new DbRefComp() - { - @Override - public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) - { - if ((mode & DB_SOURCE) != 0 && refa.getSource() == null - || DBRefUtils.getCanonicalName(refb.getSource()).equals( - DBRefUtils.getCanonicalName(refa.getSource()))) - { - if ((mode & DB_VERSION) != 0 && refa.getVersion() == null - || refb.getVersion().equals(refa.getVersion())) - { - if ((mode & DB_ID) != 0 && refa.getAccessionId() == null - || refb.getAccessionId().equals(refa.getAccessionId())) - { - if ((mode & DB_MAP) != 0 && refa.getMap() == null || (refb.getMap() != null - && refb.getMap().equals(refa.getMap()))) - { - return true; - } - } - } - } - return false; - } - }; - - /** - * either field is null or field matches for all of source, version, accession - * id and map. - */ - // TODO unused - remove? - public static DbRefComp matchEitherNonNull = new DbRefComp() - { - @Override - public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) - { - if (nullOrEqualSource(refa.getSource(), refb.getSource()) - && nullOrEqual(refa.getVersion(), refb.getVersion()) - && nullOrEqual(refa.getAccessionId(), refb.getAccessionId()) - && nullOrEqual(refa.getMap(), refb.getMap())) - { - return true; - } - return false; - } - - }; - - /** - * accession ID and DB must be identical. Version is ignored. Map is either - * not defined or is a match (or is compatible?) - */ - // TODO unused - remove? - public static DbRefComp matchDbAndIdAndEitherMap = new DbRefComp() - { - @Override - public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) - { - if (refa.getSource() != null && refb.getSource() != null - && DBRefUtils.getCanonicalName(refb.getSource()).equals( - DBRefUtils.getCanonicalName(refa.getSource()))) - { - // We dont care about version - if (refa.getAccessionId() != null && refb.getAccessionId() != null - // FIXME should be && not || here? - || refb.getAccessionId().equals(refa.getAccessionId())) - { - if ((refa.getMap() == null || refb.getMap() == null) - || (refa.getMap() != null && refb.getMap() != null - && refb.getMap().equals(refa.getMap()))) - { - return true; - } - } - } - return false; - } - }; - - /** - * accession ID and DB must be identical. Version is ignored. No map on either - * or map but no maplist on either or maplist of map on a is the complement of - * maplist of map on b. - */ - // TODO unused - remove? - public static DbRefComp matchDbAndIdAndComplementaryMapList = new DbRefComp() - { - @Override - public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) - { - if (refa.getSource() != null && refb.getSource() != null - && DBRefUtils.getCanonicalName(refb.getSource()).equals( - DBRefUtils.getCanonicalName(refa.getSource()))) - { - // We dont care about version - if (refa.getAccessionId() != null && refb.getAccessionId() != null - || refb.getAccessionId().equals(refa.getAccessionId())) - { - if ((refa.getMap() == null && refb.getMap() == null) - || (refa.getMap() != null && refb.getMap() != null)) - { - if ((refb.getMap().getMap() == null - && refa.getMap().getMap() == null) - || (refb.getMap().getMap() != null - && refa.getMap().getMap() != null - && refb.getMap().getMap().getInverse() - .equals(refa.getMap().getMap()))) - { - return true; - } - } - } - } - return false; - } - }; - - /** - * accession ID and DB must be identical. Version is ignored. No map on both - * or or map but no maplist on either or maplist of map on a is equivalent to - * the maplist of map on b. - */ - // TODO unused - remove? - public static DbRefComp matchDbAndIdAndEquivalentMapList = new DbRefComp() - { - @Override - public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) - { - if (refa.getSource() != null && refb.getSource() != null - && DBRefUtils.getCanonicalName(refb.getSource()).equals( - DBRefUtils.getCanonicalName(refa.getSource()))) - { - // We dont care about version - // if ((refa.getVersion()==null || refb.getVersion()==null) - // || refb.getVersion().equals(refa.getVersion())) - // { - if (refa.getAccessionId() != null && refb.getAccessionId() != null - || refb.getAccessionId().equals(refa.getAccessionId())) - { - if (refa.getMap() == null && refb.getMap() == null) - { - return true; - } - if (refa.getMap() != null && refb.getMap() != null - && ((refb.getMap().getMap() == null - && refa.getMap().getMap() == null) - || (refb.getMap().getMap() != null - && refa.getMap().getMap() != null - && refb.getMap().getMap() - .equals(refa.getMap().getMap())))) - { - return true; - } - } - } - return false; - } - }; - - /** - * accession ID and DB must be identical, or null on a. Version is ignored. No - * map on either or map but no maplist on either or maplist of map on a is - * equivalent to the maplist of map on b. - */ - public static DbRefComp matchDbAndIdAndEitherMapOrEquivalentMapList = new DbRefComp() - { - @Override - public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) - { - if (refa.getSource() != null && refb.getSource() != null - && DBRefUtils.getCanonicalName(refb.getSource()).equals( - DBRefUtils.getCanonicalName(refa.getSource()))) - { - // We dont care about version - - if (refa.getAccessionId() == null - || refa.getAccessionId().equals(refb.getAccessionId())) - { - if (refa.getMap() == null || refb.getMap() == null) - { - return true; - } - if ((refa.getMap() != null && refb.getMap() != null) - && (refb.getMap().getMap() == null - && refa.getMap().getMap() == null) - || (refb.getMap().getMap() != null - && refa.getMap().getMap() != null - && (refb.getMap().getMap() - .equals(refa.getMap().getMap())))) - { - return true; - } - } - } - return false; - } - }; - - /** - * accession ID only must be identical. - */ - public static DbRefComp matchId = new DbRefComp() - { - @Override - public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) - { - if (refa.getAccessionId() != null && refb.getAccessionId() != null - && refb.getAccessionId().equals(refa.getAccessionId())) - { - return true; - } - return false; - } - }; - - /** - * Parses a DBRefEntry and adds it to the sequence, also a PDBEntry if the - * database is PDB. - *

- * Used by file parsers to generate DBRefs from annotation within file (eg - * Stockholm) - * - * @param dbname - * @param version - * @param acn - * @param seq - * where to annotate with reference - * @return parsed version of entry that was added to seq (if any) - */ - public static DBRefEntry parseToDbRef(SequenceI seq, String dbname, - String version, String acn) - { - DBRefEntry ref = null; - if (dbname != null) - { - String locsrc = DBRefUtils.getCanonicalName(dbname); - if (locsrc.equals(DBRefSource.PDB)) - { - /* - * Check for PFAM style stockhom PDB accession id citation e.g. - * "1WRI A; 7-80;" - */ - Regex r = new com.stevesoft.pat.Regex( - "([0-9][0-9A-Za-z]{3})\\s*(.?)\\s*;\\s*([0-9]+)-([0-9]+)"); - if (r.search(acn.trim())) - { - String pdbid = r.stringMatched(1); - String chaincode = r.stringMatched(2); - if (chaincode == null) - { - chaincode = " "; - } - // String mapstart = r.stringMatched(3); - // String mapend = r.stringMatched(4); - if (chaincode.equals(" ")) - { - chaincode = "_"; - } - // construct pdb ref. - ref = new DBRefEntry(locsrc, version, pdbid + chaincode); - PDBEntry pdbr = new PDBEntry(); - pdbr.setId(pdbid); - pdbr.setType(PDBEntry.Type.PDB); - pdbr.setChainCode(chaincode); - seq.addPDBId(pdbr); - } - else - { - System.err.println("Malformed PDB DR line:" + acn); - } - } - else - { - // default: - ref = new DBRefEntry(locsrc, version, acn); - } - } - if (ref != null) - { - seq.addDBRef(ref); - } - return ref; - } - - /** - * Returns true if either object is null, or they are equal - * - * @param o1 - * @param o2 - * @return - */ - public static boolean nullOrEqual(Object o1, Object o2) - { - if (o1 == null || o2 == null) - { - return true; - } - return o1.equals(o2); - } - - /** - * canonicalise source string before comparing. null is always wildcard - * - * @param o1 - * - null or source string to compare - * @param o2 - * - null or source string to compare - * @return true if either o1 or o2 are null, or o1 equals o2 under - * DBRefUtils.getCanonicalName - * (o1).equals(DBRefUtils.getCanonicalName(o2)) - */ - public static boolean nullOrEqualSource(String o1, String o2) - { - if (o1 == null || o2 == null) - { - return true; - } - return DBRefUtils.getCanonicalName(o1) - .equals(DBRefUtils.getCanonicalName(o2)); - } - - /** - * Selects just the DNA or protein references from a set of references - * - * @param selectDna - * if true, select references to 'standard' DNA databases, else to - * 'standard' peptide databases - * @param refs - * a set of references to select from - * @return - */ - public static List selectDbRefs(boolean selectDna, - List refs) - { - return selectRefs(refs, - selectDna ? DBRefSource.DNACODINGDBS : DBRefSource.PROTEINDBS); - // could attempt to find other cross - // refs here - ie PDB xrefs - // (not dna, not protein seq) - } - - /** - * Returns the (possibly empty) list of those supplied dbrefs which have the - * specified source database, with a case-insensitive match of source name - * - * @param dbRefs - * @param source - * @return - */ - public static List searchRefsForSource(List dbRefs, - String source) - { - List matches = new ArrayList(); - if (dbRefs != null && source != null) - { - for (DBRefEntry dbref : dbRefs) - { - if (source.equalsIgnoreCase(dbref.getSource())) - { - matches.add(dbref); - } - } - } - return matches; - } - - /** - * promote direct database references to primary for nucleotide or protein - * sequences if they have an appropriate primary ref - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
Seq TypePrimary DBDirect which will be promoted
peptidesEnsemblUniprot
peptidesEnsemblUniprot
dnaEnsemblENA
- * - * @param sequence - */ - public static void ensurePrimaries(SequenceI sequence, List pr) - { - if (pr.size() == 0) - { - // nothing to do - return; - } - int sstart = sequence.getStart(); - int send = sequence.getEnd(); - boolean isProtein = sequence.isProtein(); - BitSet bsSelect = new BitSet(); - // List selfs = new ArrayList(); // { @@ -713,92 +585,78 @@ public class DBRefUtils // return; // } - List dbrefs = sequence.getDBRefs(); - bsSelect.set(0, dbrefs.size()); - - if (!selectRefsBS(dbrefs, isProtein ? DBRefSource.PROTEINDBSKEYS : DBRefSource.DNACODINGDBSKEYS, bsSelect)) - return; - + List dbrefs = sequence.getDBRefs(); + bsSelect.set(0, dbrefs.size()); + + if (!selectRefsBS(dbrefs, isProtein ? DBRefSource.PROTEIN_MASK : DBRefSource.DNA_CODING_MASK, bsSelect)) + return; + // selfs.addAll(selfArray); // } - // filter non-primary refs - for (int ip = pr.size(); --ip >= 0;) - { - DBRefEntry p = pr.get(ip); - for (int i = bsSelect.nextSetBit(0); i >= 0; i = bsSelect.nextSetBit(i + 1)) { - if (dbrefs.get(i) == p) - bsSelect.clear(i); - } + // filter non-primary refs + for (int ip = pr.size(); --ip >= 0;) { + DBRefEntry p = pr.get(ip); + for (int i = bsSelect.nextSetBit(0); i >= 0; i = bsSelect.nextSetBit(i + 1)) { + if (dbrefs.get(i) == p) + bsSelect.clear(i); + } // while (selfs.contains(p)) // { // selfs.remove(p); // } - } + } // List toPromote = new ArrayList(); - - - for (int ip = pr.size(), keys = 0; --ip >= 0 && keys != DBRefSource.ALL_MASKS;) - { - DBRefEntry p = pr.get(ip); - if (isProtein) - { - switch (getCanonicalName(p.getSource())) - { - case DBRefSource.UNIPROT: - keys |= DBRefSource.UNIPROT_MASK; - break; - case DBRefSource.ENSEMBL: - keys |= DBRefSource.ENSEMBL_MASK; - break; - } - } - else - { - // TODO: promote transcript refs ?? - } - if (keys == 0 || !selectRefsBS(dbrefs, DBRefSource.PROMTYPES[keys], bsSelect)) - return; + for (int ip = pr.size(), keys = 0; --ip >= 0 && keys != DBRefSource.PRIMARY_MASK;) { + DBRefEntry p = pr.get(ip); + if (isProtein) { + switch (getCanonicalName(p.getSource())) { + case DBRefSource.UNIPROT: + keys |= DBRefSource.UNIPROT_MASK; + break; + case DBRefSource.ENSEMBL: + keys |= DBRefSource.ENSEMBL_MASK; + break; + } + } else { + // TODO: promote transcript refs ?? + } + if (keys == 0 || !selectRefsBS(dbrefs, keys, bsSelect)) + return; // if (candidates != null) - { - for (int ic = bsSelect.nextSetBit(0); ic >= 0; ic = bsSelect.nextSetBit(ic + 1)) + { + for (int ic = bsSelect.nextSetBit(0); ic >= 0; ic = bsSelect.nextSetBit(ic + 1)) // for (int ic = 0, n = candidates.size(); ic < n; ic++) - { - DBRefEntry cand = dbrefs.get(ic);//candidates.get(ic); - if (cand.hasMap()) - { - Mapping map = cand.getMap(); - SequenceI cto = map.getTo(); - if (cto != null - && cto != sequence) - { - // can't promote refs with mappings to other sequences - continue; - } - MapList mlist = map.getMap(); - if (mlist.getFromLowest() != sstart - && mlist.getFromHighest() != send) - { - // can't promote refs with mappings from a region of this sequence - // - eg CDS - continue; - } - } - // and promote - cand.setVersion(p.getVersion() + " (promoted)"); - bsSelect.clear(ic); - //selfs.remove(cand); + { + DBRefEntry cand = dbrefs.get(ic);// candidates.get(ic); + if (cand.hasMap()) { + Mapping map = cand.getMap(); + SequenceI cto = map.getTo(); + if (cto != null && cto != sequence) { + // can't promote refs with mappings to other sequences + continue; + } + MapList mlist = map.getMap(); + if (mlist.getFromLowest() != sstart && mlist.getFromHighest() != send) { + // can't promote refs with mappings from a region of this sequence + // - eg CDS + continue; + } + } + // and promote - not that version must be non-null here, + // as p must have passed isPrimaryCandidate() + cand.setVersion(p.getVersion() + " (promoted)"); + bsSelect.clear(ic); + // selfs.remove(cand); // toPromote.add(cand); - if (!cand.isPrimaryCandidate()) - { - System.out.println( - "Warning: Couldn't promote dbref " + cand.toString() - + " for sequence " + sequence.toString()); - } - } - } - } - } + if (!cand.isPrimaryCandidate()) { + System.out.println("Warning: Couldn't promote dbref " + cand.toString() + " for sequence " + + sequence.toString()); + } + } + } + } + } } -- 1.7.10.2