X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FDBRefSource.java;h=f384b1e1109498b3d4678f560b86a50be6c84d0a;hb=57738a1f3c19b1c3a00bd3ac5108f8cd0af32f99;hp=91b49ebef4b33ded9060d790cae8beec80f5bfc2;hpb=4da7d6ec8ef5ff030c6d06d37a099da2d92d7246;p=jalview.git diff --git a/src/jalview/datamodel/DBRefSource.java b/src/jalview/datamodel/DBRefSource.java index 91b49eb..f384b1e 100755 --- a/src/jalview/datamodel/DBRefSource.java +++ b/src/jalview/datamodel/DBRefSource.java @@ -21,81 +21,156 @@ package jalview.datamodel; /** + * BH 2018 SwingJS note: If additional final static Strings are added to this + * file, they should be added to public static final String[] allTypes. + * * Defines internal constants for unambiguous annotation of DbRefEntry source * strings and describing the data retrieved from external database sources (see - * jalview.ws.DbSourcProxy) + * jalview.ws.DbSourcProxy)
+ * TODO: replace with ontology to allow recognition of particular attributes + * (e.g. protein coding, alignment (ortholog db, paralog db, domain db), + * genomic, transcriptomic, 3D structure providing (PDB, MODBASE, etc) ..). + * + * * * @author JimP * */ +import java.util.Locale; + public class DBRefSource { - /** - * UNIPROT Accession Number - */ - public static String UNIPROT = "UNIPROT"; - /** - * UNIPROT Entry Name - */ - public static String UP_NAME = "UNIPROT_NAME".toUpperCase(); + public static final String UNIPROT = "UNIPROT"; + + public static final String UP_NAME = "UNIPROT_NAME" + .toUpperCase(Locale.ROOT); /** * Uniprot Knowledgebase/TrEMBL as served from EMBL protein products. */ - public static final String UNIPROTKB = "UniProtKB/TrEMBL".toUpperCase(); + public static final String UNIPROTKB = "UniProtKB/TrEMBL" + .toUpperCase(Locale.ROOT); + + public static final String ENSEMBL = "ENSEMBL"; + + public static final String ENSEMBLGENOMES = "ENSEMBLGENOMES"; + + public static final String EMBL = "EMBL"; + + public static final String EMBLCDS = "EMBLCDS"; public static final String EMBLCDSProduct = "EMBLCDSProtein" - .toUpperCase(); + .toUpperCase(Locale.ROOT); - /** - * PDB Entry Code - */ - public static String PDB = "PDB"; + public static final String PDB = "PDB"; - /** - * mmCIF Entry Code - */ - public static String MMCIF = "mmCIF"; + public static final String PFAM = "PFAM"; - /** - * EMBL ID - */ - public static String EMBL = "EMBL"; + public static final String RFAM = "RFAM"; - /** - * EMBLCDS ID - */ - public static String EMBLCDS = "EMBLCDS"; + public static final String GENEDB = "GeneDB".toUpperCase(Locale.ROOT); - /** - * PFAM ID - */ - public static String PFAM = "PFAM"; + public static final String PDB_CANONICAL_NAME = PDB; - /** - * RFAM ID - */ - public static String RFAM = "RFAM"; + public static final String[] allSources = new String[] { UNIPROT, UP_NAME, + UNIPROTKB, ENSEMBL, ENSEMBLGENOMES, EMBL, EMBLCDS, EMBLCDSProduct, + PDB, PFAM, RFAM, GENEDB }; - /** - * GeneDB ID - */ - public static final String GENEDB = "GeneDB".toUpperCase(); + public static final int UNIPROT_MASK = 1 << 0; - /** - * Ensembl - */ - public static final String ENSEMBL = "ENSEMBL"; + public static final int UP_NAME_MASK = 1 << 1; + + public static final int UNIPROT_KB_MASK = 1 << 2; + + public static final int ENSEMBL_MASK = 1 << 3; + + public static final int ENSEMBL_GENOMES_MASK = 1 << 4; + + public static final int EMBL_MASK = 1 << 5; + + public static final int EMBL_CDS_MASK = 1 << 6; + + public static final int EMBL_CDS_PRODUCT_MASK = 1 << 7; + + public static final int PDB_MASK = 1 << 8; + + public static final int PFAM_MASK = 1 << 9; + + public static final int RFAM_MASK = 1 << 10; + + public static final int GENE_DB_MASK = 1 << 11; + + public static final int MASK_COUNT = 12; + + public static final int ALL_MASKS = (1 << MASK_COUNT) - 1; + + public static int getSourceKey(String name) + { + for (int i = 0; i < MASK_COUNT; i++) + { + if (name.equals(allSources[i])) + { + return 1 << i; + } + } + return 0; + } + + public static final int PRIMARY_MASK = UNIPROT_MASK | ENSEMBL_MASK; /** * List of databases whose sequences might have coding regions annotated */ - public static final String[] DNACODINGDBS = { EMBL, EMBLCDS, GENEDB, - ENSEMBL }; + public static final String[] DNACODINGDBS = { ENSEMBL, ENSEMBLGENOMES, + EMBL, EMBLCDS, GENEDB }; + + public static final int DNA_CODING_MASK = ENSEMBL_MASK + | ENSEMBL_GENOMES_MASK | EMBL_MASK | EMBL_CDS_MASK | GENE_DB_MASK; public static final String[] CODINGDBS = { EMBLCDS, GENEDB, ENSEMBL }; - public static final String[] PROTEINDBS = { UNIPROT, PDB, UNIPROTKB, - EMBLCDSProduct, ENSEMBL }; // Ensembl ENSP* entries are protein + public static final int CODING_MASK = EMBL_CDS_MASK | GENE_DB_MASK + | ENSEMBL_MASK; + + public static final String[] PROTEINDBS = { UNIPROT, UNIPROTKB, ENSEMBL, + EMBLCDSProduct }; // Ensembl ENSP* entries are protein + + public static final int PROTEIN_MASK = UNIPROT_MASK | UNIPROT_KB_MASK + | ENSEMBL_MASK | EMBL_CDS_PRODUCT_MASK; + + // for SequenceAnnotationReport only + + // public static final String[][] PRIMARY_SOURCES = new String[][] { + // CODINGDBS, DNACODINGDBS, PROTEINDBS }; + // + public static final int PRIMARY_SOURCES_MASK = CODING_MASK + | DNA_CODING_MASK | PROTEIN_MASK; + + public static boolean isPrimarySource(String source) + { + return ((PRIMARY_SOURCES_MASK & getSourceKey(source)) != 0); + } + + public static boolean isPrimaryCandidate(String ucversion) + { + // tricky - this test really needs to search the sequence's set of dbrefs to + // see if there is a primary reference that derived this reference. + for (int i = allSources.length; --i >= 0;) + { + if (ucversion.startsWith(allSources[i])) // BH 2019.01.25 + // .toUpperCase(Locale.ROOT) + // unnecessary here for + // allSources + { + // by convention, many secondary references inherit the primary + // reference's + // source string as a prefix for any version information from the + // secondary reference. + return false; + } + } + return true; + } + }