X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FDBRefSource.java;h=cb853419154542c2a41d4df6beb24c0dcd531f71;hb=HEAD;hp=0552b2ce675b391bc9a59f958b203a10c57f2e59;hpb=3448f766a6dfe127f1d01e533c14d10aede962fa;p=jalview.git diff --git a/src/jalview/datamodel/DBRefSource.java b/src/jalview/datamodel/DBRefSource.java index 0552b2c..cb85341 100755 --- a/src/jalview/datamodel/DBRefSource.java +++ b/src/jalview/datamodel/DBRefSource.java @@ -1,102 +1,181 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer - * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.datamodel; + /** - * Defines internal constants for unambiguous annotation - * of DbRefEntry source strings and describing the data - * retrieved from external database sources (see jalview.ws.DbSourcProxy) + * BH 2018 SwingJS note: If additional final static Strings are added to this + * file, they should be added to public static final String[] allTypes. + * + * Defines internal constants for unambiguous annotation of DbRefEntry source + * strings and describing the data retrieved from external database sources (see + * jalview.ws.DbSourcProxy)
+ * TODO: replace with ontology to allow recognition of particular attributes + * (e.g. protein coding, alignment (ortholog db, paralog db, domain db), + * genomic, transcriptomic, 3D structure providing (PDB, MODBASE, etc) ..). + * + * + * * @author JimP - * + * */ +import java.util.Locale; + public class DBRefSource { + + public static final String UNIPROT = "UNIPROT"; + + public static final String UP_NAME = "UNIPROT_NAME" + .toUpperCase(Locale.ROOT); + /** - * UNIPROT Accession Number - */ - public static String UNIPROT = "UNIPROT"; - /** - * UNIPROT Entry Name - */ - public static String UP_NAME = "UNIPROT_NAME"; - /** - * Uniprot Knowledgebase/TrEMBL - * as served from EMBL protein products. - */ - public static final String UNIPROTKB = "UniProtKB/TrEMBL"; - /** - * PDB Entry Code - */ - public static String PDB = "PDB"; - /** - * EMBL ID - */ - public static String EMBL = "EMBL"; - /** - * EMBLCDS ID - */ - public static String EMBLCDS = "EMBLCDS"; - /** - * PFAM ID - */ - public static String PFAM = "PFAM"; - /** - * GeneDB ID + * Uniprot Knowledgebase/TrEMBL as served from EMBL protein products. */ - public static final String GENEDB = "GeneDB"; + public static final String UNIPROTKB = "UniProtKB/TrEMBL" + .toUpperCase(Locale.ROOT); + + public static final String ENSEMBL = "ENSEMBL"; + + public static final String ENSEMBLGENOMES = "ENSEMBLGENOMES"; + + public static final String EMBL = "EMBL"; + + public static final String EMBLCDS = "EMBLCDS"; + + public static final String EMBLCDSProduct = "EMBLCDSProtein" + .toUpperCase(Locale.ROOT); + + public static final String PDB = "PDB"; + + public static final String PFAM = "PFAM"; + + public static final String RFAM = "RFAM"; + + public static final String GENEDB = "GeneDB".toUpperCase(Locale.ROOT); + + public static final String PDB_CANONICAL_NAME = PDB; + + public static final String[] allSources = new String[] { UNIPROT, UP_NAME, + UNIPROTKB, ENSEMBL, ENSEMBLGENOMES, EMBL, EMBLCDS, EMBLCDSProduct, + PDB, PFAM, RFAM, GENEDB }; + + public static final int UNIPROT_MASK = 1 << 0; + + public static final int UP_NAME_MASK = 1 << 1; + + public static final int UNIPROT_KB_MASK = 1 << 2; + + public static final int ENSEMBL_MASK = 1 << 3; + + public static final int ENSEMBL_GENOMES_MASK = 1 << 4; + + public static final int EMBL_MASK = 1 << 5; + + public static final int EMBL_CDS_MASK = 1 << 6; + + public static final int EMBL_CDS_PRODUCT_MASK = 1 << 7; + + public static final int PDB_MASK = 1 << 8; + + public static final int PFAM_MASK = 1 << 9; + + public static final int RFAM_MASK = 1 << 10; + + public static final int GENE_DB_MASK = 1 << 11; + + public static final int MASK_COUNT = 12; + + public static final int ALL_MASKS = (1 << MASK_COUNT) - 1; + + public static int getSourceKey(String name) + { + for (int i = 0; i < MASK_COUNT; i++) + { + if (name.equals(allSources[i])) + { + return 1 << i; + } + } + return 0; + } + + public static final int PRIMARY_MASK = UNIPROT_MASK | ENSEMBL_MASK; /** * List of databases whose sequences might have coding regions annotated */ - public static final String[] DNACODINGDBS = { EMBL, EMBLCDS, GENEDB}; - public static final String[] CODINGDBS = { EMBLCDS, GENEDB}; - public static final String[] PROTEINDBS = { UNIPROT, PDB, UNIPROTKB}; - public static final String[] PROTEINSEQ = { UNIPROT, UNIPROTKB}; - public static final String[] PROTEINSTR = { PDB }; - public static final String[] DOMAINDBS = { PFAM }; - /** - * set of unique DBRefSource property constants. - * These could be used to reconstruct the above groupings - */ - public static final Object SEQDB = "SQ"; - /** - * database of nucleic acid sequences - */ - public static final Object DNASEQDB = "NASQ"; - /** - * database of amino acid sequences - */ - public static final Object PROTSEQDB = "PROTSQ"; - /** - * database of cDNA sequences - */ - public static final Object CODINGSEQDB = "CODING"; - /** - * database of na sequences with exon annotation - */ - public static final Object DNACODINGSEQDB = "XONCODING"; - /** - * DB returns several sequences associated with a protein domain - */ - public static final Object DOMAINDB = "DOMAIN"; - /** - * DB query can take multiple accession codes concatenated - * by a separator. Value of property indicates maximum number of accession codes to send at a time. - */ - public static final Object MULTIACC = "MULTIACC"; + public static final String[] DNACODINGDBS = { ENSEMBL, ENSEMBLGENOMES, + EMBL, EMBLCDS, GENEDB }; + + public static final int DNA_CODING_MASK = ENSEMBL_MASK + | ENSEMBL_GENOMES_MASK | EMBL_MASK | EMBL_CDS_MASK | GENE_DB_MASK; + + public static final String[] CODINGDBS = { EMBLCDS, GENEDB, ENSEMBL }; + + public static final int CODING_MASK = EMBL_CDS_MASK | GENE_DB_MASK + | ENSEMBL_MASK; + + public static final String[] PROTEINDBS = { UNIPROT, UNIPROTKB, ENSEMBL, + EMBLCDSProduct }; // Ensembl ENSP* entries are protein + + public static final int PROTEIN_MASK = UNIPROT_MASK | UNIPROT_KB_MASK + | ENSEMBL_MASK | EMBL_CDS_PRODUCT_MASK; + + // for SequenceAnnotationReport only + + // public static final String[][] PRIMARY_SOURCES = new String[][] { + // CODINGDBS, DNACODINGDBS, PROTEINDBS }; + // + public static final int PRIMARY_SOURCES_MASK = CODING_MASK + | DNA_CODING_MASK | PROTEIN_MASK; + + public static boolean isPrimarySource(String source) + { + return ((PRIMARY_SOURCES_MASK & getSourceKey(source)) != 0); + } + + public static boolean isPrimaryCandidate(String ucversion) + { + if (ucversion == null) + { + // Null/empty version is not a real reference ? + return false; + } + // tricky - this test really needs to search the sequence's set of dbrefs to + // see if there is a primary reference that derived this reference. + for (int i = allSources.length; --i >= 0;) + { + if (ucversion.startsWith(allSources[i])) // BH 2019.01.25 + // .toUpperCase(Locale.ROOT) + // unnecessary here for + // allSources + { + // by convention, many secondary references inherit the primary + // reference's + // source string as a prefix for any version information from the + // secondary reference. + return false; + } + } + return true; + } + }