X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FDBRefSource.java;h=cb853419154542c2a41d4df6beb24c0dcd531f71;hb=HEAD;hp=abcdc47724c7abd2fbc7e839049dc83f7d9fde16;hpb=b57a02c25e335d033c97f8a6bacd6b54f62bd2b6;p=jalview.git diff --git a/src/jalview/datamodel/DBRefSource.java b/src/jalview/datamodel/DBRefSource.java index abcdc47..cb85341 100755 --- a/src/jalview/datamodel/DBRefSource.java +++ b/src/jalview/datamodel/DBRefSource.java @@ -1,138 +1,181 @@ -/* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7) - * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle - * - * This file is part of Jalview. - * - * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with Jalview. If not, see . - */ -package jalview.datamodel; - -/** - * Defines internal constants for unambiguous annotation of DbRefEntry source - * strings and describing the data retrieved from external database sources (see - * jalview.ws.DbSourcProxy) - * - * @author JimP - * - */ -public class DBRefSource -{ - /** - * UNIPROT Accession Number - */ - public static String UNIPROT = "UNIPROT"; - - /** - * UNIPROT Entry Name - */ - public static String UP_NAME = "UNIPROT_NAME"; - - /** - * Uniprot Knowledgebase/TrEMBL as served from EMBL protein products. - */ - public static final String UNIPROTKB = "UniProtKB/TrEMBL"; - - /** - * PDB Entry Code - */ - public static String PDB = "PDB"; - - /** - * EMBL ID - */ - public static String EMBL = "EMBL"; - - /** - * EMBLCDS ID - */ - public static String EMBLCDS = "EMBLCDS"; - - /** - * PFAM ID - */ - public static String PFAM = "PFAM"; - - /** - * RFAM ID - */ - public static String RFAM = "RFAM"; - - /** - * GeneDB ID - */ - public static final String GENEDB = "GeneDB"; - - /** - * List of databases whose sequences might have coding regions annotated - */ - public static final String[] DNACODINGDBS = - { EMBL, EMBLCDS, GENEDB }; - - public static final String[] CODINGDBS = - { EMBLCDS, GENEDB }; - - public static final String[] PROTEINDBS = - { UNIPROT, PDB, UNIPROTKB }; - - public static final String[] PROTEINSEQ = - { UNIPROT, UNIPROTKB }; - - public static final String[] PROTEINSTR = - { PDB }; - - public static final String[] DOMAINDBS = - { PFAM, RFAM }; - - /** - * set of unique DBRefSource property constants. These could be used to - * reconstruct the above groupings - */ - public static final Object SEQDB = "SQ"; - - /** - * database of nucleic acid sequences - */ - public static final Object DNASEQDB = "NASQ"; - - /** - * database of amino acid sequences - */ - public static final Object PROTSEQDB = "PROTSQ"; - - /** - * database of cDNA sequences - */ - public static final Object CODINGSEQDB = "CODING"; - - /** - * database of na sequences with exon annotation - */ - public static final Object DNACODINGSEQDB = "XONCODING"; - - /** - * DB returns several sequences associated with a protein/nucleotide domain - */ - public static final Object DOMAINDB = "DOMAIN"; - - /** - * DB query can take multiple accession codes concatenated by a separator. - * Value of property indicates maximum number of accession codes to send at a - * time. - */ - public static final Object MULTIACC = "MULTIACC"; - - /** - * DB query returns an alignment for each accession provided. - */ - public static final Object ALIGNMENTDB = "ALIGNMENTS"; -} +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.datamodel; + +/** + * BH 2018 SwingJS note: If additional final static Strings are added to this + * file, they should be added to public static final String[] allTypes. + * + * Defines internal constants for unambiguous annotation of DbRefEntry source + * strings and describing the data retrieved from external database sources (see + * jalview.ws.DbSourcProxy)
+ * TODO: replace with ontology to allow recognition of particular attributes + * (e.g. protein coding, alignment (ortholog db, paralog db, domain db), + * genomic, transcriptomic, 3D structure providing (PDB, MODBASE, etc) ..). + * + * + * + * @author JimP + * + */ +import java.util.Locale; + +public class DBRefSource +{ + + public static final String UNIPROT = "UNIPROT"; + + public static final String UP_NAME = "UNIPROT_NAME" + .toUpperCase(Locale.ROOT); + + /** + * Uniprot Knowledgebase/TrEMBL as served from EMBL protein products. + */ + public static final String UNIPROTKB = "UniProtKB/TrEMBL" + .toUpperCase(Locale.ROOT); + + public static final String ENSEMBL = "ENSEMBL"; + + public static final String ENSEMBLGENOMES = "ENSEMBLGENOMES"; + + public static final String EMBL = "EMBL"; + + public static final String EMBLCDS = "EMBLCDS"; + + public static final String EMBLCDSProduct = "EMBLCDSProtein" + .toUpperCase(Locale.ROOT); + + public static final String PDB = "PDB"; + + public static final String PFAM = "PFAM"; + + public static final String RFAM = "RFAM"; + + public static final String GENEDB = "GeneDB".toUpperCase(Locale.ROOT); + + public static final String PDB_CANONICAL_NAME = PDB; + + public static final String[] allSources = new String[] { UNIPROT, UP_NAME, + UNIPROTKB, ENSEMBL, ENSEMBLGENOMES, EMBL, EMBLCDS, EMBLCDSProduct, + PDB, PFAM, RFAM, GENEDB }; + + public static final int UNIPROT_MASK = 1 << 0; + + public static final int UP_NAME_MASK = 1 << 1; + + public static final int UNIPROT_KB_MASK = 1 << 2; + + public static final int ENSEMBL_MASK = 1 << 3; + + public static final int ENSEMBL_GENOMES_MASK = 1 << 4; + + public static final int EMBL_MASK = 1 << 5; + + public static final int EMBL_CDS_MASK = 1 << 6; + + public static final int EMBL_CDS_PRODUCT_MASK = 1 << 7; + + public static final int PDB_MASK = 1 << 8; + + public static final int PFAM_MASK = 1 << 9; + + public static final int RFAM_MASK = 1 << 10; + + public static final int GENE_DB_MASK = 1 << 11; + + public static final int MASK_COUNT = 12; + + public static final int ALL_MASKS = (1 << MASK_COUNT) - 1; + + public static int getSourceKey(String name) + { + for (int i = 0; i < MASK_COUNT; i++) + { + if (name.equals(allSources[i])) + { + return 1 << i; + } + } + return 0; + } + + public static final int PRIMARY_MASK = UNIPROT_MASK | ENSEMBL_MASK; + + /** + * List of databases whose sequences might have coding regions annotated + */ + public static final String[] DNACODINGDBS = { ENSEMBL, ENSEMBLGENOMES, + EMBL, EMBLCDS, GENEDB }; + + public static final int DNA_CODING_MASK = ENSEMBL_MASK + | ENSEMBL_GENOMES_MASK | EMBL_MASK | EMBL_CDS_MASK | GENE_DB_MASK; + + public static final String[] CODINGDBS = { EMBLCDS, GENEDB, ENSEMBL }; + + public static final int CODING_MASK = EMBL_CDS_MASK | GENE_DB_MASK + | ENSEMBL_MASK; + + public static final String[] PROTEINDBS = { UNIPROT, UNIPROTKB, ENSEMBL, + EMBLCDSProduct }; // Ensembl ENSP* entries are protein + + public static final int PROTEIN_MASK = UNIPROT_MASK | UNIPROT_KB_MASK + | ENSEMBL_MASK | EMBL_CDS_PRODUCT_MASK; + + // for SequenceAnnotationReport only + + // public static final String[][] PRIMARY_SOURCES = new String[][] { + // CODINGDBS, DNACODINGDBS, PROTEINDBS }; + // + public static final int PRIMARY_SOURCES_MASK = CODING_MASK + | DNA_CODING_MASK | PROTEIN_MASK; + + public static boolean isPrimarySource(String source) + { + return ((PRIMARY_SOURCES_MASK & getSourceKey(source)) != 0); + } + + public static boolean isPrimaryCandidate(String ucversion) + { + if (ucversion == null) + { + // Null/empty version is not a real reference ? + return false; + } + // tricky - this test really needs to search the sequence's set of dbrefs to + // see if there is a primary reference that derived this reference. + for (int i = allSources.length; --i >= 0;) + { + if (ucversion.startsWith(allSources[i])) // BH 2019.01.25 + // .toUpperCase(Locale.ROOT) + // unnecessary here for + // allSources + { + // by convention, many secondary references inherit the primary + // reference's + // source string as a prefix for any version information from the + // secondary reference. + return false; + } + } + return true; + } + +}