2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
24 * BH 2018 SwingJS note: If additional final static Strings are added to this
25 * file, they should be added to public static final String[] allTypes.
27 * Defines internal constants for unambiguous annotation of DbRefEntry source
28 * strings and describing the data retrieved from external database sources (see
29 * jalview.ws.DbSourcProxy) <br/>
30 * TODO: replace with ontology to allow recognition of particular attributes
31 * (e.g. protein coding, alignment (ortholog db, paralog db, domain db),
32 * genomic, transcriptomic, 3D structure providing (PDB, MODBASE, etc) ..).
39 public class DBRefSource
44 public static final String UNIPROT = "UNIPROT";
46 public static final String UP_NAME = "UNIPROT_NAME";
48 * Uniprot Knowledgebase/TrEMBL as served from EMBL protein products.
50 public static final String UNIPROTKB = "UniProtKB/TrEMBL".toUpperCase();
52 public static final String ENSEMBL = "ENSEMBL";
53 public static final String ENSEMBLGENOMES = "ENSEMBLGENOMES";
55 public static final String EMBL = "EMBL";
56 public static final String EMBLCDS = "EMBLCDS";
57 public static final String EMBLCDSProduct = "EMBLCDSProtein".toUpperCase();
59 public static final String PDB = "PDB";
61 public static final String PFAM = "PFAM";
63 public static final String RFAM = "RFAM";
64 public static final String GENEDB = "GeneDB".toUpperCase();
66 public static final String PFAM_FULL = "PFAM (Full)";
68 public static final String PFAM_SEED = "PFAM (Seed)";
70 public static final String RFAM_SEED = "RFAM (Seed)";
72 public static final String PDB_CANONICAL_NAME = PDB;
75 public static final String[] allSources = new String[] {
78 ENSEMBL, ENSEMBLGENOMES,
79 EMBL, EMBLCDS, EMBLCDSProduct,
80 PDB, PFAM, RFAM, GENEDB
83 public static final int UNIPROT_MASK = 1<<0;
84 public static final int UP_NAME_MASK = 1<<1;
85 public static final int UNIPROT_KB_MASK = 1<<2;
86 public static final int ENSEMBL_MASK = 1<<3;
87 public static final int ENSEMBL_GENOMES_MASK = 1<<4;
88 public static final int EMBL_MASK = 1<<5;
89 public static final int EMBL_CDS_MASK = 1<<6;
90 public static final int EMBL_CDS_PRODUCT_MASK = 1<<7;
91 public static final int PDB_MASK = 1<<8;
92 public static final int PFAM_MASK = 1<<9;
93 public static final int RFAM_MASK = 1<<10;
94 public static final int GENE_DB_MASK = 1<<11;
96 public static final int MASK_COUNT = 12;
98 public static final int ALL_MASKS = (1 << MASK_COUNT) - 1;
100 public static int getSourceKey(String name) {
101 for (int i = 0; i < MASK_COUNT; i++) {
102 if (name.equals(allSources[i]))
110 public static final int PRIMARY_MASK = UNIPROT_MASK | ENSEMBL_MASK;
113 * List of databases whose sequences might have coding regions annotated
115 public static final String[] DNACODINGDBS = {
116 ENSEMBL, ENSEMBLGENOMES,
117 EMBL, EMBLCDS, GENEDB
120 public static final int DNA_CODING_MASK =
121 ENSEMBL_MASK | ENSEMBL_GENOMES_MASK
122 | EMBL_MASK | EMBL_CDS_MASK | GENE_DB_MASK;
126 public static final String[] CODINGDBS = { EMBLCDS, GENEDB, ENSEMBL };
128 public static final int CODING_MASK = EMBL_CDS_MASK | GENE_DB_MASK | ENSEMBL_MASK;
132 public static final String[] PROTEINDBS = {
134 ENSEMBL, EMBLCDSProduct }; // Ensembl ENSP* entries are protein
136 public static final int PROTEIN_MASK =
137 UNIPROT_MASK | UNIPROT_KB_MASK
138 | ENSEMBL_MASK | EMBL_CDS_PRODUCT_MASK ;
141 // for SequenceAnnotationReport only
143 // public static final String[][] PRIMARY_SOURCES = new String[][] {
144 // CODINGDBS, DNACODINGDBS, PROTEINDBS };
146 public static final int PRIMARY_SOURCES_MASK = CODING_MASK | DNA_CODING_MASK | PROTEIN_MASK;
148 public static boolean isPrimarySource(String source)
150 return ((PRIMARY_SOURCES_MASK & getSourceKey(source)) != 0);
153 public static boolean isPrimaryCandidate(String ucversion) {
154 // tricky - this test really needs to search the sequence's set of dbrefs to
155 // see if there is a primary reference that derived this reference.
156 for (int i = allSources.length; --i >= 0;)
158 if (ucversion.startsWith(allSources[i])) // BH 2019.01.25 .toUpperCase() unnecessary here for allSources
160 // by convention, many secondary references inherit the primary
162 // source string as a prefix for any version information from the
163 // secondary reference.