-/*\r
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.4)\r
- * Copyright (C) 2008 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle\r
- * \r
- * This program is free software; you can redistribute it and/or\r
- * modify it under the terms of the GNU General Public License\r
- * as published by the Free Software Foundation; either version 2\r
- * of the License, or (at your option) any later version.\r
- * \r
- * This program is distributed in the hope that it will be useful,\r
- * but WITHOUT ANY WARRANTY; without even the implied warranty of\r
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
- * GNU General Public License for more details.\r
- * \r
- * You should have received a copy of the GNU General Public License\r
- * along with this program; if not, write to the Free Software\r
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA\r
- */\r
-package jalview.datamodel;\r
-\r
-/**\r
- * Defines internal constants for unambiguous annotation of DbRefEntry source\r
- * strings and describing the data retrieved from external database sources (see\r
- * jalview.ws.DbSourcProxy)\r
- * \r
- * @author JimP\r
- * \r
- */\r
-public class DBRefSource\r
-{\r
- /**\r
- * UNIPROT Accession Number\r
- */\r
- public static String UNIPROT = "UNIPROT";\r
-\r
- /**\r
- * UNIPROT Entry Name\r
- */\r
- public static String UP_NAME = "UNIPROT_NAME";\r
-\r
- /**\r
- * Uniprot Knowledgebase/TrEMBL as served from EMBL protein products.\r
- */\r
- public static final String UNIPROTKB = "UniProtKB/TrEMBL";\r
-\r
- /**\r
- * PDB Entry Code\r
- */\r
- public static String PDB = "PDB";\r
-\r
- /**\r
- * EMBL ID\r
- */\r
- public static String EMBL = "EMBL";\r
-\r
- /**\r
- * EMBLCDS ID\r
- */\r
- public static String EMBLCDS = "EMBLCDS";\r
-\r
- /**\r
- * PFAM ID\r
- */\r
- public static String PFAM = "PFAM";\r
-\r
- /**\r
- * GeneDB ID\r
- */\r
- public static final String GENEDB = "GeneDB";\r
-\r
- /**\r
- * List of databases whose sequences might have coding regions annotated\r
- */\r
- public static final String[] DNACODINGDBS =\r
- { EMBL, EMBLCDS, GENEDB };\r
-\r
- public static final String[] CODINGDBS =\r
- { EMBLCDS, GENEDB };\r
-\r
- public static final String[] PROTEINDBS =\r
- { UNIPROT, PDB, UNIPROTKB };\r
-\r
- public static final String[] PROTEINSEQ =\r
- { UNIPROT, UNIPROTKB };\r
-\r
- public static final String[] PROTEINSTR =\r
- { PDB };\r
-\r
- public static final String[] DOMAINDBS =\r
- { PFAM };\r
-\r
- /**\r
- * set of unique DBRefSource property constants. These could be used to\r
- * reconstruct the above groupings\r
- */\r
- public static final Object SEQDB = "SQ";\r
-\r
- /**\r
- * database of nucleic acid sequences\r
- */\r
- public static final Object DNASEQDB = "NASQ";\r
-\r
- /**\r
- * database of amino acid sequences\r
- */\r
- public static final Object PROTSEQDB = "PROTSQ";\r
-\r
- /**\r
- * database of cDNA sequences\r
- */\r
- public static final Object CODINGSEQDB = "CODING";\r
-\r
- /**\r
- * database of na sequences with exon annotation\r
- */\r
- public static final Object DNACODINGSEQDB = "XONCODING";\r
-\r
- /**\r
- * DB returns several sequences associated with a protein domain\r
- */\r
- public static final Object DOMAINDB = "DOMAIN";\r
-\r
- /**\r
- * DB query can take multiple accession codes concatenated by a separator.\r
- * Value of property indicates maximum number of accession codes to send at a\r
- * time.\r
- */\r
- public static final Object MULTIACC = "MULTIACC";\r
-}\r
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.datamodel;
+
+/**
+ * BH 2018 SwingJS note: If additional final static Strings are added to this
+ * file, they should be added to public static final String[] allTypes.
+ *
+ * Defines internal constants for unambiguous annotation of DbRefEntry source
+ * strings and describing the data retrieved from external database sources (see
+ * jalview.ws.DbSourcProxy) <br/>
+ * TODO: replace with ontology to allow recognition of particular attributes
+ * (e.g. protein coding, alignment (ortholog db, paralog db, domain db),
+ * genomic, transcriptomic, 3D structure providing (PDB, MODBASE, etc) ..).
+ *
+ *
+ *
+ * @author JimP
+ *
+ */
+import java.util.Locale;
+
+public class DBRefSource
+{
+
+ public static final String UNIPROT = "UNIPROT";
+
+ public static final String UP_NAME = "UNIPROT_NAME"
+ .toUpperCase(Locale.ROOT);
+
+ /**
+ * Uniprot Knowledgebase/TrEMBL as served from EMBL protein products.
+ */
+ public static final String UNIPROTKB = "UniProtKB/TrEMBL"
+ .toUpperCase(Locale.ROOT);
+
+ public static final String ENSEMBL = "ENSEMBL";
+
+ public static final String ENSEMBLGENOMES = "ENSEMBLGENOMES";
+
+ public static final String EMBL = "EMBL";
+
+ public static final String EMBLCDS = "EMBLCDS";
+
+ public static final String EMBLCDSProduct = "EMBLCDSProtein"
+ .toUpperCase(Locale.ROOT);
+
+ public static final String PDB = "PDB";
+
+ public static final String PFAM = "PFAM";
+
+ public static final String RFAM = "RFAM";
+
+ public static final String GENEDB = "GeneDB".toUpperCase(Locale.ROOT);
+
+ public static final String PDB_CANONICAL_NAME = PDB;
+
+ public static final String[] allSources = new String[] { UNIPROT, UP_NAME,
+ UNIPROTKB, ENSEMBL, ENSEMBLGENOMES, EMBL, EMBLCDS, EMBLCDSProduct,
+ PDB, PFAM, RFAM, GENEDB };
+
+ public static final int UNIPROT_MASK = 1 << 0;
+
+ public static final int UP_NAME_MASK = 1 << 1;
+
+ public static final int UNIPROT_KB_MASK = 1 << 2;
+
+ public static final int ENSEMBL_MASK = 1 << 3;
+
+ public static final int ENSEMBL_GENOMES_MASK = 1 << 4;
+
+ public static final int EMBL_MASK = 1 << 5;
+
+ public static final int EMBL_CDS_MASK = 1 << 6;
+
+ public static final int EMBL_CDS_PRODUCT_MASK = 1 << 7;
+
+ public static final int PDB_MASK = 1 << 8;
+
+ public static final int PFAM_MASK = 1 << 9;
+
+ public static final int RFAM_MASK = 1 << 10;
+
+ public static final int GENE_DB_MASK = 1 << 11;
+
+ public static final int MASK_COUNT = 12;
+
+ public static final int ALL_MASKS = (1 << MASK_COUNT) - 1;
+
+ public static int getSourceKey(String name)
+ {
+ for (int i = 0; i < MASK_COUNT; i++)
+ {
+ if (name.equals(allSources[i]))
+ {
+ return 1 << i;
+ }
+ }
+ return 0;
+ }
+
+ public static final int PRIMARY_MASK = UNIPROT_MASK | ENSEMBL_MASK;
+
+ /**
+ * List of databases whose sequences might have coding regions annotated
+ */
+ public static final String[] DNACODINGDBS = { ENSEMBL, ENSEMBLGENOMES,
+ EMBL, EMBLCDS, GENEDB };
+
+ public static final int DNA_CODING_MASK = ENSEMBL_MASK
+ | ENSEMBL_GENOMES_MASK | EMBL_MASK | EMBL_CDS_MASK | GENE_DB_MASK;
+
+ public static final String[] CODINGDBS = { EMBLCDS, GENEDB, ENSEMBL };
+
+ public static final int CODING_MASK = EMBL_CDS_MASK | GENE_DB_MASK
+ | ENSEMBL_MASK;
+
+ public static final String[] PROTEINDBS = { UNIPROT, UNIPROTKB, ENSEMBL,
+ EMBLCDSProduct }; // Ensembl ENSP* entries are protein
+
+ public static final int PROTEIN_MASK = UNIPROT_MASK | UNIPROT_KB_MASK
+ | ENSEMBL_MASK | EMBL_CDS_PRODUCT_MASK;
+
+ // for SequenceAnnotationReport only
+
+ // public static final String[][] PRIMARY_SOURCES = new String[][] {
+ // CODINGDBS, DNACODINGDBS, PROTEINDBS };
+ //
+ public static final int PRIMARY_SOURCES_MASK = CODING_MASK
+ | DNA_CODING_MASK | PROTEIN_MASK;
+
+ public static boolean isPrimarySource(String source)
+ {
+ return ((PRIMARY_SOURCES_MASK & getSourceKey(source)) != 0);
+ }
+
+ public static boolean isPrimaryCandidate(String ucversion)
+ {
+ // tricky - this test really needs to search the sequence's set of dbrefs to
+ // see if there is a primary reference that derived this reference.
+ for (int i = allSources.length; --i >= 0;)
+ {
+ if (ucversion.startsWith(allSources[i])) // BH 2019.01.25
+ // .toUpperCase(Locale.ROOT)
+ // unnecessary here for
+ // allSources
+ {
+ // by convention, many secondary references inherit the primary
+ // reference's
+ // source string as a prefix for any version information from the
+ // secondary reference.
+ return false;
+ }
+ }
+ return true;
+ }
+
+}