/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.datamodel;
/**
* BH 2018 SwingJS note: If additional final static Strings are added to this
* file, they should be added to public static final String[] allTypes.
*
* Defines internal constants for unambiguous annotation of DbRefEntry source
* strings and describing the data retrieved from external database sources (see
* jalview.ws.DbSourcProxy)
* TODO: replace with ontology to allow recognition of particular attributes
* (e.g. protein coding, alignment (ortholog db, paralog db, domain db),
* genomic, transcriptomic, 3D structure providing (PDB, MODBASE, etc) ..).
*
*
*
* @author JimP
*
*/
import java.util.Locale;
public class DBRefSource
{
public static final String UNIPROT = "UNIPROT";
public static final String UP_NAME = "UNIPROT_NAME"
.toUpperCase(Locale.ROOT);
/**
* Uniprot Knowledgebase/TrEMBL as served from EMBL protein products.
*/
public static final String UNIPROTKB = "UniProtKB/TrEMBL"
.toUpperCase(Locale.ROOT);
public static final String ENSEMBL = "ENSEMBL";
public static final String ENSEMBLGENOMES = "ENSEMBLGENOMES";
public static final String EMBL = "EMBL";
public static final String EMBLCDS = "EMBLCDS";
public static final String EMBLCDSProduct = "EMBLCDSProtein"
.toUpperCase(Locale.ROOT);
public static final String PDB = "PDB";
public static final String PFAM = "PFAM";
public static final String RFAM = "RFAM";
public static final String GENEDB = "GeneDB".toUpperCase(Locale.ROOT);
public static final String PDB_CANONICAL_NAME = PDB;
public static final String[] allSources = new String[] { UNIPROT, UP_NAME,
UNIPROTKB, ENSEMBL, ENSEMBLGENOMES, EMBL, EMBLCDS, EMBLCDSProduct,
PDB, PFAM, RFAM, GENEDB };
public static final int UNIPROT_MASK = 1 << 0;
public static final int UP_NAME_MASK = 1 << 1;
public static final int UNIPROT_KB_MASK = 1 << 2;
public static final int ENSEMBL_MASK = 1 << 3;
public static final int ENSEMBL_GENOMES_MASK = 1 << 4;
public static final int EMBL_MASK = 1 << 5;
public static final int EMBL_CDS_MASK = 1 << 6;
public static final int EMBL_CDS_PRODUCT_MASK = 1 << 7;
public static final int PDB_MASK = 1 << 8;
public static final int PFAM_MASK = 1 << 9;
public static final int RFAM_MASK = 1 << 10;
public static final int GENE_DB_MASK = 1 << 11;
public static final int MASK_COUNT = 12;
public static final int ALL_MASKS = (1 << MASK_COUNT) - 1;
public static int getSourceKey(String name)
{
for (int i = 0; i < MASK_COUNT; i++)
{
if (name.equals(allSources[i]))
{
return 1 << i;
}
}
return 0;
}
public static final int PRIMARY_MASK = UNIPROT_MASK | ENSEMBL_MASK;
/**
* List of databases whose sequences might have coding regions annotated
*/
public static final String[] DNACODINGDBS = { ENSEMBL, ENSEMBLGENOMES,
EMBL, EMBLCDS, GENEDB };
public static final int DNA_CODING_MASK = ENSEMBL_MASK
| ENSEMBL_GENOMES_MASK | EMBL_MASK | EMBL_CDS_MASK | GENE_DB_MASK;
public static final String[] CODINGDBS = { EMBLCDS, GENEDB, ENSEMBL };
public static final int CODING_MASK = EMBL_CDS_MASK | GENE_DB_MASK
| ENSEMBL_MASK;
public static final String[] PROTEINDBS = { UNIPROT, UNIPROTKB, ENSEMBL,
EMBLCDSProduct }; // Ensembl ENSP* entries are protein
public static final int PROTEIN_MASK = UNIPROT_MASK | UNIPROT_KB_MASK
| ENSEMBL_MASK | EMBL_CDS_PRODUCT_MASK;
// for SequenceAnnotationReport only
// public static final String[][] PRIMARY_SOURCES = new String[][] {
// CODINGDBS, DNACODINGDBS, PROTEINDBS };
//
public static final int PRIMARY_SOURCES_MASK = CODING_MASK
| DNA_CODING_MASK | PROTEIN_MASK;
public static boolean isPrimarySource(String source)
{
return ((PRIMARY_SOURCES_MASK & getSourceKey(source)) != 0);
}
public static boolean isPrimaryCandidate(String ucversion)
{
// tricky - this test really needs to search the sequence's set of dbrefs to
// see if there is a primary reference that derived this reference.
for (int i = allSources.length; --i >= 0;)
{
if (ucversion.startsWith(allSources[i])) // BH 2019.01.25
// .toUpperCase(Locale.ROOT)
// unnecessary here for
// allSources
{
// by convention, many secondary references inherit the primary
// reference's
// source string as a prefix for any version information from the
// secondary reference.
return false;
}
}
return true;
}
}