X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FDBRefSource.java;h=3b1757bf18ad52cfe2269c0e6413f16f1e1d7a32;hb=4a0c50aa36148389daaddaf22281f459ffd0cc20;hp=f8039bb6341844f2a7a994845f913d67e45c7854;hpb=26b115b0a77d521da92a06572d9b7819c2d0d49a;p=jalview.git diff --git a/src/jalview/datamodel/DBRefSource.java b/src/jalview/datamodel/DBRefSource.java index f8039bb..3b1757b 100755 --- a/src/jalview/datamodel/DBRefSource.java +++ b/src/jalview/datamodel/DBRefSource.java @@ -20,10 +20,6 @@ */ package jalview.datamodel; -import java.lang.reflect.Field; -import java.util.ArrayList; -import java.util.List; - /** * BH 2018 SwingJS note: If additional final static Strings are added to this * file, they should be added to public static final String[] allTypes. @@ -40,150 +36,146 @@ import java.util.List; * @author JimP * */ +import java.util.Locale; + public class DBRefSource { - - - - /** - * UNIPROT Accession Number - */ + public static final String UNIPROT = "UNIPROT"; - /** - * UNIPROT Entry Name - */ - public static final String UP_NAME = "UNIPROT_NAME".toUpperCase(); + public static final String UP_NAME = "UNIPROT_NAME" + .toUpperCase(Locale.ROOT); /** * Uniprot Knowledgebase/TrEMBL as served from EMBL protein products. */ - public static final String UNIPROTKB = "UniProtKB/TrEMBL".toUpperCase(); + public static final String UNIPROTKB = "UniProtKB/TrEMBL" + .toUpperCase(Locale.ROOT); - public static final String EMBLCDSProduct = "EMBLCDSProtein".toUpperCase(); + public static final String ENSEMBL = "ENSEMBL"; - - /** - * PDB Entry Code - */ - public static final String PDB = "PDB"; + public static final String ENSEMBLGENOMES = "ENSEMBLGENOMES"; - /** - * EMBL ID - */ public static final String EMBL = "EMBL"; - /** - * EMBLCDS ID - */ public static final String EMBLCDS = "EMBLCDS"; - - /** - * PFAM ID - */ + public static final String EMBLCDSProduct = "EMBLCDSProtein" + .toUpperCase(Locale.ROOT); + + public static final String PDB = "PDB"; + public static final String PFAM = "PFAM"; - /** - * RFAM ID - */ public static final String RFAM = "RFAM"; - /** - * GeneDB ID - */ - public static final String GENEDB = "GeneDB".toUpperCase(); + public static final String GENEDB = "GeneDB".toUpperCase(Locale.ROOT); - - /** - * Ensembl - */ - public static final String ENSEMBL = "ENSEMBL"; + public static final String PDB_CANONICAL_NAME = PDB; - public static final String ENSEMBLGENOMES = "ENSEMBLGENOMES"; + public static final String[] allSources = new String[] { UNIPROT, UP_NAME, + UNIPROTKB, ENSEMBL, ENSEMBLGENOMES, EMBL, EMBLCDS, EMBLCDSProduct, + PDB, PFAM, RFAM, GENEDB }; - - /** - * List of databases whose sequences might have coding regions annotated - */ - public static final String[] DNACODINGDBS = { EMBL, EMBLCDS, GENEDB, - ENSEMBL, ENSEMBLGENOMES }; + public static final int UNIPROT_MASK = 1 << 0; - public static final String[] CODINGDBS = { EMBLCDS, GENEDB, ENSEMBL }; + public static final int UP_NAME_MASK = 1 << 1; - public static final String[] PROTEINDBS = { UNIPROT, UNIPROTKB, - EMBLCDSProduct, ENSEMBL }; // Ensembl ENSP* entries are protein + public static final int UNIPROT_KB_MASK = 1 << 2; - - public static final String[] allTypes = new String[] { - UNIPROT, UP_NAME, UNIPROTKB, - EMBLCDSProduct, PDB, EMBL, - EMBLCDS, PFAM, RFAM, - GENEDB, ENSEMBL, ENSEMBLGENOMES - }; + public static final int ENSEMBL_MASK = 1 << 3; -public static final String PROTEINDBSKEYS, DNACODINGDBSKEYS; + public static final int ENSEMBL_GENOMES_MASK = 1 << 4; -public static final String[] PROMTYPES; + public static final int EMBL_MASK = 1 << 5; + public static final int EMBL_CDS_MASK = 1 << 6; -public static final int UNIPROT_MASK = 1; + public static final int EMBL_CDS_PRODUCT_MASK = 1 << 7; -public static final int ENSEMBL_MASK = 2; + public static final int PDB_MASK = 1 << 8; -public static final int ALL_MASKS = UNIPROT_MASK | ENSEMBL_MASK; + public static final int PFAM_MASK = 1 << 9; -public static final String PDB_CANONICAL_NAME = PDB; + public static final int RFAM_MASK = 1 << 10; -static { - // BH 2019.01.25 trying to speed this up - String s = ";"; - for (int i = PROTEINDBS.length; --i >= 0;) - s += PROTEINDBS[i] + ";"; - PROTEINDBSKEYS = s; - - s = ";"; - for (int i = DNACODINGDBS.length; --i >= 0;) - s += DNACODINGDBS[i] + ";"; - DNACODINGDBSKEYS = s; - - PROMTYPES = new String[] { null, ";" + UNIPROT + ";", ";" + ENSEMBL + ";" , ";" + UNIPROT + ";" + ENSEMBL + ";" }; -} + public static final int GENE_DB_MASK = 1 << 11; - public static String[] allSourcesFromReflection; + public static final int MASK_COUNT = 12; - public static String[] allSources() + public static final int ALL_MASKS = (1 << MASK_COUNT) - 1; + public static int getSourceKey(String name) { - /** - * @j2sNative - * - * return C$.allTypes; - * - */ + for (int i = 0; i < MASK_COUNT; i++) + { + if (name.equals(allSources[i])) + { + return 1 << i; + } + } + return 0; + } + + public static final int PRIMARY_MASK = UNIPROT_MASK | ENSEMBL_MASK; + + /** + * List of databases whose sequences might have coding regions annotated + */ + public static final String[] DNACODINGDBS = { ENSEMBL, ENSEMBLGENOMES, + EMBL, EMBLCDS, GENEDB }; + + public static final int DNA_CODING_MASK = ENSEMBL_MASK + | ENSEMBL_GENOMES_MASK | EMBL_MASK | EMBL_CDS_MASK | GENE_DB_MASK; + public static final String[] CODINGDBS = { EMBLCDS, GENEDB, ENSEMBL }; + + public static final int CODING_MASK = EMBL_CDS_MASK | GENE_DB_MASK + | ENSEMBL_MASK; + + public static final String[] PROTEINDBS = { UNIPROT, UNIPROTKB, ENSEMBL, + EMBLCDSProduct }; // Ensembl ENSP* entries are protein + + public static final int PROTEIN_MASK = UNIPROT_MASK | UNIPROT_KB_MASK + | ENSEMBL_MASK | EMBL_CDS_PRODUCT_MASK; + + // for SequenceAnnotationReport only + + // public static final String[][] PRIMARY_SOURCES = new String[][] { + // CODINGDBS, DNACODINGDBS, PROTEINDBS }; + // + public static final int PRIMARY_SOURCES_MASK = CODING_MASK + | DNA_CODING_MASK | PROTEIN_MASK; + + public static boolean isPrimarySource(String source) + { + return ((PRIMARY_SOURCES_MASK & getSourceKey(source)) != 0); + } + + public static boolean isPrimaryCandidate(String ucversion) + { + if (ucversion==null) { - if (allSourcesFromReflection == null) + // Null/empty version is not a real reference ? + return false; + } + // tricky - this test really needs to search the sequence's set of dbrefs to + // see if there is a primary reference that derived this reference. + for (int i = allSources.length; --i >= 0;) + { + if (ucversion.startsWith(allSources[i])) // BH 2019.01.25 + // .toUpperCase(Locale.ROOT) + // unnecessary here for + // allSources { - List src = new ArrayList<>(); - for (Field f : DBRefSource.class.getFields()) - { - if (String.class.equals(f.getType())) - { - try - { - src.add((String) f.get(null)); - } catch (Exception x) - { - x.printStackTrace(); - } - } - } - allSourcesFromReflection = src.toArray(new String[0]); + // by convention, many secondary references inherit the primary + // reference's + // source string as a prefix for any version information from the + // secondary reference. + return false; } - return allSourcesFromReflection; } + return true; } - - + }