X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fext%2Fensembl%2FEnsemblSequenceFetcher.java;fp=src%2Fjalview%2Fext%2Fensembl%2FEnsemblSequenceFetcher.java;h=9a4952e9082d87642432f920f6c208752fd1bea9;hb=eccba41980e4a52d4b009101049ae7518053362e;hp=0000000000000000000000000000000000000000;hpb=bd2e76b5eb4a16994d6ebc6344900184e590d552;p=jalview.git diff --git a/src/jalview/ext/ensembl/EnsemblSequenceFetcher.java b/src/jalview/ext/ensembl/EnsemblSequenceFetcher.java new file mode 100644 index 0000000..9a4952e --- /dev/null +++ b/src/jalview/ext/ensembl/EnsemblSequenceFetcher.java @@ -0,0 +1,93 @@ +package jalview.ext.ensembl; + +import jalview.datamodel.DBRefSource; +import jalview.ws.seqfetcher.DbSourceProxyImpl; + +import com.stevesoft.pat.Regex; + +/** + * A base class for Ensembl sequence fetchers + * + * @author gmcarstairs + */ +abstract class EnsemblSequenceFetcher extends DbSourceProxyImpl +{ + /* + * accepts ENSG/T/E/P with 11 digits + * or ENSMUSP or similar for other species + * or CCDSnnnnn.nn with at least 3 digits + */ + private static final Regex ACCESSION_REGEX = new Regex( + "(ENS([A-Z]{3}|)[GTEP]{1}[0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)"); + + /* + * possible values for the 'feature' parameter of the /overlap REST service + * @see http://rest.ensembl.org/documentation/info/overlap_id + */ + protected enum EnsemblFeatureType + { + gene, transcript, cds, exon, repeat, simple, misc, variation, + somatic_variation, structural_variation, somatic_structural_variation, + constrained, regulatory + } + + @Override + public String getDbSource() + { + // NB ensure Uniprot xrefs are canonicalised from "Ensembl" to "ENSEMBL" + return DBRefSource.ENSEMBL; // "ENSEMBL" + } + + @Override + public String getDbVersion() + { + return "0"; + } + + @Override + public String getAccessionSeparator() + { + return " "; + } + + /** + * Ensembl accession are ENST + 11 digits for human transcript, ENSG for human + * gene. Other species insert 3 letters e.g. ENSMUST..., ENSMUSG... + * + * @see http://www.ensembl.org/Help/View?id=151 + */ + @Override + public Regex getAccessionValidator() + { + return ACCESSION_REGEX; + } + + @Override + public boolean isValidReference(String accession) + { + return getAccessionValidator().search(accession); + } + + @Override + public int getTier() + { + return 0; + } + + /** + * Default test query is a transcript + */ + @Override + public String getTestQuery() + { + // has CDS on reverse strand: + return "ENST00000288602"; + // ENST00000461457 // forward strand + } + + @Override + public boolean isDnaCoding() + { + return true; + } +}