--- /dev/null
+package jalview.ext.ensembl;
+
+import jalview.datamodel.DBRefSource;
+import jalview.ws.seqfetcher.DbSourceProxyImpl;
+
+import com.stevesoft.pat.Regex;
+
+/**
+ * A base class for Ensembl sequence fetchers
+ *
+ * @author gmcarstairs
+ */
+abstract class EnsemblSequenceFetcher extends DbSourceProxyImpl
+{
+ /*
+ * accepts ENSG/T/E/P with 11 digits
+ * or ENSMUSP or similar for other species
+ * or CCDSnnnnn.nn with at least 3 digits
+ */
+ private static final Regex ACCESSION_REGEX = new Regex(
+ "(ENS([A-Z]{3}|)[GTEP]{1}[0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)");
+
+ /*
+ * possible values for the 'feature' parameter of the /overlap REST service
+ * @see http://rest.ensembl.org/documentation/info/overlap_id
+ */
+ protected enum EnsemblFeatureType
+ {
+ gene, transcript, cds, exon, repeat, simple, misc, variation,
+ somatic_variation, structural_variation, somatic_structural_variation,
+ constrained, regulatory
+ }
+
+ @Override
+ public String getDbSource()
+ {
+ // NB ensure Uniprot xrefs are canonicalised from "Ensembl" to "ENSEMBL"
+ return DBRefSource.ENSEMBL; // "ENSEMBL"
+ }
+
+ @Override
+ public String getDbVersion()
+ {
+ return "0";
+ }
+
+ @Override
+ public String getAccessionSeparator()
+ {
+ return " ";
+ }
+
+ /**
+ * Ensembl accession are ENST + 11 digits for human transcript, ENSG for human
+ * gene. Other species insert 3 letters e.g. ENSMUST..., ENSMUSG...
+ *
+ * @see http://www.ensembl.org/Help/View?id=151
+ */
+ @Override
+ public Regex getAccessionValidator()
+ {
+ return ACCESSION_REGEX;
+ }
+
+ @Override
+ public boolean isValidReference(String accession)
+ {
+ return getAccessionValidator().search(accession);
+ }
+
+ @Override
+ public int getTier()
+ {
+ return 0;
+ }
+
+ /**
+ * Default test query is a transcript
+ */
+ @Override
+ public String getTestQuery()
+ {
+ // has CDS on reverse strand:
+ return "ENST00000288602";
+ // ENST00000461457 // forward strand
+ }
+
+ @Override
+ public boolean isDnaCoding()
+ {
+ return true;
+ }
+}