2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ext.ensembl;
23 import jalview.datamodel.DBRefSource;
24 import jalview.ws.seqfetcher.DbSourceProxyImpl;
26 import com.stevesoft.pat.Regex;
29 * A base class for Ensembl sequence fetchers
33 abstract class EnsemblSequenceFetcher extends DbSourceProxyImpl
36 * accepts ENSG/T/E/P with 11 digits
37 * or ENSMUSP or similar for other species
38 * or CCDSnnnnn.nn with at least 3 digits
40 private static final Regex ACCESSION_REGEX = new Regex(
41 "(ENS([A-Z]{3}|)[GTEP]{1}[0-9]{11}$)" + "|"
42 + "(CCDS[0-9.]{3,}$)");
44 protected static final String ENSEMBL_GENOMES_REST = "http://rest.ensemblgenomes.org";
46 protected static final String ENSEMBL_REST = "http://rest.ensembl.org";
49 * possible values for the 'feature' parameter of the /overlap REST service
50 * @see http://rest.ensembl.org/documentation/info/overlap_id
52 protected enum EnsemblFeatureType
54 gene, transcript, cds, exon, repeat, simple, misc, variation,
55 somatic_variation, structural_variation, somatic_structural_variation,
56 constrained, regulatory
59 private String domain = ENSEMBL_REST;
62 public String getDbSource()
64 // NB ensure Uniprot xrefs are canonicalised from "Ensembl" to "ENSEMBL"
65 if (ENSEMBL_GENOMES_REST.equals(getDomain()))
67 return DBRefSource.ENSEMBLGENOMES;
69 return DBRefSource.ENSEMBL;
73 public String getAccessionSeparator()
79 * Ensembl accession are ENST + 11 digits for human transcript, ENSG for human
80 * gene. Other species insert 3 letters e.g. ENSMUST..., ENSMUSG...
82 * @see http://www.ensembl.org/Help/View?id=151
85 public Regex getAccessionValidator()
87 return ACCESSION_REGEX;
91 public boolean isValidReference(String accession)
93 return getAccessionValidator().search(accession);
103 * Default test query is a transcript
106 public String getTestQuery()
108 // has CDS on reverse strand:
109 return "ENST00000288602";
110 // ENST00000461457 // forward strand
114 public boolean isDnaCoding()
120 * Returns the domain name to query e.g. http://rest.ensembl.org or
121 * http://rest.ensemblgenomes.org
125 protected String getDomain()
130 protected void setDomain(String d)