2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ext.ensembl;
23 import jalview.analysis.AlignmentUtils;
24 import jalview.bin.Cache;
25 import jalview.datamodel.DBRefSource;
26 import jalview.util.Platform;
27 import jalview.ws.seqfetcher.DbSourceProxyImpl;
29 import com.stevesoft.pat.Regex;
32 * A base class for Ensembl sequence fetchers
36 abstract class EnsemblSequenceFetcher extends DbSourceProxyImpl
38 // domain properties lookup keys:
39 protected static final String ENSEMBL_BASEURL = "ENSEMBL_BASEURL";
41 protected static final String ENSEMBL_GENOMES_BASEURL = "ENSEMBL_GENOMES_BASEURL";
43 // domain properties default values:
44 protected static final String DEFAULT_ENSEMBL_BASEURL = "https://rest.ensembl.org";
46 // ensemblgenomes REST service merged to ensembl 9th April 2019
47 protected static final String DEFAULT_ENSEMBL_GENOMES_BASEURL = DEFAULT_ENSEMBL_BASEURL;
49 private static Regex ACCESSION_REGEX;
51 protected final String ensemblGenomesDomain;
53 protected final String ensemblDomain;
55 protected static final String OBJECT_TYPE_TRANSLATION = "Translation";
57 protected static final String OBJECT_TYPE_TRANSCRIPT = "Transcript";
59 protected static final String OBJECT_TYPE_GENE = "Gene";
61 protected static final String PARENT = "Parent";
63 protected static final String JSON_ID = AlignmentUtils.VARIANT_ID; // "id";
65 protected static final String OBJECT_TYPE = "object_type";
68 * possible values for the 'feature' parameter of the /overlap REST service
69 * @see http://rest.ensembl.org/documentation/info/overlap_id
71 protected enum EnsemblFeatureType
73 gene, transcript, cds, exon, repeat, simple, misc, variation,
74 somatic_variation, structural_variation, somatic_structural_variation,
75 constrained, regulatory
78 private String domain;
83 public EnsemblSequenceFetcher()
86 * the default domain names may be overridden in .jalview_properties;
87 * this allows an easy change from http to https in future if needed
89 ensemblDomain = Cache.getDefault(ENSEMBL_BASEURL,
90 DEFAULT_ENSEMBL_BASEURL).trim();
91 ensemblGenomesDomain = Cache.getDefault(ENSEMBL_GENOMES_BASEURL,
92 DEFAULT_ENSEMBL_GENOMES_BASEURL).trim();
93 domain = ensemblDomain;
97 public String getDbSource()
99 // NB ensure Uniprot xrefs are canonicalised from "Ensembl" to "ENSEMBL"
100 return DBRefSource.ENSEMBL;
104 public String getAccessionSeparator()
110 * Ensembl accession are ENST + 11 digits for human transcript, ENSG for human
111 * gene. Other species insert 3 letters e.g. ENSMUST..., ENSMUSG...
113 * @see http://www.ensembl.org/Help/View?id=151
116 public Regex getAccessionValidator()
118 if (ACCESSION_REGEX == null)
121 * accepts ENSG/T/E/P with 11 digits
122 * or ENSMUSP or similar for other species
123 * or CCDSnnnnn.nn with at least 3 digits
125 ACCESSION_REGEX = Platform
126 .newRegex("(ENS([A-Z]{3}|)[GTEP]{1}[0-9]{11}$)" + "|"
127 + "(CCDS[0-9.]{3,}$)", null);
129 return ACCESSION_REGEX;
133 public boolean isValidReference(String accession)
135 return getAccessionValidator().search(accession);
145 * Default test query is a transcript
148 public String getTestQuery()
150 // has CDS on reverse strand:
151 return "ENST00000288602";
152 // ENST00000461457 // forward strand
156 public boolean isDnaCoding()
162 * Returns the domain name to query e.g. http://rest.ensembl.org or
163 * http://rest.ensemblgenomes.org
167 protected String getDomain()
172 protected void setDomain(String d)
174 domain = d == null ? null : d.trim();