2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ext.ensembl;
23 import jalview.datamodel.AlignmentI;
24 import jalview.datamodel.SequenceFeature;
25 import jalview.datamodel.SequenceI;
26 import jalview.util.Platform;
28 import java.util.ArrayList;
29 import java.util.List;
31 import com.stevesoft.pat.Regex;
34 * A client to fetch protein translated sequence for an Ensembl identifier
39 public class EnsemblProtein extends EnsemblSeqProxy
42 * accepts ENSP with 11 digits
43 * or ENSMUSP or similar for other species
44 * or CCDSnnnnn.nn with at least 3 digits
46 private static Regex ACCESSION_REGEX;
49 * Default constructor (to use rest.ensembl.org)
51 public EnsemblProtein()
57 * Constructor given the target domain to fetch data from
61 public EnsemblProtein(String d)
67 public String getDbName()
69 return "ENSEMBL (Protein)";
73 protected EnsemblSeqType getSourceEnsemblType()
75 return EnsemblSeqType.PROTEIN;
79 * Returns false, as this fetcher does not retrieve DNA sequences.
82 public boolean isDnaCoding()
88 * Test query is to the protein translation of transcript ENST00000288602
91 public String getTestQuery()
93 return "ENSP00000288602";
97 * Overrides base class method to do nothing - genomic features are not
98 * applicable to the protein product sequence
101 protected void addFeaturesAndProduct(String accId, AlignmentI alignment)
106 protected EnsemblFeatureType[] getFeaturesToFetch()
108 // not applicable - can't fetch genomic features for a protein sequence
113 protected List<SequenceFeature> getIdentifyingFeatures(SequenceI seq,
116 return new ArrayList<>();
120 public Regex getAccessionValidator()
122 if (ACCESSION_REGEX == null)
124 ACCESSION_REGEX = Platform.newRegex(
125 "(ENS([A-Z]{3}|)P[0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)", null);
127 return ACCESSION_REGEX;
131 * Returns an accession id for a query, including conversion of ENST* to
132 * ENSP*. This supports querying for the protein sequence for a transcript
133 * (ENST identifier) and returning the ENSP identifier.
136 public String getAccessionIdFromQuery(String query)
138 String accId = super.getAccessionIdFromQuery(query);
141 * ensure last character before (11) digits is P
142 * ENST00000288602 -> ENSP00000288602
143 * ENSMUST00000288602 -> ENSMUSP00000288602
145 if (accId != null && accId.length() >= 12)
147 char[] chars = accId.toCharArray();
148 chars[chars.length - 12] = 'P';
149 accId = new String(chars);