1 package jalview.ext.ensembl;
3 import jalview.datamodel.AlignmentI;
4 import jalview.datamodel.SequenceFeature;
6 import java.util.Arrays;
9 import com.stevesoft.pat.Regex;
12 * A client to fetch protein translated sequence for an Ensembl identifier
17 public class EnsemblProtein extends EnsemblSeqProxy
20 * accepts ENSP with 11 digits
21 * or ENSMUSP or similar for other species
22 * or CCDSnnnnn.nn with at least 3 digits
24 private static final Regex ACCESSION_REGEX = new Regex(
25 "(ENS([A-Z]{3}|)P[0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)");
27 private static final List<String> CROSSREFS = Arrays.asList(new String[] {
28 "PDB", "Uniprot/SPTREMBL", "Uniprot/SWISSPROT" });
31 * Default constructor (to use rest.ensembl.org)
33 public EnsemblProtein()
39 * Constructor given the target domain to fetch data from
43 public EnsemblProtein(String d)
49 public String getDbName()
51 return "ENSEMBL (Protein)";
55 protected EnsemblSeqType getSourceEnsemblType()
57 return EnsemblSeqType.PROTEIN;
61 * Returns false, as this fetcher does not retrieve DNA sequences.
64 public boolean isDnaCoding()
70 * Test query is to the protein translation of transcript ENST00000288602
73 public String getTestQuery()
75 return "ENSP00000288602";
79 * Overrides base class method to do nothing - genomic features are not
80 * applicable to the protein product sequence
83 protected void addFeaturesAndProduct(String accId, AlignmentI alignment)
88 protected EnsemblFeatureType[] getFeaturesToFetch()
90 // not applicable - can't fetch genomic features for a protein sequence
95 protected boolean identifiesSequence(SequenceFeature sf, String accId)
97 // not applicable - protein sequence is not a 'subset' of genomic sequence
102 protected List<String> getCrossReferenceDatabases()
108 public Regex getAccessionValidator()
110 return ACCESSION_REGEX;
114 * Returns an accession id for a query, including conversion of ENST* to
115 * ENSP*. This supports querying for the protein sequence for a transcript
116 * (ENST identifier) and returning the ENSP identifier.
119 public String getAccessionIdFromQuery(String query)
121 String accId = super.getAccessionIdFromQuery(query);
124 * ensure last character before (11) digits is P
125 * ENST00000288602 -> ENSP00000288602
126 * ENSMUST00000288602 -> ENSMUSP00000288602
128 if (accId != null && accId.length() >= 12)
130 char[] chars = accId.toCharArray();
131 chars[chars.length - 12] = 'P';
132 accId = new String(chars);