/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.ext.ensembl;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import java.util.ArrayList;
import java.util.List;
import com.stevesoft.pat.Regex;
/**
* A client to fetch protein translated sequence for an Ensembl identifier
*
* @author gmcarstairs
*
*/
public class EnsemblProtein extends EnsemblSeqProxy
{
/*
* accepts ENSP with 11 digits
* or ENSMUSP or similar for other species
* or CCDSnnnnn.nn with at least 3 digits
*/
private static final Regex ACCESSION_REGEX = new Regex(
"(ENS([A-Z]{3}|)P[0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)");
/**
* Default constructor (to use rest.ensembl.org)
*/
public EnsemblProtein()
{
super();
}
/**
* Constructor given the target domain to fetch data from
*
* @param d
*/
public EnsemblProtein(String d)
{
super(d);
}
@Override
public String getDbName()
{
return "ENSEMBL (Protein)";
}
@Override
protected EnsemblSeqType getSourceEnsemblType()
{
return EnsemblSeqType.PROTEIN;
}
/**
* Returns false, as this fetcher does not retrieve DNA sequences.
*/
@Override
public boolean isDnaCoding()
{
return false;
}
/**
* Test query is to the protein translation of transcript ENST00000288602
*/
@Override
public String getTestQuery()
{
return "ENSP00000288602";
}
/**
* Overrides base class method to do nothing - genomic features are not
* applicable to the protein product sequence
*/
@Override
protected void addFeaturesAndProduct(String accId, AlignmentI alignment)
{
}
@Override
protected EnsemblFeatureType[] getFeaturesToFetch()
{
// not applicable - can't fetch genomic features for a protein sequence
return null;
}
@Override
protected List getIdentifyingFeatures(SequenceI seq,
String accId)
{
return new ArrayList<>();
}
@Override
public Regex getAccessionValidator()
{
return ACCESSION_REGEX;
}
/**
* Returns an accession id for a query, including conversion of ENST* to
* ENSP*. This supports querying for the protein sequence for a transcript
* (ENST identifier) and returning the ENSP identifier.
*/
@Override
public String getAccessionIdFromQuery(String query)
{
String accId = super.getAccessionIdFromQuery(query);
/*
* ensure last character before (11) digits is P
* ENST00000288602 -> ENSP00000288602
* ENSMUST00000288602 -> ENSMUSP00000288602
*/
if (accId != null && accId.length() >= 12)
{
char[] chars = accId.toCharArray();
chars[chars.length - 12] = 'P';
accId = new String(chars);
}
return accId;
}
}