import jalview.io.gff.SequenceOntologyFactory;
import jalview.io.gff.SequenceOntologyI;
-import java.util.HashMap;
-import java.util.Map;
-
import com.stevesoft.pat.Regex;
/**
private static final Regex ACCESSION_REGEX = new Regex(
"(ENS([A-Z]{3}|)[TG][0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)");
- private static Map<String, String> params = new HashMap<String, String>();
-
- static
- {
- params.put("object_type", "transcript");
- }
-
/*
* fetch exon features on genomic sequence (to identify the cdna regions)
* and cds and variation features (to retain)
}
/**
- * Parameter object_type=cdna added to ensure cdna and not peptide is returned
- * (JAL-2529)
+ * Parameter object_type=Transcaript added to ensure cdna and not peptide is
+ * returned (JAL-2529)
*/
@Override
- protected Map<String, String> getAdditionalParameters()
+ protected String getObjectType()
{
- return params;
+ return OBJECT_TYPE_TRANSCRIPT;
}
}
return EnsemblSeqType.GENOMIC;
}
+ @Override
+ protected String getObjectType()
+ {
+ return OBJECT_TYPE_GENE;
+ }
+
/**
* Returns an alignment containing the gene(s) for the given gene or
* transcript identifier, or external identifier (e.g. Uniprot id). If given a
*/
List<String> getGeneIds(String accessions)
{
- List<String> geneIds = new ArrayList<String>();
+ List<String> geneIds = new ArrayList<>();
for (String acc : accessions.split(getAccessionSeparator()))
{
int transcriptLength = 0;
final char[] geneChars = gene.getSequence();
int offset = gene.getStart(); // to convert to 0-based positions
- List<int[]> mappedFrom = new ArrayList<int[]>();
+ List<int[]> mappedFrom = new ArrayList<>();
for (SequenceFeature sf : splices)
{
* transfer features to the new sequence; we use EnsemblCdna to do this,
* to filter out unwanted features types (see method retainFeature)
*/
- List<int[]> mapTo = new ArrayList<int[]>();
+ List<int[]> mapTo = new ArrayList<>();
mapTo.add(new int[] { 1, transcriptLength });
MapList mapping = new MapList(mappedFrom, mapTo, 1, 1);
EnsemblCdna cdna = new EnsemblCdna(getDomain());
protected List<SequenceFeature> getTranscriptFeatures(String accId,
SequenceI geneSequence)
{
- List<SequenceFeature> transcriptFeatures = new ArrayList<SequenceFeature>();
+ List<SequenceFeature> transcriptFeatures = new ArrayList<>();
String parentIdentifier = GENE_PREFIX + accId;
if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
SequenceOntologyI.GENE))
{
- String id = (String) sf.getValue(ID);
+ // NB features as gff use 'ID'; rest services return as 'id'
+ String id = (String) sf.getValue("ID");
if ((GENE_PREFIX + accId).equals(id))
{
return true;
{
if (isTranscript(sf.getType()))
{
- String id = (String) sf.getValue(ID);
+ String id = (String) sf.getValue("ID");
if (("transcript:" + accId).equals(id))
{
return true;
import org.json.simple.parser.ParseException;
/**
- * A client for the Ensembl lookup REST endpoint; used to find the Parent gene
- * identifier given a transcript identifier.
+ * A client for the Ensembl lookup REST endpoint, used to find the gene
+ * identifier given a gene, transcript or protein identifier.
*
* @author gmcarstairs
- *
*/
public class EnsemblLookup extends EnsemblRestClient
{
-
- private static final String OBJECT_TYPE_TRANSLATION = "Translation";
- private static final String PARENT = "Parent";
- private static final String OBJECT_TYPE_TRANSCRIPT = "Transcript";
- private static final String ID = "id";
- private static final String OBJECT_TYPE_GENE = "Gene";
- private static final String OBJECT_TYPE = "object_type";
-
/**
* Default constructor (to use rest.ensembl.org)
*/
protected URL getUrl(List<String> ids) throws MalformedURLException
{
String identifier = ids.get(0);
- return getUrl(identifier);
+ return getUrl(identifier, null);
}
/**
+ * Gets the url for lookup of the given identifier, optionally with objectType
+ * also specified in the request
+ *
* @param identifier
+ * @param objectType
* @return
*/
- protected URL getUrl(String identifier)
+ protected URL getUrl(String identifier, String objectType)
{
String url = getDomain() + "/lookup/id/" + identifier
+ CONTENT_TYPE_JSON;
+ if (objectType != null)
+ {
+ url += "&" + OBJECT_TYPE + "=" + objectType;
+ }
+
try
{
return new URL(url);
}
/**
+ * Returns the gene id related to the given identifier, which may be for a
+ * gene, transcript or protein
+ *
+ * @param identifier
+ * @return
+ */
+ public String getGeneId(String identifier)
+ {
+ return getGeneId(identifier, null);
+ }
+
+ /**
* Calls the Ensembl lookup REST endpoint and retrieves the 'Parent' for the
* given identifier, or null if not found
*
* @param identifier
+ * @param objectType
+ * (optional)
* @return
*/
- public String getGeneId(String identifier)
+ public String getGeneId(String identifier, String objectType)
{
List<String> ids = Arrays.asList(new String[] { identifier });
BufferedReader br = null;
try
{
- URL url = getUrl(identifier);
+ URL url = getUrl(identifier, objectType);
if (url != null)
{
br = getHttpResponse(url, ids);
String type = val.get(OBJECT_TYPE).toString();
if (OBJECT_TYPE_GENE.equalsIgnoreCase(type))
{
+ // got the gene - just returns its id
geneId = val.get(ID).toString();
}
else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type))
{
+ // got the transcript - return its (Gene) Parent
geneId = val.get(PARENT).toString();
}
else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type))
{
+ // got the protein - get its Parent, restricted to type Transcript
String transcriptId = val.get(PARENT).toString();
- try
- {
- geneId = getGeneId(transcriptId);
- } catch (StackOverflowError e)
- {
- /*
- * unlikely data condition error!
- */
- System.err
- .println("** Ensembl lookup "
- + getUrl(transcriptId).toString()
- + " looping on Parent!");
- }
+ geneId = getGeneId(transcriptId, OBJECT_TYPE_TRANSCRIPT);
}
} catch (ParseException e)
{
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
/**
* Base class for Ensembl sequence fetchers
{
private static final String ALLELES = "alleles";
- protected static final String PARENT = "Parent";
-
- protected static final String ID = "ID";
-
protected static final String NAME = "Name";
protected static final String DESCRIPTION = "description";
urlstring.append("?type=").append(getSourceEnsemblType().getType());
urlstring.append(("&Accept=text/x-fasta"));
- Map<String, String> params = getAdditionalParameters();
- if (params != null)
+ String objectType = getObjectType();
+ if (objectType != null)
{
- for (Entry<String, String> entry : params.entrySet())
- {
- urlstring.append("&").append(entry.getKey()).append("=")
- .append(entry.getValue());
- }
+ urlstring.append("&").append(OBJECT_TYPE).append("=")
+ .append(objectType);
}
URL url = new URL(urlstring.toString());
}
/**
- * Override this method to add any additional x=y URL parameters needed
+ * Override this method to specify object_type request parameter
*
* @return
*/
- protected Map<String, String> getAdditionalParameters()
+ protected String getObjectType()
{
return null;
}
protected MapList getGenomicRangesFromFeatures(SequenceI sourceSequence,
String accId, int start)
{
- // SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
List<SequenceFeature> sfs = sourceSequence.getFeatures()
.getPositionalFeatures();
if (sfs.isEmpty())
* generously initial size for number of cds regions
* (worst case titin Q8WZ42 has c. 313 exons)
*/
- List<int[]> regions = new ArrayList<int[]>(100);
+ List<int[]> regions = new ArrayList<>(100);
int mappedLength = 0;
int direction = 1; // forward
boolean directionSet = false;
protected List<SequenceFeature> findFeatures(SequenceI sequence,
String term, String parentId)
{
- List<SequenceFeature> result = new ArrayList<SequenceFeature>();
+ List<SequenceFeature> result = new ArrayList<>();
List<SequenceFeature> sfs = sequence.getFeatures()
.getFeaturesByOntology(term);
protected static final String ENSEMBL_REST = "http://rest.ensembl.org";
+ protected static final String OBJECT_TYPE_TRANSLATION = "Translation";
+
+ protected static final String OBJECT_TYPE_TRANSCRIPT = "Transcript";
+
+ protected static final String OBJECT_TYPE_GENE = "Gene";
+
+ protected static final String PARENT = "Parent";
+
+ protected static final String ID = "id";
+
+ protected static final String OBJECT_TYPE = "object_type";
+
/*
* possible values for the 'feature' parameter of the /overlap REST service
* @see http://rest.ensembl.org/documentation/info/overlap_id
{
private static final String GENE = "gene";
private static final String TYPE = "type";
- private static final String ID = "id";
-
/**
* Constructor given the target domain to fetch data from
*