/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.ext.ensembl;
import jalview.bin.Cache;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.GeneLociI;
import jalview.util.MapList;
import java.io.BufferedReader;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.function.Function;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
/**
* A client for the Ensembl lookup REST endpoint
*
* @author gmcarstairs
*/
public class EnsemblLookup extends EnsemblRestClient
{
private static final String SPECIES = "species";
private static final String PARENT = "Parent";
private static final String OBJECT_TYPE_TRANSLATION = "Translation";
private static final String OBJECT_TYPE_TRANSCRIPT = "Transcript";
private static final String ID = "id";
private static final String OBJECT_TYPE_GENE = "Gene";
private static final String OBJECT_TYPE = "object_type";
/**
* Default constructor (to use rest.ensembl.org)
*/
public EnsemblLookup()
{
super();
}
/**
* Constructor given the target domain to fetch data from
*
* @param
*/
public EnsemblLookup(String d)
{
super(d);
}
@Override
public String getDbName()
{
return "ENSEMBL";
}
@Override
public AlignmentI getSequenceRecords(String queries) throws Exception
{
return null;
}
@Override
protected URL getUrl(List ids) throws MalformedURLException
{
String identifier = ids.get(0);
return getUrl(identifier);
}
/**
* @param identifier
* @return
*/
protected URL getUrl(String identifier)
{
String url = getDomain() + "/lookup/id/" + identifier
+ CONTENT_TYPE_JSON;
try
{
return new URL(url);
} catch (MalformedURLException e)
{
return null;
}
}
@Override
protected boolean useGetRequest()
{
return true;
}
@Override
protected String getRequestMimeType(boolean multipleIds)
{
return "application/json";
}
@Override
protected String getResponseMimeType()
{
return "application/json";
}
/**
* Calls the Ensembl lookup REST endpoint and returns
*
* - the 'id' for the identifier if its type is "Gene"
* - the 'Parent' if its type is 'Transcript'
*
* If the type is 'Translation', does a recursive call to this method, passing
* in the 'Parent' (transcript id).
*
* @param identifier
* @return
*/
public String getGeneId(String identifier)
{
return (String) getResult(identifier, br -> parseGeneId(br));
}
/**
* Calls the Ensembl lookup REST endpoint and retrieves the 'species' for the
* given identifier, or null if not found
*
* @param identifier
* @return
*/
public String getSpecies(String identifier)
{
return (String) getResult(identifier, br -> getAttribute(br, SPECIES));
}
/**
* Calls the /lookup/id rest service and delegates parsing of the JSON
* response to the supplied parser
*
* @param identifier
* @param parser
* @return
*/
protected Object getResult(String identifier,
Function parser)
{
List ids = Arrays.asList(new String[] { identifier });
BufferedReader br = null;
try
{
URL url = getUrl(identifier);
if (url != null)
{
br = getHttpResponse(url, ids);
}
return br == null ? null : parser.apply(br);
} catch (IOException e)
{
// ignore
return null;
} finally
{
if (br != null)
{
try
{
br.close();
} catch (IOException e)
{
// ignore
}
}
}
}
/**
* Answers the value of 'attribute' from the JSON response, or null if not
* found
*
* @param br
* @param attribute
* @return
*/
protected String getAttribute(BufferedReader br, String attribute)
{
String value = null;
JSONParser jp = new JSONParser();
try
{
JSONObject val = (JSONObject) jp.parse(br);
value = val.get(attribute).toString();
} catch (ParseException | NullPointerException | IOException e)
{
// ignore
}
return value;
}
/**
* Parses the JSON response and returns the gene identifier, or null if not
* found. If the returned object_type is Gene, returns the id, if Transcript
* returns the Parent. If it is Translation (peptide identifier), then the
* Parent is the transcript identifier, so we redo the search with this value.
*
* @param br
* @return
*/
protected String parseGeneId(BufferedReader br)
{
String geneId = null;
JSONParser jp = new JSONParser();
try
{
JSONObject val = (JSONObject) jp.parse(br);
String type = val.get(OBJECT_TYPE).toString();
if (OBJECT_TYPE_GENE.equalsIgnoreCase(type))
{
geneId = val.get(ID).toString();
}
else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type))
{
geneId = val.get(PARENT).toString();
}
else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type))
{
String transcriptId = val.get(PARENT).toString();
try
{
geneId = getGeneId(transcriptId);
} catch (StackOverflowError e)
{
/*
* unlikely data condition error!
*/
System.err
.println("** Ensembl lookup "
+ getUrl(transcriptId).toString()
+ " looping on Parent!");
}
}
} catch (ParseException | IOException e)
{
// ignore
}
return geneId;
}
/**
* Calls the /lookup/id rest service for the given id, and if successful,
* parses and returns the gene's chromosomal coordinates
*
* @param geneId
* @return
*/
public GeneLociI getGeneLoci(String geneId)
{
return (GeneLociI) getResult(geneId, br -> parseGeneLoci(br));
}
/**
* Parses the /lookup/id response for species, asssembly_name,
* seq_region_name, start, end and returns an object that wraps them, or null
* if unsuccessful
*
* @param br
* @return
*/
GeneLociI parseGeneLoci(BufferedReader br)
{
JSONParser jp = new JSONParser();
try
{
JSONObject val = (JSONObject) jp.parse(br);
final String species = val.get("species").toString();
final String assembly = val.get("assembly_name").toString();
final String chromosome = val.get("seq_region_name").toString();
String strand = val.get("strand").toString();
int start = Integer.parseInt(val.get("start").toString());
int end = Integer.parseInt(val.get("end").toString());
int fromEnd = end - start + 1;
boolean reverseStrand = "-1".equals(strand);
int toStart = reverseStrand ? end : start;
int toEnd = reverseStrand ? start : end;
List fromRange = Collections.singletonList(new int[] { 1,
fromEnd });
List toRange = Collections.singletonList(new int[] { toStart,
toEnd });
final MapList map = new MapList(fromRange, toRange, 1, 1);
return new GeneLociI()
{
@Override
public String getSpeciesId()
{
return species == null ? "" : species;
}
@Override
public String getAssemblyId()
{
return assembly;
}
@Override
public String getChromosomeId()
{
return chromosome;
}
@Override
public MapList getMap()
{
return map;
}
};
} catch (ParseException | NullPointerException | IOException
| NumberFormatException | ClassCastException e)
{
Cache.log.error("Error looking up gene loci: " + e.getMessage());
}
return null;
}
}