*/
package jalview.ext.ensembl;
+import jalview.bin.Console;
import jalview.datamodel.AlignmentI;
+import jalview.datamodel.GeneLociI;
+import jalview.datamodel.GeneLocus;
+import jalview.datamodel.Mapping;
+import jalview.util.MapList;
-import java.io.BufferedReader;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
+import java.util.Collections;
import java.util.List;
+import java.util.Map;
-import org.json.simple.JSONObject;
-import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
/**
- * A client for the Ensembl lookup REST endpoint, used to find the gene
- * identifier given a gene, transcript or protein identifier.
+ * A client for the Ensembl /lookup REST endpoint, used to find the gene
+ * identifier given a gene, transcript or protein identifier, or to extract the
+ * species or chromosomal coordinates from the same service response
*
* @author gmcarstairs
*/
public class EnsemblLookup extends EnsemblRestClient
{
-
- private static final String OBJECT_TYPE_TRANSLATION = "Translation";
- private static final String PARENT = "Parent";
- private static final String OBJECT_TYPE_TRANSCRIPT = "Transcript";
- private static final String ID = "id";
- private static final String OBJECT_TYPE_GENE = "Gene";
- private static final String OBJECT_TYPE = "object_type";
-
- /**
- * keep track of last identifier retrieved to break loops
- */
- private String lastId;
+ private static final String SPECIES = "species";
/**
* Default constructor (to use rest.ensembl.org)
return true;
}
- @Override
- protected String getRequestMimeType(boolean multipleIds)
- {
- return "application/json";
- }
-
- @Override
- protected String getResponseMimeType()
- {
- return "application/json";
- }
-
/**
- * Returns the gene id related to the given identifier, which may be for a
- * gene, transcript or protein
+ * Returns the gene id related to the given identifier (which may be for a
+ * gene, transcript or protein), or null if none is found
*
* @param identifier
* @return
}
/**
- * Calls the Ensembl lookup REST endpoint and retrieves the 'Parent' for the
- * given identifier, or null if not found
+ * Returns the gene id related to the given identifier (which may be for a
+ * gene, transcript or protein), or null if none is found
*
* @param identifier
* @param objectType
- * (optional)
* @return
*/
public String getGeneId(String identifier, String objectType)
{
- List<String> ids = Arrays.asList(new String[] { identifier });
+ return parseGeneId(getResult(identifier, objectType));
+ }
- BufferedReader br = null;
- try
+ /**
+ * Parses the JSON response and returns the gene identifier, or null if not
+ * found. If the returned object_type is Gene, returns the id, if Transcript
+ * returns the Parent. If it is Translation (peptide identifier), then the
+ * Parent is the transcript identifier, so we redo the search with this value.
+ *
+ * @param br
+ * @return
+ */
+ protected String parseGeneId(Map<String, Object> val)
+ {
+ if (val == null)
+ {
+ return null;
+ }
+ String geneId = null;
+ String type = val.get(OBJECT_TYPE).toString();
+ if (OBJECT_TYPE_GENE.equalsIgnoreCase(type))
+ {
+ // got the gene - just returns its id
+ geneId = val.get(JSON_ID).toString();
+ }
+ else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type))
+ {
+ // got the transcript - return its (Gene) Parent
+ geneId = val.get(PARENT).toString();
+ }
+ else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type))
{
+ // got the protein - get its Parent, restricted to type Transcript
+ String transcriptId = val.get(PARENT).toString();
+ geneId = getGeneId(transcriptId, OBJECT_TYPE_TRANSCRIPT);
+ }
- URL url = getUrl(identifier, objectType);
+ return geneId;
+ }
- if (identifier.equals(lastId))
+ /**
+ * Calls the Ensembl lookup REST endpoint and retrieves the 'species' for the
+ * given identifier, or null if not found
+ *
+ * @param identifier
+ * @return
+ */
+ public String getSpecies(String identifier)
+ {
+ String species = null;
+ Map<String, Object> json = getResult(identifier, null);
+ if (json != null)
+ {
+ Object o = json.get(SPECIES);
+ if (o != null)
{
- System.err.println("** Ensembl lookup " + url.toString()
- + " looping on Parent!");
- return null;
+ species = o.toString();
}
+ }
+ return species;
+ }
- lastId = identifier;
+ /**
+ * Calls the /lookup/id rest service and returns the response as a Map<String,
+ * Object>, or null if any error
+ *
+ * @param identifier
+ * @param objectType
+ * (optional)
+ * @return
+ */
+ @SuppressWarnings("unchecked")
+ protected Map<String, Object> getResult(String identifier,
+ String objectType)
+ {
+ List<String> ids = Arrays.asList(new String[] { identifier });
- if (url != null)
- {
- br = getHttpResponse(url, ids);
- }
- return br == null ? null : parseResponse(br);
- } catch (IOException e)
+ try
{
- // ignore
- return null;
- } finally
+ return (Map<String, Object>) getJSON(getUrl(identifier, objectType),
+ ids, -1, MODE_MAP, null);
+ } catch (IOException | ParseException e)
{
- if (br != null)
- {
- try
- {
- br.close();
- } catch (IOException e)
- {
- // ignore
- }
- }
+ jalview.bin.Console.errPrintln("Error parsing " + identifier + " lookup response "
+ + e.getMessage());
+ return null;
}
}
/**
- * Parses the JSON response and returns the gene identifier, or null if not
- * found. If the returned object_type is Gene, returns the id, if Transcript
- * returns the Parent. If it is Translation (peptide identifier), then the
- * Parent is the transcript identifier, so we redo the search with this value.
+ * Calls the /lookup/id rest service for the given id, and if successful,
+ * parses and returns the gene's chromosomal coordinates
*
- * @param br
+ * @param geneId
* @return
- * @throws IOException
*/
- protected String parseResponse(BufferedReader br) throws IOException
+ public GeneLociI getGeneLoci(String geneId)
{
- String geneId = null;
- JSONParser jp = new JSONParser();
+ return parseGeneLoci(getResult(geneId, OBJECT_TYPE_GENE));
+ }
+
+ /**
+ * Parses the /lookup/id response for species, asssembly_name,
+ * seq_region_name, start, end and returns an object that wraps them, or null
+ * if unsuccessful
+ *
+ * @param json
+ * @return
+ */
+ GeneLociI parseGeneLoci(Map<String, Object> json)
+ {
+ if (json == null)
+ {
+ return null;
+ }
+
try
{
- JSONObject val = (JSONObject) jp.parse(br);
- String type = val.get(OBJECT_TYPE).toString();
- if (OBJECT_TYPE_GENE.equalsIgnoreCase(type))
- {
- // got the gene - just returns its id
- geneId = val.get(ID).toString();
- }
- else if (OBJECT_TYPE_TRANSCRIPT.equalsIgnoreCase(type))
- {
- // got the transcript - return its (Gene) Parent
- geneId = val.get(PARENT).toString();
- }
- else if (OBJECT_TYPE_TRANSLATION.equalsIgnoreCase(type))
- {
- // got the protein - get its Parent, restricted to type Transcript
- String transcriptId = val.get(PARENT).toString();
- geneId = getGeneId(transcriptId, OBJECT_TYPE_TRANSCRIPT);
- }
- } catch (ParseException e)
+ final String species = json.get("species").toString();
+ final String assembly = json.get("assembly_name").toString();
+ final String chromosome = json.get("seq_region_name").toString();
+ String strand = json.get("strand").toString();
+ int start = Integer.parseInt(json.get("start").toString());
+ int end = Integer.parseInt(json.get("end").toString());
+ int fromEnd = end - start + 1;
+ boolean reverseStrand = "-1".equals(strand);
+ int toStart = reverseStrand ? end : start;
+ int toEnd = reverseStrand ? start : end;
+ List<int[]> fromRange = Collections
+ .singletonList(new int[]
+ { 1, fromEnd });
+ List<int[]> toRange = Collections
+ .singletonList(new int[]
+ { toStart, toEnd });
+ final Mapping map = new Mapping(
+ new MapList(fromRange, toRange, 1, 1));
+ return new GeneLocus(species == null ? "" : species, assembly,
+ chromosome, map);
+ } catch (NullPointerException | NumberFormatException e)
{
- // ignore
+ Console.error("Error looking up gene loci: " + e.getMessage());
+ e.printStackTrace();
}
- return geneId;
+ return null;
}
}