package jalview.ext.ensembl; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; import jalview.util.DBRefUtils; import java.io.BufferedReader; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.json.simple.JSONArray; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; /** * A class to fetch cross-references from Ensembl by calling the /xrefs REST * service * * @author gmcarstairs * @see http://rest.ensembl.org/documentation/info/xref_id */ class EnsemblXref extends EnsemblRestClient { private static final String GO_GENE_ONTOLOGY = "GO"; /** * Constructor given the target domain to fetch data from * * @param d */ public EnsemblXref(String d) { super(d); } @Override public String getDbName() { return "ENSEMBL (xref)"; } @Override public AlignmentI getSequenceRecords(String queries) throws Exception { return null; } @Override protected URL getUrl(List ids) throws MalformedURLException { return getUrl(ids.get(0)); } @Override protected boolean useGetRequest() { return true; } @Override protected String getRequestMimeType(boolean multipleIds) { return "application/json"; } @Override protected String getResponseMimeType() { return "application/json"; } /** * Calls the Ensembl xrefs REST endpoint and retrieves any cross-references * ("primary_id") for the given identifier (Ensembl accession id) and database * names. The "dbname" returned by Ensembl is canonicalised to Jalview's * standard version, and a DBRefEntry constructed. Currently takes all * identifiers apart from GO terms and synonyms. * * @param identifier * an Ensembl stable identifier * @return */ public List getCrossReferences(String identifier) { List result = new ArrayList(); List ids = new ArrayList(); ids.add(identifier); BufferedReader br = null; try { URL url = getUrl(identifier); if (url != null) { br = getHttpResponse(url, ids); } return (parseResponse(br)); } catch (IOException e) { // ignore } finally { if (br != null) { try { br.close(); } catch (IOException e) { // ignore } } } return result; } /** * Parses "primary_id" and "dbname" values from the JSON response and * constructs a DBRefEntry. Returns a list of the DBRefEntry created. Note we * don't parse "synonyms" as they appear to be either redirected or obsolete * in Uniprot. * * @param br * @return * @throws IOException */ protected List parseResponse(BufferedReader br) throws IOException { JSONParser jp = new JSONParser(); List result = new ArrayList(); try { JSONArray responses = (JSONArray) jp.parse(br); Iterator rvals = responses.iterator(); while (rvals.hasNext()) { JSONObject val = (JSONObject) rvals.next(); String dbName = val.get("dbname").toString(); if (dbName.equals(GO_GENE_ONTOLOGY)) { continue; } String id = val.get("primary_id").toString(); if (dbName != null && id != null) { dbName = DBRefUtils.getCanonicalName(dbName); DBRefEntry dbref = new DBRefEntry(dbName, "0", id); result.add(dbref); } } } catch (ParseException e) { // ignore } return result; } /** * Returns the URL for the REST endpoint to fetch all cross-references for an * identifier. Note this may return protein cross-references for nucleotide. * Filter the returned list as required. * * @param identifier * @return */ protected URL getUrl(String identifier) { String url = getDomain() + "/xrefs/id/" + identifier + "?content-type=application/json&all_levels=1"; try { return new URL(url); } catch (MalformedURLException e) { return null; } } }