2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ext.ensembl;
23 import java.io.BufferedReader;
24 import java.io.IOException;
25 import java.net.MalformedURLException;
27 import java.util.ArrayList;
28 import java.util.Collections;
29 import java.util.Iterator;
30 import java.util.List;
32 import org.json.simple.JSONArray;
33 import org.json.simple.JSONObject;
34 import org.json.simple.parser.JSONParser;
35 import org.json.simple.parser.ParseException;
38 * A client for the Ensembl xrefs/symbol REST service;
40 * @see http://rest.ensembl.org/documentation/info/xref_external
44 public class EnsemblSymbol extends EnsemblXref
46 private static final String COLON = ":";
47 private static final String GENE = "gene";
48 private static final String TYPE = "type";
49 private static final String ID = "id";
52 * Constructor given the target domain to fetch data from
58 public EnsemblSymbol(String domain, String dbName, String dbVersion)
60 super(domain, dbName, dbVersion);
64 * Returns the first "id" value in gene identifier format from the JSON
65 * response, or null if none found
71 protected String parseSymbolResponse(BufferedReader br) throws IOException
73 JSONParser jp = new JSONParser();
77 JSONArray responses = (JSONArray) jp.parse(br);
78 Iterator rvals = responses.iterator();
79 while (rvals.hasNext())
81 JSONObject val = (JSONObject) rvals.next();
82 String id = val.get(ID).toString();
83 String type = val.get(TYPE).toString();
84 if (id != null && GENE.equals(type))
90 } catch (ParseException e)
98 * Constructs the URL for the REST symbol endpoint
101 * the accession id (Ensembl or external)
103 * a species name recognisable by Ensembl
105 * an optional type to filter the response (gene, transcript,
109 protected URL getUrl(String id, String species, String... type)
111 StringBuilder sb = new StringBuilder();
112 sb.append(getDomain()).append("/xrefs/symbol/").append(species)
113 .append("/").append(id).append(CONTENT_TYPE_JSON);
114 for (String t : type)
116 sb.append("&object_type=").append(t);
120 String url = sb.toString();
122 } catch (MalformedURLException e)
129 * Calls the Ensembl xrefs REST 'symbol' endpoint and retrieves gene id(s) for
130 * the given identifier. If the identifier has the format species:symbol then
131 * the gene id for the specified species is returned, else any matched gene ids
132 * for model organisms. If lookup fails, the returned list is empty.
137 public List<String> getGeneIds(String identifier)
139 List<String> result = new ArrayList<>();
140 List<String> ids = Collections.<String> emptyList();
141 List<String> species = getSpecies(identifier);
143 String symbol = identifier.substring(identifier.indexOf(COLON) + 1);
144 BufferedReader br = null;
147 for (String taxon : species)
149 URL url = getUrl(symbol, taxon, GENE);
152 br = getHttpResponse(url, ids);
155 String geneId = parseSymbolResponse(br);
156 if (geneId != null && !result.contains(geneId))
163 } catch (IOException e)
173 } catch (IOException e)
183 * Answers a list of species names which is
185 * <li>the species in the identifier if it is of the form species:symbol</li>
186 * <li>else a fixed list of 'model organism' species</li>
192 private List<String> getSpecies(String identifier)
194 int pos = identifier.indexOf(COLON);
197 return Collections.singletonList(identifier.substring(0, pos));
201 return Species.getModelOrganisms();