+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
package jalview.ext.ensembl;
import java.io.BufferedReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
+/**
+ * A client for the Ensembl xrefs/symbol REST service;
+ *
+ * @see http://rest.ensembl.org/documentation/info/xref_external
+ * @author gmcarstairs
+ *
+ */
public class EnsemblSymbol extends EnsemblXref
{
+ private static final String COLON = ":";
+ private static final String GENE = "gene";
+ private static final String TYPE = "type";
+ private static final String ID = "id";
+
+ /**
+ * Constructor given the target domain to fetch data from
+ *
+ * @param domain
+ * @param dbName
+ * @param dbVersion
+ */
+ public EnsemblSymbol(String domain, String dbName, String dbVersion)
+ {
+ super(domain, dbName, dbVersion);
+ }
+
/**
* Returns the first "id" value in gene identifier format from the JSON
* response, or null if none found
* @return
* @throws IOException
*/
- protected String parseResponse(BufferedReader br)
- throws IOException
+ protected String parseSymbolResponse(BufferedReader br) throws IOException
{
JSONParser jp = new JSONParser();
String result = null;
while (rvals.hasNext())
{
JSONObject val = (JSONObject) rvals.next();
- String id = val.get("id").toString();
- if (id != null && isGeneIdentifier(id))
+ String id = val.get(ID).toString();
+ String type = val.get(TYPE).toString();
+ if (id != null && GENE.equals(type))
{
result = id;
break;
return result;
}
- protected URL getUrl(String id, Species species)
+ /**
+ * Constructs the URL for the REST symbol endpoint
+ *
+ * @param id
+ * the accession id (Ensembl or external)
+ * @param species
+ * a species name recognisable by Ensembl
+ * @param type
+ * an optional type to filter the response (gene, transcript,
+ * translation)
+ * @return
+ */
+ protected URL getUrl(String id, String species, String... type)
{
- String url = ENSEMBL_REST + "/xrefs/symbol/" + species.toString() + "/"
- + id
- + "?content-type=application/json";
+ StringBuilder sb = new StringBuilder();
+ sb.append(getDomain()).append("/xrefs/symbol/").append(species)
+ .append("/").append(id).append(CONTENT_TYPE_JSON);
+ for (String t : type)
+ {
+ sb.append("&object_type=").append(t);
+ }
try
{
+ String url = sb.toString();
return new URL(url);
} catch (MalformedURLException e)
{
}
/**
- * Calls the Ensembl xrefs REST 'symbol' endpoint and retrieves any gene ids
- * for the given identifier, for any known model organisms
+ * Calls the Ensembl xrefs REST 'symbol' endpoint and retrieves gene id(s) for
+ * the given identifier. If the identifier has the format species:symbol then
+ * the gene id for the specified species is returned, else any matched gene ids
+ * for model organisms. If lookup fails, the returned list is empty.
*
* @param identifier
* @return
*/
- public List<String> getIds(String identifier)
+ public List<String> getGeneIds(String identifier)
{
- List<String> result = new ArrayList<String>();
- List<String> ids = new ArrayList<String>();
- ids.add(identifier);
-
- String[] queries = identifier.split(getAccessionSeparator());
+ List<String> result = new ArrayList<>();
+ List<String> ids = Collections.<String> emptyList();
+ List<String> species = getSpecies(identifier);
+
+ String symbol = identifier.substring(identifier.indexOf(COLON) + 1);
BufferedReader br = null;
try
{
- for (String query : queries)
- {
- for (Species taxon : Species.values())
+ for (String taxon : species)
{
- if (taxon.isModelOrganism())
+ URL url = getUrl(symbol, taxon, GENE);
+ if (url != null)
{
- URL url = getUrl(query, taxon);
- if (url != null)
+ br = getHttpResponse(url, ids);
+ if (br != null)
{
- br = getHttpResponse(url, ids);
- }
- String geneId = parseResponse(br);
- if (geneId != null)
- {
- result.add(geneId);
+ String geneId = parseSymbolResponse(br);
+ if (geneId != null && !result.contains(geneId))
+ {
+ result.add(geneId);
+ }
}
}
}
- }
} catch (IOException e)
{
// ignore
return result;
}
+ /**
+ * Answers a list of species names which is
+ * <ul>
+ * <li>the species in the identifier if it is of the form species:symbol</li>
+ * <li>else a fixed list of 'model organism' species</li>
+ * </ul>
+ *
+ * @param identifier
+ * @return
+ */
+ private List<String> getSpecies(String identifier)
+ {
+ int pos = identifier.indexOf(COLON);
+ if (pos > 0)
+ {
+ return Collections.singletonList(identifier.substring(0, pos));
+ }
+ else
+ {
+ return Species.getModelOrganisms();
+ }
+ }
+
}