*/
List<String> getGeneIds(String accessions)
{
- List<String> geneIds = new ArrayList<String>();
+ List<String> geneIds = new ArrayList<>();
for (String acc : accessions.split(getAccessionSeparator()))
{
+ String geneId = null;
+
/*
* First try lookup as an Ensembl (gene or transcript) identifier
+ * unless query is of the form species:symbol e.g. human:braf
*/
- String geneId = new EnsemblLookup(getDomain()).getGeneId(acc);
- if (geneId != null)
+ if (acc.indexOf(":") == -1)
{
- if (!geneIds.contains(geneId))
+ geneId = new EnsemblLookup(getDomain()).getGeneId(acc);
+ if (geneId != null)
{
- geneIds.add(geneId);
+ if (!geneIds.contains(geneId))
+ {
+ geneIds.add(geneId);
+ }
}
}
- else
+ if (geneId == null)
{
/*
- * if given a gene or other external name, lookup and fetch
- * the corresponding gene for all model organisms
+ * if given a gene or other external name, lookup and fetch the gene
+ * for the species (if specified in the query), else for all model organisms
*/
List<String> ids = new EnsemblSymbol(getDomain(), getDbSource(),
getDbVersion()).getGeneIds(acc);
int transcriptLength = 0;
final char[] geneChars = gene.getSequence();
int offset = gene.getStart(); // to convert to 0-based positions
- List<int[]> mappedFrom = new ArrayList<int[]>();
+ List<int[]> mappedFrom = new ArrayList<>();
for (SequenceFeature sf : splices)
{
* transfer features to the new sequence; we use EnsemblCdna to do this,
* to filter out unwanted features types (see method retainFeature)
*/
- List<int[]> mapTo = new ArrayList<int[]>();
+ List<int[]> mapTo = new ArrayList<>();
mapTo.add(new int[] { 1, transcriptLength });
MapList mapping = new MapList(mappedFrom, mapTo, 1, 1);
EnsemblCdna cdna = new EnsemblCdna(getDomain());
protected List<SequenceFeature> getTranscriptFeatures(String accId,
SequenceI geneSequence)
{
- List<SequenceFeature> transcriptFeatures = new ArrayList<SequenceFeature>();
+ List<SequenceFeature> transcriptFeatures = new ArrayList<>();
String parentIdentifier = GENE_PREFIX + accId;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.Iterator;
import java.util.List;
*/
public class EnsemblSymbol extends EnsemblXref
{
+ private static final String COLON = ":";
private static final String GENE = "gene";
private static final String TYPE = "type";
private static final String ID = "id";
* translation)
* @return
*/
- protected URL getUrl(String id, Species species, String... type)
+ protected URL getUrl(String id, String species, String... type)
{
StringBuilder sb = new StringBuilder();
- sb.append(getDomain()).append("/xrefs/symbol/")
- .append(species.toString()).append("/").append(id)
- .append(CONTENT_TYPE_JSON);
+ sb.append(getDomain()).append("/xrefs/symbol/").append(species)
+ .append("/").append(id).append(CONTENT_TYPE_JSON);
for (String t : type)
{
sb.append("&object_type=").append(t);
}
/**
- * Calls the Ensembl xrefs REST 'symbol' endpoint and retrieves any gene ids
- * for the given identifier, for any known model organisms
+ * Calls the Ensembl xrefs REST 'symbol' endpoint and retrieves gene id(s) for
+ * the given identifier. If the identifier has the format species:symbol then
+ * the gene id for the specified species is returned, else any matched gene ids
+ * for model organisms. If lookup fails, the returned list is empty.
*
* @param identifier
* @return
*/
public List<String> getGeneIds(String identifier)
{
- List<String> result = new ArrayList<String>();
- List<String> ids = new ArrayList<String>();
- ids.add(identifier);
+ List<String> result = new ArrayList<>();
+ List<String> ids = Collections.<String> emptyList();
+ List<String> species = getSpecies(identifier);
- String[] queries = identifier.split(getAccessionSeparator());
+ String symbol = identifier.substring(identifier.indexOf(COLON) + 1);
BufferedReader br = null;
try
{
- for (String query : queries)
- {
- for (Species taxon : Species.getModelOrganisms())
+ for (String taxon : species)
{
- URL url = getUrl(query, taxon, GENE);
+ URL url = getUrl(symbol, taxon, GENE);
if (url != null)
{
br = getHttpResponse(url, ids);
if (br != null)
{
String geneId = parseSymbolResponse(br);
- System.out.println(url + " returned " + geneId);
if (geneId != null && !result.contains(geneId))
{
result.add(geneId);
}
}
}
- }
} catch (IOException e)
{
// ignore
return result;
}
+ /**
+ * Answers a list of species names which is
+ * <ul>
+ * <li>the species in the identifier if it is of the form species:symbol</li>
+ * <li>else a fixed list of 'model organism' species</li>
+ * </ul>
+ *
+ * @param identifier
+ * @return
+ */
+ private List<String> getSpecies(String identifier)
+ {
+ int pos = identifier.indexOf(COLON);
+ if (pos > 0)
+ {
+ return Collections.singletonList(identifier.substring(0, pos));
+ }
+ else
+ {
+ return Species.getModelOrganisms();
+ }
+ }
+
}
*/
package jalview.ext.ensembl;
-import java.util.HashSet;
-import java.util.Set;
+import java.util.ArrayList;
+import java.util.List;
/**
* Selected species identifiers used by Ensembl
chimpanzee(false), cat(false), zebrafish(true), chicken(true),
dmelanogaster(true);
- static Set<Species> modelOrganisms = new HashSet<>();
+ static List<String> modelOrganisms = new ArrayList<>();
static
{
{
if (s.isModelOrganism())
{
- modelOrganisms.add(s);
+ modelOrganisms.add(s.name());
}
}
}
return modelOrganism;
}
- public static Set<Species> getModelOrganisms()
+ public static List<String> getModelOrganisms()
{
return modelOrganisms;
}
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
-import java.util.Set;
+import java.util.List;
import org.testng.annotations.Test;
@Test
public void testGetModelOrganisms()
{
- Set<Species> models = Species.getModelOrganisms();
- assertTrue(models.contains(Species.human));
- assertFalse(models.contains(Species.horse));
+ List<String> models = Species.getModelOrganisms();
+ assertTrue(models.contains(Species.human.name()));
+ assertFalse(models.contains(Species.horse.name()));
for (Species s : Species.values())
{
if (s.isModelOrganism())
{
- assertTrue(models.contains(s));
+ assertTrue(models.contains(s.name()));
}
else
{
- assertFalse(models.contains(s));
+ assertFalse(models.contains(s.name()));
}
}
}