From 916c073172d5e374f7375ec718420455472dfcb3 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Thu, 7 Dec 2017 10:40:33 +0000 Subject: [PATCH] JAL-2783 accept Ensembl query of the form "human:braf" --- src/jalview/ext/ensembl/EnsemblGene.java | 28 +++++++++------ src/jalview/ext/ensembl/EnsemblSymbol.java | 54 +++++++++++++++++++--------- src/jalview/ext/ensembl/Species.java | 10 +++--- test/jalview/ext/ensembl/SpeciesTest.java | 12 +++---- 4 files changed, 66 insertions(+), 38 deletions(-) diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java index 50dfa90..7e04b95 100644 --- a/src/jalview/ext/ensembl/EnsemblGene.java +++ b/src/jalview/ext/ensembl/EnsemblGene.java @@ -170,26 +170,32 @@ public class EnsemblGene extends EnsemblSeqProxy */ List getGeneIds(String accessions) { - List geneIds = new ArrayList(); + List geneIds = new ArrayList<>(); for (String acc : accessions.split(getAccessionSeparator())) { + String geneId = null; + /* * First try lookup as an Ensembl (gene or transcript) identifier + * unless query is of the form species:symbol e.g. human:braf */ - String geneId = new EnsemblLookup(getDomain()).getGeneId(acc); - if (geneId != null) + if (acc.indexOf(":") == -1) { - if (!geneIds.contains(geneId)) + geneId = new EnsemblLookup(getDomain()).getGeneId(acc); + if (geneId != null) { - geneIds.add(geneId); + if (!geneIds.contains(geneId)) + { + geneIds.add(geneId); + } } } - else + if (geneId == null) { /* - * if given a gene or other external name, lookup and fetch - * the corresponding gene for all model organisms + * if given a gene or other external name, lookup and fetch the gene + * for the species (if specified in the query), else for all model organisms */ List ids = new EnsemblSymbol(getDomain(), getDbSource(), getDbVersion()).getGeneIds(acc); @@ -305,7 +311,7 @@ public class EnsemblGene extends EnsemblSeqProxy int transcriptLength = 0; final char[] geneChars = gene.getSequence(); int offset = gene.getStart(); // to convert to 0-based positions - List mappedFrom = new ArrayList(); + List mappedFrom = new ArrayList<>(); for (SequenceFeature sf : splices) { @@ -347,7 +353,7 @@ public class EnsemblGene extends EnsemblSeqProxy * transfer features to the new sequence; we use EnsemblCdna to do this, * to filter out unwanted features types (see method retainFeature) */ - List mapTo = new ArrayList(); + List mapTo = new ArrayList<>(); mapTo.add(new int[] { 1, transcriptLength }); MapList mapping = new MapList(mappedFrom, mapTo, 1, 1); EnsemblCdna cdna = new EnsemblCdna(getDomain()); @@ -395,7 +401,7 @@ public class EnsemblGene extends EnsemblSeqProxy protected List getTranscriptFeatures(String accId, SequenceI geneSequence) { - List transcriptFeatures = new ArrayList(); + List transcriptFeatures = new ArrayList<>(); String parentIdentifier = GENE_PREFIX + accId; diff --git a/src/jalview/ext/ensembl/EnsemblSymbol.java b/src/jalview/ext/ensembl/EnsemblSymbol.java index 75598a0..411a244 100644 --- a/src/jalview/ext/ensembl/EnsemblSymbol.java +++ b/src/jalview/ext/ensembl/EnsemblSymbol.java @@ -25,6 +25,7 @@ import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; +import java.util.Collections; import java.util.Iterator; import java.util.List; @@ -42,6 +43,7 @@ import org.json.simple.parser.ParseException; */ public class EnsemblSymbol extends EnsemblXref { + private static final String COLON = ":"; private static final String GENE = "gene"; private static final String TYPE = "type"; private static final String ID = "id"; @@ -104,12 +106,11 @@ public class EnsemblSymbol extends EnsemblXref * translation) * @return */ - protected URL getUrl(String id, Species species, String... type) + protected URL getUrl(String id, String species, String... type) { StringBuilder sb = new StringBuilder(); - sb.append(getDomain()).append("/xrefs/symbol/") - .append(species.toString()).append("/").append(id) - .append(CONTENT_TYPE_JSON); + sb.append(getDomain()).append("/xrefs/symbol/").append(species) + .append("/").append(id).append(CONTENT_TYPE_JSON); for (String t : type) { sb.append("&object_type=").append(t); @@ -125,34 +126,33 @@ public class EnsemblSymbol extends EnsemblXref } /** - * Calls the Ensembl xrefs REST 'symbol' endpoint and retrieves any gene ids - * for the given identifier, for any known model organisms + * Calls the Ensembl xrefs REST 'symbol' endpoint and retrieves gene id(s) for + * the given identifier. If the identifier has the format species:symbol then + * the gene id for the specified species is returned, else any matched gene ids + * for model organisms. If lookup fails, the returned list is empty. * * @param identifier * @return */ public List getGeneIds(String identifier) { - List result = new ArrayList(); - List ids = new ArrayList(); - ids.add(identifier); + List result = new ArrayList<>(); + List ids = Collections. emptyList(); + List species = getSpecies(identifier); - String[] queries = identifier.split(getAccessionSeparator()); + String symbol = identifier.substring(identifier.indexOf(COLON) + 1); BufferedReader br = null; try { - for (String query : queries) - { - for (Species taxon : Species.getModelOrganisms()) + for (String taxon : species) { - URL url = getUrl(query, taxon, GENE); + URL url = getUrl(symbol, taxon, GENE); if (url != null) { br = getHttpResponse(url, ids); if (br != null) { String geneId = parseSymbolResponse(br); - System.out.println(url + " returned " + geneId); if (geneId != null && !result.contains(geneId)) { result.add(geneId); @@ -160,7 +160,6 @@ public class EnsemblSymbol extends EnsemblXref } } } - } } catch (IOException e) { // ignore @@ -180,4 +179,27 @@ public class EnsemblSymbol extends EnsemblXref return result; } + /** + * Answers a list of species names which is + *
    + *
  • the species in the identifier if it is of the form species:symbol
  • + *
  • else a fixed list of 'model organism' species
  • + *
+ * + * @param identifier + * @return + */ + private List getSpecies(String identifier) + { + int pos = identifier.indexOf(COLON); + if (pos > 0) + { + return Collections.singletonList(identifier.substring(0, pos)); + } + else + { + return Species.getModelOrganisms(); + } + } + } diff --git a/src/jalview/ext/ensembl/Species.java b/src/jalview/ext/ensembl/Species.java index cc5465e..e29dcd7 100644 --- a/src/jalview/ext/ensembl/Species.java +++ b/src/jalview/ext/ensembl/Species.java @@ -20,8 +20,8 @@ */ package jalview.ext.ensembl; -import java.util.HashSet; -import java.util.Set; +import java.util.ArrayList; +import java.util.List; /** * Selected species identifiers used by Ensembl @@ -41,7 +41,7 @@ enum Species chimpanzee(false), cat(false), zebrafish(true), chicken(true), dmelanogaster(true); - static Set modelOrganisms = new HashSet<>(); + static List modelOrganisms = new ArrayList<>(); static { @@ -49,7 +49,7 @@ enum Species { if (s.isModelOrganism()) { - modelOrganisms.add(s); + modelOrganisms.add(s.name()); } } } @@ -65,7 +65,7 @@ enum Species return modelOrganism; } - public static Set getModelOrganisms() + public static List getModelOrganisms() { return modelOrganisms; } diff --git a/test/jalview/ext/ensembl/SpeciesTest.java b/test/jalview/ext/ensembl/SpeciesTest.java index 44658e7..2afa159 100644 --- a/test/jalview/ext/ensembl/SpeciesTest.java +++ b/test/jalview/ext/ensembl/SpeciesTest.java @@ -3,7 +3,7 @@ package jalview.ext.ensembl; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; -import java.util.Set; +import java.util.List; import org.testng.annotations.Test; @@ -12,18 +12,18 @@ public class SpeciesTest @Test public void testGetModelOrganisms() { - Set models = Species.getModelOrganisms(); - assertTrue(models.contains(Species.human)); - assertFalse(models.contains(Species.horse)); + List models = Species.getModelOrganisms(); + assertTrue(models.contains(Species.human.name())); + assertFalse(models.contains(Species.horse.name())); for (Species s : Species.values()) { if (s.isModelOrganism()) { - assertTrue(models.contains(s)); + assertTrue(models.contains(s.name())); } else { - assertFalse(models.contains(s)); + assertFalse(models.contains(s.name())); } } } -- 1.7.10.2