From f4f47d1979c146145d8fc3fc1272bb76a7286b16 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Tue, 28 Nov 2017 09:27:21 +0000 Subject: [PATCH] JAL-2087 /info/species client, alias cache, type-assist demo --- src/jalview/ext/ensembl/EnsemblInfo.java | 93 +++++++++++++ src/jalview/ext/ensembl/EnsemblSpecies.java | 154 ++++++++++++++++++++++ test/jalview/ext/ensembl/EnsemblInfoTest.java | 14 ++ test/jalview/ext/ensembl/EnsemblSpeciesDemo.java | 114 ++++++++++++++++ test/jalview/ext/ensembl/EnsemblSpeciesTest.java | 62 +++++++++ 5 files changed, 437 insertions(+) create mode 100644 src/jalview/ext/ensembl/EnsemblSpecies.java create mode 100644 test/jalview/ext/ensembl/EnsemblInfoTest.java create mode 100644 test/jalview/ext/ensembl/EnsemblSpeciesDemo.java create mode 100644 test/jalview/ext/ensembl/EnsemblSpeciesTest.java diff --git a/src/jalview/ext/ensembl/EnsemblInfo.java b/src/jalview/ext/ensembl/EnsemblInfo.java index de55a53..f5f201b 100644 --- a/src/jalview/ext/ensembl/EnsemblInfo.java +++ b/src/jalview/ext/ensembl/EnsemblInfo.java @@ -2,11 +2,13 @@ package jalview.ext.ensembl; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefSource; +import jalview.ext.ensembl.EnsemblSpecies.EnsemblTaxon; import java.io.BufferedReader; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -14,6 +16,7 @@ import java.util.Map; import java.util.Set; import org.json.simple.JSONArray; +import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; @@ -37,6 +40,16 @@ public class EnsemblInfo extends EnsemblRestClient */ private static Map divisions; + public EnsemblInfo() + { + super(); + } + + public EnsemblInfo(String domain) + { + super(domain); + } + @Override public String getDbName() { @@ -182,4 +195,84 @@ public class EnsemblInfo extends EnsemblRestClient return divisions.keySet(); } + + /** + * Calls the REST /info/species endpoint for the current domain and returns the + * result as a list of EnsemblTaxon objects, or null if an error occurs + * + * @return + */ + public List getSpecies() + { + BufferedReader br = null; + try + { + URL url = getSpeciesUrl(getDomain()); + if (url != null) + { + br = getHttpResponse(url, null); + } + return parseSpeciesResponse(br); + } catch (IOException e) + { + // ignore + } finally + { + if (br != null) + { + try + { + br.close(); + } catch (IOException e) + { + // ignore + } + } + } + return null; + } + + private List parseSpeciesResponse(BufferedReader br) + { + List result = new ArrayList<>(); + + JSONParser jp = new JSONParser(); + try + { + JSONObject reply = (JSONObject) jp.parse(br); + JSONArray parsed = (JSONArray) reply.get("species"); + + int count = 0; + int count2 = 0; + Iterator rvals = parsed.iterator(); + while (rvals.hasNext()) + { + JSONObject taxon = (JSONObject) rvals.next(); + String id = (String) taxon.get("taxon_id"); + String name = (String) taxon.get("name"); + String commonName = (String) taxon.get("common_name"); + String displayName = (String) taxon.get("display_name"); + JSONArray aliases = (JSONArray) taxon.get("aliases"); + count2 += aliases.size(); + List aka = new ArrayList<>(); + aliases.forEach(alias -> aka.add(alias.toString())); + count++; + result.add( + new EnsemblTaxon(id, name, displayName, commonName, aka)); + } + // Cache.log.debug(String.format( + // "%s /info/species returned %d species and %d aliases", + // getDomain(), count, count2)); + } catch (IOException | ParseException | NumberFormatException e) + { + // ignore + } + + return result; + } + + private URL getSpeciesUrl(String domain) throws MalformedURLException + { + return new URL(domain + "/info/species?content-type=application/json"); + } } diff --git a/src/jalview/ext/ensembl/EnsemblSpecies.java b/src/jalview/ext/ensembl/EnsemblSpecies.java new file mode 100644 index 0000000..77b87a3 --- /dev/null +++ b/src/jalview/ext/ensembl/EnsemblSpecies.java @@ -0,0 +1,154 @@ +package jalview.ext.ensembl; + +import jalview.bin.Cache; + +import java.util.ArrayList; +import java.util.List; +import java.util.SortedMap; +import java.util.TreeMap; + +public class EnsemblSpecies +{ + public static class EnsemblTaxon + { + public final String ncbiId; + + public final String name; + + public final String displayName; + + public final String commonName; + + public final List aliases; + + public EnsemblTaxon(String id, String theName, String theDisplayName, + String theCommonName, List aka) + { + ncbiId = id; + name = theName; + displayName = theDisplayName; + commonName = theCommonName; + aliases = aka; + } + } + + private static EnsemblSpecies forEnsembl; + + private static EnsemblSpecies forEnsemblGenomes; + + private String domain; + + /* + * a map whose key is a species id or alias, and value the Taxon + */ + private SortedMap lookup; + + private EnsemblSpecies(String dom) + { + domain = dom; + lookup = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); + } + + public static EnsemblSpecies getSpecies(boolean ensembl) + { + if (ensembl) + { + if (forEnsembl == null) + { + forEnsembl = new EnsemblSpecies( + EnsemblSequenceFetcher.ENSEMBL_REST); + forEnsembl.fetchSpecies(); + } + return forEnsembl; + } + else + { + if (forEnsemblGenomes == null) + { + forEnsemblGenomes = new EnsemblSpecies( + EnsemblSequenceFetcher.ENSEMBL_GENOMES_REST); + forEnsemblGenomes.fetchSpecies(); + return forEnsemblGenomes; + } + } + return null; + } + + /** + * Fetches species data via a REST service and caches it in lookup tables + */ + void fetchSpecies() + { + EnsemblInfo info = new EnsemblInfo(domain); + List taxons = info.getSpecies(); + SortedMap aka = lookup; + for (EnsemblTaxon taxon : taxons) + { + String id = taxon.ncbiId; + aka.put(id, taxon); + aka.put(taxon.name, taxon); + String value = taxon.commonName; + addAlias(aka, taxon, value); + if (taxon.displayName != null) + { + aka.put(taxon.displayName, taxon); + } + taxon.aliases.forEach(a -> aka.put(a, taxon)); + } + } + + private void addAlias(SortedMap map, + EnsemblTaxon taxon, String alias) + { + if (alias != null) + { + EnsemblTaxon entry = map.get(alias); + if (entry != null && !entry.ncbiId.equals(taxon.ncbiId)) + { + Cache.log.warn(String.format( + "Ambiguous species alias %s (%s:%s / %s:%s)", alias, + taxon.ncbiId, taxon.name, entry.ncbiId, entry.name)); + } + map.put(alias, taxon); + } + } + + /** + * Returns a list of all taxons with an id, name, display name, common name or + * alias that starts with the given string + * + * @param name + * @return + */ + public List getNameMatches(String name) + { + List matches = new ArrayList<>(); + + // todo cleverer seek to first partial match? + + int len = name.length(); + for (String alias : lookup.keySet()) + { + if (alias.length() < len) + { + continue; + } + String startsWith = alias.substring(0, len); + int compared = startsWith.compareToIgnoreCase(name); + if (compared > 0) + { + break; // gone past all matches + } + if (compared == 0) + { + EnsemblTaxon entry = lookup.get(alias); + if (!matches.contains(entry)) + { + matches.add(entry); // partial or exact match + } + } + } + + return matches; + } +} diff --git a/test/jalview/ext/ensembl/EnsemblInfoTest.java b/test/jalview/ext/ensembl/EnsemblInfoTest.java new file mode 100644 index 0000000..6d632fa --- /dev/null +++ b/test/jalview/ext/ensembl/EnsemblInfoTest.java @@ -0,0 +1,14 @@ +package jalview.ext.ensembl; + +import org.testng.annotations.Test; + +public class EnsemblInfoTest +{ + @Test(groups = "Network") + public void testGetSpecies() + { + EnsemblInfo info = new EnsemblInfo(); + info.getSpecies(); + } + +} diff --git a/test/jalview/ext/ensembl/EnsemblSpeciesDemo.java b/test/jalview/ext/ensembl/EnsemblSpeciesDemo.java new file mode 100644 index 0000000..456783b --- /dev/null +++ b/test/jalview/ext/ensembl/EnsemblSpeciesDemo.java @@ -0,0 +1,114 @@ +package jalview.ext.ensembl; + +import jalview.ext.ensembl.EnsemblSpecies.EnsemblTaxon; + +import java.awt.Dimension; +import java.awt.FlowLayout; +import java.util.Arrays; +import java.util.List; + +import javax.swing.DefaultComboBoxModel; +import javax.swing.JComboBox; +import javax.swing.JComponent; +import javax.swing.JFrame; +import javax.swing.JLabel; +import javax.swing.JPanel; +import javax.swing.event.DocumentEvent; +import javax.swing.event.DocumentListener; +import javax.swing.text.AbstractDocument; +import javax.swing.text.JTextComponent; + +public class EnsemblSpeciesDemo +{ + /** + * Main method may be run interactively to explore a dynamic drop-down list that + * populates with matches of Ensembl taxon ids, names or aliases + * + * @param args + */ + public static void main(String[] args) + { + // Schedule a job for the event dispatch thread: + // creating and showing this application's GUI. + javax.swing.SwingUtilities.invokeLater(new Runnable() + { + @Override + public void run() + { + createAndShowGUI(); + } + }); + } + + /** + * Create the GUI and show it. For thread safety, this method should be invoked + * from the event dispatch thread. + */ + private static void createAndShowGUI() + { + JFrame frame = new JFrame("Taxon drop-down demo"); + frame.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE); + + JComponent newContentPane = new JPanel(new FlowLayout()); + newContentPane.setOpaque(true); + newContentPane.setPreferredSize(new Dimension(400, 300)); + frame.setContentPane(newContentPane); + + JLabel label = new JLabel("Taxon:"); + newContentPane.add(label); + + JComboBox combo = new JComboBox<>(); + combo.setEditable(true); + combo.setPreferredSize(new Dimension(200, 20)); + newContentPane.add(combo); + AbstractDocument document = (AbstractDocument) ((JTextComponent) combo + .getEditor().getEditorComponent()).getDocument(); + document.addDocumentListener(new DocumentListener() + { + @Override + public void insertUpdate(DocumentEvent e) + { + refreshComboList(combo, document); + } + + @Override + public void removeUpdate(DocumentEvent e) + { + refreshComboList(combo, document); + } + + @Override + public void changedUpdate(DocumentEvent e) + { + } + }); + + frame.pack(); + frame.setVisible(true); + } + + /** + * Refreshes the combo box list to contain what the user has typed, plus any + * matches for Ensembl taxon id, name or alias + * + * @param combo + * @param document + */ + protected static void refreshComboList(JComboBox combo, AbstractDocument document) + { + String typed = (String) combo.getEditor().getItem(); + if (typed.length() > 1) + { + List matches = EnsemblSpecies.getSpecies(true) + .getNameMatches(typed); + String[] items = new String[matches.size()]; + int i = 0; + for (EnsemblTaxon m : matches) + { + items[i++] = String.format("%s (%s)", m.displayName, m.ncbiId); + } + Arrays.sort(items, String.CASE_INSENSITIVE_ORDER); + combo.setModel(new DefaultComboBoxModel<>(items)); + } + } +} diff --git a/test/jalview/ext/ensembl/EnsemblSpeciesTest.java b/test/jalview/ext/ensembl/EnsemblSpeciesTest.java new file mode 100644 index 0000000..3ca5ba1 --- /dev/null +++ b/test/jalview/ext/ensembl/EnsemblSpeciesTest.java @@ -0,0 +1,62 @@ +package jalview.ext.ensembl; + +import static org.testng.Assert.assertEquals; + +import jalview.bin.Cache; +import jalview.ext.ensembl.EnsemblSpecies.EnsemblTaxon; + +import java.util.List; +import java.util.Map; + +import org.apache.log4j.Logger; +import org.testng.annotations.Test; + +import junit.extensions.PA; + +public class EnsemblSpeciesTest +{ + @Test(groups = "Network") + public void testGetSpecies_ensembl() + { + Cache.log = Logger.getRootLogger(); + EnsemblSpecies species = EnsemblSpecies.getSpecies(true); + System.out.println("Ensembllookup map size = " + + ((Map) PA.getValue(species, "lookup")).size()); + + List matches = species.getNameMatches("homo sapiens"); + assertEquals(matches.size(), 1); + EnsemblTaxon match = matches.get(0); + assertEquals(match.ncbiId, "9606"); + + matches = species.getNameMatches("mus "); + assertEquals(matches.size(), 6); + match = matches.get(0); + assertEquals(match.ncbiId, "10089"); + assertEquals(match.name, "mus_caroli"); + match = matches.get(1); + assertEquals(match.ncbiId, "10090"); + assertEquals(match.commonName, "house mouse"); + match = matches.get(2); + assertEquals(match.ncbiId, "10091"); + assertEquals(match.displayName, "Mouse CAST/EiJ"); + assertEquals(match.commonName, "south eastern house mouse"); + } + + @Test(groups = "Network") + public void testGetSpecies_ensemblGenomes() + { + Cache.log = Logger.getRootLogger(); + EnsemblSpecies species = EnsemblSpecies.getSpecies(false); + System.out.println("EnsemblGenomes lookup map size = " + + ((Map) PA.getValue(species, "lookup")).size()); + + List matches = species.getNameMatches("streptomyces al"); + assertEquals(matches.size(), 12); + assertEquals(matches.get(0).ncbiId, "132473"); + assertEquals(matches.get(0).name, "streptomyces_alboniger"); + assertEquals(matches.get(1).ncbiId, "68570"); + assertEquals(matches.get(1).displayName, "Streptomyces albulus"); + assertEquals(matches.get(2).ncbiId, "68570"); + assertEquals(matches.get(2).name, "streptomyces_albulus_gca_001646665"); + } +} -- 1.7.10.2