import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefSource;
+import jalview.ext.ensembl.EnsemblSpecies.EnsemblTaxon;
import java.io.BufferedReader;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
*/
private static Map<String, String> divisions;
+ public EnsemblInfo()
+ {
+ super();
+ }
+
+ public EnsemblInfo(String domain)
+ {
+ super(domain);
+ }
+
@Override
public String getDbName()
{
return divisions.keySet();
}
+
+ /**
+ * Calls the REST /info/species endpoint for the current domain and returns the
+ * result as a list of EnsemblTaxon objects, or null if an error occurs
+ *
+ * @return
+ */
+ public List<EnsemblTaxon> getSpecies()
+ {
+ BufferedReader br = null;
+ try
+ {
+ URL url = getSpeciesUrl(getDomain());
+ if (url != null)
+ {
+ br = getHttpResponse(url, null);
+ }
+ return parseSpeciesResponse(br);
+ } catch (IOException e)
+ {
+ // ignore
+ } finally
+ {
+ if (br != null)
+ {
+ try
+ {
+ br.close();
+ } catch (IOException e)
+ {
+ // ignore
+ }
+ }
+ }
+ return null;
+ }
+
+ private List<EnsemblTaxon> parseSpeciesResponse(BufferedReader br)
+ {
+ List<EnsemblTaxon> result = new ArrayList<>();
+
+ JSONParser jp = new JSONParser();
+ try
+ {
+ JSONObject reply = (JSONObject) jp.parse(br);
+ JSONArray parsed = (JSONArray) reply.get("species");
+
+ int count = 0;
+ int count2 = 0;
+ Iterator rvals = parsed.iterator();
+ while (rvals.hasNext())
+ {
+ JSONObject taxon = (JSONObject) rvals.next();
+ String id = (String) taxon.get("taxon_id");
+ String name = (String) taxon.get("name");
+ String commonName = (String) taxon.get("common_name");
+ String displayName = (String) taxon.get("display_name");
+ JSONArray aliases = (JSONArray) taxon.get("aliases");
+ count2 += aliases.size();
+ List<String> aka = new ArrayList<>();
+ aliases.forEach(alias -> aka.add(alias.toString()));
+ count++;
+ result.add(
+ new EnsemblTaxon(id, name, displayName, commonName, aka));
+ }
+ // Cache.log.debug(String.format(
+ // "%s /info/species returned %d species and %d aliases",
+ // getDomain(), count, count2));
+ } catch (IOException | ParseException | NumberFormatException e)
+ {
+ // ignore
+ }
+
+ return result;
+ }
+
+ private URL getSpeciesUrl(String domain) throws MalformedURLException
+ {
+ return new URL(domain + "/info/species?content-type=application/json");
+ }
}
--- /dev/null
+package jalview.ext.ensembl;
+
+import jalview.bin.Cache;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+public class EnsemblSpecies
+{
+ public static class EnsemblTaxon
+ {
+ public final String ncbiId;
+
+ public final String name;
+
+ public final String displayName;
+
+ public final String commonName;
+
+ public final List<String> aliases;
+
+ public EnsemblTaxon(String id, String theName, String theDisplayName,
+ String theCommonName, List<String> aka)
+ {
+ ncbiId = id;
+ name = theName;
+ displayName = theDisplayName;
+ commonName = theCommonName;
+ aliases = aka;
+ }
+ }
+
+ private static EnsemblSpecies forEnsembl;
+
+ private static EnsemblSpecies forEnsemblGenomes;
+
+ private String domain;
+
+ /*
+ * a map whose key is a species id or alias, and value the Taxon
+ */
+ private SortedMap<String, EnsemblTaxon> lookup;
+
+ private EnsemblSpecies(String dom)
+ {
+ domain = dom;
+ lookup = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
+ }
+
+ public static EnsemblSpecies getSpecies(boolean ensembl)
+ {
+ if (ensembl)
+ {
+ if (forEnsembl == null)
+ {
+ forEnsembl = new EnsemblSpecies(
+ EnsemblSequenceFetcher.ENSEMBL_REST);
+ forEnsembl.fetchSpecies();
+ }
+ return forEnsembl;
+ }
+ else
+ {
+ if (forEnsemblGenomes == null)
+ {
+ forEnsemblGenomes = new EnsemblSpecies(
+ EnsemblSequenceFetcher.ENSEMBL_GENOMES_REST);
+ forEnsemblGenomes.fetchSpecies();
+ return forEnsemblGenomes;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Fetches species data via a REST service and caches it in lookup tables
+ */
+ void fetchSpecies()
+ {
+ EnsemblInfo info = new EnsemblInfo(domain);
+ List<EnsemblTaxon> taxons = info.getSpecies();
+ SortedMap<String, EnsemblTaxon> aka = lookup;
+ for (EnsemblTaxon taxon : taxons)
+ {
+ String id = taxon.ncbiId;
+ aka.put(id, taxon);
+ aka.put(taxon.name, taxon);
+ String value = taxon.commonName;
+ addAlias(aka, taxon, value);
+ if (taxon.displayName != null)
+ {
+ aka.put(taxon.displayName, taxon);
+ }
+ taxon.aliases.forEach(a -> aka.put(a, taxon));
+ }
+ }
+
+ private void addAlias(SortedMap<String, EnsemblTaxon> map,
+ EnsemblTaxon taxon, String alias)
+ {
+ if (alias != null)
+ {
+ EnsemblTaxon entry = map.get(alias);
+ if (entry != null && !entry.ncbiId.equals(taxon.ncbiId))
+ {
+ Cache.log.warn(String.format(
+ "Ambiguous species alias %s (%s:%s / %s:%s)", alias,
+ taxon.ncbiId, taxon.name, entry.ncbiId, entry.name));
+ }
+ map.put(alias, taxon);
+ }
+ }
+
+ /**
+ * Returns a list of all taxons with an id, name, display name, common name or
+ * alias that starts with the given string
+ *
+ * @param name
+ * @return
+ */
+ public List<EnsemblTaxon> getNameMatches(String name)
+ {
+ List<EnsemblTaxon> matches = new ArrayList<>();
+
+ // todo cleverer seek to first partial match?
+
+ int len = name.length();
+ for (String alias : lookup.keySet())
+ {
+ if (alias.length() < len)
+ {
+ continue;
+ }
+ String startsWith = alias.substring(0, len);
+ int compared = startsWith.compareToIgnoreCase(name);
+ if (compared > 0)
+ {
+ break; // gone past all matches
+ }
+ if (compared == 0)
+ {
+ EnsemblTaxon entry = lookup.get(alias);
+ if (!matches.contains(entry))
+ {
+ matches.add(entry); // partial or exact match
+ }
+ }
+ }
+
+ return matches;
+ }
+}
--- /dev/null
+package jalview.ext.ensembl;
+
+import org.testng.annotations.Test;
+
+public class EnsemblInfoTest
+{
+ @Test(groups = "Network")
+ public void testGetSpecies()
+ {
+ EnsemblInfo info = new EnsemblInfo();
+ info.getSpecies();
+ }
+
+}
--- /dev/null
+package jalview.ext.ensembl;
+
+import jalview.ext.ensembl.EnsemblSpecies.EnsemblTaxon;
+
+import java.awt.Dimension;
+import java.awt.FlowLayout;
+import java.util.Arrays;
+import java.util.List;
+
+import javax.swing.DefaultComboBoxModel;
+import javax.swing.JComboBox;
+import javax.swing.JComponent;
+import javax.swing.JFrame;
+import javax.swing.JLabel;
+import javax.swing.JPanel;
+import javax.swing.event.DocumentEvent;
+import javax.swing.event.DocumentListener;
+import javax.swing.text.AbstractDocument;
+import javax.swing.text.JTextComponent;
+
+public class EnsemblSpeciesDemo
+{
+ /**
+ * Main method may be run interactively to explore a dynamic drop-down list that
+ * populates with matches of Ensembl taxon ids, names or aliases
+ *
+ * @param args
+ */
+ public static void main(String[] args)
+ {
+ // Schedule a job for the event dispatch thread:
+ // creating and showing this application's GUI.
+ javax.swing.SwingUtilities.invokeLater(new Runnable()
+ {
+ @Override
+ public void run()
+ {
+ createAndShowGUI();
+ }
+ });
+ }
+
+ /**
+ * Create the GUI and show it. For thread safety, this method should be invoked
+ * from the event dispatch thread.
+ */
+ private static void createAndShowGUI()
+ {
+ JFrame frame = new JFrame("Taxon drop-down demo");
+ frame.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE);
+
+ JComponent newContentPane = new JPanel(new FlowLayout());
+ newContentPane.setOpaque(true);
+ newContentPane.setPreferredSize(new Dimension(400, 300));
+ frame.setContentPane(newContentPane);
+
+ JLabel label = new JLabel("Taxon:");
+ newContentPane.add(label);
+
+ JComboBox<String> combo = new JComboBox<>();
+ combo.setEditable(true);
+ combo.setPreferredSize(new Dimension(200, 20));
+ newContentPane.add(combo);
+ AbstractDocument document = (AbstractDocument) ((JTextComponent) combo
+ .getEditor().getEditorComponent()).getDocument();
+ document.addDocumentListener(new DocumentListener()
+ {
+ @Override
+ public void insertUpdate(DocumentEvent e)
+ {
+ refreshComboList(combo, document);
+ }
+
+ @Override
+ public void removeUpdate(DocumentEvent e)
+ {
+ refreshComboList(combo, document);
+ }
+
+ @Override
+ public void changedUpdate(DocumentEvent e)
+ {
+ }
+ });
+
+ frame.pack();
+ frame.setVisible(true);
+ }
+
+ /**
+ * Refreshes the combo box list to contain what the user has typed, plus any
+ * matches for Ensembl taxon id, name or alias
+ *
+ * @param combo
+ * @param document
+ */
+ protected static void refreshComboList(JComboBox<String> combo, AbstractDocument document)
+ {
+ String typed = (String) combo.getEditor().getItem();
+ if (typed.length() > 1)
+ {
+ List<EnsemblTaxon> matches = EnsemblSpecies.getSpecies(true)
+ .getNameMatches(typed);
+ String[] items = new String[matches.size()];
+ int i = 0;
+ for (EnsemblTaxon m : matches)
+ {
+ items[i++] = String.format("%s (%s)", m.displayName, m.ncbiId);
+ }
+ Arrays.sort(items, String.CASE_INSENSITIVE_ORDER);
+ combo.setModel(new DefaultComboBoxModel<>(items));
+ }
+ }
+}
--- /dev/null
+package jalview.ext.ensembl;
+
+import static org.testng.Assert.assertEquals;
+
+import jalview.bin.Cache;
+import jalview.ext.ensembl.EnsemblSpecies.EnsemblTaxon;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.log4j.Logger;
+import org.testng.annotations.Test;
+
+import junit.extensions.PA;
+
+public class EnsemblSpeciesTest
+{
+ @Test(groups = "Network")
+ public void testGetSpecies_ensembl()
+ {
+ Cache.log = Logger.getRootLogger();
+ EnsemblSpecies species = EnsemblSpecies.getSpecies(true);
+ System.out.println("Ensembllookup map size = "
+ + ((Map) PA.getValue(species, "lookup")).size());
+
+ List<EnsemblTaxon> matches = species.getNameMatches("homo sapiens");
+ assertEquals(matches.size(), 1);
+ EnsemblTaxon match = matches.get(0);
+ assertEquals(match.ncbiId, "9606");
+
+ matches = species.getNameMatches("mus ");
+ assertEquals(matches.size(), 6);
+ match = matches.get(0);
+ assertEquals(match.ncbiId, "10089");
+ assertEquals(match.name, "mus_caroli");
+ match = matches.get(1);
+ assertEquals(match.ncbiId, "10090");
+ assertEquals(match.commonName, "house mouse");
+ match = matches.get(2);
+ assertEquals(match.ncbiId, "10091");
+ assertEquals(match.displayName, "Mouse CAST/EiJ");
+ assertEquals(match.commonName, "south eastern house mouse");
+ }
+
+ @Test(groups = "Network")
+ public void testGetSpecies_ensemblGenomes()
+ {
+ Cache.log = Logger.getRootLogger();
+ EnsemblSpecies species = EnsemblSpecies.getSpecies(false);
+ System.out.println("EnsemblGenomes lookup map size = "
+ + ((Map) PA.getValue(species, "lookup")).size());
+
+ List<EnsemblTaxon> matches = species.getNameMatches("streptomyces al");
+ assertEquals(matches.size(), 12);
+ assertEquals(matches.get(0).ncbiId, "132473");
+ assertEquals(matches.get(0).name, "streptomyces_alboniger");
+ assertEquals(matches.get(1).ncbiId, "68570");
+ assertEquals(matches.get(1).displayName, "Streptomyces albulus");
+ assertEquals(matches.get(2).ncbiId, "68570");
+ assertEquals(matches.get(2).name, "streptomyces_albulus_gca_001646665");
+ }
+}