From: Jim Procter Date: Wed, 18 Sep 2013 13:11:01 +0000 (+0100) Subject: JAL-1375 initial import of EntrezClient code from Cytoscape3 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=dbabbe48a455467de19b953d098a9cd943f9c133;p=jalview.git JAL-1375 initial import of EntrezClient code from Cytoscape3 --- diff --git a/src/jalview/ws/dbsources/EntrezRestClient.java b/src/jalview/ws/dbsources/EntrezRestClient.java new file mode 100644 index 0000000..d5ad673 --- /dev/null +++ b/src/jalview/ws/dbsources/EntrezRestClient.java @@ -0,0 +1,207 @@ +/** + * Downloaded from http://code.cytoscape.org/redmine/projects/cy3-impl/repository/revisions/171d9affb3499807b14db17702176b0b82d16878/raw/webservice-ncbi-client-impl/src/main/java/org/cytoscape/webservice/ncbi/rest/EntrezRestClient.java + * Originally in package org.cytoscape.webservice.ncbi.rest; + * Cytoscape3 code + * License and author attribution currently unknown. + */ + +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.util.Date; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +import org.apache.commons.logging.LogFactory; +import org.apache.log4j.spi.LoggerFactory; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + + +public class EntrezRestClient { + private static final org.apache.commons.logging.Log logger = LogFactory.getLog(EntrezRestClient.class); + private static final String BASE_URL = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/"; + public static final String FETCH_URL = BASE_URL + "efetch.fcgi?db=gene&retmode=xml&id="; + private static final String SEARCH_URL = BASE_URL + "esearch.fcgi?db=gene&retmax=100000&term="; + private final String regex = "\\s+"; + private static final String ID = "Id"; + + private final CyTableFactory tableFactory; + private final CyNetworkFactory networkFactory; + private final CyTableManager tableManager; + + public EntrezRestClient(final CyNetworkFactory networkFactory, + final CyTableFactory tableFactory, final CyTableManager tableManager) + { + this.networkFactory = networkFactory; + this.tableFactory = tableFactory; + this.tableManager = tableManager; + } + + public Set search(final String queryString) throws IOException, ParserConfigurationException, SAXException { + final URL url = createURL(SEARCH_URL, queryString); + + final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + final DocumentBuilder builder = factory.newDocumentBuilder(); + InputStream is = url.openStream(); + + final Document result = builder.parse(is); + + final Set idSet = new HashSet(); + final NodeList ids = result.getElementsByTagName(ID); + final int dataSize = ids.getLength(); + + for (int i = 0; i < dataSize; i++) { + Node id = ids.item(i); + idSet.add(id.getTextContent()); + } + + is.close(); + is = null; + + return idSet; + } + + public CyNetwork importNetwork(final Set idList) { + + long startTime = System.currentTimeMillis(); + + final ExecutorService executer = Executors.newFixedThreadPool(4); + + logger.debug("Executor initialized."); + // final CyNetwork newNetwork = networkFactory.getInstance(); + + + final ConcurrentMap nodeName2CyNodeMap = new ConcurrentHashMap(); + + int group = 0; + int buketNum = 10; + String[] box = new String[buketNum]; + + + for (String entrezID : idList) { + box[group] = entrezID; + group++; + + if (group == buketNum) { + executer.submit(new EntryProcessor(new ImportNetworkTask(box, newNetwork, nodeName2CyNodeMap))); + group = 0; + box = new String[buketNum]; + } + } + + String[] newbox = new String[group]; + + for (int i = 0; i < group; i++) + newbox[i] = box[i]; + + executer.submit(new EntryProcessor(new ImportNetworkTask(box, newNetwork, nodeName2CyNodeMap))); + + try { + executer.shutdown(); + executer.awaitTermination(1000, TimeUnit.SECONDS); + + long endTime = System.currentTimeMillis(); + double sec = (endTime - startTime) / (1000.0); + System.out.println("Finished in " + sec + " sec."); + +// if ((canceled != null) && canceled) { +// canceled = null; +// +// return null; +// } + } catch( Exception ex) { + ex.printStackTrace(); + } + + return newNetwork; + } + + + + public CyTable importDataTable(final Set idList, final Set category) { + if(idList == null || idList.size() == 0) + throw new IllegalArgumentException("ID list is null."); + + long startTime = System.currentTimeMillis(); + final ExecutorService executer = Executors.newFixedThreadPool(4); + + logger.debug("Table Import Executor initialized."); + final Date currentDate = new Date(); + final CyTable table = tableFactory.createTable("NCBI Global Table: " + currentDate.toString(), CyTableEntry.NAME, String.class, true, true); + + int group = 0; + int buketNum = 10; + String[] box = new String[buketNum]; + + + for (String entrezID : idList) { + box[group] = entrezID; + group++; + + if (group == buketNum) { + executer.submit(new ImportTableTask(box, category, table)); + group = 0; + box = new String[buketNum]; + } + } + + String[] newbox = new String[group]; + + for (int i = 0; i < group; i++) + newbox[i] = box[i]; + + executer.submit(new ImportTableTask(box, category, table)); + + try { + executer.shutdown(); + executer.awaitTermination(1000, TimeUnit.SECONDS); + + long endTime = System.currentTimeMillis(); + double sec = (endTime - startTime) / (1000.0); + System.out.println("Table Import Finished in " + sec + " sec."); + +// if ((canceled != null) && canceled) { +// canceled = null; +// +// return null; +// } + } catch( Exception ex) { + ex.printStackTrace(); + } + + tableManager.addTable(table); + + return table; + } + + private URL createURL(final String base, final String queryString) throws IOException { + final String[] parts = queryString.split(regex); + final StringBuilder builder = new StringBuilder(); + + if (parts.length != 0) { + for (String dTerm : parts) { + final String trimed = dTerm.trim(); + builder.append(trimed + "+"); + } + } + + String urlString = builder.toString(); + urlString = urlString.substring(0, urlString.length() - 1); + final URL url = new URL(base + urlString); + logger.debug("Query URL = " + url.toString()); + return url; + } +}