2 * Downloaded from http://code.cytoscape.org/redmine/projects/cy3-impl/repository/revisions/171d9affb3499807b14db17702176b0b82d16878/raw/webservice-ncbi-client-impl/src/main/java/org/cytoscape/webservice/ncbi/rest/EntrezRestClient.java
3 * Originally in package org.cytoscape.webservice.ncbi.rest;
5 * License and author attribution currently unknown.
8 import java.io.IOException;
9 import java.io.InputStream;
11 import java.util.Date;
12 import java.util.HashSet;
15 import java.util.concurrent.ConcurrentHashMap;
16 import java.util.concurrent.ConcurrentMap;
17 import java.util.concurrent.ExecutorService;
18 import java.util.concurrent.Executors;
19 import java.util.concurrent.TimeUnit;
21 import javax.xml.parsers.DocumentBuilder;
22 import javax.xml.parsers.DocumentBuilderFactory;
23 import javax.xml.parsers.ParserConfigurationException;
25 import org.apache.commons.logging.LogFactory;
26 import org.apache.log4j.spi.LoggerFactory;
27 import org.w3c.dom.Document;
28 import org.w3c.dom.Node;
29 import org.w3c.dom.NodeList;
30 import org.xml.sax.SAXException;
33 public class EntrezRestClient {
34 private static final org.apache.commons.logging.Log logger = LogFactory.getLog(EntrezRestClient.class);
35 private static final String BASE_URL = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/";
36 public static final String FETCH_URL = BASE_URL + "efetch.fcgi?db=gene&retmode=xml&id=";
37 private static final String SEARCH_URL = BASE_URL + "esearch.fcgi?db=gene&retmax=100000&term=";
38 private final String regex = "\\s+";
39 private static final String ID = "Id";
41 private final CyTableFactory tableFactory;
42 private final CyNetworkFactory networkFactory;
43 private final CyTableManager tableManager;
45 public EntrezRestClient(final CyNetworkFactory networkFactory,
46 final CyTableFactory tableFactory, final CyTableManager tableManager)
48 this.networkFactory = networkFactory;
49 this.tableFactory = tableFactory;
50 this.tableManager = tableManager;
53 public Set<String> search(final String queryString) throws IOException, ParserConfigurationException, SAXException {
54 final URL url = createURL(SEARCH_URL, queryString);
56 final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
57 final DocumentBuilder builder = factory.newDocumentBuilder();
58 InputStream is = url.openStream();
60 final Document result = builder.parse(is);
62 final Set<String> idSet = new HashSet<String>();
63 final NodeList ids = result.getElementsByTagName(ID);
64 final int dataSize = ids.getLength();
66 for (int i = 0; i < dataSize; i++) {
67 Node id = ids.item(i);
68 idSet.add(id.getTextContent());
77 public CyNetwork importNetwork(final Set<String> idList) {
79 long startTime = System.currentTimeMillis();
81 final ExecutorService executer = Executors.newFixedThreadPool(4);
83 logger.debug("Executor initialized.");
84 // final CyNetwork newNetwork = networkFactory.getInstance();
87 final ConcurrentMap<String, CyNode> nodeName2CyNodeMap = new ConcurrentHashMap<String, CyNode>();
91 String[] box = new String[buketNum];
94 for (String entrezID : idList) {
95 box[group] = entrezID;
98 if (group == buketNum) {
99 executer.submit(new EntryProcessor<String>(new ImportNetworkTask<String>(box, newNetwork, nodeName2CyNodeMap)));
101 box = new String[buketNum];
105 String[] newbox = new String[group];
107 for (int i = 0; i < group; i++)
110 executer.submit(new EntryProcessor<String>(new ImportNetworkTask<String>(box, newNetwork, nodeName2CyNodeMap)));
114 executer.awaitTermination(1000, TimeUnit.SECONDS);
116 long endTime = System.currentTimeMillis();
117 double sec = (endTime - startTime) / (1000.0);
118 System.out.println("Finished in " + sec + " sec.");
120 // if ((canceled != null) && canceled) {
125 } catch( Exception ex) {
126 ex.printStackTrace();
134 public CyTable importDataTable(final Set<String> idList, final Set<AnnotationCategory> category) {
135 if(idList == null || idList.size() == 0)
136 throw new IllegalArgumentException("ID list is null.");
138 long startTime = System.currentTimeMillis();
139 final ExecutorService executer = Executors.newFixedThreadPool(4);
141 logger.debug("Table Import Executor initialized.");
142 final Date currentDate = new Date();
143 final CyTable table = tableFactory.createTable("NCBI Global Table: " + currentDate.toString(), CyTableEntry.NAME, String.class, true, true);
147 String[] box = new String[buketNum];
150 for (String entrezID : idList) {
151 box[group] = entrezID;
154 if (group == buketNum) {
155 executer.submit(new ImportTableTask(box, category, table));
157 box = new String[buketNum];
161 String[] newbox = new String[group];
163 for (int i = 0; i < group; i++)
166 executer.submit(new ImportTableTask(box, category, table));
170 executer.awaitTermination(1000, TimeUnit.SECONDS);
172 long endTime = System.currentTimeMillis();
173 double sec = (endTime - startTime) / (1000.0);
174 System.out.println("Table Import Finished in " + sec + " sec.");
176 // if ((canceled != null) && canceled) {
181 } catch( Exception ex) {
182 ex.printStackTrace();
185 tableManager.addTable(table);
190 private URL createURL(final String base, final String queryString) throws IOException {
191 final String[] parts = queryString.split(regex);
192 final StringBuilder builder = new StringBuilder();
194 if (parts.length != 0) {
195 for (String dTerm : parts) {
196 final String trimed = dTerm.trim();
197 builder.append(trimed + "+");
201 String urlString = builder.toString();
202 urlString = urlString.substring(0, urlString.length() - 1);
203 final URL url = new URL(base + urlString);
204 logger.debug("Query URL = " + url.toString());