JAL-1375 initial import of EntrezClient code from Cytoscape3
[jalview.git] / src / jalview / ws / dbsources / EntrezRestClient.java
1 /**
2  * Downloaded from http://code.cytoscape.org/redmine/projects/cy3-impl/repository/revisions/171d9affb3499807b14db17702176b0b82d16878/raw/webservice-ncbi-client-impl/src/main/java/org/cytoscape/webservice/ncbi/rest/EntrezRestClient.java
3  * Originally in package org.cytoscape.webservice.ncbi.rest;
4  * Cytoscape3 code
5  * License and author attribution currently unknown.
6  */
7
8 import java.io.IOException;
9 import java.io.InputStream;
10 import java.net.URL;
11 import java.util.Date;
12 import java.util.HashSet;
13 import java.util.Map;
14 import java.util.Set;
15 import java.util.concurrent.ConcurrentHashMap;
16 import java.util.concurrent.ConcurrentMap;
17 import java.util.concurrent.ExecutorService;
18 import java.util.concurrent.Executors;
19 import java.util.concurrent.TimeUnit;
20
21 import javax.xml.parsers.DocumentBuilder;
22 import javax.xml.parsers.DocumentBuilderFactory;
23 import javax.xml.parsers.ParserConfigurationException;
24
25 import org.apache.commons.logging.LogFactory;
26 import org.apache.log4j.spi.LoggerFactory;
27 import org.w3c.dom.Document;
28 import org.w3c.dom.Node;
29 import org.w3c.dom.NodeList;
30 import org.xml.sax.SAXException;
31
32
33 public class EntrezRestClient {
34         private static final org.apache.commons.logging.Log logger = LogFactory.getLog(EntrezRestClient.class);
35         private static final String BASE_URL = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/";
36         public static final String FETCH_URL = BASE_URL + "efetch.fcgi?db=gene&retmode=xml&id=";
37         private static final String SEARCH_URL = BASE_URL + "esearch.fcgi?db=gene&retmax=100000&term=";
38         private final String regex = "\\s+";
39         private static final String ID = "Id";
40
41         private final CyTableFactory tableFactory;
42         private final CyNetworkFactory networkFactory;
43         private final CyTableManager tableManager;
44
45         public EntrezRestClient(final CyNetworkFactory networkFactory,
46                                 final CyTableFactory tableFactory, final CyTableManager tableManager)
47         {
48                 this.networkFactory = networkFactory;
49                 this.tableFactory   = tableFactory;
50                 this.tableManager   = tableManager;
51         }
52
53         public Set<String> search(final String queryString) throws IOException, ParserConfigurationException, SAXException {
54                 final URL url = createURL(SEARCH_URL, queryString);
55
56                 final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
57                 final DocumentBuilder builder = factory.newDocumentBuilder();
58                 InputStream is = url.openStream();
59
60                 final Document result = builder.parse(is);
61
62                 final Set<String> idSet = new HashSet<String>();
63                 final NodeList ids = result.getElementsByTagName(ID);
64                 final int dataSize = ids.getLength();
65
66                 for (int i = 0; i < dataSize; i++) {
67                         Node id = ids.item(i);
68                         idSet.add(id.getTextContent());
69                 }
70
71                 is.close();
72                 is = null;
73
74                 return idSet;
75         }
76
77         public CyNetwork importNetwork(final Set<String> idList) {
78
79                 long startTime = System.currentTimeMillis();
80
81                 final ExecutorService executer = Executors.newFixedThreadPool(4);
82
83                 logger.debug("Executor initialized.");
84                 // final CyNetwork newNetwork = networkFactory.getInstance();
85
86
87                 final ConcurrentMap<String, CyNode> nodeName2CyNodeMap = new ConcurrentHashMap<String, CyNode>();
88
89                 int group = 0;
90                 int buketNum = 10;
91                 String[] box = new String[buketNum];
92
93
94                 for (String entrezID : idList) {
95                         box[group] = entrezID;
96                         group++;
97
98                         if (group == buketNum) {
99                                 executer.submit(new EntryProcessor<String>(new ImportNetworkTask<String>(box, newNetwork, nodeName2CyNodeMap)));
100                                 group = 0;
101                                 box = new String[buketNum];
102                         }
103                 }
104
105                 String[] newbox = new String[group];
106
107                 for (int i = 0; i < group; i++)
108                         newbox[i] = box[i];
109
110                 executer.submit(new EntryProcessor<String>(new ImportNetworkTask<String>(box, newNetwork, nodeName2CyNodeMap)));
111
112                 try {
113                         executer.shutdown();
114                         executer.awaitTermination(1000, TimeUnit.SECONDS);
115
116                         long endTime = System.currentTimeMillis();
117                         double sec = (endTime - startTime) / (1000.0);
118                         System.out.println("Finished in " + sec + " sec.");
119
120 //                      if ((canceled != null) && canceled) {
121 //                              canceled = null;
122 //
123 //                              return null;
124 //                      }
125                 } catch( Exception ex) {
126                         ex.printStackTrace();
127                 }
128
129                 return newNetwork;
130         }
131
132
133
134         public CyTable importDataTable(final Set<String> idList, final Set<AnnotationCategory> category) {
135                 if(idList == null || idList.size() == 0)
136                         throw new IllegalArgumentException("ID list is null.");
137
138                 long startTime = System.currentTimeMillis();
139                 final ExecutorService executer = Executors.newFixedThreadPool(4);
140
141                 logger.debug("Table Import Executor initialized.");
142                 final Date currentDate = new Date();
143                 final CyTable table = tableFactory.createTable("NCBI Global Table: " + currentDate.toString(), CyTableEntry.NAME, String.class, true, true);
144
145                 int group = 0;
146                 int buketNum = 10;
147                 String[] box = new String[buketNum];
148
149
150                 for (String entrezID : idList) {
151                         box[group] = entrezID;
152                         group++;
153
154                         if (group == buketNum) {
155                                 executer.submit(new ImportTableTask(box, category, table));
156                                 group = 0;
157                                 box = new String[buketNum];
158                         }
159                 }
160
161                 String[] newbox = new String[group];
162
163                 for (int i = 0; i < group; i++)
164                         newbox[i] = box[i];
165
166                 executer.submit(new ImportTableTask(box, category, table));
167
168                 try {
169                         executer.shutdown();
170                         executer.awaitTermination(1000, TimeUnit.SECONDS);
171
172                         long endTime = System.currentTimeMillis();
173                         double sec = (endTime - startTime) / (1000.0);
174                         System.out.println("Table Import Finished in " + sec + " sec.");
175
176 //                      if ((canceled != null) && canceled) {
177 //                              canceled = null;
178 //
179 //                              return null;
180 //                      }
181                 } catch( Exception ex) {
182                         ex.printStackTrace();
183                 }
184
185                 tableManager.addTable(table);
186
187                 return table;
188         }
189
190         private URL createURL(final String base, final String queryString) throws IOException {
191                 final String[] parts = queryString.split(regex);
192                 final StringBuilder builder = new StringBuilder();
193
194                 if (parts.length != 0) {
195                         for (String dTerm : parts) {
196                                 final String trimed = dTerm.trim();
197                                 builder.append(trimed + "+");
198                         }
199                 }
200
201                 String urlString = builder.toString();
202                 urlString = urlString.substring(0, urlString.length() - 1);
203                 final URL url = new URL(base + urlString);
204                 logger.debug("Query URL = " + url.toString());
205                 return url;
206         }
207 }