2 // forester -- software libraries and applications
3 // for genomics and evolutionary biology research.
5 // Copyright (C) 2010 Christian M Zmasek
6 // Copyright (C) 2010 Sanford-Burnham Medical Research Institute
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
26 package org.forester.ws.uniprot;
28 import java.io.BufferedReader;
29 import java.io.IOException;
30 import java.io.InputStreamReader;
31 import java.io.UnsupportedEncodingException;
33 import java.net.URLConnection;
34 import java.net.URLEncoder;
35 import java.util.ArrayList;
36 import java.util.List;
38 import org.forester.util.ForesterUtil;
40 public final class UniProtWsTools {
42 public final static String BASE_URL = "http://www.uniprot.org/";
43 private final static String URL_ENC = "UTF-8";
44 private final static boolean DEBUG = false;
46 synchronized private static String encode( final String str ) throws UnsupportedEncodingException {
47 return URLEncoder.encode( str.trim(), URL_ENC );
50 synchronized public static List<UniProtTaxonomy> getTaxonomiesFromCommonName( final String cn,
51 final int max_taxonomies_return )
53 final List<String> result = getTaxonomyStringFromCommonName( cn, max_taxonomies_return );
54 if ( result.size() > 0 ) {
55 return parseUniProtTaxonomy( result );
60 synchronized public static List<UniProtTaxonomy> getTaxonomiesFromCommonNameStrict( final String cn,
61 final int max_taxonomies_return )
63 final List<UniProtTaxonomy> taxonomies = getTaxonomiesFromCommonName( cn, max_taxonomies_return );
64 if ( ( taxonomies != null ) && ( taxonomies.size() > 0 ) ) {
65 final List<UniProtTaxonomy> filtered_taxonomies = new ArrayList<UniProtTaxonomy>();
66 for( final UniProtTaxonomy taxonomy : taxonomies ) {
67 if ( taxonomy.getCommonName().equalsIgnoreCase( cn ) ) {
68 filtered_taxonomies.add( taxonomy );
71 return filtered_taxonomies;
76 synchronized public static List<UniProtTaxonomy> getTaxonomiesFromId( final String id,
77 final int max_taxonomies_return )
79 final List<String> result = getTaxonomyStringFromId( id, max_taxonomies_return );
80 if ( result.size() > 0 ) {
81 return parseUniProtTaxonomy( result );
86 synchronized public static List<UniProtTaxonomy> getTaxonomiesFromScientificName( final String sn,
87 final int max_taxonomies_return )
90 if ( sn.equals( "Drosophila" ) ) {
91 return hack( UniProtTaxonomy.DROSOPHILA_GENUS );
93 else if ( sn.equals( "Xenopus" ) ) {
94 return hack( UniProtTaxonomy.XENOPUS_GENUS );
96 final List<String> result = getTaxonomyStringFromScientificName( sn, max_taxonomies_return );
97 if ( result.size() > 0 ) {
98 return parseUniProtTaxonomy( result );
104 * Does not return "sub-types".
105 * For example, for "Mus musculus" only returns "Mus musculus"
106 * and not "Mus musculus", "Mus musculus bactrianus", ...
109 synchronized public static List<UniProtTaxonomy> getTaxonomiesFromScientificNameStrict( final String sn,
110 final int max_taxonomies_return )
112 final List<UniProtTaxonomy> taxonomies = getTaxonomiesFromScientificName( sn, max_taxonomies_return );
113 if ( ( taxonomies != null ) && ( taxonomies.size() > 0 ) ) {
114 final List<UniProtTaxonomy> filtered_taxonomies = new ArrayList<UniProtTaxonomy>();
115 for( final UniProtTaxonomy taxonomy : taxonomies ) {
116 if ( taxonomy.getScientificName().equalsIgnoreCase( sn ) ) {
117 filtered_taxonomies.add( taxonomy );
120 return filtered_taxonomies;
125 synchronized public static List<UniProtTaxonomy> getTaxonomiesFromTaxonomyCode( final String code,
126 final int max_taxonomies_return )
128 String my_code = new String( code );
130 if ( my_code.equals( "FUGRU" ) ) {
133 else if ( my_code.equals( "CAP" ) ) {
134 return hack( UniProtTaxonomy.CAPITELLA_TELATA_SPECIES );
136 final List<String> result = getTaxonomyStringFromTaxonomyCode( my_code, max_taxonomies_return );
137 if ( result.size() > 0 ) {
138 return parseUniProtTaxonomy( result );
143 synchronized private static List<String> getTaxonomyStringFromCommonName( final String cn,
144 final int max_lines_to_return )
146 return queryUniprot( "taxonomy/?query=common%3a%22" + encode( cn ) + "%22&format=tab", max_lines_to_return );
149 synchronized private static List<String> getTaxonomyStringFromId( final String id, final int max_lines_to_return )
151 return queryUniprot( "taxonomy/?query=id%3a%22" + encode( id ) + "%22&format=tab", max_lines_to_return );
154 synchronized private static List<String> getTaxonomyStringFromScientificName( final String sn,
155 final int max_lines_to_return )
157 return queryUniprot( "taxonomy/?query=scientific%3a%22" + encode( sn ) + "%22&format=tab", max_lines_to_return );
160 synchronized private static List<String> getTaxonomyStringFromTaxonomyCode( final String code,
161 final int max_lines_to_return )
163 return queryUniprot( "taxonomy/?query=mnemonic%3a%22" + encode( code ) + "%22&format=tab", max_lines_to_return );
166 synchronized private static List<UniProtTaxonomy> hack( final UniProtTaxonomy tax ) {
167 final List<UniProtTaxonomy> l = new ArrayList<UniProtTaxonomy>();
172 synchronized private static List<UniProtTaxonomy> parseUniProtTaxonomy( final List<String> result )
174 final List<UniProtTaxonomy> taxonomies = new ArrayList<UniProtTaxonomy>();
175 for( final String line : result ) {
176 if ( ForesterUtil.isEmpty( line ) ) {
177 // Ignore empty lines.
179 else if ( line.startsWith( "Taxon" ) ) {
180 //TODO next the check format FIXME
183 if ( line.split( "\t" ).length > 4 ) {
184 taxonomies.add( new UniProtTaxonomy( line ) );
191 synchronized public static List<String> queryUniprot( final String query, int max_lines_to_return )
193 if ( ForesterUtil.isEmpty( query ) ) {
194 throw new IllegalArgumentException( "illegal attempt to use empty query " );
196 if ( max_lines_to_return < 1 ) {
197 max_lines_to_return = 1;
199 final URL url = new URL( BASE_URL + query );
201 System.out.println( "url: " + url.toString() );
203 final URLConnection urlc = url.openConnection();
204 final BufferedReader in = new BufferedReader( new InputStreamReader( urlc.getInputStream() ) );
206 final List<String> result = new ArrayList<String>();
207 while ( ( line = in.readLine() ) != null ) {
209 if ( result.size() > max_lines_to_return ) {