From 4248d371380d24a929b2b5ed2d29dfdae6329963 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Wed, 9 Feb 2011 01:15:29 +0000 Subject: [PATCH] initial commit --- .../org/forester/ws/uniprot/UniProtTaxonomy.java | 158 +++++++++ .../org/forester/ws/uniprot/UniProtWsTools.java | 216 ++++++++++++ .../java/src/org/forester/ws/wabi/RestUtil.java | 100 ++++++ .../java/src/org/forester/ws/wabi/TxSearch.java | 369 ++++++++++++++++++++ .../java/src/org/forester/ws/wabi/WabiTools.java | 94 +++++ 5 files changed, 937 insertions(+) create mode 100644 forester/java/src/org/forester/ws/uniprot/UniProtTaxonomy.java create mode 100644 forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java create mode 100644 forester/java/src/org/forester/ws/wabi/RestUtil.java create mode 100644 forester/java/src/org/forester/ws/wabi/TxSearch.java create mode 100644 forester/java/src/org/forester/ws/wabi/WabiTools.java diff --git a/forester/java/src/org/forester/ws/uniprot/UniProtTaxonomy.java b/forester/java/src/org/forester/ws/uniprot/UniProtTaxonomy.java new file mode 100644 index 0000000..d4b0b75 --- /dev/null +++ b/forester/java/src/org/forester/ws/uniprot/UniProtTaxonomy.java @@ -0,0 +1,158 @@ +// $Id: +// forester -- software libraries and applications +// for genomics and evolutionary biology research. +// +// Copyright (C) 2010 Christian M Zmasek +// Copyright (C) 2010 Sanford-Burnham Medical Research Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.ws.uniprot; + +import java.util.ArrayList; +import java.util.List; + +import org.forester.util.ForesterUtil; + +public final class UniProtTaxonomy { + + private final String[] _lineage; + private final String _code; + private final String _scientific_name; + private final String _common_name; + private final String _synonym; + private final String _rank; + private final String _id; + public final static UniProtTaxonomy DROSOPHILA_GENUS = new UniProtTaxonomy( new String[] { "Eukaryota", + "Metazoa", "Arthropoda", "Hexapoda", "Insecta", "Pterygota", "Neoptera", "Endopterygota", "Diptera", + "Brachycera", "Muscomorpha", "Ephydroidea", "Drosophilidae" }, + "", + "fruit flies", + "Drosophila", + "", + "genus", + "7215" ); + public final static UniProtTaxonomy XENOPUS_GENUS = new UniProtTaxonomy( new String[] { "Eukaryota", + "Metazoa", "Chordata", "Craniata", "Vertebrata", "Euteleostomi", "Amphibia", "Batrachia", "Anura", + "Mesobatrachia", "Pipoidea", "Pipidae", "Xenopodinae" }, "", "", "Xenopus", "", "genus", "8353" ); + public final static UniProtTaxonomy CAPITELLA_TELATA_SPECIES = new UniProtTaxonomy( new String[] { "Eukaryota", + "Metazoa", "Annelida", "Polychaeta", "Scolecida", "Capitellida", "Capitellidae", "Capitella" }, + "", + "", + "Capitella teleta", + "Capitella sp. I", + "species", + "283909" ); + + public UniProtTaxonomy( final String line ) { + final String[] items = line.split( "\t" ); + if ( items.length < 5 ) { + throw new IllegalArgumentException( "cannot parse uniprot taxonomy from: " + line ); + } + _id = items[ 0 ].trim(); + _code = items[ 1 ].trim(); + _scientific_name = items[ 2 ].trim(); + _common_name = items[ 3 ].trim(); + _synonym = items[ 4 ].trim(); + if ( items.length > 6 ) { + _rank = items[ 7 ].trim(); + } + else { + _rank = ""; + } + String[] lin = null; + if ( items.length > 7 ) { + lin = items[ 8 ].split( "; " ); + } + if ( ( lin != null ) && ( lin.length > 0 ) ) { + final List temp = new ArrayList(); + for( final String t : lin ) { + if ( !ForesterUtil.isEmpty( t ) ) { + temp.add( t.trim() ); + } + } + _lineage = new String[ temp.size() ]; + for( int i = 0; i < temp.size(); ++i ) { + _lineage[ i ] = temp.get( i ); + } + } + else { + _lineage = new String[ 0 ]; + } + } + + public UniProtTaxonomy( final String[] lineage, + final String code, + final String common_name, + final String scientific_name, + final String synonym, + final String rank, + final String id ) { + _lineage = lineage; + _code = code; + _scientific_name = scientific_name; + _common_name = common_name; + _synonym = synonym; + _rank = rank; + _id = id; + } + + /** + * Creates deep copy for all fields, except lineage. + * + * @return + */ + public UniProtTaxonomy copy() { + return new UniProtTaxonomy( getLineage(), + getCode() != null ? new String( getCode() ) : null, + getCommonName() != null ? new String( getCommonName() ) : null, + getScientificName() != null ? new String( getScientificName() ) : null, + getSynonym() != null ? new String( getSynonym() ) : null, + getRank() != null ? new String( getRank() ) : null, + getId() != null ? new String( getId() ) : null ); + } + + public String getCode() { + return _code; + } + + public String getCommonName() { + return _common_name; + } + + public String getId() { + return _id; + } + + public String[] getLineage() { + return _lineage; + } + + public String getRank() { + return _rank; + } + + public String getScientificName() { + return _scientific_name; + } + + public String getSynonym() { + return _synonym; + } +} diff --git a/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java b/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java new file mode 100644 index 0000000..623f837 --- /dev/null +++ b/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java @@ -0,0 +1,216 @@ +// $Id: +// forester -- software libraries and applications +// for genomics and evolutionary biology research. +// +// Copyright (C) 2010 Christian M Zmasek +// Copyright (C) 2010 Sanford-Burnham Medical Research Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.ws.uniprot; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; +import java.net.URL; +import java.net.URLConnection; +import java.net.URLEncoder; +import java.util.ArrayList; +import java.util.List; + +import org.forester.util.ForesterUtil; + +public final class UniProtWsTools { + + public final static String BASE_URL = "http://www.uniprot.org/"; + private final static String URL_ENC = "UTF-8"; + private final static boolean DEBUG = false; + + synchronized private static String encode( final String str ) throws UnsupportedEncodingException { + return URLEncoder.encode( str.trim(), URL_ENC ); + } + + synchronized public static List getTaxonomiesFromCommonName( final String cn, + final int max_taxonomies_return ) + throws IOException { + final List result = getTaxonomyStringFromCommonName( cn, max_taxonomies_return ); + if ( result.size() > 0 ) { + return parseUniProtTaxonomy( result ); + } + return null; + } + + synchronized public static List getTaxonomiesFromCommonNameStrict( final String cn, + final int max_taxonomies_return ) + throws IOException { + final List taxonomies = getTaxonomiesFromCommonName( cn, max_taxonomies_return ); + if ( ( taxonomies != null ) && ( taxonomies.size() > 0 ) ) { + final List filtered_taxonomies = new ArrayList(); + for( final UniProtTaxonomy taxonomy : taxonomies ) { + if ( taxonomy.getCommonName().equalsIgnoreCase( cn ) ) { + filtered_taxonomies.add( taxonomy ); + } + } + return filtered_taxonomies; + } + return null; + } + + synchronized public static List getTaxonomiesFromId( final String id, + final int max_taxonomies_return ) + throws IOException { + final List result = getTaxonomyStringFromId( id, max_taxonomies_return ); + if ( result.size() > 0 ) { + return parseUniProtTaxonomy( result ); + } + return null; + } + + synchronized public static List getTaxonomiesFromScientificName( final String sn, + final int max_taxonomies_return ) + throws IOException { + // Hack! Craniata? .. + if ( sn.equals( "Drosophila" ) ) { + return hack( UniProtTaxonomy.DROSOPHILA_GENUS ); + } + else if ( sn.equals( "Xenopus" ) ) { + return hack( UniProtTaxonomy.XENOPUS_GENUS ); + } + final List result = getTaxonomyStringFromScientificName( sn, max_taxonomies_return ); + if ( result.size() > 0 ) { + return parseUniProtTaxonomy( result ); + } + return null; + } + + /** + * Does not return "sub-types". + * For example, for "Mus musculus" only returns "Mus musculus" + * and not "Mus musculus", "Mus musculus bactrianus", ... + * + */ + synchronized public static List getTaxonomiesFromScientificNameStrict( final String sn, + final int max_taxonomies_return ) + throws IOException { + final List taxonomies = getTaxonomiesFromScientificName( sn, max_taxonomies_return ); + if ( ( taxonomies != null ) && ( taxonomies.size() > 0 ) ) { + final List filtered_taxonomies = new ArrayList(); + for( final UniProtTaxonomy taxonomy : taxonomies ) { + if ( taxonomy.getScientificName().equalsIgnoreCase( sn ) ) { + filtered_taxonomies.add( taxonomy ); + } + } + return filtered_taxonomies; + } + return null; + } + + synchronized public static List getTaxonomiesFromTaxonomyCode( final String code, + final int max_taxonomies_return ) + throws IOException { + String my_code = new String( code ); + // Hacks! + if ( my_code.equals( "FUGRU" ) ) { + my_code = "TAKRU"; + } + else if ( my_code.equals( "CAP" ) ) { + return hack( UniProtTaxonomy.CAPITELLA_TELATA_SPECIES ); + } + final List result = getTaxonomyStringFromTaxonomyCode( my_code, max_taxonomies_return ); + if ( result.size() > 0 ) { + return parseUniProtTaxonomy( result ); + } + return null; + } + + synchronized private static List getTaxonomyStringFromCommonName( final String cn, + final int max_lines_to_return ) + throws IOException { + return queryUniprot( "taxonomy/?query=common%3a%22" + encode( cn ) + "%22&format=tab", max_lines_to_return ); + } + + synchronized private static List getTaxonomyStringFromId( final String id, final int max_lines_to_return ) + throws IOException { + return queryUniprot( "taxonomy/?query=id%3a%22" + encode( id ) + "%22&format=tab", max_lines_to_return ); + } + + synchronized private static List getTaxonomyStringFromScientificName( final String sn, + final int max_lines_to_return ) + throws IOException { + return queryUniprot( "taxonomy/?query=scientific%3a%22" + encode( sn ) + "%22&format=tab", max_lines_to_return ); + } + + synchronized private static List getTaxonomyStringFromTaxonomyCode( final String code, + final int max_lines_to_return ) + throws IOException { + return queryUniprot( "taxonomy/?query=mnemonic%3a%22" + encode( code ) + "%22&format=tab", max_lines_to_return ); + } + + synchronized private static List hack( final UniProtTaxonomy tax ) { + final List l = new ArrayList(); + l.add( tax ); + return l; + } + + synchronized private static List parseUniProtTaxonomy( final List result ) + throws IOException { + final List taxonomies = new ArrayList(); + for( final String line : result ) { + if ( ForesterUtil.isEmpty( line ) ) { + // Ignore empty lines. + } + else if ( line.startsWith( "Taxon" ) ) { + //TODO next the check format FIXME + } + else { + if ( line.split( "\t" ).length > 4 ) { + taxonomies.add( new UniProtTaxonomy( line ) ); + } + } + } + return taxonomies; + } + + synchronized public static List queryUniprot( final String query, int max_lines_to_return ) + throws IOException { + if ( ForesterUtil.isEmpty( query ) ) { + throw new IllegalArgumentException( "illegal attempt to use empty query " ); + } + if ( max_lines_to_return < 1 ) { + max_lines_to_return = 1; + } + final URL url = new URL( BASE_URL + query ); + if ( DEBUG ) { + System.out.println( "url: " + url.toString() ); + } + final URLConnection urlc = url.openConnection(); + final BufferedReader in = new BufferedReader( new InputStreamReader( urlc.getInputStream() ) ); + String line; + final List result = new ArrayList(); + while ( ( line = in.readLine() ) != null ) { + result.add( line ); + if ( result.size() > max_lines_to_return ) { + break; + } + } + in.close(); + return result; + } +} diff --git a/forester/java/src/org/forester/ws/wabi/RestUtil.java b/forester/java/src/org/forester/ws/wabi/RestUtil.java new file mode 100644 index 0000000..b4f723d --- /dev/null +++ b/forester/java/src/org/forester/ws/wabi/RestUtil.java @@ -0,0 +1,100 @@ +// $Id: +// forester -- software libraries and applications +// for genomics and evolutionary biology research. +// +// Copyright (C) 2010 Christian M Zmasek +// Copyright (C) 2010 Sanford-Burnham Medical Research Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.ws.wabi; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.PrintStream; +import java.io.UnsupportedEncodingException; +import java.net.URL; +import java.net.URLConnection; +import java.net.URLEncoder; +import java.util.List; + +/** + * + * This is to access the Web API for Biology (WABI) at DDBJ. + * See: http://xml.nig.ac.jp/ + * + */ +public final class RestUtil { + + final static String LIST_SEPARATOR = "%0A"; + final static String LINE_SEPARATOR = "\n"; + private final static String BASE_URL = "http://xml.nig.ac.jp/rest/Invoke"; + private final static String SERVICE = "service"; + private final static String METHOD = "method"; + private final static String URL_ENC = "UTF-8"; + + static String encode( final String str ) throws UnsupportedEncodingException { + return URLEncoder.encode( str.trim(), URL_ENC ); + } + + /** + * Method for access REST + * @param query + * service name method name and parameter for executing rest + * @return execution result + * @throws IOException + */ + public static String getResult( final String query ) throws IOException { + final URL url = new URL( BASE_URL ); + final URLConnection urlc = url.openConnection(); + urlc.setDoOutput( true ); + urlc.setAllowUserInteraction( false ); + final PrintStream ps = new PrintStream( urlc.getOutputStream() ); + //System.out.println( "query: " + query ); + ps.print( query.trim() ); + ps.close(); + final BufferedReader br = new BufferedReader( new InputStreamReader( urlc.getInputStream() ) ); + final StringBuffer sb = new StringBuffer(); + String line = null; + while ( ( line = br.readLine() ) != null ) { + sb.append( line + LINE_SEPARATOR ); + } + br.close(); + return sb.toString().trim(); + } + + public static String getResult( final String service_name, final String method_name, final String parameters ) + throws IOException { + final String service = SERVICE + '=' + encode( service_name ); + final String method = METHOD + '=' + encode( method_name ); + return getResult( service + '&' + method + '&' + parameters.trim() ); + } + + static String listAsString( final List l ) throws UnsupportedEncodingException { + final StringBuffer sb = new StringBuffer(); + for( final String s : l ) { + if ( sb.length() > 0 ) { + sb.append( LIST_SEPARATOR ); + } + sb.append( encode( s ) ); + } + return sb.toString(); + } +} diff --git a/forester/java/src/org/forester/ws/wabi/TxSearch.java b/forester/java/src/org/forester/ws/wabi/TxSearch.java new file mode 100644 index 0000000..ed3bc29 --- /dev/null +++ b/forester/java/src/org/forester/ws/wabi/TxSearch.java @@ -0,0 +1,369 @@ +// $Id: +// forester -- software libraries and applications +// for genomics and evolutionary biology research. +// +// Copyright (C) 2010 Christian M Zmasek +// Copyright (C) 2010 Sanford-Burnham Medical Research Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.ws.wabi; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.List; + +/** + * + * This is to access the Web API for Biology (WABI) at DDBJ. + * See: http://xml.nig.ac.jp/ + * + * Service Description: + * TXSearch is a retrieval system for a Taxonomy Database which + * was unified by DDBJ, GenBank and EMBL, which is developed by DDBJ. + * See: http://xml.nig.ac.jp/wabi/Method?serviceName=TxSearch&mode=methodList + * + */ +public final class TxSearch { + + private static final String TAXONOMIC_RANK = "Taxonomic rank: "; + private static final String FULL_LINEAGE = "Full lineage: "; + private static final String SEARCH_LINEAGE_QUERY_PARAM_NAME = "query"; + private static final String SEARCH_LINEAGE_RANKS_PARAM_NAME = "ranks"; + private static final String SEARCH_LINEAGE_SUPERKINGDOM_PARAM_NAME = "superkingdom"; + private final static String GET_TX_ID_METHOD_NAME = "getTxId"; + private final static String GET_TX_NAME_METHOD_NAME = "getTxName"; + private final static String SEARCH_SIMPLE_METHOD_NAME = "searchSimple"; + private final static String TX_SEARCH_SERVICE_NAME = "TxSearch"; + private final static String TX_NAME_PARAM_NAME = "tx_Name"; + private final static String TX_ID_PARAM_NAME = "tx_Id"; + private final static String SEARCH_LINEAGE_NAME_METHOD_NAME = "searchLineage"; + private final static String SEARCH_PARAM_METHOD_NAME = "searchParam"; + + public static String[] getLineage( final String result ) throws IOException { + String[] lineage = null; + for( String line : result.split( RestUtil.LINE_SEPARATOR ) ) { + line = line.trim(); + if ( line.startsWith( FULL_LINEAGE ) ) { + if ( lineage != null ) { + throw new IOException( "search result is not unique" ); + } + lineage = line.substring( FULL_LINEAGE.length() ).split( ";" ); + } + } + return lineage; + } + + public static String getTaxonomicRank( final String result ) throws IOException { + String rank = null; + for( String line : result.split( RestUtil.LINE_SEPARATOR ) ) { + line = line.trim(); + if ( line.startsWith( TAXONOMIC_RANK ) ) { + if ( rank != null ) { + throw new IOException( "search result is not unique" ); + } + rank = line.substring( TAXONOMIC_RANK.length() ).trim(); + } + } + return rank; + } + + public static String getTxId( final String tx_name ) throws IOException { + return RestUtil.getResult( TX_SEARCH_SERVICE_NAME, + GET_TX_ID_METHOD_NAME, + TX_NAME_PARAM_NAME + "=" + RestUtil.encode( tx_name ) ).trim(); + } + + public static String getTxName( final String tx_id ) throws IOException { + return RestUtil.getResult( TX_SEARCH_SERVICE_NAME, + GET_TX_NAME_METHOD_NAME, + TX_ID_PARAM_NAME + "=" + RestUtil.encode( tx_id ) ).trim(); + } + + public static void main( final String[] args ) throws IOException { + String result = ""; + try { + result = searchSimple( "SAMSA" ); + } + catch ( final IOException e ) { + e.printStackTrace(); + } + System.out.println( result ); + System.out.println( "---------------" ); + try { + result = searchSimple( "nematostella" ); + } + catch ( final IOException e ) { + e.printStackTrace(); + } + System.out.println( result ); + final String[] lineage = getLineage( result ); + for( final String element : lineage ) { + System.out.println( element ); + } + System.out.println( getTaxonomicRank( result ) ); + System.out.println( "---------------" ); + try { + result = getTxId( "nematostella" ); + } + catch ( final IOException e ) { + e.printStackTrace(); + } + System.out.println( result ); + System.out.println( "---------------" ); + try { + result = getTxName( "45350" ); + } + catch ( final IOException e ) { + e.printStackTrace(); + } + System.out.println( result ); + System.out.println( "---------------" ); + final List queries = new ArrayList(); + queries.add( "Campylobacter coli" ); + queries.add( "Escherichia coli" ); + queries.add( "Arabidopsis" ); + queries.add( "Trichoplax" ); + queries.add( "Samanea saman" ); + queries.add( "Kluyveromyces marxianus" ); + queries.add( "Bacillus subtilis subsp. subtilis str. N170" ); + queries.add( "Bornavirus parrot/PDD/2008" ); + final List ranks = new ArrayList(); + // ranks.add( RANKS.SUPERKINGDOM ); + // ranks.add( RANKS.KINGDOM ); + // ranks.add( RANKS.FAMILY ); + // ranks.add( RANKS.GENUS ); + ranks.add( RANKS.ALL ); + try { + result = searchLineage( queries, ranks ); + } + catch ( final IOException e ) { + e.printStackTrace(); + } + System.out.println( result ); + System.out.println( "---------------" ); + try { + result = searchParam( "Homo sapiens", TAX_NAME_CLASS.ALL, TAX_RANK.SPECIES, 10, true ); + } + catch ( final IOException e ) { + e.printStackTrace(); + } + System.out.println( result ); + System.out.println( "---------------" ); + try { + result = searchParam( "Samanea saman", TAX_NAME_CLASS.SCIENTIFIC_NAME, TAX_RANK.ALL, 10, true ); + } + catch ( final IOException e ) { + e.printStackTrace(); + } + System.out.println( result ); + System.out.println( "---------------" ); + try { + result = searchParam( "cow", TAX_NAME_CLASS.COMMON_NAME, TAX_RANK.ALL, 10, true ); + } + catch ( final IOException e ) { + e.printStackTrace(); + } + System.out.println( result ); + System.out.println( "---------------" ); + try { + result = searchParam( "Helicogloea lagerheimii", TAX_NAME_CLASS.SCIENTIFIC_NAME, TAX_RANK.ALL, 10, true ); + } + catch ( final IOException e ) { + e.printStackTrace(); + } + System.out.println( result ); + System.out.println( "---------------" ); + try { + result = searchParam( "Cronartium ribicola", TAX_NAME_CLASS.SCIENTIFIC_NAME, TAX_RANK.ALL, 10, true ); + } + catch ( final IOException e ) { + e.printStackTrace(); + } + System.out.println( result ); + System.out.println( "---------------" ); + try { + result = searchParam( "Peridermium harknessii", TAX_NAME_CLASS.SCIENTIFIC_NAME, TAX_RANK.ALL, 10, true ); + } + catch ( final IOException e ) { + e.printStackTrace(); + } + System.out.println( result ); + System.out.println( "---------------" ); + try { + result = searchParam( "Eukaryota", TAX_NAME_CLASS.SCIENTIFIC_NAME, TAX_RANK.ALL, 10, true ); + } + catch ( final IOException e ) { + e.printStackTrace(); + } + System.out.println( result ); + } + + private static String ranksAsString( final List l ) throws UnsupportedEncodingException { + final StringBuffer sb = new StringBuffer(); + for( final RANKS r : l ) { + if ( sb.length() > 0 ) { + sb.append( RestUtil.LIST_SEPARATOR ); + } + sb.append( RestUtil.encode( r.toString() ) ); + } + return sb.toString(); + } + + public static String searchLineage( final List queries, final List ranks ) throws IOException { + return searchLineage( queries, ranks, "" ); + } + + public static String searchLineage( final List queries, final List ranks, final String superkingdom ) + throws IOException { + return RestUtil.getResult( TX_SEARCH_SERVICE_NAME, + SEARCH_LINEAGE_NAME_METHOD_NAME, + SEARCH_LINEAGE_QUERY_PARAM_NAME + "=" + RestUtil.listAsString( queries ) + "&" + + SEARCH_LINEAGE_RANKS_PARAM_NAME + "=" + ranksAsString( ranks ) + "&" + + SEARCH_LINEAGE_SUPERKINGDOM_PARAM_NAME + "=" + + RestUtil.encode( superkingdom ) ).trim(); + } + + public static String searchParam( final String tx_name, + final TAX_NAME_CLASS tx_name_class, + final TAX_RANK tx_rank, + int tx_rmax, + final boolean as_scientific_name ) throws IOException { + String as_scientific_name_str = "no"; + if ( as_scientific_name ) { + as_scientific_name_str = "yes"; + } + if ( tx_rmax < 1 ) { + tx_rmax = 1; + } + return RestUtil.getResult( TX_SEARCH_SERVICE_NAME, + SEARCH_PARAM_METHOD_NAME, + TX_NAME_PARAM_NAME + "=" + RestUtil.encode( tx_name ) + "&tx_Clas=" + + RestUtil.encode( tx_name_class.toString() ) + "&tx_Rank=" + + RestUtil.encode( tx_rank.toString() ) + "&tx_Rmax=" + tx_rmax + + "&tx_Dcls=" + as_scientific_name_str ).trim(); + } + + public static String searchSimple( final String tx_name ) throws IOException { + return RestUtil.getResult( TX_SEARCH_SERVICE_NAME, + SEARCH_SIMPLE_METHOD_NAME, + TX_NAME_PARAM_NAME + "=" + RestUtil.encode( tx_name ) ).trim(); + } + + public enum RANKS { + ALL( "all" ), + SUPERKINGDOM( "superkingdom" ), + KINGDOM( "kingdom" ), + SUBKINGDOM( "subkingdom" ), + SUPERPHYLUM( "superphylum" ), + PHYLUM( "phylum" ), + SUBPHYLUM( "subphylum" ), + SUPERCLASS( "superclass" ), + CLASS( "class" ), + SUBCLASS( "subclass" ), + INFRACLASS( "infraclass" ), + SUPERORDER( "superorder" ), + ORDER( "order" ), + SUBORDER( "suborder" ), + INFRAORDER( "infraorder" ), + PARVORDER( "parvorder" ), + SUPERFAMILY( "superfamily" ), + FAMILY( "family" ), + SUBFAMILY( "subfamily" ), + TRIBE( "tribe" ), + SUBTRIBE( "subtribe" ), + GENUS( "genus" ), + SPECIES( "species" ); + + private final String _str; + + private RANKS( final String name ) { + _str = name; + } + + @Override + public String toString() { + return _str; + } + } + + public enum TAX_NAME_CLASS { + ALL( "all" ), + SCIENTIFIC_NAME( "scientific name" ), + PREFFERED_COMMON_NAME( "preferred common name" ), + COMMON_NAME( "common name" ), + SYNONYM( "synonym" ); + + private final String _str; + + private TAX_NAME_CLASS( final String name ) { + _str = name; + } + + @Override + public String toString() { + return _str; + } + } + + public enum TAX_RANK { + ALL( "All" ), + NO_RANK( "no rank" ), + SUPERKINGDOM( "superkingdom" ), + KINGDOM( "kingdom" ), + SUBKINGDOM( "subkingdom" ), + SUPERPHYLUM( "superphylum" ), + PHYLUM( "phylum" ), + SUBPHYLUM( "subphylum" ), + SUPERCLASS( "superclass" ), + CLASS( "class" ), + SUBCLASS( "subclass" ), + INFRACLASS( "infraclass" ), + SUPERORDER( "superorder" ), + ORDER( "order" ), + SUBORDER( "suborder" ), + INFRAORDER( "infraorder" ), + PARVORDER( "parvorder" ), + SUPERFAMILY( "superfamily" ), + FAMILY( "family" ), + SUBFAMILY( "subfamily" ), + TRIBE( "tribe" ), + SUBTRIBE( "subtribe" ), + GENUS( "genus" ), + SUBGENUS( "subgenus" ), + SPECIES_GROUP( "species group" ), + SPECIES_SUBGROUP( "species subgroup" ), + SPECIES( "species" ), + SUBSPECIES( "subspecies" ), + VARIETAS( "varietas" ), + FORMA( "forma" ); + + private final String _str; + + private TAX_RANK( final String name ) { + _str = name; + } + + @Override + public String toString() { + return _str; + } + } +} diff --git a/forester/java/src/org/forester/ws/wabi/WabiTools.java b/forester/java/src/org/forester/ws/wabi/WabiTools.java new file mode 100644 index 0000000..c9445f4 --- /dev/null +++ b/forester/java/src/org/forester/ws/wabi/WabiTools.java @@ -0,0 +1,94 @@ +// $Id: +// forester -- software libraries and applications +// for genomics and evolutionary biology research. +// +// Copyright (C) 2010 Christian M Zmasek +// Copyright (C) 2010 Sanford-Burnham Medical Research Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.ws.wabi; + +import java.io.IOException; + +import org.forester.phylogeny.PhylogenyMethods; +import org.forester.phylogeny.data.Taxonomy; +import org.forester.util.ForesterUtil; +import org.forester.ws.wabi.TxSearch.TAX_NAME_CLASS; +import org.forester.ws.wabi.TxSearch.TAX_RANK; + +public final class WabiTools { + + private static String getATxName( final Taxonomy tax ) throws IOException { + String name = null; + if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) { + name = tax.getScientificName(); + } + else if ( !ForesterUtil.isEmpty( tax.getCommonName() ) ) { + name = tax.getCommonName(); + } + if ( ForesterUtil.isEmpty( name ) ) { + String id_value = null; + if ( PhylogenyMethods.isTaxonomyHasIdentifierOfGivenProvider( tax, new String[] { "uniprot", "ncbi" } ) ) { + id_value = tax.getIdentifier().getValue(); + } + if ( !ForesterUtil.isEmpty( id_value ) ) { + name = TxSearch.getTxName( id_value ); + } + } + return name; + } + + public static String[] obtainLineage( final Taxonomy tax ) throws IOException { + final String name = getATxName( tax ); + String result = null; + if ( !ForesterUtil.isEmpty( name ) ) { + result = TxSearch.searchParam( name, TAX_NAME_CLASS.ALL, TAX_RANK.ALL, 2, true ); + } + if ( !ForesterUtil.isEmpty( result ) ) { + final String[] lin = TxSearch.getLineage( result ); + if ( lin != null ) { + final String[] lin_plus_self = new String[ lin.length + 1 ]; + for( int i = 0; i < lin.length; ++i ) { + lin_plus_self[ i ] = lin[ i ]; + } + lin_plus_self[ lin.length ] = name; + return lin_plus_self; + } + } + return null; + } + + public static String obtainRank( final Taxonomy tax ) throws IOException { + final String result = searchParam( tax ); + if ( !ForesterUtil.isEmpty( result ) ) { + return TxSearch.getTaxonomicRank( result ); + } + return null; + } + + private static String searchParam( final Taxonomy tax ) throws IOException { + final String name = getATxName( tax ); + String result = null; + if ( !ForesterUtil.isEmpty( name ) ) { + result = TxSearch.searchParam( name, TAX_NAME_CLASS.ALL, TAX_RANK.ALL, 2, true ); + } + return result; + } +} -- 1.7.10.2