X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fws%2Funiprot%2FUniProtWsTools.java;h=8bef1112a774416611a084e8bdcaa81dee9cebe7;hb=e4f95087154c70d569fe54d8f08af7d4f218df0c;hp=1ecf5ead3e791158c965a0b87e22e3dd60e1e9ef;hpb=eee996a6476a1e3d84c07f8f690dcde3ff4b2ef5;p=jalview.git diff --git a/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java b/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java index 1ecf5ea..8bef111 100644 --- a/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java +++ b/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java @@ -34,21 +34,50 @@ import java.net.URLConnection; import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.forester.util.ForesterUtil; public final class UniProtWsTools { - public final static String BASE_URL = "http://www.uniprot.org/"; - private final static String URL_ENC = "UTF-8"; - private final static boolean DEBUG = false; + public enum Db { + UNKNOWN, UNIPROT; + } + public final static String BASE_URL = "http://www.uniprot.org/"; + public final static String BASE_EMBL_DB_URL = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/"; + private final static String URL_ENC = "UTF-8"; + // uniprot/expasy accession number format (6 chars): + // letter digit letter-or-digit letter-or-digit letter-or-digit digit + // ?: => no back-reference + // \A => begin of String + // \Z => end of String + private final static Pattern UNIPROT_AC_PATTERN = Pattern + .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]\\d[A-Z0-9]{3}\\d)(?:[^a-zA-Z0-9]|\\Z)" ); + private final static boolean DEBUG = false; - synchronized private static String encode( final String str ) throws UnsupportedEncodingException { + private static String encode( final String str ) throws UnsupportedEncodingException { return URLEncoder.encode( str.trim(), URL_ENC ); } - synchronized public static List getTaxonomiesFromCommonName( final String cn, - final int max_taxonomies_return ) + /** + * Returns null if no match. + * + * @param query + * @param db + * @return + */ + static public String parseUniProtAccessor( final String query ) { + final Matcher m = UNIPROT_AC_PATTERN.matcher( query ); + if ( m.lookingAt() ) { + return m.group( 1 ); + } + else { + return null; + } + } + + public static List getTaxonomiesFromCommonName( final String cn, final int max_taxonomies_return ) throws IOException { final List result = getTaxonomyStringFromCommonName( cn, max_taxonomies_return ); if ( result.size() > 0 ) { @@ -57,8 +86,8 @@ public final class UniProtWsTools { return null; } - synchronized public static List getTaxonomiesFromCommonNameStrict( final String cn, - final int max_taxonomies_return ) + public static List getTaxonomiesFromCommonNameStrict( final String cn, + final int max_taxonomies_return ) throws IOException { final List taxonomies = getTaxonomiesFromCommonName( cn, max_taxonomies_return ); if ( ( taxonomies != null ) && ( taxonomies.size() > 0 ) ) { @@ -73,8 +102,7 @@ public final class UniProtWsTools { return null; } - synchronized public static List getTaxonomiesFromId( final String id, - final int max_taxonomies_return ) + public static List getTaxonomiesFromId( final String id, final int max_taxonomies_return ) throws IOException { final List result = getTaxonomyStringFromId( id, max_taxonomies_return ); if ( result.size() > 0 ) { @@ -83,8 +111,8 @@ public final class UniProtWsTools { return null; } - synchronized public static List getTaxonomiesFromScientificName( final String sn, - final int max_taxonomies_return ) + public static List getTaxonomiesFromScientificName( final String sn, + final int max_taxonomies_return ) throws IOException { // Hack! Craniata? .. if ( sn.equals( "Drosophila" ) ) { @@ -106,8 +134,8 @@ public final class UniProtWsTools { * and not "Mus musculus", "Mus musculus bactrianus", ... * */ - synchronized public static List getTaxonomiesFromScientificNameStrict( final String sn, - final int max_taxonomies_return ) + public static List getTaxonomiesFromScientificNameStrict( final String sn, + final int max_taxonomies_return ) throws IOException { final List taxonomies = getTaxonomiesFromScientificName( sn, max_taxonomies_return ); if ( ( taxonomies != null ) && ( taxonomies.size() > 0 ) ) { @@ -122,8 +150,8 @@ public final class UniProtWsTools { return null; } - synchronized public static List getTaxonomiesFromTaxonomyCode( final String code, - final int max_taxonomies_return ) + public static List getTaxonomiesFromTaxonomyCode( final String code, + final int max_taxonomies_return ) throws IOException { String my_code = new String( code ); // Hacks! @@ -140,37 +168,33 @@ public final class UniProtWsTools { return null; } - synchronized private static List getTaxonomyStringFromCommonName( final String cn, - final int max_lines_to_return ) + private static List getTaxonomyStringFromCommonName( final String cn, final int max_lines_to_return ) throws IOException { return queryUniprot( "taxonomy/?query=common%3a%22" + encode( cn ) + "%22&format=tab", max_lines_to_return ); } - synchronized private static List getTaxonomyStringFromId( final String id, final int max_lines_to_return ) + private static List getTaxonomyStringFromId( final String id, final int max_lines_to_return ) throws IOException { return queryUniprot( "taxonomy/?query=id%3a%22" + encode( id ) + "%22&format=tab", max_lines_to_return ); } - synchronized private static List getTaxonomyStringFromScientificName( final String sn, - final int max_lines_to_return ) + private static List getTaxonomyStringFromScientificName( final String sn, final int max_lines_to_return ) throws IOException { return queryUniprot( "taxonomy/?query=scientific%3a%22" + encode( sn ) + "%22&format=tab", max_lines_to_return ); } - synchronized private static List getTaxonomyStringFromTaxonomyCode( final String code, - final int max_lines_to_return ) + private static List getTaxonomyStringFromTaxonomyCode( final String code, final int max_lines_to_return ) throws IOException { return queryUniprot( "taxonomy/?query=mnemonic%3a%22" + encode( code ) + "%22&format=tab", max_lines_to_return ); } - synchronized private static List hack( final UniProtTaxonomy tax ) { + private static List hack( final UniProtTaxonomy tax ) { final List l = new ArrayList(); l.add( tax ); return l; } - synchronized private static List parseUniProtTaxonomy( final List result ) - throws IOException { + private static List parseUniProtTaxonomy( final List result ) throws IOException { final List taxonomies = new ArrayList(); for( final String line : result ) { if ( ForesterUtil.isEmpty( line ) ) { @@ -188,7 +212,15 @@ public final class UniProtWsTools { return taxonomies; } - synchronized public static List queryUniprot( final String query, int max_lines_to_return ) + public static List queryEmblDb( final String query, final int max_lines_to_return ) throws IOException { + return queryDb( query, max_lines_to_return, BASE_EMBL_DB_URL ); + } + + public static List queryUniprot( final String query, final int max_lines_to_return ) throws IOException { + return queryDb( query, max_lines_to_return, BASE_URL ); + } + + public static List queryDb( final String query, int max_lines_to_return, final String base_url ) throws IOException { if ( ForesterUtil.isEmpty( query ) ) { throw new IllegalArgumentException( "illegal attempt to use empty query " ); @@ -196,7 +228,7 @@ public final class UniProtWsTools { if ( max_lines_to_return < 1 ) { max_lines_to_return = 1; } - final URL url = new URL( BASE_URL + query ); + final URL url = new URL( base_url + query ); if ( DEBUG ) { System.out.println( "url: " + url.toString() ); } @@ -213,4 +245,16 @@ public final class UniProtWsTools { in.close(); return result; } + + public static SequenceDatabaseEntry obtainUniProtEntry( final String query, final int max_lines_to_return ) + throws IOException { + final List lines = queryUniprot( "uniprot/" + query + ".txt", max_lines_to_return ); + return UniProtEntry.createInstanceFromPlainText( lines ); + } + + public static SequenceDatabaseEntry obtainEmblEntry( final String query, final int max_lines_to_return ) + throws IOException { + final List lines = queryEmblDb( "query", max_lines_to_return ); + return EbiDbEntry.createInstanceFromPlainText( lines ); + } }