X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fws%2Funiprot%2FUniProtWsTools.java;h=d24b171071a36d01d8e06dd16270ab2351ddc176;hb=7c8aef9df85f80605041ba4d065c798a3cd78d5a;hp=96aeb9da79ccb7b06002fa1aa1167e99432f0e86;hpb=e47fb4d682262076a44ebb802276ae237ed4cf96;p=jalview.git diff --git a/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java b/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java index 96aeb9d..d24b171 100644 --- a/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java +++ b/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java @@ -45,14 +45,16 @@ public final class UniProtWsTools { UNKNOWN, UNIPROT; } public final static String BASE_URL = "http://www.uniprot.org/"; - public final static String BASE_EMBL_DB_URL = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/"; private final static String URL_ENC = "UTF-8"; // uniprot/expasy accession number format (6 chars): // letter digit letter-or-digit letter-or-digit letter-or-digit digit + // ?: => no back-reference + // \A => begin of String + // \Z => end of String private final static Pattern UNIPROT_AC_PATTERN = Pattern - .compile( "^.*[a-zA-Z0-9]?([A-NR-ZOPQ]\\d[A-Z0-9]{3}\\d)[^a-zA-Z0-9]?" ); - private final static boolean DEBUG = false; + .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]\\d[A-Z0-9]{3}\\d)(?:[^a-zA-Z0-9]|\\Z)" ); + private final static boolean DEBUG = true; private static String encode( final String str ) throws UnsupportedEncodingException { return URLEncoder.encode( str.trim(), URL_ENC ); @@ -74,8 +76,6 @@ public final class UniProtWsTools { return null; } } - - public static List getTaxonomiesFromCommonName( final String cn, final int max_taxonomies_return ) throws IOException { @@ -212,26 +212,16 @@ public final class UniProtWsTools { return taxonomies; } - - public static List queryEmblDb( final String query, int max_lines_to_return ) throws IOException { - return queryDb( query, - max_lines_to_return, - BASE_EMBL_DB_URL ) ; + public static List queryEmblDb( final String query, final int max_lines_to_return ) throws IOException { + return queryDb( query, max_lines_to_return, BASE_EMBL_DB_URL ); } - - - - public static List queryUniprot( final String query, int max_lines_to_return ) throws IOException { - return queryDb( query, - max_lines_to_return, - BASE_URL ) ; - - + + public static List queryUniprot( final String query, final int max_lines_to_return ) throws IOException { + return queryDb( query, max_lines_to_return, BASE_URL ); } - public static List queryDb( final String query, - int max_lines_to_return, - final String base_url ) throws IOException { + public static List queryDb( final String query, int max_lines_to_return, final String base_url ) + throws IOException { if ( ForesterUtil.isEmpty( query ) ) { throw new IllegalArgumentException( "illegal attempt to use empty query " ); } @@ -247,6 +237,7 @@ public final class UniProtWsTools { String line; final List result = new ArrayList(); while ( ( line = in.readLine() ) != null ) { + System.out.println( line ); result.add( line ); if ( result.size() > max_lines_to_return ) { break; @@ -255,16 +246,16 @@ public final class UniProtWsTools { in.close(); return result; } - - + public static SequenceDatabaseEntry obtainUniProtEntry( final String query, final int max_lines_to_return ) throws IOException { final List lines = queryUniprot( "uniprot/" + query + ".txt", max_lines_to_return ); return UniProtEntry.createInstanceFromPlainText( lines ); } - public static SequenceDatabaseEntry obtainEmblEntry( String query, int max_lines_to_return ) throws IOException { - final List lines = queryEmblDb( "query", max_lines_to_return ); + public static SequenceDatabaseEntry obtainEmblEntry( final String query, final int max_lines_to_return ) + throws IOException { + final List lines = queryEmblDb( query, max_lines_to_return ); return EbiDbEntry.createInstanceFromPlainText( lines ); } }