X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fws%2Funiprot%2FUniProtWsTools.java;h=d24b171071a36d01d8e06dd16270ab2351ddc176;hb=7c8aef9df85f80605041ba4d065c798a3cd78d5a;hp=cd8ab4315780f936cf0c1376e548e3dd1676605d;hpb=c78e0c6ccc1b8c7f4e77db43be8d09e2d7c5b78e;p=jalview.git diff --git a/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java b/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java index cd8ab43..d24b171 100644 --- a/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java +++ b/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java @@ -45,19 +45,23 @@ public final class UniProtWsTools { UNKNOWN, UNIPROT; } public final static String BASE_URL = "http://www.uniprot.org/"; + public final static String BASE_EMBL_DB_URL = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/"; private final static String URL_ENC = "UTF-8"; // uniprot/expasy accession number format (6 chars): // letter digit letter-or-digit letter-or-digit letter-or-digit digit + // ?: => no back-reference + // \A => begin of String + // \Z => end of String private final static Pattern UNIPROT_AC_PATTERN = Pattern - .compile( "^.*[^a-zA-Z0-9]?([A-NR-ZOPQ]\\d[A-Z0-9]{3}\\d)[^a-zA-Z0-9]?" ); - private final static boolean DEBUG = false; + .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]\\d[A-Z0-9]{3}\\d)(?:[^a-zA-Z0-9]|\\Z)" ); + private final static boolean DEBUG = true; private static String encode( final String str ) throws UnsupportedEncodingException { return URLEncoder.encode( str.trim(), URL_ENC ); } /** - * Return null if no match. + * Returns null if no match. * * @param query * @param db @@ -208,14 +212,23 @@ public final class UniProtWsTools { return taxonomies; } - public static List queryUniprot( final String query, int max_lines_to_return ) throws IOException { + public static List queryEmblDb( final String query, final int max_lines_to_return ) throws IOException { + return queryDb( query, max_lines_to_return, BASE_EMBL_DB_URL ); + } + + public static List queryUniprot( final String query, final int max_lines_to_return ) throws IOException { + return queryDb( query, max_lines_to_return, BASE_URL ); + } + + public static List queryDb( final String query, int max_lines_to_return, final String base_url ) + throws IOException { if ( ForesterUtil.isEmpty( query ) ) { throw new IllegalArgumentException( "illegal attempt to use empty query " ); } if ( max_lines_to_return < 1 ) { max_lines_to_return = 1; } - final URL url = new URL( BASE_URL + query ); + final URL url = new URL( base_url + query ); if ( DEBUG ) { System.out.println( "url: " + url.toString() ); } @@ -224,6 +237,7 @@ public final class UniProtWsTools { String line; final List result = new ArrayList(); while ( ( line = in.readLine() ) != null ) { + System.out.println( line ); result.add( line ); if ( result.size() > max_lines_to_return ) { break; @@ -238,4 +252,10 @@ public final class UniProtWsTools { final List lines = queryUniprot( "uniprot/" + query + ".txt", max_lines_to_return ); return UniProtEntry.createInstanceFromPlainText( lines ); } + + public static SequenceDatabaseEntry obtainEmblEntry( final String query, final int max_lines_to_return ) + throws IOException { + final List lines = queryEmblDb( query, max_lines_to_return ); + return EbiDbEntry.createInstanceFromPlainText( lines ); + } }