X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fws%2Funiprot%2FDatabaseTools.java;h=77b317d6694f6fd36ac912fcc72c23615cbfc6cf;hb=e4f95087154c70d569fe54d8f08af7d4f218df0c;hp=3826e89e2424e058661fdbfa5eb0ef6dc99fe6b0;hpb=e47fb4d682262076a44ebb802276ae237ed4cf96;p=jalview.git diff --git a/forester/java/src/org/forester/ws/uniprot/DatabaseTools.java b/forester/java/src/org/forester/ws/uniprot/DatabaseTools.java index 3826e89..77b317d 100644 --- a/forester/java/src/org/forester/ws/uniprot/DatabaseTools.java +++ b/forester/java/src/org/forester/ws/uniprot/DatabaseTools.java @@ -1,27 +1,22 @@ + package org.forester.ws.uniprot; import java.util.regex.Matcher; import java.util.regex.Pattern; - public class DatabaseTools { + //The format for GenBank Accession numbers are: //Nucleotide: 1 letter + 5 numerals OR 2 letters + 6 numerals //Protein: 3 letters + 5 numerals //http://www.ncbi.nlm.nih.gov/Sequin/acc.html - private final static Pattern GENBANK_NUCLEOTIDE_AC_PATTERN_1 = Pattern - .compile( "^.*[^a-zA-Z0-9]?([A-Z]\\d{5})[^a-zA-Z0-9]?" ); - + .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]\\d{5})(?:[^a-zA-Z0-9]|\\Z)" ); private final static Pattern GENBANK_NUCLEOTIDE_AC_PATTERN_2 = Pattern - .compile( "^.*[^a-zA-Z0-9]?([A-Z]{2}\\d{6})[^a-zA-Z0-9]?" ); - - private final static Pattern GENBANK_PROTEIN_AC_PATTERN = Pattern - .compile( "^.*[^a-zA-Z0-9]?([A-Z]{3}\\d{5})[^a-zA-Z0-9]?" ); - - - - private final static boolean DEBUG = false; + .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]{2}\\d{6})(?:[^a-zA-Z0-9]|\\Z)" ); + private final static Pattern GENBANK_PROTEIN_AC_PATTERN = Pattern + .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]{3}\\d{5})(?:[^a-zA-Z0-9]|\\Z)" ); + private final static boolean DEBUG = false; /** * Returns null if no match. @@ -36,10 +31,10 @@ public class DatabaseTools { return m.group( 1 ); } else { - m = GENBANK_NUCLEOTIDE_AC_PATTERN_2.matcher( query ); + m = GENBANK_NUCLEOTIDE_AC_PATTERN_2.matcher( query ); if ( m.lookingAt() ) { return m.group( 1 ); - } + } else { m = GENBANK_PROTEIN_AC_PATTERN.matcher( query ); if ( m.lookingAt() ) { @@ -62,11 +57,8 @@ public class DatabaseTools { return target.substring( i_a + a.length(), i_b ).trim(); } - - static String extract( final String target, final String a ) { final int i_a = target.indexOf( a ); return target.substring( i_a + a.length() ).trim(); } - }