in progress
[jalview.git] / forester / java / src / org / forester / ws / uniprot / DatabaseTools.java
index 3826e89..77b317d 100644 (file)
@@ -1,27 +1,22 @@
+
 package org.forester.ws.uniprot;
 
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-
 public class DatabaseTools {
+
     //The format for GenBank Accession numbers are:
     //Nucleotide: 1 letter + 5 numerals OR 2 letters + 6 numerals
     //Protein:    3 letters + 5 numerals
     //http://www.ncbi.nlm.nih.gov/Sequin/acc.html
-    
     private final static Pattern GENBANK_NUCLEOTIDE_AC_PATTERN_1 = Pattern
-    .compile( "^.*[^a-zA-Z0-9]?([A-Z]\\d{5})[^a-zA-Z0-9]?" );
-    
+                                                                         .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]\\d{5})(?:[^a-zA-Z0-9]|\\Z)" );
     private final static Pattern GENBANK_NUCLEOTIDE_AC_PATTERN_2 = Pattern
-    .compile( "^.*[^a-zA-Z0-9]?([A-Z]{2}\\d{6})[^a-zA-Z0-9]?" );
-
-    private final static Pattern GENBANK_PROTEIN_AC_PATTERN = Pattern
-    .compile( "^.*[^a-zA-Z0-9]?([A-Z]{3}\\d{5})[^a-zA-Z0-9]?" );
-
-    
-    
-    private final static boolean DEBUG              = false;
+                                                                         .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]{2}\\d{6})(?:[^a-zA-Z0-9]|\\Z)" );
+    private final static Pattern GENBANK_PROTEIN_AC_PATTERN      = Pattern
+                                                                         .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]{3}\\d{5})(?:[^a-zA-Z0-9]|\\Z)" );
+    private final static boolean DEBUG                           = false;
 
     /**
      * Returns null if no match.
@@ -36,10 +31,10 @@ public class DatabaseTools {
             return m.group( 1 );
         }
         else {
-             m = GENBANK_NUCLEOTIDE_AC_PATTERN_2.matcher( query );
+            m = GENBANK_NUCLEOTIDE_AC_PATTERN_2.matcher( query );
             if ( m.lookingAt() ) {
                 return m.group( 1 );
-            } 
+            }
             else {
                 m = GENBANK_PROTEIN_AC_PATTERN.matcher( query );
                 if ( m.lookingAt() ) {
@@ -62,11 +57,8 @@ public class DatabaseTools {
         return target.substring( i_a + a.length(), i_b ).trim();
     }
 
-
-
     static String extract( final String target, final String a ) {
         final int i_a = target.indexOf( a );
         return target.substring( i_a + a.length() ).trim();
     }
-
 }