in progress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Mon, 25 Apr 2011 21:34:07 +0000 (21:34 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Mon, 25 Apr 2011 21:34:07 +0000 (21:34 +0000)
forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java
forester/java/src/org/forester/development/Hello3d.java
forester/java/src/org/forester/test/Test.java
forester/java/src/org/forester/ws/uniprot/DatabaseTools.java
forester/java/src/org/forester/ws/uniprot/EbiDbEntry.java
forester/java/src/org/forester/ws/uniprot/UniProtEntry.java
forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java

index 168a163..5b454e3 100644 (file)
@@ -174,21 +174,20 @@ public final class SequenceDataRetriver implements Runnable {
                 query = node.getNodeData().getSequence().getAccession().getValue();
                 db = Db.UNIPROT;
             }
-            else if ( node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAccession() != null )
+            else if ( node.getNodeData().isHasSequence()
+                    && ( node.getNodeData().getSequence().getAccession() != null )
                     && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getSource() )
                     && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getValue() )
-                    && ( node.getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "embl" ) 
-                     || node.getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "ebi" )      
-                    ) ) {
+                    && ( node.getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "embl" ) || node
+                            .getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "ebi" ) ) ) {
                 query = node.getNodeData().getSequence().getAccession().getValue();
                 db = Db.EMBL;
             }
             else if ( !ForesterUtil.isEmpty( node.getName() ) ) {
-               
-                if (  (query = UniProtWsTools.parseUniProtAccessor( node.getName() ))!=null  ) {
+                if ( ( query = UniProtWsTools.parseUniProtAccessor( node.getName() ) ) != null ) {
                     db = Db.UNIPROT;
                 }
-                else if (( query = DatabaseTools.parseGenbankAccessor( node.getName())) !=null ) {
+                else if ( ( query = DatabaseTools.parseGenbankAccessor( node.getName() ) ) != null ) {
                     db = Db.EMBL;
                 }
             }
index dc0d099..0cf0112 100644 (file)
@@ -2,30 +2,31 @@
 // http://download.java.net/media/java3d/builds/release/1.5.1/README-download.html
 //
 // /usr/lib/jvm/java-6-sun-1.6.0.24/jre
-//  lib/ext/vecmath.jar
-//  lib/ext/j3dcore.jar
-//  lib/ext/j3dutils.jar
+// lib/ext/vecmath.jar
+// lib/ext/j3dcore.jar
+// lib/ext/j3dutils.jar
 //
 
 package org.forester.development;
-import com.sun.j3d.utils.universe.SimpleUniverse;
-import com.sun.j3d.utils.geometry.ColorCube;
-import com.sun.j3d.utils.geometry.Cylinder;
-import javax.media.j3d.BranchGroup;
+
+// import com.sun.j3d.utils.universe.SimpleUniverse;
+// import com.sun.j3d.utils.geometry.ColorCube;
+// import com.sun.j3d.utils.geometry.Cylinder;
+// import javax.media.j3d.BranchGroup;
 public class Hello3d {
 
     public Hello3d() {
-        SimpleUniverse universe = new SimpleUniverse();
-        BranchGroup group = new BranchGroup();
-        //ColorCube cc0 = new ColorCube( 0.1);
-       // Appearance a = new Appearance();
-        group.addChild( new Cylinder( 0,1));
-        group.addChild( new ColorCube( 0.3 ) );
-        universe.getViewingPlatform().setNominalViewingTransform();
-        universe.addBranchGraph( group );
+        //        SimpleUniverse universe = new SimpleUniverse();
+        //        BranchGroup group = new BranchGroup();
+        //        //ColorCube cc0 = new ColorCube( 0.1);
+        //       // Appearance a = new Appearance();
+        //        group.addChild( new Cylinder( 0,1));
+        //        group.addChild( new ColorCube( 0.3 ) );
+        //        universe.getViewingPlatform().setNominalViewingTransform();
+        //        universe.addBranchGraph( group );
     }
 
-    public static void main( String[] args ) {
+    public static void main( final String[] args ) {
         new Hello3d();
     }
 } // end of class Hello3d
index f01c864..8533d18 100644 (file)
@@ -647,7 +647,6 @@ public final class Test {
             System.out.println( "failed." );
             failed++;
         }
-        
         System.out.print( "EMBL Entry Retrieval: " );
         if ( Test.testEmblEntryRetrieval() ) {
             System.out.println( "OK." );
@@ -657,7 +656,6 @@ public final class Test {
             System.out.println( "failed." );
             failed++;
         }
-        
         System.out.print( "Uniprot Entry Retrieval: " );
         if ( Test.testUniprotEntryRetrieval() ) {
             System.out.println( "OK." );
@@ -7744,13 +7742,52 @@ public final class Test {
     }
 
     private static boolean testEmblEntryRetrieval() {
+        //The format for GenBank Accession numbers are:
+        //Nucleotide: 1 letter + 5 numerals OR 2 letters + 6 numerals
+        //Protein:    3 letters + 5 numerals
+        //http://www.ncbi.nlm.nih.gov/Sequin/acc.html
         if ( !DatabaseTools.parseGenbankAccessor( "AY423861" ).equals( "AY423861" ) ) {
-            System.out.println( DatabaseTools.parseGenbankAccessor( "AY423861" ));
+            return false;
+        }
+        if ( !DatabaseTools.parseGenbankAccessor( ".AY423861." ).equals( "AY423861" ) ) {
+            return false;
+        }
+        if ( DatabaseTools.parseGenbankAccessor( "AAY423861" ) != null ) {
+            return false;
+        }
+        if ( DatabaseTools.parseGenbankAccessor( "AY4238612" ) != null ) {
+            return false;
+        }
+        if ( DatabaseTools.parseGenbankAccessor( "AAY4238612" ) != null ) {
+            return false;
+        }
+        if ( DatabaseTools.parseGenbankAccessor( "Y423861" ) != null ) {
+            return false;
+        }
+        if ( !DatabaseTools.parseGenbankAccessor( "S12345" ).equals( "S12345" ) ) {
+            return false;
+        }
+        if ( !DatabaseTools.parseGenbankAccessor( "|S12345|" ).equals( "S12345" ) ) {
+            return false;
+        }
+        if ( DatabaseTools.parseGenbankAccessor( "|S123456" ) != null ) {
+            return false;
+        }
+        if ( DatabaseTools.parseGenbankAccessor( "ABC123456" ) != null ) {
+            return false;
+        }
+        if ( !DatabaseTools.parseGenbankAccessor( "ABC12345" ).equals( "ABC12345" ) ) {
+            return false;
+        }
+        if ( !DatabaseTools.parseGenbankAccessor( "&ABC12345&" ).equals( "ABC12345" ) ) {
+            return false;
+        }
+        if ( DatabaseTools.parseGenbankAccessor( "ABCD12345" ) != null ) {
             return false;
         }
         return true;
     }
-    
+
     private static boolean testUniprotEntryRetrieval() {
         if ( !UniProtWsTools.parseUniProtAccessor( "P12345" ).equals( "P12345" ) ) {
             return false;
@@ -7758,6 +7795,21 @@ public final class Test {
         if ( UniProtWsTools.parseUniProtAccessor( "EP12345" ) != null ) {
             return false;
         }
+        if ( UniProtWsTools.parseUniProtAccessor( "3 4P12345" ) != null ) {
+            return false;
+        }
+        if ( UniProtWsTools.parseUniProtAccessor( "P12345E" ) != null ) {
+            return false;
+        }
+        if ( UniProtWsTools.parseUniProtAccessor( "P123455" ) != null ) {
+            return false;
+        }
+        if ( UniProtWsTools.parseUniProtAccessor( "EP12345E" ) != null ) {
+            return false;
+        }
+        if ( UniProtWsTools.parseUniProtAccessor( "AY423861" ) != null ) {
+            return false;
+        }
         if ( !UniProtWsTools.parseUniProtAccessor( "P1DDD5" ).equals( "P1DDD5" ) ) {
             return false;
         }
@@ -7767,6 +7819,9 @@ public final class Test {
         if ( !UniProtWsTools.parseUniProtAccessor( "P1234X/P12345/12-42" ).equals( "P12345" ) ) {
             return false;
         }
+        if ( !UniProtWsTools.parseUniProtAccessor( "P1234X P12345 12-42" ).equals( "P12345" ) ) {
+            return false;
+        }
         if ( !UniProtWsTools.parseUniProtAccessor( "P12345/12-42" ).equals( "P12345" ) ) {
             return false;
         }
index 3826e89..77b317d 100644 (file)
@@ -1,27 +1,22 @@
+
 package org.forester.ws.uniprot;
 
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-
 public class DatabaseTools {
+
     //The format for GenBank Accession numbers are:
     //Nucleotide: 1 letter + 5 numerals OR 2 letters + 6 numerals
     //Protein:    3 letters + 5 numerals
     //http://www.ncbi.nlm.nih.gov/Sequin/acc.html
-    
     private final static Pattern GENBANK_NUCLEOTIDE_AC_PATTERN_1 = Pattern
-    .compile( "^.*[^a-zA-Z0-9]?([A-Z]\\d{5})[^a-zA-Z0-9]?" );
-    
+                                                                         .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]\\d{5})(?:[^a-zA-Z0-9]|\\Z)" );
     private final static Pattern GENBANK_NUCLEOTIDE_AC_PATTERN_2 = Pattern
-    .compile( "^.*[^a-zA-Z0-9]?([A-Z]{2}\\d{6})[^a-zA-Z0-9]?" );
-
-    private final static Pattern GENBANK_PROTEIN_AC_PATTERN = Pattern
-    .compile( "^.*[^a-zA-Z0-9]?([A-Z]{3}\\d{5})[^a-zA-Z0-9]?" );
-
-    
-    
-    private final static boolean DEBUG              = false;
+                                                                         .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]{2}\\d{6})(?:[^a-zA-Z0-9]|\\Z)" );
+    private final static Pattern GENBANK_PROTEIN_AC_PATTERN      = Pattern
+                                                                         .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]{3}\\d{5})(?:[^a-zA-Z0-9]|\\Z)" );
+    private final static boolean DEBUG                           = false;
 
     /**
      * Returns null if no match.
@@ -36,10 +31,10 @@ public class DatabaseTools {
             return m.group( 1 );
         }
         else {
-             m = GENBANK_NUCLEOTIDE_AC_PATTERN_2.matcher( query );
+            m = GENBANK_NUCLEOTIDE_AC_PATTERN_2.matcher( query );
             if ( m.lookingAt() ) {
                 return m.group( 1 );
-            } 
+            }
             else {
                 m = GENBANK_PROTEIN_AC_PATTERN.matcher( query );
                 if ( m.lookingAt() ) {
@@ -62,11 +57,8 @@ public class DatabaseTools {
         return target.substring( i_a + a.length(), i_b ).trim();
     }
 
-
-
     static String extract( final String target, final String a ) {
         final int i_a = target.indexOf( a );
         return target.substring( i_a + a.length() ).trim();
     }
-
 }
index 4f7b779..8adda22 100644 (file)
@@ -30,9 +30,8 @@ import java.util.List;
 import sun.reflect.generics.reflectiveObjects.NotImplementedException;
 
 public final class EbiDbEntry implements SequenceDatabaseEntry {
-//http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/emb/AAR37336/
-    
-    
+
+    //http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/emb/AAR37336/
     private String _pa;
     private String _de;
     private String _os;
@@ -42,11 +41,11 @@ public final class EbiDbEntry implements SequenceDatabaseEntry {
     private EbiDbEntry() {
     }
 
-    
+    @Override
     public Object clone() {
         throw new NotImplementedException();
     }
-    
+
     public static SequenceDatabaseEntry createInstanceFromPlainText( final List<String> lines ) {
         final EbiDbEntry e = new EbiDbEntry();
         for( final String line : lines ) {
@@ -54,15 +53,15 @@ public final class EbiDbEntry implements SequenceDatabaseEntry {
                 e.setPA( DatabaseTools.extract( line, "PA", ";" ) );
             }
             else if ( line.startsWith( "DE" ) ) {
-               // if ( ( line.indexOf( "RecName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) {
-                    e.setDe( DatabaseTools.extract( line, "DE" ) );
+                // if ( ( line.indexOf( "RecName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) {
+                e.setDe( DatabaseTools.extract( line, "DE" ) );
                 //}
             }
-          //  else if ( line.startsWith( "GN" ) ) {
-          //      if ( ( line.indexOf( "Name=" ) > 0 ) ) {
-          //          e.setSymbol( extract( line, "Name=", ";" ) );
-          //      }
-          //  }
+            //  else if ( line.startsWith( "GN" ) ) {
+            //      if ( ( line.indexOf( "Name=" ) > 0 ) ) {
+            //          e.setSymbol( extract( line, "Name=", ";" ) );
+            //      }
+            //  }
             else if ( line.startsWith( "OS" ) ) {
                 if ( line.indexOf( "(" ) > 0 ) {
                     e.setOs( DatabaseTools.extract( line, "OS", "(" ) );
@@ -87,7 +86,7 @@ public final class EbiDbEntry implements SequenceDatabaseEntry {
 
     private void setPA( final String pa ) {
         if ( _pa == null ) {
-            _pa= pa;
+            _pa = pa;
         }
     }
 
@@ -108,7 +107,7 @@ public final class EbiDbEntry implements SequenceDatabaseEntry {
     }
 
     private void setOs( final String os ) {
-        if ( _os== null ) {
+        if ( _os == null ) {
             _os = os;
         }
     }
index 2b74883..222df5b 100644 (file)
@@ -71,8 +71,6 @@ public final class UniProtEntry implements SequenceDatabaseEntry {
         return e;
     }
 
-   
-
     @Override
     public String getAccession() {
         return _ac;
index 96aeb9d..8bef111 100644 (file)
@@ -45,13 +45,15 @@ public final class UniProtWsTools {
         UNKNOWN, UNIPROT;
     }
     public final static String   BASE_URL           = "http://www.uniprot.org/";
-    
     public final static String   BASE_EMBL_DB_URL   = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/";
     private final static String  URL_ENC            = "UTF-8";
     // uniprot/expasy accession number format (6 chars):
     // letter digit letter-or-digit letter-or-digit letter-or-digit digit
+    // ?: => no back-reference
+    // \A => begin of String
+    // \Z => end of String
     private final static Pattern UNIPROT_AC_PATTERN = Pattern
-                                                            .compile( "^.*[a-zA-Z0-9]?([A-NR-ZOPQ]\\d[A-Z0-9]{3}\\d)[^a-zA-Z0-9]?" );
+                                                            .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]\\d[A-Z0-9]{3}\\d)(?:[^a-zA-Z0-9]|\\Z)" );
     private final static boolean DEBUG              = false;
 
     private static String encode( final String str ) throws UnsupportedEncodingException {
@@ -74,8 +76,6 @@ public final class UniProtWsTools {
             return null;
         }
     }
-    
-  
 
     public static List<UniProtTaxonomy> getTaxonomiesFromCommonName( final String cn, final int max_taxonomies_return )
             throws IOException {
@@ -212,26 +212,16 @@ public final class UniProtWsTools {
         return taxonomies;
     }
 
-    
-    public static List<String> queryEmblDb( final String query, int max_lines_to_return ) throws IOException {
-        return queryDb( query,
-                        max_lines_to_return,
-                        BASE_EMBL_DB_URL ) ;
+    public static List<String> queryEmblDb( final String query, final int max_lines_to_return ) throws IOException {
+        return queryDb( query, max_lines_to_return, BASE_EMBL_DB_URL );
     }
-    
-    
-    
-    public static List<String> queryUniprot( final String query, int max_lines_to_return ) throws IOException {
-        return queryDb( query,
-                max_lines_to_return,
-                BASE_URL ) ;
-        
-       
+
+    public static List<String> queryUniprot( final String query, final int max_lines_to_return ) throws IOException {
+        return queryDb( query, max_lines_to_return, BASE_URL );
     }
 
-    public static List<String> queryDb( final String query,
-                                        int max_lines_to_return,
-                                        final String base_url ) throws IOException {
+    public static List<String> queryDb( final String query, int max_lines_to_return, final String base_url )
+            throws IOException {
         if ( ForesterUtil.isEmpty( query ) ) {
             throw new IllegalArgumentException( "illegal attempt to use empty query " );
         }
@@ -255,15 +245,15 @@ public final class UniProtWsTools {
         in.close();
         return result;
     }
-    
-    
+
     public static SequenceDatabaseEntry obtainUniProtEntry( final String query, final int max_lines_to_return )
             throws IOException {
         final List<String> lines = queryUniprot( "uniprot/" + query + ".txt", max_lines_to_return );
         return UniProtEntry.createInstanceFromPlainText( lines );
     }
 
-    public static SequenceDatabaseEntry obtainEmblEntry( String query, int max_lines_to_return ) throws IOException {
+    public static SequenceDatabaseEntry obtainEmblEntry( final String query, final int max_lines_to_return )
+            throws IOException {
         final List<String> lines = queryEmblDb( "query", max_lines_to_return );
         return EbiDbEntry.createInstanceFromPlainText( lines );
     }