From: cmzmasek@gmail.com Date: Mon, 25 Apr 2011 21:34:07 +0000 (+0000) Subject: in progress X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=e4f95087154c70d569fe54d8f08af7d4f218df0c;p=jalview.git in progress --- diff --git a/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java b/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java index 168a163..5b454e3 100644 --- a/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java +++ b/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java @@ -174,21 +174,20 @@ public final class SequenceDataRetriver implements Runnable { query = node.getNodeData().getSequence().getAccession().getValue(); db = Db.UNIPROT; } - else if ( node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAccession() != null ) + else if ( node.getNodeData().isHasSequence() + && ( node.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getSource() ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getValue() ) - && ( node.getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "embl" ) - || node.getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "ebi" ) - ) ) { + && ( node.getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "embl" ) || node + .getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "ebi" ) ) ) { query = node.getNodeData().getSequence().getAccession().getValue(); db = Db.EMBL; } else if ( !ForesterUtil.isEmpty( node.getName() ) ) { - - if ( (query = UniProtWsTools.parseUniProtAccessor( node.getName() ))!=null ) { + if ( ( query = UniProtWsTools.parseUniProtAccessor( node.getName() ) ) != null ) { db = Db.UNIPROT; } - else if (( query = DatabaseTools.parseGenbankAccessor( node.getName())) !=null ) { + else if ( ( query = DatabaseTools.parseGenbankAccessor( node.getName() ) ) != null ) { db = Db.EMBL; } } diff --git a/forester/java/src/org/forester/development/Hello3d.java b/forester/java/src/org/forester/development/Hello3d.java index dc0d099..0cf0112 100644 --- a/forester/java/src/org/forester/development/Hello3d.java +++ b/forester/java/src/org/forester/development/Hello3d.java @@ -2,30 +2,31 @@ // http://download.java.net/media/java3d/builds/release/1.5.1/README-download.html // // /usr/lib/jvm/java-6-sun-1.6.0.24/jre -// lib/ext/vecmath.jar -// lib/ext/j3dcore.jar -// lib/ext/j3dutils.jar +// lib/ext/vecmath.jar +// lib/ext/j3dcore.jar +// lib/ext/j3dutils.jar // package org.forester.development; -import com.sun.j3d.utils.universe.SimpleUniverse; -import com.sun.j3d.utils.geometry.ColorCube; -import com.sun.j3d.utils.geometry.Cylinder; -import javax.media.j3d.BranchGroup; + +// import com.sun.j3d.utils.universe.SimpleUniverse; +// import com.sun.j3d.utils.geometry.ColorCube; +// import com.sun.j3d.utils.geometry.Cylinder; +// import javax.media.j3d.BranchGroup; public class Hello3d { public Hello3d() { - SimpleUniverse universe = new SimpleUniverse(); - BranchGroup group = new BranchGroup(); - //ColorCube cc0 = new ColorCube( 0.1); - // Appearance a = new Appearance(); - group.addChild( new Cylinder( 0,1)); - group.addChild( new ColorCube( 0.3 ) ); - universe.getViewingPlatform().setNominalViewingTransform(); - universe.addBranchGraph( group ); + // SimpleUniverse universe = new SimpleUniverse(); + // BranchGroup group = new BranchGroup(); + // //ColorCube cc0 = new ColorCube( 0.1); + // // Appearance a = new Appearance(); + // group.addChild( new Cylinder( 0,1)); + // group.addChild( new ColorCube( 0.3 ) ); + // universe.getViewingPlatform().setNominalViewingTransform(); + // universe.addBranchGraph( group ); } - public static void main( String[] args ) { + public static void main( final String[] args ) { new Hello3d(); } } // end of class Hello3d diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index f01c864..8533d18 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -647,7 +647,6 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "EMBL Entry Retrieval: " ); if ( Test.testEmblEntryRetrieval() ) { System.out.println( "OK." ); @@ -657,7 +656,6 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Uniprot Entry Retrieval: " ); if ( Test.testUniprotEntryRetrieval() ) { System.out.println( "OK." ); @@ -7744,13 +7742,52 @@ public final class Test { } private static boolean testEmblEntryRetrieval() { + //The format for GenBank Accession numbers are: + //Nucleotide: 1 letter + 5 numerals OR 2 letters + 6 numerals + //Protein: 3 letters + 5 numerals + //http://www.ncbi.nlm.nih.gov/Sequin/acc.html if ( !DatabaseTools.parseGenbankAccessor( "AY423861" ).equals( "AY423861" ) ) { - System.out.println( DatabaseTools.parseGenbankAccessor( "AY423861" )); + return false; + } + if ( !DatabaseTools.parseGenbankAccessor( ".AY423861." ).equals( "AY423861" ) ) { + return false; + } + if ( DatabaseTools.parseGenbankAccessor( "AAY423861" ) != null ) { + return false; + } + if ( DatabaseTools.parseGenbankAccessor( "AY4238612" ) != null ) { + return false; + } + if ( DatabaseTools.parseGenbankAccessor( "AAY4238612" ) != null ) { + return false; + } + if ( DatabaseTools.parseGenbankAccessor( "Y423861" ) != null ) { + return false; + } + if ( !DatabaseTools.parseGenbankAccessor( "S12345" ).equals( "S12345" ) ) { + return false; + } + if ( !DatabaseTools.parseGenbankAccessor( "|S12345|" ).equals( "S12345" ) ) { + return false; + } + if ( DatabaseTools.parseGenbankAccessor( "|S123456" ) != null ) { + return false; + } + if ( DatabaseTools.parseGenbankAccessor( "ABC123456" ) != null ) { + return false; + } + if ( !DatabaseTools.parseGenbankAccessor( "ABC12345" ).equals( "ABC12345" ) ) { + return false; + } + if ( !DatabaseTools.parseGenbankAccessor( "&ABC12345&" ).equals( "ABC12345" ) ) { + return false; + } + if ( DatabaseTools.parseGenbankAccessor( "ABCD12345" ) != null ) { return false; } return true; } - + private static boolean testUniprotEntryRetrieval() { if ( !UniProtWsTools.parseUniProtAccessor( "P12345" ).equals( "P12345" ) ) { return false; @@ -7758,6 +7795,21 @@ public final class Test { if ( UniProtWsTools.parseUniProtAccessor( "EP12345" ) != null ) { return false; } + if ( UniProtWsTools.parseUniProtAccessor( "3 4P12345" ) != null ) { + return false; + } + if ( UniProtWsTools.parseUniProtAccessor( "P12345E" ) != null ) { + return false; + } + if ( UniProtWsTools.parseUniProtAccessor( "P123455" ) != null ) { + return false; + } + if ( UniProtWsTools.parseUniProtAccessor( "EP12345E" ) != null ) { + return false; + } + if ( UniProtWsTools.parseUniProtAccessor( "AY423861" ) != null ) { + return false; + } if ( !UniProtWsTools.parseUniProtAccessor( "P1DDD5" ).equals( "P1DDD5" ) ) { return false; } @@ -7767,6 +7819,9 @@ public final class Test { if ( !UniProtWsTools.parseUniProtAccessor( "P1234X/P12345/12-42" ).equals( "P12345" ) ) { return false; } + if ( !UniProtWsTools.parseUniProtAccessor( "P1234X P12345 12-42" ).equals( "P12345" ) ) { + return false; + } if ( !UniProtWsTools.parseUniProtAccessor( "P12345/12-42" ).equals( "P12345" ) ) { return false; } diff --git a/forester/java/src/org/forester/ws/uniprot/DatabaseTools.java b/forester/java/src/org/forester/ws/uniprot/DatabaseTools.java index 3826e89..77b317d 100644 --- a/forester/java/src/org/forester/ws/uniprot/DatabaseTools.java +++ b/forester/java/src/org/forester/ws/uniprot/DatabaseTools.java @@ -1,27 +1,22 @@ + package org.forester.ws.uniprot; import java.util.regex.Matcher; import java.util.regex.Pattern; - public class DatabaseTools { + //The format for GenBank Accession numbers are: //Nucleotide: 1 letter + 5 numerals OR 2 letters + 6 numerals //Protein: 3 letters + 5 numerals //http://www.ncbi.nlm.nih.gov/Sequin/acc.html - private final static Pattern GENBANK_NUCLEOTIDE_AC_PATTERN_1 = Pattern - .compile( "^.*[^a-zA-Z0-9]?([A-Z]\\d{5})[^a-zA-Z0-9]?" ); - + .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]\\d{5})(?:[^a-zA-Z0-9]|\\Z)" ); private final static Pattern GENBANK_NUCLEOTIDE_AC_PATTERN_2 = Pattern - .compile( "^.*[^a-zA-Z0-9]?([A-Z]{2}\\d{6})[^a-zA-Z0-9]?" ); - - private final static Pattern GENBANK_PROTEIN_AC_PATTERN = Pattern - .compile( "^.*[^a-zA-Z0-9]?([A-Z]{3}\\d{5})[^a-zA-Z0-9]?" ); - - - - private final static boolean DEBUG = false; + .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]{2}\\d{6})(?:[^a-zA-Z0-9]|\\Z)" ); + private final static Pattern GENBANK_PROTEIN_AC_PATTERN = Pattern + .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]{3}\\d{5})(?:[^a-zA-Z0-9]|\\Z)" ); + private final static boolean DEBUG = false; /** * Returns null if no match. @@ -36,10 +31,10 @@ public class DatabaseTools { return m.group( 1 ); } else { - m = GENBANK_NUCLEOTIDE_AC_PATTERN_2.matcher( query ); + m = GENBANK_NUCLEOTIDE_AC_PATTERN_2.matcher( query ); if ( m.lookingAt() ) { return m.group( 1 ); - } + } else { m = GENBANK_PROTEIN_AC_PATTERN.matcher( query ); if ( m.lookingAt() ) { @@ -62,11 +57,8 @@ public class DatabaseTools { return target.substring( i_a + a.length(), i_b ).trim(); } - - static String extract( final String target, final String a ) { final int i_a = target.indexOf( a ); return target.substring( i_a + a.length() ).trim(); } - } diff --git a/forester/java/src/org/forester/ws/uniprot/EbiDbEntry.java b/forester/java/src/org/forester/ws/uniprot/EbiDbEntry.java index 4f7b779..8adda22 100644 --- a/forester/java/src/org/forester/ws/uniprot/EbiDbEntry.java +++ b/forester/java/src/org/forester/ws/uniprot/EbiDbEntry.java @@ -30,9 +30,8 @@ import java.util.List; import sun.reflect.generics.reflectiveObjects.NotImplementedException; public final class EbiDbEntry implements SequenceDatabaseEntry { -//http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/emb/AAR37336/ - - + + //http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/emb/AAR37336/ private String _pa; private String _de; private String _os; @@ -42,11 +41,11 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { private EbiDbEntry() { } - + @Override public Object clone() { throw new NotImplementedException(); } - + public static SequenceDatabaseEntry createInstanceFromPlainText( final List lines ) { final EbiDbEntry e = new EbiDbEntry(); for( final String line : lines ) { @@ -54,15 +53,15 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { e.setPA( DatabaseTools.extract( line, "PA", ";" ) ); } else if ( line.startsWith( "DE" ) ) { - // if ( ( line.indexOf( "RecName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) { - e.setDe( DatabaseTools.extract( line, "DE" ) ); + // if ( ( line.indexOf( "RecName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) { + e.setDe( DatabaseTools.extract( line, "DE" ) ); //} } - // else if ( line.startsWith( "GN" ) ) { - // if ( ( line.indexOf( "Name=" ) > 0 ) ) { - // e.setSymbol( extract( line, "Name=", ";" ) ); - // } - // } + // else if ( line.startsWith( "GN" ) ) { + // if ( ( line.indexOf( "Name=" ) > 0 ) ) { + // e.setSymbol( extract( line, "Name=", ";" ) ); + // } + // } else if ( line.startsWith( "OS" ) ) { if ( line.indexOf( "(" ) > 0 ) { e.setOs( DatabaseTools.extract( line, "OS", "(" ) ); @@ -87,7 +86,7 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { private void setPA( final String pa ) { if ( _pa == null ) { - _pa= pa; + _pa = pa; } } @@ -108,7 +107,7 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { } private void setOs( final String os ) { - if ( _os== null ) { + if ( _os == null ) { _os = os; } } diff --git a/forester/java/src/org/forester/ws/uniprot/UniProtEntry.java b/forester/java/src/org/forester/ws/uniprot/UniProtEntry.java index 2b74883..222df5b 100644 --- a/forester/java/src/org/forester/ws/uniprot/UniProtEntry.java +++ b/forester/java/src/org/forester/ws/uniprot/UniProtEntry.java @@ -71,8 +71,6 @@ public final class UniProtEntry implements SequenceDatabaseEntry { return e; } - - @Override public String getAccession() { return _ac; diff --git a/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java b/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java index 96aeb9d..8bef111 100644 --- a/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java +++ b/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java @@ -45,13 +45,15 @@ public final class UniProtWsTools { UNKNOWN, UNIPROT; } public final static String BASE_URL = "http://www.uniprot.org/"; - public final static String BASE_EMBL_DB_URL = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/"; private final static String URL_ENC = "UTF-8"; // uniprot/expasy accession number format (6 chars): // letter digit letter-or-digit letter-or-digit letter-or-digit digit + // ?: => no back-reference + // \A => begin of String + // \Z => end of String private final static Pattern UNIPROT_AC_PATTERN = Pattern - .compile( "^.*[a-zA-Z0-9]?([A-NR-ZOPQ]\\d[A-Z0-9]{3}\\d)[^a-zA-Z0-9]?" ); + .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]\\d[A-Z0-9]{3}\\d)(?:[^a-zA-Z0-9]|\\Z)" ); private final static boolean DEBUG = false; private static String encode( final String str ) throws UnsupportedEncodingException { @@ -74,8 +76,6 @@ public final class UniProtWsTools { return null; } } - - public static List getTaxonomiesFromCommonName( final String cn, final int max_taxonomies_return ) throws IOException { @@ -212,26 +212,16 @@ public final class UniProtWsTools { return taxonomies; } - - public static List queryEmblDb( final String query, int max_lines_to_return ) throws IOException { - return queryDb( query, - max_lines_to_return, - BASE_EMBL_DB_URL ) ; + public static List queryEmblDb( final String query, final int max_lines_to_return ) throws IOException { + return queryDb( query, max_lines_to_return, BASE_EMBL_DB_URL ); } - - - - public static List queryUniprot( final String query, int max_lines_to_return ) throws IOException { - return queryDb( query, - max_lines_to_return, - BASE_URL ) ; - - + + public static List queryUniprot( final String query, final int max_lines_to_return ) throws IOException { + return queryDb( query, max_lines_to_return, BASE_URL ); } - public static List queryDb( final String query, - int max_lines_to_return, - final String base_url ) throws IOException { + public static List queryDb( final String query, int max_lines_to_return, final String base_url ) + throws IOException { if ( ForesterUtil.isEmpty( query ) ) { throw new IllegalArgumentException( "illegal attempt to use empty query " ); } @@ -255,15 +245,15 @@ public final class UniProtWsTools { in.close(); return result; } - - + public static SequenceDatabaseEntry obtainUniProtEntry( final String query, final int max_lines_to_return ) throws IOException { final List lines = queryUniprot( "uniprot/" + query + ".txt", max_lines_to_return ); return UniProtEntry.createInstanceFromPlainText( lines ); } - public static SequenceDatabaseEntry obtainEmblEntry( String query, int max_lines_to_return ) throws IOException { + public static SequenceDatabaseEntry obtainEmblEntry( final String query, final int max_lines_to_return ) + throws IOException { final List lines = queryEmblDb( "query", max_lines_to_return ); return EbiDbEntry.createInstanceFromPlainText( lines ); }