query = node.getNodeData().getSequence().getAccession().getValue();
db = Db.UNIPROT;
}
- else if ( node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAccession() != null )
+ else if ( node.getNodeData().isHasSequence()
+ && ( node.getNodeData().getSequence().getAccession() != null )
&& !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getSource() )
&& !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getValue() )
- && ( node.getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "embl" )
- || node.getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "ebi" )
- ) ) {
+ && ( node.getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "embl" ) || node
+ .getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "ebi" ) ) ) {
query = node.getNodeData().getSequence().getAccession().getValue();
db = Db.EMBL;
}
else if ( !ForesterUtil.isEmpty( node.getName() ) ) {
-
- if ( (query = UniProtWsTools.parseUniProtAccessor( node.getName() ))!=null ) {
+ if ( ( query = UniProtWsTools.parseUniProtAccessor( node.getName() ) ) != null ) {
db = Db.UNIPROT;
}
- else if (( query = DatabaseTools.parseGenbankAccessor( node.getName())) !=null ) {
+ else if ( ( query = DatabaseTools.parseGenbankAccessor( node.getName() ) ) != null ) {
db = Db.EMBL;
}
}
// http://download.java.net/media/java3d/builds/release/1.5.1/README-download.html
//
// /usr/lib/jvm/java-6-sun-1.6.0.24/jre
-// lib/ext/vecmath.jar
-// lib/ext/j3dcore.jar
-// lib/ext/j3dutils.jar
+// lib/ext/vecmath.jar
+// lib/ext/j3dcore.jar
+// lib/ext/j3dutils.jar
//
package org.forester.development;
-import com.sun.j3d.utils.universe.SimpleUniverse;
-import com.sun.j3d.utils.geometry.ColorCube;
-import com.sun.j3d.utils.geometry.Cylinder;
-import javax.media.j3d.BranchGroup;
+
+// import com.sun.j3d.utils.universe.SimpleUniverse;
+// import com.sun.j3d.utils.geometry.ColorCube;
+// import com.sun.j3d.utils.geometry.Cylinder;
+// import javax.media.j3d.BranchGroup;
public class Hello3d {
public Hello3d() {
- SimpleUniverse universe = new SimpleUniverse();
- BranchGroup group = new BranchGroup();
- //ColorCube cc0 = new ColorCube( 0.1);
- // Appearance a = new Appearance();
- group.addChild( new Cylinder( 0,1));
- group.addChild( new ColorCube( 0.3 ) );
- universe.getViewingPlatform().setNominalViewingTransform();
- universe.addBranchGraph( group );
+ // SimpleUniverse universe = new SimpleUniverse();
+ // BranchGroup group = new BranchGroup();
+ // //ColorCube cc0 = new ColorCube( 0.1);
+ // // Appearance a = new Appearance();
+ // group.addChild( new Cylinder( 0,1));
+ // group.addChild( new ColorCube( 0.3 ) );
+ // universe.getViewingPlatform().setNominalViewingTransform();
+ // universe.addBranchGraph( group );
}
- public static void main( String[] args ) {
+ public static void main( final String[] args ) {
new Hello3d();
}
} // end of class Hello3d
System.out.println( "failed." );
failed++;
}
-
System.out.print( "EMBL Entry Retrieval: " );
if ( Test.testEmblEntryRetrieval() ) {
System.out.println( "OK." );
System.out.println( "failed." );
failed++;
}
-
System.out.print( "Uniprot Entry Retrieval: " );
if ( Test.testUniprotEntryRetrieval() ) {
System.out.println( "OK." );
}
private static boolean testEmblEntryRetrieval() {
+ //The format for GenBank Accession numbers are:
+ //Nucleotide: 1 letter + 5 numerals OR 2 letters + 6 numerals
+ //Protein: 3 letters + 5 numerals
+ //http://www.ncbi.nlm.nih.gov/Sequin/acc.html
if ( !DatabaseTools.parseGenbankAccessor( "AY423861" ).equals( "AY423861" ) ) {
- System.out.println( DatabaseTools.parseGenbankAccessor( "AY423861" ));
+ return false;
+ }
+ if ( !DatabaseTools.parseGenbankAccessor( ".AY423861." ).equals( "AY423861" ) ) {
+ return false;
+ }
+ if ( DatabaseTools.parseGenbankAccessor( "AAY423861" ) != null ) {
+ return false;
+ }
+ if ( DatabaseTools.parseGenbankAccessor( "AY4238612" ) != null ) {
+ return false;
+ }
+ if ( DatabaseTools.parseGenbankAccessor( "AAY4238612" ) != null ) {
+ return false;
+ }
+ if ( DatabaseTools.parseGenbankAccessor( "Y423861" ) != null ) {
+ return false;
+ }
+ if ( !DatabaseTools.parseGenbankAccessor( "S12345" ).equals( "S12345" ) ) {
+ return false;
+ }
+ if ( !DatabaseTools.parseGenbankAccessor( "|S12345|" ).equals( "S12345" ) ) {
+ return false;
+ }
+ if ( DatabaseTools.parseGenbankAccessor( "|S123456" ) != null ) {
+ return false;
+ }
+ if ( DatabaseTools.parseGenbankAccessor( "ABC123456" ) != null ) {
+ return false;
+ }
+ if ( !DatabaseTools.parseGenbankAccessor( "ABC12345" ).equals( "ABC12345" ) ) {
+ return false;
+ }
+ if ( !DatabaseTools.parseGenbankAccessor( "&ABC12345&" ).equals( "ABC12345" ) ) {
+ return false;
+ }
+ if ( DatabaseTools.parseGenbankAccessor( "ABCD12345" ) != null ) {
return false;
}
return true;
}
-
+
private static boolean testUniprotEntryRetrieval() {
if ( !UniProtWsTools.parseUniProtAccessor( "P12345" ).equals( "P12345" ) ) {
return false;
if ( UniProtWsTools.parseUniProtAccessor( "EP12345" ) != null ) {
return false;
}
+ if ( UniProtWsTools.parseUniProtAccessor( "3 4P12345" ) != null ) {
+ return false;
+ }
+ if ( UniProtWsTools.parseUniProtAccessor( "P12345E" ) != null ) {
+ return false;
+ }
+ if ( UniProtWsTools.parseUniProtAccessor( "P123455" ) != null ) {
+ return false;
+ }
+ if ( UniProtWsTools.parseUniProtAccessor( "EP12345E" ) != null ) {
+ return false;
+ }
+ if ( UniProtWsTools.parseUniProtAccessor( "AY423861" ) != null ) {
+ return false;
+ }
if ( !UniProtWsTools.parseUniProtAccessor( "P1DDD5" ).equals( "P1DDD5" ) ) {
return false;
}
if ( !UniProtWsTools.parseUniProtAccessor( "P1234X/P12345/12-42" ).equals( "P12345" ) ) {
return false;
}
+ if ( !UniProtWsTools.parseUniProtAccessor( "P1234X P12345 12-42" ).equals( "P12345" ) ) {
+ return false;
+ }
if ( !UniProtWsTools.parseUniProtAccessor( "P12345/12-42" ).equals( "P12345" ) ) {
return false;
}
+
package org.forester.ws.uniprot;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-
public class DatabaseTools {
+
//The format for GenBank Accession numbers are:
//Nucleotide: 1 letter + 5 numerals OR 2 letters + 6 numerals
//Protein: 3 letters + 5 numerals
//http://www.ncbi.nlm.nih.gov/Sequin/acc.html
-
private final static Pattern GENBANK_NUCLEOTIDE_AC_PATTERN_1 = Pattern
- .compile( "^.*[^a-zA-Z0-9]?([A-Z]\\d{5})[^a-zA-Z0-9]?" );
-
+ .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]\\d{5})(?:[^a-zA-Z0-9]|\\Z)" );
private final static Pattern GENBANK_NUCLEOTIDE_AC_PATTERN_2 = Pattern
- .compile( "^.*[^a-zA-Z0-9]?([A-Z]{2}\\d{6})[^a-zA-Z0-9]?" );
-
- private final static Pattern GENBANK_PROTEIN_AC_PATTERN = Pattern
- .compile( "^.*[^a-zA-Z0-9]?([A-Z]{3}\\d{5})[^a-zA-Z0-9]?" );
-
-
-
- private final static boolean DEBUG = false;
+ .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]{2}\\d{6})(?:[^a-zA-Z0-9]|\\Z)" );
+ private final static Pattern GENBANK_PROTEIN_AC_PATTERN = Pattern
+ .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]{3}\\d{5})(?:[^a-zA-Z0-9]|\\Z)" );
+ private final static boolean DEBUG = false;
/**
* Returns null if no match.
return m.group( 1 );
}
else {
- m = GENBANK_NUCLEOTIDE_AC_PATTERN_2.matcher( query );
+ m = GENBANK_NUCLEOTIDE_AC_PATTERN_2.matcher( query );
if ( m.lookingAt() ) {
return m.group( 1 );
- }
+ }
else {
m = GENBANK_PROTEIN_AC_PATTERN.matcher( query );
if ( m.lookingAt() ) {
return target.substring( i_a + a.length(), i_b ).trim();
}
-
-
static String extract( final String target, final String a ) {
final int i_a = target.indexOf( a );
return target.substring( i_a + a.length() ).trim();
}
-
}
import sun.reflect.generics.reflectiveObjects.NotImplementedException;
public final class EbiDbEntry implements SequenceDatabaseEntry {
-//http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/emb/AAR37336/
-
-
+
+ //http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/emb/AAR37336/
private String _pa;
private String _de;
private String _os;
private EbiDbEntry() {
}
-
+ @Override
public Object clone() {
throw new NotImplementedException();
}
-
+
public static SequenceDatabaseEntry createInstanceFromPlainText( final List<String> lines ) {
final EbiDbEntry e = new EbiDbEntry();
for( final String line : lines ) {
e.setPA( DatabaseTools.extract( line, "PA", ";" ) );
}
else if ( line.startsWith( "DE" ) ) {
- // if ( ( line.indexOf( "RecName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) {
- e.setDe( DatabaseTools.extract( line, "DE" ) );
+ // if ( ( line.indexOf( "RecName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) {
+ e.setDe( DatabaseTools.extract( line, "DE" ) );
//}
}
- // else if ( line.startsWith( "GN" ) ) {
- // if ( ( line.indexOf( "Name=" ) > 0 ) ) {
- // e.setSymbol( extract( line, "Name=", ";" ) );
- // }
- // }
+ // else if ( line.startsWith( "GN" ) ) {
+ // if ( ( line.indexOf( "Name=" ) > 0 ) ) {
+ // e.setSymbol( extract( line, "Name=", ";" ) );
+ // }
+ // }
else if ( line.startsWith( "OS" ) ) {
if ( line.indexOf( "(" ) > 0 ) {
e.setOs( DatabaseTools.extract( line, "OS", "(" ) );
private void setPA( final String pa ) {
if ( _pa == null ) {
- _pa= pa;
+ _pa = pa;
}
}
}
private void setOs( final String os ) {
- if ( _os== null ) {
+ if ( _os == null ) {
_os = os;
}
}
return e;
}
-
-
@Override
public String getAccession() {
return _ac;
UNKNOWN, UNIPROT;
}
public final static String BASE_URL = "http://www.uniprot.org/";
-
public final static String BASE_EMBL_DB_URL = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/";
private final static String URL_ENC = "UTF-8";
// uniprot/expasy accession number format (6 chars):
// letter digit letter-or-digit letter-or-digit letter-or-digit digit
+ // ?: => no back-reference
+ // \A => begin of String
+ // \Z => end of String
private final static Pattern UNIPROT_AC_PATTERN = Pattern
- .compile( "^.*[a-zA-Z0-9]?([A-NR-ZOPQ]\\d[A-Z0-9]{3}\\d)[^a-zA-Z0-9]?" );
+ .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]\\d[A-Z0-9]{3}\\d)(?:[^a-zA-Z0-9]|\\Z)" );
private final static boolean DEBUG = false;
private static String encode( final String str ) throws UnsupportedEncodingException {
return null;
}
}
-
-
public static List<UniProtTaxonomy> getTaxonomiesFromCommonName( final String cn, final int max_taxonomies_return )
throws IOException {
return taxonomies;
}
-
- public static List<String> queryEmblDb( final String query, int max_lines_to_return ) throws IOException {
- return queryDb( query,
- max_lines_to_return,
- BASE_EMBL_DB_URL ) ;
+ public static List<String> queryEmblDb( final String query, final int max_lines_to_return ) throws IOException {
+ return queryDb( query, max_lines_to_return, BASE_EMBL_DB_URL );
}
-
-
-
- public static List<String> queryUniprot( final String query, int max_lines_to_return ) throws IOException {
- return queryDb( query,
- max_lines_to_return,
- BASE_URL ) ;
-
-
+
+ public static List<String> queryUniprot( final String query, final int max_lines_to_return ) throws IOException {
+ return queryDb( query, max_lines_to_return, BASE_URL );
}
- public static List<String> queryDb( final String query,
- int max_lines_to_return,
- final String base_url ) throws IOException {
+ public static List<String> queryDb( final String query, int max_lines_to_return, final String base_url )
+ throws IOException {
if ( ForesterUtil.isEmpty( query ) ) {
throw new IllegalArgumentException( "illegal attempt to use empty query " );
}
in.close();
return result;
}
-
-
+
public static SequenceDatabaseEntry obtainUniProtEntry( final String query, final int max_lines_to_return )
throws IOException {
final List<String> lines = queryUniprot( "uniprot/" + query + ".txt", max_lines_to_return );
return UniProtEntry.createInstanceFromPlainText( lines );
}
- public static SequenceDatabaseEntry obtainEmblEntry( String query, int max_lines_to_return ) throws IOException {
+ public static SequenceDatabaseEntry obtainEmblEntry( final String query, final int max_lines_to_return )
+ throws IOException {
final List<String> lines = queryEmblDb( "query", max_lines_to_return );
return EbiDbEntry.createInstanceFromPlainText( lines );
}