import org.forester.phylogeny.data.Taxonomy;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
import org.forester.util.ForesterUtil;
+import org.forester.ws.uniprot.DatabaseTools;
import org.forester.ws.uniprot.SequenceDatabaseEntry;
import org.forester.ws.uniprot.UniProtWsTools;
private final static boolean DEBUG = true;
private enum Db {
- UNKNOWN, UNIPROT;
+ UNKNOWN, UNIPROT, EMBL;
}
public SequenceDataRetriver( final MainFrameApplication mf, final TreePanel treepanel, final Phylogeny phy ) {
query = node.getNodeData().getSequence().getAccession().getValue();
db = Db.UNIPROT;
}
+ else if ( node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAccession() != null )
+ && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getSource() )
+ && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getValue() )
+ && ( node.getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "embl" )
+ || node.getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "ebi" )
+ ) ) {
+ query = node.getNodeData().getSequence().getAccession().getValue();
+ db = Db.EMBL;
+ }
else if ( !ForesterUtil.isEmpty( node.getName() ) ) {
- query = UniProtWsTools.parseUniProtAccessor( node.getName() );
- if ( !ForesterUtil.isEmpty( query ) ) {
+
+ if ( (query = UniProtWsTools.parseUniProtAccessor( node.getName() ))!=null ) {
db = Db.UNIPROT;
}
+ else if (( query = DatabaseTools.parseGenbankAccessor( node.getName())) !=null ) {
+ db = Db.EMBL;
+ }
}
if ( !ForesterUtil.isEmpty( query ) ) {
SequenceDatabaseEntry db_entry = null;
// Ignore.
}
}
+ else if ( db == Db.EMBL ) {
+ if ( DEBUG ) {
+ System.out.println( "embl: " + query );
+ }
+ try {
+ db_entry = UniProtWsTools.obtainEmblEntry( query, 200 );
+ }
+ catch ( final FileNotFoundException e ) {
+ // Ignore.
+ }
+ }
if ( db_entry != null ) {
if ( !ForesterUtil.isEmpty( db_entry.getAccession() ) ) {
seq.setAccession( new Accession( db_entry.getAccession(), "uniprot" ) );
--- /dev/null
+// http://www.java3d.org/tutorial.html
+// http://download.java.net/media/java3d/builds/release/1.5.1/README-download.html
+//
+// /usr/lib/jvm/java-6-sun-1.6.0.24/jre
+// lib/ext/vecmath.jar
+// lib/ext/j3dcore.jar
+// lib/ext/j3dutils.jar
+//
+
+package org.forester.development;
+import com.sun.j3d.utils.universe.SimpleUniverse;
+import com.sun.j3d.utils.geometry.ColorCube;
+import com.sun.j3d.utils.geometry.Cylinder;
+import javax.media.j3d.BranchGroup;
+public class Hello3d {
+
+ public Hello3d() {
+ SimpleUniverse universe = new SimpleUniverse();
+ BranchGroup group = new BranchGroup();
+ //ColorCube cc0 = new ColorCube( 0.1);
+ // Appearance a = new Appearance();
+ group.addChild( new Cylinder( 0,1));
+ group.addChild( new ColorCube( 0.3 ) );
+ universe.getViewingPlatform().setNominalViewingTransform();
+ universe.addBranchGraph( group );
+ }
+
+ public static void main( String[] args ) {
+ new Hello3d();
+ }
+} // end of class Hello3d
import org.forester.util.ForesterConstants;
import org.forester.util.ForesterUtil;
import org.forester.util.GeneralTable;
+import org.forester.ws.uniprot.DatabaseTools;
import org.forester.ws.uniprot.SequenceDatabaseEntry;
import org.forester.ws.uniprot.UniProtTaxonomy;
import org.forester.ws.uniprot.UniProtWsTools;
System.out.println( "failed." );
failed++;
}
+
+ System.out.print( "EMBL Entry Retrieval: " );
+ if ( Test.testEmblEntryRetrieval() ) {
+ System.out.println( "OK." );
+ succeeded++;
+ }
+ else {
+ System.out.println( "failed." );
+ failed++;
+ }
+
System.out.print( "Uniprot Entry Retrieval: " );
if ( Test.testUniprotEntryRetrieval() ) {
System.out.println( "OK." );
return true;
}
+ private static boolean testEmblEntryRetrieval() {
+ if ( !DatabaseTools.parseGenbankAccessor( "AY423861" ).equals( "AY423861" ) ) {
+ System.out.println( DatabaseTools.parseGenbankAccessor( "AY423861" ));
+ return false;
+ }
+ return true;
+ }
+
private static boolean testUniprotEntryRetrieval() {
if ( !UniProtWsTools.parseUniProtAccessor( "P12345" ).equals( "P12345" ) ) {
return false;
}
+ if ( UniProtWsTools.parseUniProtAccessor( "EP12345" ) != null ) {
+ return false;
+ }
if ( !UniProtWsTools.parseUniProtAccessor( "P1DDD5" ).equals( "P1DDD5" ) ) {
return false;
}
--- /dev/null
+package org.forester.ws.uniprot;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+
+public class DatabaseTools {
+ //The format for GenBank Accession numbers are:
+ //Nucleotide: 1 letter + 5 numerals OR 2 letters + 6 numerals
+ //Protein: 3 letters + 5 numerals
+ //http://www.ncbi.nlm.nih.gov/Sequin/acc.html
+
+ private final static Pattern GENBANK_NUCLEOTIDE_AC_PATTERN_1 = Pattern
+ .compile( "^.*[^a-zA-Z0-9]?([A-Z]\\d{5})[^a-zA-Z0-9]?" );
+
+ private final static Pattern GENBANK_NUCLEOTIDE_AC_PATTERN_2 = Pattern
+ .compile( "^.*[^a-zA-Z0-9]?([A-Z]{2}\\d{6})[^a-zA-Z0-9]?" );
+
+ private final static Pattern GENBANK_PROTEIN_AC_PATTERN = Pattern
+ .compile( "^.*[^a-zA-Z0-9]?([A-Z]{3}\\d{5})[^a-zA-Z0-9]?" );
+
+
+
+ private final static boolean DEBUG = false;
+
+ /**
+ * Returns null if no match.
+ *
+ * @param query
+ * @param db
+ * @return
+ */
+ static public String parseGenbankAccessor( final String query ) {
+ Matcher m = GENBANK_NUCLEOTIDE_AC_PATTERN_1.matcher( query );
+ if ( m.lookingAt() ) {
+ return m.group( 1 );
+ }
+ else {
+ m = GENBANK_NUCLEOTIDE_AC_PATTERN_2.matcher( query );
+ if ( m.lookingAt() ) {
+ return m.group( 1 );
+ }
+ else {
+ m = GENBANK_PROTEIN_AC_PATTERN.matcher( query );
+ if ( m.lookingAt() ) {
+ return m.group( 1 );
+ }
+ else {
+ return null;
+ }
+ }
+ }
+ }
+
+ static String extract( final String target, final String a, final String b ) {
+ final int i_a = target.indexOf( a );
+ final int i_b = target.indexOf( b );
+ if ( ( i_a < 0 ) || ( i_b < i_a ) ) {
+ throw new IllegalArgumentException( "attempt to extract from [" + target + "] between [" + a + "] and ["
+ + b + "]" );
+ }
+ return target.substring( i_a + a.length(), i_b ).trim();
+ }
+
+
+
+ static String extract( final String target, final String a ) {
+ final int i_a = target.indexOf( a );
+ return target.substring( i_a + a.length() ).trim();
+ }
+
+}
--- /dev/null
+// $Id:
+// forester -- software libraries and applications
+// for genomics and evolutionary biology research.
+//
+// Copyright (C) 2010 Christian M Zmasek
+// Copyright (C) 2010 Sanford-Burnham Medical Research Institute
+// All rights reserved
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.ws.uniprot;
+
+import java.util.List;
+
+import sun.reflect.generics.reflectiveObjects.NotImplementedException;
+
+public final class EbiDbEntry implements SequenceDatabaseEntry {
+//http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/emb/AAR37336/
+
+
+ private String _pa;
+ private String _de;
+ private String _os;
+ private String _tax_id;
+ private String _symbol;
+
+ private EbiDbEntry() {
+ }
+
+
+ public Object clone() {
+ throw new NotImplementedException();
+ }
+
+ public static SequenceDatabaseEntry createInstanceFromPlainText( final List<String> lines ) {
+ final EbiDbEntry e = new EbiDbEntry();
+ for( final String line : lines ) {
+ if ( line.startsWith( "PA" ) ) {
+ e.setPA( DatabaseTools.extract( line, "PA", ";" ) );
+ }
+ else if ( line.startsWith( "DE" ) ) {
+ // if ( ( line.indexOf( "RecName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) {
+ e.setDe( DatabaseTools.extract( line, "DE" ) );
+ //}
+ }
+ // else if ( line.startsWith( "GN" ) ) {
+ // if ( ( line.indexOf( "Name=" ) > 0 ) ) {
+ // e.setSymbol( extract( line, "Name=", ";" ) );
+ // }
+ // }
+ else if ( line.startsWith( "OS" ) ) {
+ if ( line.indexOf( "(" ) > 0 ) {
+ e.setOs( DatabaseTools.extract( line, "OS", "(" ) );
+ }
+ else {
+ e.setOs( DatabaseTools.extract( line, "OS" ) );
+ }
+ }
+ else if ( line.startsWith( "OX" ) ) {
+ if ( line.indexOf( "NCBI_TaxID=" ) > 0 ) {
+ e.setTaxId( DatabaseTools.extract( line, "NCBI_TaxID=", ";" ) );
+ }
+ }
+ }
+ return e;
+ }
+
+ @Override
+ public String getAccession() {
+ return _pa;
+ }
+
+ private void setPA( final String pa ) {
+ if ( _pa == null ) {
+ _pa= pa;
+ }
+ }
+
+ @Override
+ public String getSequenceName() {
+ return _de;
+ }
+
+ private void setDe( final String rec_name ) {
+ if ( _de == null ) {
+ _de = rec_name;
+ }
+ }
+
+ @Override
+ public String getTaxonomyScientificName() {
+ return _os;
+ }
+
+ private void setOs( final String os ) {
+ if ( _os== null ) {
+ _os = os;
+ }
+ }
+
+ @Override
+ public String getTaxonomyIdentifier() {
+ return _tax_id;
+ }
+
+ private void setTaxId( final String tax_id ) {
+ if ( _tax_id == null ) {
+ _tax_id = tax_id;
+ }
+ }
+
+ @Override
+ public String getSequenceSymbol() {
+ return _symbol;
+ }
+
+ private void setSymbol( final String symbol ) {
+ if ( _symbol == null ) {
+ _symbol = symbol;
+ }
+ }
+}
final UniProtEntry e = new UniProtEntry();
for( final String line : lines ) {
if ( line.startsWith( "AC" ) ) {
- e.setAc( extract( line, "AC", ";" ) );
+ e.setAc( DatabaseTools.extract( line, "AC", ";" ) );
}
else if ( line.startsWith( "DE" ) ) {
if ( ( line.indexOf( "RecName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) {
- e.setRecName( extract( line, "Full=", ";" ) );
+ e.setRecName( DatabaseTools.extract( line, "Full=", ";" ) );
}
}
else if ( line.startsWith( "GN" ) ) {
if ( ( line.indexOf( "Name=" ) > 0 ) ) {
- e.setSymbol( extract( line, "Name=", ";" ) );
+ e.setSymbol( DatabaseTools.extract( line, "Name=", ";" ) );
}
}
else if ( line.startsWith( "OS" ) ) {
if ( line.indexOf( "(" ) > 0 ) {
- e.setOsScientificName( extract( line, "OS", "(" ) );
+ e.setOsScientificName( DatabaseTools.extract( line, "OS", "(" ) );
}
else {
- e.setOsScientificName( extract( line, "OS", "." ) );
+ e.setOsScientificName( DatabaseTools.extract( line, "OS", "." ) );
}
}
else if ( line.startsWith( "OX" ) ) {
if ( line.indexOf( "NCBI_TaxID=" ) > 0 ) {
- e.setTaxId( extract( line, "NCBI_TaxID=", ";" ) );
+ e.setTaxId( DatabaseTools.extract( line, "NCBI_TaxID=", ";" ) );
}
}
}
return e;
}
- private static String extract( final String target, final String a, final String b ) {
- final int i_a = target.indexOf( a );
- final int i_b = target.indexOf( b );
- if ( ( i_a < 0 ) || ( i_b < i_a ) ) {
- throw new IllegalArgumentException( "attempt to extract from [" + target + "] between [" + a + "] and ["
- + b + "]" );
- }
- return target.substring( i_a + a.length(), i_b ).trim();
- }
+
@Override
public String getAccession() {
UNKNOWN, UNIPROT;
}
public final static String BASE_URL = "http://www.uniprot.org/";
+
+ public final static String BASE_EMBL_DB_URL = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/";
private final static String URL_ENC = "UTF-8";
// uniprot/expasy accession number format (6 chars):
// letter digit letter-or-digit letter-or-digit letter-or-digit digit
}
/**
- * Return null if no match.
+ * Returns null if no match.
*
* @param query
* @param db
return null;
}
}
+
+
public static List<UniProtTaxonomy> getTaxonomiesFromCommonName( final String cn, final int max_taxonomies_return )
throws IOException {
return taxonomies;
}
+
+ public static List<String> queryEmblDb( final String query, int max_lines_to_return ) throws IOException {
+ return queryDb( query,
+ max_lines_to_return,
+ BASE_EMBL_DB_URL ) ;
+ }
+
+
+
public static List<String> queryUniprot( final String query, int max_lines_to_return ) throws IOException {
+ return queryDb( query,
+ max_lines_to_return,
+ BASE_URL ) ;
+
+
+ }
+
+ public static List<String> queryDb( final String query,
+ int max_lines_to_return,
+ final String base_url ) throws IOException {
if ( ForesterUtil.isEmpty( query ) ) {
throw new IllegalArgumentException( "illegal attempt to use empty query " );
}
if ( max_lines_to_return < 1 ) {
max_lines_to_return = 1;
}
- final URL url = new URL( BASE_URL + query );
+ final URL url = new URL( base_url + query );
if ( DEBUG ) {
System.out.println( "url: " + url.toString() );
}
in.close();
return result;
}
-
+
+
public static SequenceDatabaseEntry obtainUniProtEntry( final String query, final int max_lines_to_return )
throws IOException {
final List<String> lines = queryUniprot( "uniprot/" + query + ".txt", max_lines_to_return );
return UniProtEntry.createInstanceFromPlainText( lines );
}
+
+ public static SequenceDatabaseEntry obtainEmblEntry( String query, int max_lines_to_return ) throws IOException {
+ final List<String> lines = queryEmblDb( "query", max_lines_to_return );
+ return EbiDbEntry.createInstanceFromPlainText( lines );
+ }
}