import org.forester.archaeopteryx.tools.PhyloInferenceDialog;
import org.forester.archaeopteryx.tools.PhylogeneticInferenceOptions;
import org.forester.archaeopteryx.tools.PhylogeneticInferrer;
+import org.forester.archaeopteryx.tools.SequenceDataRetriver;
import org.forester.archaeopteryx.tools.TaxonomyDataObtainer;
-import org.forester.archaeopteryx.tools.UniProtSequenceObtainer;
import org.forester.archaeopteryx.webservices.PhylogeniesWebserviceClient;
import org.forester.archaeopteryx.webservices.WebservicesManager;
import org.forester.io.parsers.FastaParser;
if ( getCurrentTreePanel() != null ) {
final Phylogeny phy = getCurrentTreePanel().getPhylogeny();
if ( ( phy != null ) && !phy.isEmpty() ) {
- final UniProtSequenceObtainer u = new UniProtSequenceObtainer( this,
- _mainpanel.getCurrentTreePanel(),
- phy.copy() );
+ final SequenceDataRetriver u = new SequenceDataRetriver( this,
+ _mainpanel.getCurrentTreePanel(),
+ phy.copy() );
new Thread( u ).start();
}
}
package org.forester.archaeopteryx.tools;
+import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.UnknownHostException;
-import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
+import java.util.regex.Pattern;
import javax.swing.JOptionPane;
import org.forester.phylogeny.data.Taxonomy;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
import org.forester.util.ForesterUtil;
-import org.forester.ws.uniprot.UniProtEntry;
+import org.forester.ws.uniprot.SequenceDatabaseEntry;
import org.forester.ws.uniprot.UniProtWsTools;
-public class UniProtSequenceObtainer implements Runnable {
+public final class SequenceDataRetriver implements Runnable {
+ // uniprot/expasy accession number format (6 chars):
+ // letter digit letter-or-digit letter-or-digit letter-or-digit digit
+ private final static Pattern UNIPROT_AC_PATTERN = Pattern.compile( "[A-NR-ZOPQ]\\d[A-Z0-9]{3}\\d" );
private final Phylogeny _phy;
private final MainFrameApplication _mf;
private final TreePanel _treepanel;
+ private final static boolean DEBUG = true;
- public UniProtSequenceObtainer( final MainFrameApplication mf, final TreePanel treepanel, final Phylogeny phy ) {
+ private enum Db {
+ UNKNOWN, UNIPROT;
+ }
+
+ public SequenceDataRetriver( final MainFrameApplication mf, final TreePanel treepanel, final Phylogeny phy ) {
_phy = phy;
_mf = mf;
_treepanel = treepanel;
final SortedSet<String> not_found = new TreeSet<String>();
for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
final PhylogenyNode node = iter.next();
+ Sequence seq = null;
+ Taxonomy tax = null;
if ( node.getNodeData().isHasSequence() ) {
- //TODO Do something
+ seq = node.getNodeData().getSequence();
+ }
+ else {
+ seq = new Sequence();
+ }
+ if ( node.getNodeData().isHasTaxonomy() ) {
+ tax = node.getNodeData().getTaxonomy();
+ }
+ else {
+ tax = new Taxonomy();
+ }
+ String query = null;
+ Db db = Db.UNKNOWN;
+ if ( node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAccession() != null )
+ && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getSource() )
+ && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getValue() )
+ && node.getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "uniprot" ) ) {
+ query = node.getNodeData().getSequence().getAccession().getValue();
+ db = Db.UNIPROT;
}
- // else if ( !ForesterUtil.isEmpty( node.getName() ) ) {
- // not_found.add( node.getName() );
- // }
else if ( !ForesterUtil.isEmpty( node.getName() ) ) {
- String query = node.getName();
+ query = node.getName();
+ }
+ if ( !ForesterUtil.isEmpty( query ) ) {
if ( query.indexOf( '/' ) > 0 ) {
query = query.substring( 0, query.indexOf( '/' ) );
}
if ( query.indexOf( '_' ) > 0 ) {
query = query.substring( 0, query.indexOf( '_' ) );
}
- final UniProtEntry upe = obtainUniProtEntry( query );
- if ( upe != null ) {
- final Sequence seq = new Sequence();
- final Taxonomy tax = new Taxonomy();
- if ( !ForesterUtil.isEmpty( upe.getAc() ) ) {
- seq.setAccession( new Accession( upe.getAc(), "uniprot" ) );
+ SequenceDatabaseEntry db_entry = null;
+ if ( ( db == Db.UNIPROT ) || UNIPROT_AC_PATTERN.matcher( query ).matches() ) {
+ if ( DEBUG ) {
+ System.out.println( "uniprot: " + query );
+ }
+ try {
+ db_entry = UniProtWsTools.obtainUniProtEntry( query, 200 );
}
- if ( !ForesterUtil.isEmpty( upe.getRecName() ) ) {
- seq.setName( upe.getRecName() );
+ catch ( final FileNotFoundException e ) {
+ // Ignore.
+ }
+ }
+ if ( db_entry != null ) {
+ if ( !ForesterUtil.isEmpty( db_entry.getAccession() ) ) {
+ seq.setAccession( new Accession( db_entry.getAccession(), "uniprot" ) );
}
- if ( !ForesterUtil.isEmpty( upe.getSymbol() ) ) {
- seq.setSymbol( upe.getSymbol() );
+ if ( !ForesterUtil.isEmpty( db_entry.getSequenceName() ) ) {
+ seq.setName( db_entry.getSequenceName() );
}
- if ( !ForesterUtil.isEmpty( upe.getOsScientificName() ) ) {
- tax.setScientificName( upe.getOsScientificName() );
+ if ( !ForesterUtil.isEmpty( db_entry.getSequenceSymbol() ) ) {
+ seq.setSymbol( db_entry.getSequenceSymbol() );
}
- if ( !ForesterUtil.isEmpty( upe.getTaxId() ) ) {
- tax.setIdentifier( new Identifier( upe.getTaxId(), "uniprot" ) );
+ if ( !ForesterUtil.isEmpty( db_entry.getTaxonomyScientificName() ) ) {
+ tax.setScientificName( db_entry.getTaxonomyScientificName() );
+ }
+ if ( !ForesterUtil.isEmpty( db_entry.getTaxonomyIdentifier() ) ) {
+ tax.setIdentifier( new Identifier( db_entry.getTaxonomyIdentifier(), "uniprot" ) );
}
node.getNodeData().setTaxonomy( tax );
node.getNodeData().setSequence( seq );
else {
not_found.add( node.getName() );
}
- //}
}
}
return not_found;
}
- static UniProtEntry obtainUniProtEntry( final String query ) throws IOException {
- final List<String> lines = UniProtWsTools.queryUniprot( "uniprot/" + query + ".txt", 200 );
- return UniProtEntry.createInstanceFromPlainText( lines );
- }
-
@Override
public void run() {
execute();
import org.forester.util.ForesterConstants;
import org.forester.util.ForesterUtil;
import org.forester.util.GeneralTable;
+import org.forester.ws.uniprot.SequenceDatabaseEntry;
import org.forester.ws.uniprot.UniProtTaxonomy;
import org.forester.ws.uniprot.UniProtWsTools;
import org.forester.ws.wabi.TxSearch;
System.out.println( "failed." );
failed++;
}
+ System.out.print( "Uniprot Entry Retrieval: " );
+ if ( Test.testUniprotEntryRetrieval() ) {
+ System.out.println( "OK." );
+ succeeded++;
+ }
+ else {
+ System.out.println( "failed." );
+ failed++;
+ }
System.out.print( "Uniprot Taxonomy Search: " );
if ( Test.testUniprotTaxonomySearch() ) {
System.out.println( "OK." );
succeeded++;
}
else {
- System.out
- .println( "failed [will not count towards failed tests since it might be due to absence internet connection]" );
+ System.out.println( "failed." );
+ failed++;
}
if ( Mafft.isInstalled() ) {
System.out.print( "MAFFT (external program): " );
return false;
}
}
+ catch ( final IOException e ) {
+ System.out.println();
+ System.out.println( "the following might be due to absence internet connection:" );
+ e.printStackTrace( System.out );
+ return true;
+ }
catch ( final Exception e ) {
+ return false;
+ }
+ return true;
+ }
+
+ private static boolean testUniprotEntryRetrieval() {
+ try {
+ final SequenceDatabaseEntry entry = UniProtWsTools.obtainUniProtEntry( "P12345", 200 );
+ if ( !entry.getAccession().equals( "P12345" ) ) {
+ return false;
+ }
+ if ( !entry.getTaxonomyScientificName().equals( "Oryctolagus cuniculus" ) ) {
+ return false;
+ }
+ if ( !entry.getSequenceName().equals( "Aspartate aminotransferase, mitochondrial" ) ) {
+ return false;
+ }
+ if ( !entry.getSequenceSymbol().equals( "GOT2" ) ) {
+ return false;
+ }
+ if ( !entry.getTaxonomyIdentifier().equals( "9986" ) ) {
+ return false;
+ }
+ }
+ catch ( final IOException e ) {
System.out.println();
System.out.println( "the following might be due to absence internet connection:" );
e.printStackTrace( System.out );
+ return true;
+ }
+ catch ( final Exception e ) {
return false;
}
return true;
--- /dev/null
+// $Id:
+// forester -- software libraries and applications
+// for genomics and evolutionary biology research.
+//
+// Copyright (C) 2010 Christian M Zmasek
+// Copyright (C) 2010 Sanford-Burnham Medical Research Institute
+// All rights reserved
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.ws.uniprot;
+
+public interface SequenceDatabaseEntry {
+
+ public String getAccession();
+
+ public String getSequenceName();
+
+ public String getTaxonomyScientificName();
+
+ public String getTaxonomyIdentifier();
+
+ public String getSequenceSymbol();
+}
\ No newline at end of file
import java.util.List;
-public final class UniProtEntry {
+public final class UniProtEntry implements SequenceDatabaseEntry {
private String _ac;
private String _rec_name;
private UniProtEntry() {
}
- public static UniProtEntry createInstanceFromPlainText( final List<String> lines ) {
+ public static SequenceDatabaseEntry createInstanceFromPlainText( final List<String> lines ) {
final UniProtEntry e = new UniProtEntry();
for( final String line : lines ) {
if ( line.startsWith( "AC" ) ) {
return target.substring( i_a + a.length(), i_b ).trim();
}
- public String getAc() {
+ /* (non-Javadoc)
+ * @see org.forester.ws.uniprot.SequenceDatabaseEntry#getAc()
+ */
+ @Override
+ public String getAccession() {
return _ac;
}
}
}
- public String getRecName() {
+ /* (non-Javadoc)
+ * @see org.forester.ws.uniprot.SequenceDatabaseEntry#getRecName()
+ */
+ @Override
+ public String getSequenceName() {
return _rec_name;
}
}
}
- public String getOsScientificName() {
+ /* (non-Javadoc)
+ * @see org.forester.ws.uniprot.SequenceDatabaseEntry#getOsScientificName()
+ */
+ @Override
+ public String getTaxonomyScientificName() {
return _os_scientific_name;
}
}
}
- public String getTaxId() {
+ /* (non-Javadoc)
+ * @see org.forester.ws.uniprot.SequenceDatabaseEntry#getTaxId()
+ */
+ @Override
+ public String getTaxonomyIdentifier() {
return _tax_id;
}
}
}
- public String getSymbol() {
+ /* (non-Javadoc)
+ * @see org.forester.ws.uniprot.SequenceDatabaseEntry#getSymbol()
+ */
+ @Override
+ public String getSequenceSymbol() {
return _symbol;
}
in.close();
return result;
}
+
+ public static SequenceDatabaseEntry obtainUniProtEntry( final String query, final int max_lines_to_return )
+ throws IOException {
+ final List<String> lines = queryUniprot( "uniprot/" + query + ".txt", max_lines_to_return );
+ return UniProtEntry.createInstanceFromPlainText( lines );
+ }
}