import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
import org.forester.util.ForesterUtil;
import org.forester.util.SequenceIdParser;
-import org.forester.ws.uniprot.SequenceDatabaseEntry;
-import org.forester.ws.uniprot.SequenceDbWsTools;
+import org.forester.ws.seqdb.SequenceDatabaseEntry;
+import org.forester.ws.seqdb.SequenceDbWsTools;
public final class SequenceDataRetriver extends RunnableProcess {
+ public final static int DEFAULT_LINES_TO_RETURN = 50;
private final Phylogeny _phy;
private final MainFrameApplication _mf;
private final TreePanel _treepanel;
- private final static boolean DEBUG = true;
+ private final static boolean DEBUG = false;
private enum Db {
UNIPROT, EMBL, NCBI, NONE, REFSEQ;
start( _mf, "sequence data" );
SortedSet<String> not_found = null;
try {
- not_found = obtainSeqInformation( _phy );
+ not_found = obtainSeqInformation( _phy, false, true );
}
catch ( final UnknownHostException e ) {
final String what = "_"; //TODO FIXME
}
}
- public static SortedSet<String> obtainSeqInformation( final Phylogeny phy ) throws IOException {
+ public static SortedSet<String> obtainSeqInformation( final Phylogeny phy,
+ final boolean ext_nodes_only,
+ final boolean allow_to_set_taxonomic_data )
+ throws IOException {
final SortedSet<String> not_found = new TreeSet<String>();
for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
final PhylogenyNode node = iter.next();
- final Sequence seq = node.getNodeData().isHasSequence() ? node.getNodeData().getSequence() : new Sequence() ;
- final Taxonomy tax = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy() : new Taxonomy() ;
-
-
+ if ( ext_nodes_only && node.isInternal() ) {
+ continue;
+ }
+ final Sequence seq = node.getNodeData().isHasSequence() ? node.getNodeData().getSequence() : new Sequence();
+ final Taxonomy tax = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy() : new Taxonomy();
String query = null;
Identifier id = null;
Db db = Db.NONE;
db = Db.UNIPROT;
}
else if ( ( id = SequenceIdParser.parse( node.getName() ) ) != null ) {
-
if ( id.getProvider().equalsIgnoreCase( Identifier.NCBI ) ) {
db = Db.NCBI;
}
else if ( id.getProvider().equalsIgnoreCase( Identifier.REFSEQ ) ) {
db = Db.REFSEQ;
}
-
}
}
+ if ( db == Db.NONE ) {
+ not_found.add( node.getName() );
+ }
SequenceDatabaseEntry db_entry = null;
if ( !ForesterUtil.isEmpty( query ) ) {
if ( db == Db.UNIPROT ) {
if ( DEBUG ) {
System.out.println( "uniprot: " + query );
}
- db_entry = SequenceDbWsTools.obtainUniProtEntry( query, 200 );
+ db_entry = SequenceDbWsTools.obtainUniProtEntry( query, DEFAULT_LINES_TO_RETURN );
}
if ( ( db == Db.EMBL ) || ( ( db == Db.UNIPROT ) && ( db_entry == null ) ) ) {
if ( DEBUG ) {
System.out.println( "embl: " + query );
}
- db_entry = SequenceDbWsTools.obtainEmblEntry( new Identifier( query ), 200 );
+ db_entry = SequenceDbWsTools.obtainEmblEntry( new Identifier( query ), DEFAULT_LINES_TO_RETURN );
if ( ( db == Db.UNIPROT ) && ( db_entry != null ) ) {
db = Db.EMBL;
}
}
}
else if ( ( db == Db.REFSEQ ) && ( id != null ) ) {
- db_entry = SequenceDbWsTools.obtainRefSeqEntryFromEmbl( id, 200 );
+ db_entry = SequenceDbWsTools.obtainRefSeqEntryFromEmbl( id, DEFAULT_LINES_TO_RETURN );
}
else if ( ( db == Db.NCBI ) && ( id != null ) ) {
- db_entry = SequenceDbWsTools.obtainEmblEntry( id, 200 );
+ db_entry = SequenceDbWsTools.obtainEmblEntry( id, DEFAULT_LINES_TO_RETURN );
}
if ( ( db_entry != null ) && !db_entry.isEmpty() ) {
if ( !ForesterUtil.isEmpty( db_entry.getAccession() ) ) {
if ( !ForesterUtil.isEmpty( db_entry.getTaxonomyScientificName() ) ) {
tax.setScientificName( db_entry.getTaxonomyScientificName() );
}
- if ( !ForesterUtil.isEmpty( db_entry.getTaxonomyIdentifier() ) ) {
+ if ( allow_to_set_taxonomic_data && !ForesterUtil.isEmpty( db_entry.getTaxonomyIdentifier() ) ) {
tax.setIdentifier( new Identifier( db_entry.getTaxonomyIdentifier(), "uniprot" ) );
}
node.getNodeData().setTaxonomy( tax );
else if ( db != Db.NONE ) {
not_found.add( node.getName() );
}
+ try {
+ Thread.sleep( 10 );// Sleep for 10 ms
+ }
+ catch ( final InterruptedException ie ) {
+ }
}
return not_found;
}