package org.forester.application;
+import java.io.BufferedWriter;
import java.io.File;
+import java.io.FileWriter;
import java.io.IOException;
import java.util.SortedSet;
final static private String HELP_OPTION_1 = "help";
final static private String HELP_OPTION_2 = "h";
- final static private String FROM_OPTION = "f";
- final static private String TO_OPTION = "t";
- final static private String STEP_OPTION = "s";
- final static private String WINDOW_OPTION = "w";
final static private String PRG_NAME = "gene_tree_preprocess";
final static private String PRG_DESC = "gene tree preprocessing for SDI analysis";
final static private String PRG_VERSION = "1.00";
ForesterUtil.fatalError( PRG_NAME, "phylogeny has " + phy.getNumberOfExternalNodes()
+ " external node(s), aborting" );
}
- final SortedSet<String> not_found = SequenceDataRetriver.obtainSeqInformation( phy );
+ final SortedSet<String> not_found = SequenceDataRetriver.obtainSeqInformation( phy, true );
for( final String remove_me : not_found ) {
- System.out.println( " not found: " + not_found );
+ System.out.println( " not found: " + remove_me );
PhylogenyMethods.removeNode( phy.getNode( remove_me ), phy );
}
if ( phy.getNumberOfExternalNodes() < 2 ) {
catch ( final IOException e ) {
ForesterUtil.fatalError( PRG_NAME, "failed to write to [" + outtree + "]: " + e.getLocalizedMessage() );
}
- // ForesterUtil.programMessage( PRG_NAME, "wrote output to: [" + outfile + "]" );
+ ForesterUtil.programMessage( PRG_NAME, "wrote output phylogeny to: " + outtree );
+ try {
+ final BufferedWriter out = new BufferedWriter( new FileWriter( removed_nodes ) );
+ for( final String remove_me : not_found ) {
+ out.write( remove_me );
+ out.newLine();
+ }
+ out.close();
+ }
+ catch ( final IOException e ) {
+ ForesterUtil.fatalError( PRG_NAME,
+ "failed to write to [" + removed_nodes + "]: " + e.getLocalizedMessage() );
+ }
+ ForesterUtil.programMessage( PRG_NAME, "wrote removed external nodes labels to: " + removed_nodes );
ForesterUtil.programMessage( PRG_NAME, "OK" );
- System.out.println();
}
catch ( final Exception e ) {
ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
E_MAIL,
WWW,
ForesterUtil.getForesterLibraryInformation() );
- System.out.println( "Usage:" );
- System.out.println();
- System.out.println( PRG_NAME + " <options> <msa input file>" );
- System.out.println();
- System.out.println( " options: " );
- System.out.println();
- System.out.println( " -" + FROM_OPTION + "=<integer>: from (msa column)" );
- System.out.println( " -" + TO_OPTION + "=<integer>: to (msa column)" );
- System.out.println( " or" );
- System.out.println( " -" + WINDOW_OPTION + "=<integer>: window size (msa columns)" );
- System.out.println( " -" + STEP_OPTION + "=<integer>: step size (msa columns)" );
- System.out.println();
- System.out.println();
+ System.out.print( "Usage: " );
+ System.out.println( PRG_NAME + " <input phylogeny file>" );
System.out.println();
}
}
import org.forester.ws.seqdb.SequenceDbWsTools;
public final class SequenceDataRetriver extends RunnableProcess {
-
- public final static int DEFAULT_LINES_TO_RETURN = 50;
+ public final static int DEFAULT_LINES_TO_RETURN = 50;
private final Phylogeny _phy;
private final MainFrameApplication _mf;
private final TreePanel _treepanel;
- private final static boolean DEBUG = true;
+ private final static boolean DEBUG = false;
private enum Db {
UNIPROT, EMBL, NCBI, NONE, REFSEQ;
start( _mf, "sequence data" );
SortedSet<String> not_found = null;
try {
- not_found = obtainSeqInformation( _phy );
+ not_found = obtainSeqInformation( _phy, false );
}
catch ( final UnknownHostException e ) {
final String what = "_"; //TODO FIXME
}
}
- public static SortedSet<String> obtainSeqInformation( final Phylogeny phy ) throws IOException {
+ public static SortedSet<String> obtainSeqInformation( final Phylogeny phy, final boolean ext_nodes_only ) throws IOException {
final SortedSet<String> not_found = new TreeSet<String>();
for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
final PhylogenyNode node = iter.next();
- final Sequence seq = node.getNodeData().isHasSequence() ? node.getNodeData().getSequence() : new Sequence() ;
- final Taxonomy tax = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy() : new Taxonomy() ;
-
-
+ if ( ext_nodes_only && node.isInternal() ) {
+ continue;
+ }
+ final Sequence seq = node.getNodeData().isHasSequence() ? node.getNodeData().getSequence() : new Sequence();
+ final Taxonomy tax = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy() : new Taxonomy();
String query = null;
Identifier id = null;
Db db = Db.NONE;
db = Db.UNIPROT;
}
else if ( ( id = SequenceIdParser.parse( node.getName() ) ) != null ) {
-
if ( id.getProvider().equalsIgnoreCase( Identifier.NCBI ) ) {
db = Db.NCBI;
}
else if ( id.getProvider().equalsIgnoreCase( Identifier.REFSEQ ) ) {
db = Db.REFSEQ;
}
-
}
}
+
+ if ( db == Db.NONE ) {
+ not_found.add( node.getName() );
+ }
+
SequenceDatabaseEntry db_entry = null;
if ( !ForesterUtil.isEmpty( query ) ) {
if ( db == Db.UNIPROT ) {
if ( DEBUG ) {
System.out.println( "embl: " + query );
}
- db_entry = SequenceDbWsTools.obtainEmblEntry( new Identifier( query ), DEFAULT_LINES_TO_RETURN );
+ db_entry = SequenceDbWsTools.obtainEmblEntry( new Identifier( query ), DEFAULT_LINES_TO_RETURN );
if ( ( db == Db.UNIPROT ) && ( db_entry != null ) ) {
db = Db.EMBL;
}
else if ( ( db == Db.NCBI ) && ( id != null ) ) {
db_entry = SequenceDbWsTools.obtainEmblEntry( id, DEFAULT_LINES_TO_RETURN );
}
+
if ( ( db_entry != null ) && !db_entry.isEmpty() ) {
if ( !ForesterUtil.isEmpty( db_entry.getAccession() ) ) {
String type = null;