import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
import java.util.SortedSet;
+import java.util.TreeSet;
import org.forester.archaeopteryx.tools.SequenceDataRetriver;
import org.forester.io.parsers.util.ParserUtils;
import org.forester.io.writers.PhylogenyWriter;
import org.forester.phylogeny.Phylogeny;
-import org.forester.phylogeny.PhylogenyMethods;
import org.forester.phylogeny.PhylogenyNode;
import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
import org.forester.phylogeny.factories.PhylogenyFactory;
final static private String HELP_OPTION_2 = "h";
final static private String PRG_NAME = "gene_tree_preprocess";
final static private String PRG_DESC = "gene tree preprocessing for SDI analysis";
- final static private String PRG_VERSION = "1.00";
+ final static private String PRG_VERSION = "1.01";
final static private String PRG_DATE = "2012.06.07";
final static private String E_MAIL = "phylosoft@gmail.com";
final static private String WWW = "www.phylosoft.org/forester/";
}
final SortedSet<String> not_found = SequenceDataRetriver.obtainSeqInformation( phy, true );
for( final String remove_me : not_found ) {
- // System.out.println( " not found: " + remove_me );
- PhylogenyMethods.removeNode( phy.getNode( remove_me ), phy );
+ //PhylogenyMethods.removeNode( phy.getNode( remove_me ), phy );
+ phy.deleteSubtree( phy.getNode( remove_me ), true );
}
if ( phy.getNumberOfExternalNodes() < 2 ) {
ForesterUtil.fatalError( PRG_NAME,
ForesterUtil.fatalError( PRG_NAME, "failed to write to [" + outtree + "]: " + e.getLocalizedMessage() );
}
ForesterUtil.programMessage( PRG_NAME, "wrote output phylogeny to: " + outtree );
- final Set<String> species_found = new HashSet<String>();
+ final SortedSet<String> species_set = new TreeSet<String>();
+ for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
+ final PhylogenyNode node = iter.next();
+ if ( node.getNodeData().isHasTaxonomy() ) {
+ final String sn = node.getNodeData().getTaxonomy().getScientificName();
+ if ( !ForesterUtil.isEmpty( sn ) ) {
+ species_set.add( sn );
+ }
+ }
+ }
try {
final BufferedWriter out = new BufferedWriter( new FileWriter( present_species ) );
- for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
- final PhylogenyNode node = iter.next();
- if ( node.getNodeData().isHasTaxonomy() ) {
- final String sn = node.getNodeData().getTaxonomy().getScientificName();
- if ( !ForesterUtil.isEmpty( sn ) ) {
- if ( !species_found.contains( sn ) ) {
- species_found.add( sn );
- out.write( node.getNodeData().getTaxonomy().getScientificName() );
- out.newLine();
- }
- }
- }
+ for( final String species : species_set ) {
+ out.write( species );
+ out.newLine();
}
out.close();
}
System.out.println( "Running time (excluding I/O): " + ( new Date().getTime() - start_time ) + "ms" );
try {
final PhylogenyWriter writer = new PhylogenyWriter();
- writer.toPhyloXML( out_file, gene_tree, 1 );
+ writer.toPhyloXML( out_file, gene_tree, 0 );
}
catch ( final IOException e ) {
ForesterUtil.fatalError( PRG_NAME, "Failed to write to \"" + out_file + "\" [" + e.getMessage() + "]" );
System.out.println( "Number speciations : " + ( ( GSDI ) sdi ).getSpeciationsSum() );
}
System.out.println();
- } // main( final String args[] )
+ }
private static void print_help() {
System.out.println( "Usage: \"" + gsdi.PRG_NAME
System.out.println( "Options:" );
System.out.println( " -" + gsdi.STRIP_OPTION + ": to strip the species tree prior to duplication inference" );
System.out.println( " -" + gsdi.GSDI_OPTION
- + ": to use GSDI algorithm instead of SDIse algorithm (under development, not recommended)" );
+ + ": to use GSDI algorithm instead of SDIse algorithm" );
System.out
.println( " -" + gsdi.MOST_PARSIMONIOUS_OPTION + ": use most parimonious duplication model for GSDI: " );
System.out.println( " assign nodes as speciations which would otherwise be assiged" );