cleanup
[jalview.git] / forester / java / src / org / forester / application / gene_tree_preprocess.java
index e9804a3..d1fd58e 100644 (file)
@@ -30,14 +30,16 @@ import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.util.SortedSet;
+import java.util.TreeSet;
 
 import org.forester.archaeopteryx.tools.SequenceDataRetriver;
 import org.forester.io.parsers.util.ParserUtils;
 import org.forester.io.writers.PhylogenyWriter;
 import org.forester.phylogeny.Phylogeny;
-import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.phylogeny.PhylogenyNode;
 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
 import org.forester.phylogeny.factories.PhylogenyFactory;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
 import org.forester.util.CommandLineArguments;
 import org.forester.util.ForesterUtil;
 
@@ -47,10 +49,10 @@ public class gene_tree_preprocess {
     final static private String HELP_OPTION_2 = "h";
     final static private String PRG_NAME      = "gene_tree_preprocess";
     final static private String PRG_DESC      = "gene tree preprocessing for SDI analysis";
-    final static private String PRG_VERSION   = "1.00";
+    final static private String PRG_VERSION   = "1.01";
     final static private String PRG_DATE      = "2012.06.07";
     final static private String E_MAIL        = "phylosoft@gmail.com";
-    final static private String WWW           = "www.phylosoft.org/forester/";
+    final static private String WWW           = "www.phylosoft.org/forester";
 
     public static void main( final String[] args ) {
         try {
@@ -67,22 +69,22 @@ public class gene_tree_preprocess {
             }
             catch ( final IOException e ) {
                 ForesterUtil.fatalError( PRG_NAME,
-                                         "failed to read target phylogenies from [" + in + "]: "
-                                                 + e.getLocalizedMessage() );
+                                         "failed to read phylogeny from [" + in + "]: " + e.getLocalizedMessage() );
             }
             final File outtree = new File( ForesterUtil.removeSuffix( in.toString() )
                     + "_preprocessed_gene_tree.phylo.xml" );
             final File removed_nodes = new File( ForesterUtil.removeSuffix( in.toString() ) + "_removed_nodes.txt" );
+            final File present_species = new File( ForesterUtil.removeSuffix( in.toString() ) + "_species_present.txt" );
             checkForOutputFileWriteability( outtree );
             checkForOutputFileWriteability( removed_nodes );
+            checkForOutputFileWriteability( present_species );
             if ( phy.getNumberOfExternalNodes() < 2 ) {
                 ForesterUtil.fatalError( PRG_NAME, "phylogeny has " + phy.getNumberOfExternalNodes()
                         + " external node(s), aborting" );
             }
-            final SortedSet<String> not_found = SequenceDataRetriver.obtainSeqInformation( phy, true );
+            final SortedSet<String> not_found = SequenceDataRetriver.obtainSeqInformation( phy, true, false );
             for( final String remove_me : not_found ) {
-                System.out.println( " not found: " + remove_me );
-                PhylogenyMethods.removeNode( phy.getNode( remove_me ), phy );
+                phy.deleteSubtree( phy.getNode( remove_me ), true );
             }
             if ( phy.getNumberOfExternalNodes() < 2 ) {
                 ForesterUtil.fatalError( PRG_NAME,
@@ -97,6 +99,29 @@ public class gene_tree_preprocess {
                 ForesterUtil.fatalError( PRG_NAME, "failed to write to [" + outtree + "]: " + e.getLocalizedMessage() );
             }
             ForesterUtil.programMessage( PRG_NAME, "wrote output phylogeny to: " + outtree );
+            final SortedSet<String> species_set = new TreeSet<String>();
+            for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
+                final PhylogenyNode node = iter.next();
+                if ( node.getNodeData().isHasTaxonomy() ) {
+                    final String sn = node.getNodeData().getTaxonomy().getScientificName();
+                    if ( !ForesterUtil.isEmpty( sn ) ) {
+                        species_set.add( sn );
+                    }
+                }
+            }
+            try {
+                final BufferedWriter out = new BufferedWriter( new FileWriter( present_species ) );
+                for( final String species : species_set ) {
+                    out.write( species );
+                    out.newLine();
+                }
+                out.close();
+            }
+            catch ( final IOException e ) {
+                ForesterUtil.fatalError( PRG_NAME,
+                                         "failed to write to [" + present_species + "]: " + e.getLocalizedMessage() );
+            }
+            ForesterUtil.programMessage( PRG_NAME, "wrote present species to: " + present_species );
             try {
                 final BufferedWriter out = new BufferedWriter( new FileWriter( removed_nodes ) );
                 for( final String remove_me : not_found ) {