phylotastic hackathon at NESCENT 120607
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Thu, 7 Jun 2012 20:41:24 +0000 (20:41 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Thu, 7 Jun 2012 20:41:24 +0000 (20:41 +0000)
forester/java/src/org/forester/application/gene_tree_preprocess.java
forester/java/src/org/forester/application/gsdi.java

index 525a3fc..41f6a47 100644 (file)
@@ -29,15 +29,13 @@ import java.io.BufferedWriter;
 import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
 import java.util.SortedSet;
+import java.util.TreeSet;
 
 import org.forester.archaeopteryx.tools.SequenceDataRetriver;
 import org.forester.io.parsers.util.ParserUtils;
 import org.forester.io.writers.PhylogenyWriter;
 import org.forester.phylogeny.Phylogeny;
-import org.forester.phylogeny.PhylogenyMethods;
 import org.forester.phylogeny.PhylogenyNode;
 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
 import org.forester.phylogeny.factories.PhylogenyFactory;
@@ -51,7 +49,7 @@ public class gene_tree_preprocess {
     final static private String HELP_OPTION_2 = "h";
     final static private String PRG_NAME      = "gene_tree_preprocess";
     final static private String PRG_DESC      = "gene tree preprocessing for SDI analysis";
-    final static private String PRG_VERSION   = "1.00";
+    final static private String PRG_VERSION   = "1.01";
     final static private String PRG_DATE      = "2012.06.07";
     final static private String E_MAIL        = "phylosoft@gmail.com";
     final static private String WWW           = "www.phylosoft.org/forester/";
@@ -86,8 +84,8 @@ public class gene_tree_preprocess {
             }
             final SortedSet<String> not_found = SequenceDataRetriver.obtainSeqInformation( phy, true );
             for( final String remove_me : not_found ) {
-               // System.out.println( " not found: " + remove_me );
-                PhylogenyMethods.removeNode( phy.getNode( remove_me ), phy );
+                //PhylogenyMethods.removeNode( phy.getNode( remove_me ), phy );
+                phy.deleteSubtree( phy.getNode( remove_me ), true );
             }
             if ( phy.getNumberOfExternalNodes() < 2 ) {
                 ForesterUtil.fatalError( PRG_NAME,
@@ -102,21 +100,21 @@ public class gene_tree_preprocess {
                 ForesterUtil.fatalError( PRG_NAME, "failed to write to [" + outtree + "]: " + e.getLocalizedMessage() );
             }
             ForesterUtil.programMessage( PRG_NAME, "wrote output phylogeny to: " + outtree );
-            final Set<String> species_found = new HashSet<String>();
+            final SortedSet<String> species_set = new TreeSet<String>();
+            for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
+                final PhylogenyNode node = iter.next();
+                if ( node.getNodeData().isHasTaxonomy() ) {
+                    final String sn = node.getNodeData().getTaxonomy().getScientificName();
+                    if ( !ForesterUtil.isEmpty( sn ) ) {
+                        species_set.add( sn );
+                    }
+                }
+            }
             try {
                 final BufferedWriter out = new BufferedWriter( new FileWriter( present_species ) );
-                for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
-                    final PhylogenyNode node = iter.next();
-                    if ( node.getNodeData().isHasTaxonomy() ) {
-                        final String sn = node.getNodeData().getTaxonomy().getScientificName();
-                        if ( !ForesterUtil.isEmpty( sn ) ) {
-                            if ( !species_found.contains( sn ) ) {
-                                species_found.add( sn );
-                                out.write( node.getNodeData().getTaxonomy().getScientificName() );
-                                out.newLine();
-                            }
-                        }
-                    }
+                for( final String species : species_set ) {
+                    out.write( species );
+                    out.newLine();
                 }
                 out.close();
             }
index 323d20d..3763e1b 100644 (file)
@@ -198,7 +198,7 @@ public final class gsdi {
         System.out.println( "Running time (excluding I/O): " + ( new Date().getTime() - start_time ) + "ms" );
         try {
             final PhylogenyWriter writer = new PhylogenyWriter();
-            writer.toPhyloXML( out_file, gene_tree, 1 );
+            writer.toPhyloXML( out_file, gene_tree, 0 );
         }
         catch ( final IOException e ) {
             ForesterUtil.fatalError( PRG_NAME, "Failed to write to \"" + out_file + "\" [" + e.getMessage() + "]" );
@@ -219,7 +219,7 @@ public final class gsdi {
             System.out.println( "Number speciations              : " + ( ( GSDI ) sdi ).getSpeciationsSum() );
         }
         System.out.println();
-    } // main( final String args[] )
+    } 
 
     private static void print_help() {
         System.out.println( "Usage: \"" + gsdi.PRG_NAME
@@ -228,7 +228,7 @@ public final class gsdi {
         System.out.println( "Options:" );
         System.out.println( " -" + gsdi.STRIP_OPTION + ": to strip the species tree prior to duplication inference" );
         System.out.println( " -" + gsdi.GSDI_OPTION
-                + ": to use GSDI algorithm instead of SDIse algorithm (under development, not recommended)" );
+                + ": to use GSDI algorithm instead of SDIse algorithm" );
         System.out
                 .println( " -" + gsdi.MOST_PARSIMONIOUS_OPTION + ": use most parimonious duplication model for GSDI: " );
         System.out.println( "     assign nodes as speciations which would otherwise be assiged" );