From f0c947ea4c0fba2ef9b688b9e6523952ca6c6110 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Thu, 7 Jun 2012 20:41:24 +0000 Subject: [PATCH] phylotastic hackathon at NESCENT 120607 --- .../forester/application/gene_tree_preprocess.java | 36 +++++++++----------- .../java/src/org/forester/application/gsdi.java | 6 ++-- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/forester/java/src/org/forester/application/gene_tree_preprocess.java b/forester/java/src/org/forester/application/gene_tree_preprocess.java index 525a3fc..41f6a47 100644 --- a/forester/java/src/org/forester/application/gene_tree_preprocess.java +++ b/forester/java/src/org/forester/application/gene_tree_preprocess.java @@ -29,15 +29,13 @@ import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; -import java.util.HashSet; -import java.util.Set; import java.util.SortedSet; +import java.util.TreeSet; import org.forester.archaeopteryx.tools.SequenceDataRetriver; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; -import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; @@ -51,7 +49,7 @@ public class gene_tree_preprocess { final static private String HELP_OPTION_2 = "h"; final static private String PRG_NAME = "gene_tree_preprocess"; final static private String PRG_DESC = "gene tree preprocessing for SDI analysis"; - final static private String PRG_VERSION = "1.00"; + final static private String PRG_VERSION = "1.01"; final static private String PRG_DATE = "2012.06.07"; final static private String E_MAIL = "phylosoft@gmail.com"; final static private String WWW = "www.phylosoft.org/forester/"; @@ -86,8 +84,8 @@ public class gene_tree_preprocess { } final SortedSet not_found = SequenceDataRetriver.obtainSeqInformation( phy, true ); for( final String remove_me : not_found ) { - // System.out.println( " not found: " + remove_me ); - PhylogenyMethods.removeNode( phy.getNode( remove_me ), phy ); + //PhylogenyMethods.removeNode( phy.getNode( remove_me ), phy ); + phy.deleteSubtree( phy.getNode( remove_me ), true ); } if ( phy.getNumberOfExternalNodes() < 2 ) { ForesterUtil.fatalError( PRG_NAME, @@ -102,21 +100,21 @@ public class gene_tree_preprocess { ForesterUtil.fatalError( PRG_NAME, "failed to write to [" + outtree + "]: " + e.getLocalizedMessage() ); } ForesterUtil.programMessage( PRG_NAME, "wrote output phylogeny to: " + outtree ); - final Set species_found = new HashSet(); + final SortedSet species_set = new TreeSet(); + for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + if ( node.getNodeData().isHasTaxonomy() ) { + final String sn = node.getNodeData().getTaxonomy().getScientificName(); + if ( !ForesterUtil.isEmpty( sn ) ) { + species_set.add( sn ); + } + } + } try { final BufferedWriter out = new BufferedWriter( new FileWriter( present_species ) ); - for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - if ( node.getNodeData().isHasTaxonomy() ) { - final String sn = node.getNodeData().getTaxonomy().getScientificName(); - if ( !ForesterUtil.isEmpty( sn ) ) { - if ( !species_found.contains( sn ) ) { - species_found.add( sn ); - out.write( node.getNodeData().getTaxonomy().getScientificName() ); - out.newLine(); - } - } - } + for( final String species : species_set ) { + out.write( species ); + out.newLine(); } out.close(); } diff --git a/forester/java/src/org/forester/application/gsdi.java b/forester/java/src/org/forester/application/gsdi.java index 323d20d..3763e1b 100644 --- a/forester/java/src/org/forester/application/gsdi.java +++ b/forester/java/src/org/forester/application/gsdi.java @@ -198,7 +198,7 @@ public final class gsdi { System.out.println( "Running time (excluding I/O): " + ( new Date().getTime() - start_time ) + "ms" ); try { final PhylogenyWriter writer = new PhylogenyWriter(); - writer.toPhyloXML( out_file, gene_tree, 1 ); + writer.toPhyloXML( out_file, gene_tree, 0 ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, "Failed to write to \"" + out_file + "\" [" + e.getMessage() + "]" ); @@ -219,7 +219,7 @@ public final class gsdi { System.out.println( "Number speciations : " + ( ( GSDI ) sdi ).getSpeciationsSum() ); } System.out.println(); - } // main( final String args[] ) + } private static void print_help() { System.out.println( "Usage: \"" + gsdi.PRG_NAME @@ -228,7 +228,7 @@ public final class gsdi { System.out.println( "Options:" ); System.out.println( " -" + gsdi.STRIP_OPTION + ": to strip the species tree prior to duplication inference" ); System.out.println( " -" + gsdi.GSDI_OPTION - + ": to use GSDI algorithm instead of SDIse algorithm (under development, not recommended)" ); + + ": to use GSDI algorithm instead of SDIse algorithm" ); System.out .println( " -" + gsdi.MOST_PARSIMONIOUS_OPTION + ": use most parimonious duplication model for GSDI: " ); System.out.println( " assign nodes as speciations which would otherwise be assiged" ); -- 1.7.10.2