X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fapplication%2Fgene_tree_preprocess.java;h=581381cf7dcf44914e7eec6a45c3314ef615cfc0;hb=efaea5057ced91f5d162cebb6459d206da1d3c82;hp=4dd6672785e3df48afcc0b934c1ba16f51778019;hpb=51adbe9a9727baa44b48c1d5d75ac664455ee63b;p=jalview.git diff --git a/forester/java/src/org/forester/application/gene_tree_preprocess.java b/forester/java/src/org/forester/application/gene_tree_preprocess.java index 4dd6672..581381c 100644 --- a/forester/java/src/org/forester/application/gene_tree_preprocess.java +++ b/forester/java/src/org/forester/application/gene_tree_preprocess.java @@ -21,7 +21,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; @@ -30,27 +30,30 @@ import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.SortedSet; +import java.util.TreeSet; -import org.forester.archaeopteryx.tools.SequenceDataRetriver; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; -import org.forester.phylogeny.PhylogenyMethods; +import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; +import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; +import org.forester.ws.seqdb.SequenceDbWsTools; public class gene_tree_preprocess { - final static private String HELP_OPTION_1 = "help"; - final static private String HELP_OPTION_2 = "h"; - final static private String PRG_NAME = "gene_tree_preprocess"; - final static private String PRG_DESC = "gene tree preprocessing for SDI analysis"; - final static private String PRG_VERSION = "1.00"; - final static private String PRG_DATE = "2012.06.07"; - final static private String E_MAIL = "phylosoft@gmail.com"; - final static private String WWW = "www.phylosoft.org/forester/"; + final static private String HELP_OPTION_1 = "help"; + final static private String HELP_OPTION_2 = "h"; + final static private String PRG_NAME = "gene_tree_preprocess"; + final static private String PRG_DESC = "gene tree preprocessing for SDI analysis"; + final static private String PRG_VERSION = "1.01"; + final static private String PRG_DATE = "2012.06.07"; + final static private String E_MAIL = "phylosoft@gmail.com"; + final static private String WWW = "www.phylosoft.org/forester"; + private final static int DEFAULT_LINES_TO_RETURN = 50; public static void main( final String[] args ) { try { @@ -67,23 +70,28 @@ public class gene_tree_preprocess { } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, - "failed to read phylogeny from [" + in + "]: " - + e.getLocalizedMessage() ); + "failed to read phylogeny from [" + in + "]: " + e.getLocalizedMessage() ); } final File outtree = new File( ForesterUtil.removeSuffix( in.toString() ) - + "_preprocessed_gene_tree.phylo.xml" ); + + "_preprocessed_gene_tree.phylo.xml" ); final File removed_nodes = new File( ForesterUtil.removeSuffix( in.toString() ) + "_removed_nodes.txt" ); + final File present_species = new File( ForesterUtil.removeSuffix( in.toString() ) + "_species_present.txt" ); checkForOutputFileWriteability( outtree ); checkForOutputFileWriteability( removed_nodes ); + checkForOutputFileWriteability( present_species ); if ( phy.getNumberOfExternalNodes() < 2 ) { ForesterUtil.fatalError( PRG_NAME, "phylogeny has " + phy.getNumberOfExternalNodes() - + " external node(s), aborting" ); + + " external node(s), aborting" ); } - final SortedSet not_found = SequenceDataRetriver.obtainSeqInformation( phy, true ); + final SortedSet not_found = SequenceDbWsTools.obtainSeqInformation( phy, + true, + false, + DEFAULT_LINES_TO_RETURN ); for( final String remove_me : not_found ) { - System.out.println( " not found: " + remove_me ); - PhylogenyMethods.removeNode( phy.getNode( remove_me ), phy ); + phy.deleteSubtree( phy.getNode( remove_me ), true ); } + phy.clearHashIdToNodeMap(); + phy.externalNodesHaveChanged(); if ( phy.getNumberOfExternalNodes() < 2 ) { ForesterUtil.fatalError( PRG_NAME, "after removal of unresolvable external nodes, phylogeny has " @@ -97,6 +105,29 @@ public class gene_tree_preprocess { ForesterUtil.fatalError( PRG_NAME, "failed to write to [" + outtree + "]: " + e.getLocalizedMessage() ); } ForesterUtil.programMessage( PRG_NAME, "wrote output phylogeny to: " + outtree ); + final SortedSet species_set = new TreeSet(); + for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + if ( node.getNodeData().isHasTaxonomy() ) { + final String sn = node.getNodeData().getTaxonomy().getScientificName(); + if ( !ForesterUtil.isEmpty( sn ) ) { + species_set.add( sn ); + } + } + } + try { + final BufferedWriter out = new BufferedWriter( new FileWriter( present_species ) ); + for( final String species : species_set ) { + out.write( species ); + out.newLine(); + } + out.close(); + } + catch ( final IOException e ) { + ForesterUtil.fatalError( PRG_NAME, + "failed to write to [" + present_species + "]: " + e.getLocalizedMessage() ); + } + ForesterUtil.programMessage( PRG_NAME, "wrote present species to: " + present_species ); try { final BufferedWriter out = new BufferedWriter( new FileWriter( removed_nodes ) ); for( final String remove_me : not_found ) { @@ -117,7 +148,7 @@ public class gene_tree_preprocess { } } - public static void checkForOutputFileWriteability( final File outfile ) { + private static void checkForOutputFileWriteability( final File outfile ) { final String error = ForesterUtil.isWritableFile( outfile ); if ( !ForesterUtil.isEmpty( error ) ) { ForesterUtil.fatalError( PRG_NAME, error );