X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fapplication%2Fgene_tree_preprocess.java;h=bcb93f4074d1c530759735dd134364b5d4ff0c0f;hb=5c494e04b1cb40db2f1ac11c9dd9f459c9fab7f5;hp=46e5bee08714fa9a3904e9ab3282eda0d8550841;hpb=bd30826b30945fec52ffd33f65f9456b6ef57e77;p=jalview.git diff --git a/forester/java/src/org/forester/application/gene_tree_preprocess.java b/forester/java/src/org/forester/application/gene_tree_preprocess.java index 46e5bee..bcb93f4 100644 --- a/forester/java/src/org/forester/application/gene_tree_preprocess.java +++ b/forester/java/src/org/forester/application/gene_tree_preprocess.java @@ -25,34 +25,35 @@ package org.forester.application; +import java.io.BufferedWriter; import java.io.File; +import java.io.FileWriter; import java.io.IOException; import java.util.SortedSet; +import java.util.TreeSet; -import org.forester.archaeopteryx.tools.SequenceDataRetriver; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; -import org.forester.phylogeny.PhylogenyMethods; +import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; +import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; +import org.forester.ws.seqdb.SequenceDbWsTools; public class gene_tree_preprocess { - final static private String HELP_OPTION_1 = "help"; - final static private String HELP_OPTION_2 = "h"; - final static private String FROM_OPTION = "f"; - final static private String TO_OPTION = "t"; - final static private String STEP_OPTION = "s"; - final static private String WINDOW_OPTION = "w"; - final static private String PRG_NAME = "gene_tree_preprocess"; - final static private String PRG_DESC = "gene tree preprocessing for SDI analysis"; - final static private String PRG_VERSION = "1.00"; - final static private String PRG_DATE = "2012.06.07"; - final static private String E_MAIL = "phylosoft@gmail.com"; - final static private String WWW = "www.phylosoft.org/forester/"; + final static private String HELP_OPTION_1 = "help"; + final static private String HELP_OPTION_2 = "h"; + final static private String PRG_NAME = "gene_tree_preprocess"; + final static private String PRG_DESC = "gene tree preprocessing for SDI analysis"; + final static private String PRG_VERSION = "1.01"; + final static private String PRG_DATE = "2012.06.07"; + final static private String E_MAIL = "phylosoft@gmail.com"; + final static private String WWW = "www.phylosoft.org/forester"; + private final static int DEFAULT_LINES_TO_RETURN = 50; public static void main( final String[] args ) { try { @@ -69,23 +70,28 @@ public class gene_tree_preprocess { } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, - "failed to read target phylogenies from [" + in + "]: " - + e.getLocalizedMessage() ); + "failed to read phylogeny from [" + in + "]: " + e.getLocalizedMessage() ); } final File outtree = new File( ForesterUtil.removeSuffix( in.toString() ) + "_preprocessed_gene_tree.phylo.xml" ); final File removed_nodes = new File( ForesterUtil.removeSuffix( in.toString() ) + "_removed_nodes.txt" ); + final File present_species = new File( ForesterUtil.removeSuffix( in.toString() ) + "_species_present.txt" ); checkForOutputFileWriteability( outtree ); checkForOutputFileWriteability( removed_nodes ); + checkForOutputFileWriteability( present_species ); if ( phy.getNumberOfExternalNodes() < 2 ) { ForesterUtil.fatalError( PRG_NAME, "phylogeny has " + phy.getNumberOfExternalNodes() + " external node(s), aborting" ); } - final SortedSet not_found = SequenceDataRetriver.obtainSeqInformation( phy ); + final SortedSet not_found = SequenceDbWsTools.obtainSeqInformation( phy, + true, + false, + DEFAULT_LINES_TO_RETURN ); for( final String remove_me : not_found ) { - System.out.println( " not found: " + not_found ); - PhylogenyMethods.removeNode( phy.getNode( remove_me ), phy ); + phy.deleteSubtree( phy.getNode( remove_me ), true ); } + phy.clearHashIdToNodeMap(); + phy.externalNodesHaveChanged(); if ( phy.getNumberOfExternalNodes() < 2 ) { ForesterUtil.fatalError( PRG_NAME, "after removal of unresolvable external nodes, phylogeny has " @@ -98,16 +104,51 @@ public class gene_tree_preprocess { catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, "failed to write to [" + outtree + "]: " + e.getLocalizedMessage() ); } - // ForesterUtil.programMessage( PRG_NAME, "wrote output to: [" + outfile + "]" ); + ForesterUtil.programMessage( PRG_NAME, "wrote output phylogeny to: " + outtree ); + final SortedSet species_set = new TreeSet(); + for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + if ( node.getNodeData().isHasTaxonomy() ) { + final String sn = node.getNodeData().getTaxonomy().getScientificName(); + if ( !ForesterUtil.isEmpty( sn ) ) { + species_set.add( sn ); + } + } + } + try { + final BufferedWriter out = new BufferedWriter( new FileWriter( present_species ) ); + for( final String species : species_set ) { + out.write( species ); + out.newLine(); + } + out.close(); + } + catch ( final IOException e ) { + ForesterUtil.fatalError( PRG_NAME, + "failed to write to [" + present_species + "]: " + e.getLocalizedMessage() ); + } + ForesterUtil.programMessage( PRG_NAME, "wrote present species to: " + present_species ); + try { + final BufferedWriter out = new BufferedWriter( new FileWriter( removed_nodes ) ); + for( final String remove_me : not_found ) { + out.write( remove_me ); + out.newLine(); + } + out.close(); + } + catch ( final IOException e ) { + ForesterUtil.fatalError( PRG_NAME, + "failed to write to [" + removed_nodes + "]: " + e.getLocalizedMessage() ); + } + ForesterUtil.programMessage( PRG_NAME, "wrote removed external nodes labels to: " + removed_nodes ); ForesterUtil.programMessage( PRG_NAME, "OK" ); - System.out.println(); } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } } - public static void checkForOutputFileWriteability( final File outfile ) { + private static void checkForOutputFileWriteability( final File outfile ) { final String error = ForesterUtil.isWritableFile( outfile ); if ( !ForesterUtil.isEmpty( error ) ) { ForesterUtil.fatalError( PRG_NAME, error ); @@ -122,19 +163,8 @@ public class gene_tree_preprocess { E_MAIL, WWW, ForesterUtil.getForesterLibraryInformation() ); - System.out.println( "Usage:" ); - System.out.println(); - System.out.println( PRG_NAME + " " ); - System.out.println(); - System.out.println( " options: " ); - System.out.println(); - System.out.println( " -" + FROM_OPTION + "=: from (msa column)" ); - System.out.println( " -" + TO_OPTION + "=: to (msa column)" ); - System.out.println( " or" ); - System.out.println( " -" + WINDOW_OPTION + "=: window size (msa columns)" ); - System.out.println( " -" + STEP_OPTION + "=: step size (msa columns)" ); - System.out.println(); - System.out.println(); + System.out.print( "Usage: " ); + System.out.println( PRG_NAME + " " ); System.out.println(); } }