2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2012 Christian M. Zmasek
6 // Copyright (C) 2008-2012 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
26 package org.forester.application;
28 import java.io.BufferedWriter;
30 import java.io.FileWriter;
31 import java.io.IOException;
32 import java.util.HashSet;
34 import java.util.SortedSet;
36 import org.forester.archaeopteryx.tools.SequenceDataRetriver;
37 import org.forester.io.parsers.util.ParserUtils;
38 import org.forester.io.writers.PhylogenyWriter;
39 import org.forester.phylogeny.Phylogeny;
40 import org.forester.phylogeny.PhylogenyMethods;
41 import org.forester.phylogeny.PhylogenyNode;
42 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
43 import org.forester.phylogeny.factories.PhylogenyFactory;
44 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
45 import org.forester.util.CommandLineArguments;
46 import org.forester.util.ForesterUtil;
48 public class gene_tree_preprocess {
50 final static private String HELP_OPTION_1 = "help";
51 final static private String HELP_OPTION_2 = "h";
52 final static private String PRG_NAME = "gene_tree_preprocess";
53 final static private String PRG_DESC = "gene tree preprocessing for SDI analysis";
54 final static private String PRG_VERSION = "1.00";
55 final static private String PRG_DATE = "2012.06.07";
56 final static private String E_MAIL = "phylosoft@gmail.com";
57 final static private String WWW = "www.phylosoft.org/forester/";
59 public static void main( final String[] args ) {
61 final CommandLineArguments cla = new CommandLineArguments( args );
62 if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length != 1 ) ) {
66 final File in = cla.getFile( 0 );
68 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
70 phy = factory.create( in, ParserUtils.createParserDependingOnFileType( in, true ) )[ 0 ];
72 catch ( final IOException e ) {
73 ForesterUtil.fatalError( PRG_NAME,
74 "failed to read phylogeny from [" + in + "]: " + e.getLocalizedMessage() );
76 final File outtree = new File( ForesterUtil.removeSuffix( in.toString() )
77 + "_preprocessed_gene_tree.phylo.xml" );
78 final File removed_nodes = new File( ForesterUtil.removeSuffix( in.toString() ) + "_removed_nodes.txt" );
79 final File present_species = new File( ForesterUtil.removeSuffix( in.toString() ) + "_species_present.txt" );
80 checkForOutputFileWriteability( outtree );
81 checkForOutputFileWriteability( removed_nodes );
82 checkForOutputFileWriteability( present_species );
83 if ( phy.getNumberOfExternalNodes() < 2 ) {
84 ForesterUtil.fatalError( PRG_NAME, "phylogeny has " + phy.getNumberOfExternalNodes()
85 + " external node(s), aborting" );
87 final SortedSet<String> not_found = SequenceDataRetriver.obtainSeqInformation( phy, true );
88 for( final String remove_me : not_found ) {
89 // System.out.println( " not found: " + remove_me );
90 PhylogenyMethods.removeNode( phy.getNode( remove_me ), phy );
92 if ( phy.getNumberOfExternalNodes() < 2 ) {
93 ForesterUtil.fatalError( PRG_NAME,
94 "after removal of unresolvable external nodes, phylogeny has "
95 + phy.getNumberOfExternalNodes() + " external node(s), aborting" );
98 final PhylogenyWriter writer = new PhylogenyWriter();
99 writer.toPhyloXML( phy, 0, outtree );
101 catch ( final IOException e ) {
102 ForesterUtil.fatalError( PRG_NAME, "failed to write to [" + outtree + "]: " + e.getLocalizedMessage() );
104 ForesterUtil.programMessage( PRG_NAME, "wrote output phylogeny to: " + outtree );
105 final Set<String> species_found = new HashSet<String>();
107 final BufferedWriter out = new BufferedWriter( new FileWriter( present_species ) );
108 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
109 final PhylogenyNode node = iter.next();
110 if ( node.getNodeData().isHasTaxonomy() ) {
111 final String sn = node.getNodeData().getTaxonomy().getScientificName();
112 if ( !ForesterUtil.isEmpty( sn ) ) {
113 if ( !species_found.contains( sn ) ) {
114 species_found.add( sn );
115 out.write( node.getNodeData().getTaxonomy().getScientificName() );
123 catch ( final IOException e ) {
124 ForesterUtil.fatalError( PRG_NAME,
125 "failed to write to [" + present_species + "]: " + e.getLocalizedMessage() );
127 ForesterUtil.programMessage( PRG_NAME, "wrote present species to: " + present_species );
129 final BufferedWriter out = new BufferedWriter( new FileWriter( removed_nodes ) );
130 for( final String remove_me : not_found ) {
131 out.write( remove_me );
136 catch ( final IOException e ) {
137 ForesterUtil.fatalError( PRG_NAME,
138 "failed to write to [" + removed_nodes + "]: " + e.getLocalizedMessage() );
140 ForesterUtil.programMessage( PRG_NAME, "wrote removed external nodes labels to: " + removed_nodes );
141 ForesterUtil.programMessage( PRG_NAME, "OK" );
143 catch ( final Exception e ) {
144 ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
148 public static void checkForOutputFileWriteability( final File outfile ) {
149 final String error = ForesterUtil.isWritableFile( outfile );
150 if ( !ForesterUtil.isEmpty( error ) ) {
151 ForesterUtil.fatalError( PRG_NAME, error );
155 private static void printHelp() {
156 ForesterUtil.printProgramInformation( PRG_NAME,
162 ForesterUtil.getForesterLibraryInformation() );
163 System.out.print( "Usage: " );
164 System.out.println( PRG_NAME + " <input phylogeny file>" );
165 System.out.println();