phylotastic hackathon at NESCENT 120607
[jalview.git] / forester / java / src / org / forester / application / gene_tree_preprocess.java
1 // $Id:
2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2008-2012 Christian M. Zmasek
6 // Copyright (C) 2008-2012 Burnham Institute for Medical Research
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
25
26 package org.forester.application;
27
28 import java.io.BufferedWriter;
29 import java.io.File;
30 import java.io.FileWriter;
31 import java.io.IOException;
32 import java.util.SortedSet;
33
34 import org.forester.archaeopteryx.tools.SequenceDataRetriver;
35 import org.forester.io.parsers.util.ParserUtils;
36 import org.forester.io.writers.PhylogenyWriter;
37 import org.forester.phylogeny.Phylogeny;
38 import org.forester.phylogeny.PhylogenyMethods;
39 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
40 import org.forester.phylogeny.factories.PhylogenyFactory;
41 import org.forester.util.CommandLineArguments;
42 import org.forester.util.ForesterUtil;
43
44 public class gene_tree_preprocess {
45
46     final static private String HELP_OPTION_1 = "help";
47     final static private String HELP_OPTION_2 = "h";
48     final static private String PRG_NAME      = "gene_tree_preprocess";
49     final static private String PRG_DESC      = "gene tree preprocessing for SDI analysis";
50     final static private String PRG_VERSION   = "1.00";
51     final static private String PRG_DATE      = "2012.06.07";
52     final static private String E_MAIL        = "phylosoft@gmail.com";
53     final static private String WWW           = "www.phylosoft.org/forester/";
54
55     public static void main( final String[] args ) {
56         try {
57             final CommandLineArguments cla = new CommandLineArguments( args );
58             if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length != 1 ) ) {
59                 printHelp();
60                 System.exit( 0 );
61             }
62             final File in = cla.getFile( 0 );
63             Phylogeny phy = null;
64             final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
65             try {
66                 phy = factory.create( in, ParserUtils.createParserDependingOnFileType( in, true ) )[ 0 ];
67             }
68             catch ( final IOException e ) {
69                 ForesterUtil.fatalError( PRG_NAME,
70                                          "failed to read target phylogenies from [" + in + "]: "
71                                                  + e.getLocalizedMessage() );
72             }
73             final File outtree = new File( ForesterUtil.removeSuffix( in.toString() )
74                     + "_preprocessed_gene_tree.phylo.xml" );
75             final File removed_nodes = new File( ForesterUtil.removeSuffix( in.toString() ) + "_removed_nodes.txt" );
76             checkForOutputFileWriteability( outtree );
77             checkForOutputFileWriteability( removed_nodes );
78             if ( phy.getNumberOfExternalNodes() < 2 ) {
79                 ForesterUtil.fatalError( PRG_NAME, "phylogeny has " + phy.getNumberOfExternalNodes()
80                         + " external node(s), aborting" );
81             }
82             final SortedSet<String> not_found = SequenceDataRetriver.obtainSeqInformation( phy, true );
83             for( final String remove_me : not_found ) {
84                 System.out.println( " not found: " + remove_me );
85                 PhylogenyMethods.removeNode( phy.getNode( remove_me ), phy );
86             }
87             if ( phy.getNumberOfExternalNodes() < 2 ) {
88                 ForesterUtil.fatalError( PRG_NAME,
89                                          "after removal of unresolvable external nodes, phylogeny has "
90                                                  + phy.getNumberOfExternalNodes() + " external node(s), aborting" );
91             }
92             try {
93                 final PhylogenyWriter writer = new PhylogenyWriter();
94                 writer.toPhyloXML( phy, 0, outtree );
95             }
96             catch ( final IOException e ) {
97                 ForesterUtil.fatalError( PRG_NAME, "failed to write to [" + outtree + "]: " + e.getLocalizedMessage() );
98             }
99             ForesterUtil.programMessage( PRG_NAME, "wrote output phylogeny to: " + outtree );
100             try {
101                 final BufferedWriter out = new BufferedWriter( new FileWriter( removed_nodes ) );
102                 for( final String remove_me : not_found ) {
103                     out.write( remove_me );
104                     out.newLine();
105                 }
106                 out.close();
107             }
108             catch ( final IOException e ) {
109                 ForesterUtil.fatalError( PRG_NAME,
110                                          "failed to write to [" + removed_nodes + "]: " + e.getLocalizedMessage() );
111             }
112             ForesterUtil.programMessage( PRG_NAME, "wrote removed external nodes labels to: " + removed_nodes );
113             ForesterUtil.programMessage( PRG_NAME, "OK" );
114         }
115         catch ( final Exception e ) {
116             ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
117         }
118     }
119
120     public static void checkForOutputFileWriteability( final File outfile ) {
121         final String error = ForesterUtil.isWritableFile( outfile );
122         if ( !ForesterUtil.isEmpty( error ) ) {
123             ForesterUtil.fatalError( PRG_NAME, error );
124         }
125     }
126
127     private static void printHelp() {
128         ForesterUtil.printProgramInformation( PRG_NAME,
129                                               PRG_DESC,
130                                               PRG_VERSION,
131                                               PRG_DATE,
132                                               E_MAIL,
133                                               WWW,
134                                               ForesterUtil.getForesterLibraryInformation() );
135         System.out.print( "Usage: " );
136         System.out.println( PRG_NAME + " <input phylogeny file>" );
137         System.out.println();
138     }
139 }