moved to: https://sites.google.com/site/cmzmasek/home/software/forester
[jalview.git] / forester / java / src / org / forester / application / gene_tree_preprocess.java
1 // $Id:
2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2008-2012 Christian M. Zmasek
6 // Copyright (C) 2008-2012 Burnham Institute for Medical Research
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
25
26 package org.forester.application;
27
28 import java.io.BufferedWriter;
29 import java.io.File;
30 import java.io.FileWriter;
31 import java.io.IOException;
32 import java.util.SortedSet;
33 import java.util.TreeSet;
34
35 import org.forester.io.parsers.util.ParserUtils;
36 import org.forester.io.writers.PhylogenyWriter;
37 import org.forester.phylogeny.Phylogeny;
38 import org.forester.phylogeny.PhylogenyNode;
39 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
40 import org.forester.phylogeny.factories.PhylogenyFactory;
41 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
42 import org.forester.util.CommandLineArguments;
43 import org.forester.util.ForesterUtil;
44 import org.forester.ws.seqdb.SequenceDbWsTools;
45
46 public class gene_tree_preprocess {
47
48     final static private String HELP_OPTION_1           = "help";
49     final static private String HELP_OPTION_2           = "h";
50     final static private String PRG_NAME                = "gene_tree_preprocess";
51     final static private String PRG_DESC                = "gene tree preprocessing for SDI analysis";
52     final static private String PRG_VERSION             = "1.01";
53     final static private String PRG_DATE                = "2012.06.07";
54     final static private String E_MAIL                  = "phylosoft@gmail.com";
55     final static private String WWW                     = "www.phylosoft.org/forester";
56     private final static int    DEFAULT_LINES_TO_RETURN = 50;
57
58     public static void main( final String[] args ) {
59         try {
60             final CommandLineArguments cla = new CommandLineArguments( args );
61             if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length != 1 ) ) {
62                 printHelp();
63                 System.exit( 0 );
64             }
65             final File in = cla.getFile( 0 );
66             Phylogeny phy = null;
67             final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
68             try {
69                 phy = factory.create( in, ParserUtils.createParserDependingOnFileType( in, true ) )[ 0 ];
70             }
71             catch ( final IOException e ) {
72                 ForesterUtil.fatalError( PRG_NAME,
73                                          "failed to read phylogeny from [" + in + "]: " + e.getLocalizedMessage() );
74             }
75             final File outtree = new File( ForesterUtil.removeSuffix( in.toString() )
76                     + "_preprocessed_gene_tree.phylo.xml" );
77             final File removed_nodes = new File( ForesterUtil.removeSuffix( in.toString() ) + "_removed_nodes.txt" );
78             final File present_species = new File( ForesterUtil.removeSuffix( in.toString() ) + "_species_present.txt" );
79             checkForOutputFileWriteability( outtree );
80             checkForOutputFileWriteability( removed_nodes );
81             checkForOutputFileWriteability( present_species );
82             if ( phy.getNumberOfExternalNodes() < 2 ) {
83                 ForesterUtil.fatalError( PRG_NAME, "phylogeny has " + phy.getNumberOfExternalNodes()
84                         + " external node(s), aborting" );
85             }
86             final SortedSet<String> not_found = SequenceDbWsTools.obtainSeqInformation( phy,
87                                                                                         true,
88                                                                                         false,
89                                                                                         DEFAULT_LINES_TO_RETURN );
90             for( final String remove_me : not_found ) {
91                 phy.deleteSubtree( phy.getNode( remove_me ), true );
92             }
93             phy.clearHashIdToNodeMap();
94             phy.externalNodesHaveChanged();
95             if ( phy.getNumberOfExternalNodes() < 2 ) {
96                 ForesterUtil.fatalError( PRG_NAME,
97                                          "after removal of unresolvable external nodes, phylogeny has "
98                                                  + phy.getNumberOfExternalNodes() + " external node(s), aborting" );
99             }
100             try {
101                 final PhylogenyWriter writer = new PhylogenyWriter();
102                 writer.toPhyloXML( phy, 0, outtree );
103             }
104             catch ( final IOException e ) {
105                 ForesterUtil.fatalError( PRG_NAME, "failed to write to [" + outtree + "]: " + e.getLocalizedMessage() );
106             }
107             ForesterUtil.programMessage( PRG_NAME, "wrote output phylogeny to: " + outtree );
108             final SortedSet<String> species_set = new TreeSet<String>();
109             for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
110                 final PhylogenyNode node = iter.next();
111                 if ( node.getNodeData().isHasTaxonomy() ) {
112                     final String sn = node.getNodeData().getTaxonomy().getScientificName();
113                     if ( !ForesterUtil.isEmpty( sn ) ) {
114                         species_set.add( sn );
115                     }
116                 }
117             }
118             try {
119                 final BufferedWriter out = new BufferedWriter( new FileWriter( present_species ) );
120                 for( final String species : species_set ) {
121                     out.write( species );
122                     out.newLine();
123                 }
124                 out.close();
125             }
126             catch ( final IOException e ) {
127                 ForesterUtil.fatalError( PRG_NAME,
128                                          "failed to write to [" + present_species + "]: " + e.getLocalizedMessage() );
129             }
130             ForesterUtil.programMessage( PRG_NAME, "wrote present species to: " + present_species );
131             try {
132                 final BufferedWriter out = new BufferedWriter( new FileWriter( removed_nodes ) );
133                 for( final String remove_me : not_found ) {
134                     out.write( remove_me );
135                     out.newLine();
136                 }
137                 out.close();
138             }
139             catch ( final IOException e ) {
140                 ForesterUtil.fatalError( PRG_NAME,
141                                          "failed to write to [" + removed_nodes + "]: " + e.getLocalizedMessage() );
142             }
143             ForesterUtil.programMessage( PRG_NAME, "wrote removed external nodes labels to: " + removed_nodes );
144             ForesterUtil.programMessage( PRG_NAME, "OK" );
145         }
146         catch ( final Exception e ) {
147             ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
148         }
149     }
150
151     private static void checkForOutputFileWriteability( final File outfile ) {
152         final String error = ForesterUtil.isWritableFile( outfile );
153         if ( !ForesterUtil.isEmpty( error ) ) {
154             ForesterUtil.fatalError( PRG_NAME, error );
155         }
156     }
157
158     private static void printHelp() {
159         ForesterUtil.printProgramInformation( PRG_NAME,
160                                               PRG_DESC,
161                                               PRG_VERSION,
162                                               PRG_DATE,
163                                               E_MAIL,
164                                               WWW,
165                                               ForesterUtil.getForesterLibraryInformation() );
166         System.out.print( "Usage: " );
167         System.out.println( PRG_NAME + " <input phylogeny file>" );
168         System.out.println();
169     }
170 }