in progress
[jalview.git] / forester / java / src / org / forester / application / support_statistics.java
1 // $Id:
2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
25
26 package org.forester.application;
27
28 import java.io.File;
29 import java.io.IOException;
30 import java.io.PrintWriter;
31 import java.util.ArrayList;
32 import java.util.List;
33
34 import org.forester.io.parsers.PhylogenyParser;
35 import org.forester.phylogeny.Phylogeny;
36 import org.forester.phylogeny.PhylogenyMethods;
37 import org.forester.phylogeny.PhylogenyNode;
38 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
39 import org.forester.phylogeny.factories.PhylogenyFactory;
40 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
41 import org.forester.util.BasicDescriptiveStatistics;
42 import org.forester.util.CommandLineArguments;
43 import org.forester.util.DescriptiveStatistics;
44 import org.forester.util.ForesterUtil;
45
46 public final class support_statistics {
47
48     final static private int    PLACES            = 2;
49     final static private String HELP_OPTION       = "help";
50     final static private String OUTPUTFILE_OPTION = "o";
51     final static private String PRG_NAME          = "support_statistics";
52     final static private String PRG_VERSION       = "1.0";
53     final static private String PRG_DATE          = "2008.08.29";
54
55     private static StringBuffer analyze( final File[] phylogenies_infiles, final Phylogeny[] phylogenies ) {
56         final DescriptiveStatistics[] dss = new DescriptiveStatistics[ phylogenies.length ];
57         for( int i = 0; i < phylogenies.length; i++ ) {
58             dss[ i ] = new BasicDescriptiveStatistics();
59             final Phylogeny p = phylogenies[ i ];
60             for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) {
61                 final PhylogenyNode node = iter.next();
62                 if ( !node.isRoot() && !node.isExternal() ) {
63                     double s = PhylogenyMethods.getConfidenceValue( node );
64                     if ( s < 0.0 ) {
65                         s = 0.0;
66                     }
67                     dss[ i ].addValue( s );
68                 }
69             }
70         }
71         DescriptiveStatistics dss_comp = null;
72         if ( dss.length > 2 ) {
73             dss_comp = new BasicDescriptiveStatistics();
74             for( final DescriptiveStatistics element : dss ) {
75                 dss_comp.addValue( element.arithmeticMean() );
76             }
77         }
78         int max_length = 30;
79         for( int i = 0; i < phylogenies.length; i++ ) {
80             final int l = phylogenies_infiles[ i ].getName().length();
81             if ( l > max_length ) {
82                 max_length = l;
83             }
84         }
85         final StringBuffer sb = new StringBuffer();
86         sb.append( "\t" + ForesterUtil.normalizeString( "name:", max_length, true, ' ' ) + "\t" );
87         sb.append( "median:" + "\t" );
88         sb.append( "mean:" + "\t" );
89         sb.append( "sd:" + "\t" );
90         sb.append( "min:" + "\t" );
91         sb.append( "max:" + "\t" );
92         sb.append( "n:" + "\t" );
93         if ( dss_comp != null ) {
94             sb.append( "\"z-score\":" );
95         }
96         sb.append( ForesterUtil.getLineSeparator() );
97         for( int i = 0; i < phylogenies.length; i++ ) {
98             sb.append( i + 1 + ":\t"
99                     + ForesterUtil.normalizeString( phylogenies_infiles[ i ].getName(), max_length, true, ' ' ) + "\t" );
100             sb.append( ForesterUtil.round( dss[ i ].median(), support_statistics.PLACES ) + "\t" );
101             sb.append( ForesterUtil.round( dss[ i ].arithmeticMean(), support_statistics.PLACES ) + "\t" );
102             try {
103                 sb.append( ForesterUtil.round( dss[ i ].sampleStandardDeviation(), support_statistics.PLACES ) + "\t" );
104             }
105             catch ( final ArithmeticException ex ) {
106                 sb.append( "n/a\t" );
107             }
108             sb.append( ForesterUtil.round( dss[ i ].getMin(), support_statistics.PLACES ) + "\t" );
109             sb.append( ForesterUtil.round( dss[ i ].getMax(), support_statistics.PLACES ) + "\t" );
110             sb.append( dss[ i ].getN() + "\t" );
111             if ( dss_comp != null ) {
112                 final double z_score = dss_comp.sampleStandardUnit( dss[ i ].arithmeticMean() );
113                 sb.append( ForesterUtil.round( z_score, support_statistics.PLACES ) + "\t" );
114             }
115             sb.append( ForesterUtil.getLineSeparator() );
116         }
117         if ( dss_comp != null ) {
118             sb.append( ForesterUtil.getLineSeparator() );
119             sb.append( "\t" + ForesterUtil.normalizeString( "values for support means:", max_length, true, ' ' )
120                     + "\t\t" );
121             sb.append( ForesterUtil.round( dss_comp.arithmeticMean(), support_statistics.PLACES ) + "\t" );
122             sb.append( ForesterUtil.round( dss_comp.sampleStandardDeviation(), support_statistics.PLACES ) + "\t" );
123             sb.append( ForesterUtil.round( dss_comp.getMin(), support_statistics.PLACES ) + "\t" );
124             sb.append( ForesterUtil.round( dss_comp.getMax(), support_statistics.PLACES ) + "\t" );
125         }
126         return sb;
127     }
128
129     public static void main( final String args[] ) {
130         ForesterUtil.printProgramInformation( support_statistics.PRG_NAME,
131                                               support_statistics.PRG_VERSION,
132                                               support_statistics.PRG_DATE );
133         if ( ( args.length < 1 ) ) {
134             System.out.println();
135             System.out.println( "wrong number of arguments" );
136             System.out.println();
137             support_statistics.printHelp();
138             System.exit( -1 );
139         }
140         CommandLineArguments cla = null;
141         try {
142             cla = new CommandLineArguments( args );
143         }
144         catch ( final Exception e ) {
145             ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
146         }
147         if ( cla.isOptionSet( support_statistics.HELP_OPTION ) ) {
148             System.out.println();
149             support_statistics.printHelp();
150             System.exit( 0 );
151         }
152         final List<String> allowed_options = new ArrayList<String>();
153         allowed_options.add( support_statistics.OUTPUTFILE_OPTION );
154         final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
155         if ( dissallowed_options.length() > 0 ) {
156             ForesterUtil.fatalError( support_statistics.PRG_NAME, "unknown option(s): " + dissallowed_options );
157         }
158         final File[] phylogenies_infiles = new File[ cla.getNumberOfNames() ];
159         for( int i = 0; i < phylogenies_infiles.length; ++i ) {
160             phylogenies_infiles[ i ] = cla.getFile( i );
161         }
162         File outfile = null;
163         if ( cla.isOptionSet( support_statistics.OUTPUTFILE_OPTION ) ) {
164             try {
165                 outfile = new File( cla.getOptionValue( support_statistics.OUTPUTFILE_OPTION ) );
166             }
167             catch ( final IllegalArgumentException e ) {
168                 ForesterUtil.fatalError( support_statistics.PRG_NAME, "error in command line: " + e.getMessage() );
169             }
170             final String error = ForesterUtil.isWritableFile( outfile );
171             if ( error != null ) {
172                 ForesterUtil.fatalError( support_statistics.PRG_NAME, error );
173             }
174         }
175         final Phylogeny[] phylogenies = new Phylogeny[ phylogenies_infiles.length ];
176         for( int i = 0; i < phylogenies_infiles.length; i++ ) {
177             try {
178                 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
179                 final PhylogenyParser pp = ForesterUtil
180                         .createParserDependingOnFileType( phylogenies_infiles[ i ], true );
181                 phylogenies[ i ] = factory.create( phylogenies_infiles[ i ], pp )[ 0 ];
182             }
183             catch ( final IOException e ) {
184                 ForesterUtil.fatalError( support_statistics.PRG_NAME, "could not read \"" + phylogenies_infiles[ i ]
185                         + "\": " + e.getMessage() );
186             }
187         }
188         final StringBuffer sb = support_statistics.analyze( phylogenies_infiles, phylogenies );
189         System.out.println();
190         System.out.println( sb );
191         System.out.println();
192         if ( outfile != null ) {
193             try {
194                 final PrintWriter out = new PrintWriter( outfile );
195                 out.println( sb );
196                 out.flush();
197                 out.close();
198                 System.out.println( "wrote file: " + outfile );
199             }
200             catch ( final IOException e ) {
201                 ForesterUtil.fatalError( support_statistics.PRG_NAME, "failed to write output: " + e.getMessage() );
202             }
203         }
204         System.out.println( support_statistics.PRG_NAME + ": successfully completed" );
205         System.out.println();
206     }
207
208     private static void printHelp() {
209         System.out.println( "usage:" );
210         System.out.println();
211         System.out.println( support_statistics.PRG_NAME + " [-o=<outfile>] <phylogeny infile 1> "
212                 + "<phylogeny infile 2> <phylogeny infile 3> ..." );
213         System.out.println();
214         System.out.println( " options: " );
215         System.out.println();
216         System.out.println( " -o=<outfile> : write output to file" );
217         System.out.println();
218     }
219 }