ae8083a00e2b6abc8f822278c56d1fb19c874a47
[jalview.git] / forester / java / src / org / forester / application / support_statistics.java
1 // $Id:
2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
25
26 package org.forester.application;
27
28 import java.io.File;
29 import java.io.IOException;
30 import java.io.PrintWriter;
31 import java.util.ArrayList;
32 import java.util.List;
33
34 import org.forester.io.parsers.PhylogenyParser;
35 import org.forester.io.parsers.util.ParserUtils;
36 import org.forester.phylogeny.Phylogeny;
37 import org.forester.phylogeny.PhylogenyMethods;
38 import org.forester.phylogeny.PhylogenyNode;
39 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
40 import org.forester.phylogeny.factories.PhylogenyFactory;
41 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
42 import org.forester.util.BasicDescriptiveStatistics;
43 import org.forester.util.CommandLineArguments;
44 import org.forester.util.DescriptiveStatistics;
45 import org.forester.util.ForesterUtil;
46
47 public final class support_statistics {
48
49     final static private int    PLACES            = 2;
50     final static private String HELP_OPTION       = "help";
51     final static private String OUTPUTFILE_OPTION = "o";
52     final static private String PRG_NAME          = "support_statistics";
53     final static private String PRG_VERSION       = "1.0";
54     final static private String PRG_DATE          = "2008.08.29";
55
56     private static StringBuffer analyze( final File[] phylogenies_infiles, final Phylogeny[] phylogenies ) {
57         final DescriptiveStatistics[] dss = new DescriptiveStatistics[ phylogenies.length ];
58         for( int i = 0; i < phylogenies.length; i++ ) {
59             dss[ i ] = new BasicDescriptiveStatistics();
60             final Phylogeny p = phylogenies[ i ];
61             for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) {
62                 final PhylogenyNode node = iter.next();
63                 if ( !node.isRoot() && !node.isExternal() ) {
64                     double s = PhylogenyMethods.getConfidenceValue( node );
65                     if ( s < 0.0 ) {
66                         s = 0.0;
67                     }
68                     dss[ i ].addValue( s );
69                 }
70             }
71         }
72         DescriptiveStatistics dss_comp = null;
73         if ( dss.length > 2 ) {
74             dss_comp = new BasicDescriptiveStatistics();
75             for( final DescriptiveStatistics element : dss ) {
76                 dss_comp.addValue( element.arithmeticMean() );
77             }
78         }
79         int max_length = 30;
80         for( int i = 0; i < phylogenies.length; i++ ) {
81             final int l = phylogenies_infiles[ i ].getName().length();
82             if ( l > max_length ) {
83                 max_length = l;
84             }
85         }
86         final StringBuffer sb = new StringBuffer();
87         sb.append( "\t" + ForesterUtil.normalizeString( "name:", max_length, true, ' ' ) + "\t" );
88         sb.append( "median:" + "\t" );
89         sb.append( "mean:" + "\t" );
90         sb.append( "sd:" + "\t" );
91         sb.append( "min:" + "\t" );
92         sb.append( "max:" + "\t" );
93         sb.append( "n:" + "\t" );
94         if ( dss_comp != null ) {
95             sb.append( "\"z-score\":" );
96         }
97         sb.append( ForesterUtil.getLineSeparator() );
98         for( int i = 0; i < phylogenies.length; i++ ) {
99             sb.append( i + 1 + ":\t"
100                     + ForesterUtil.normalizeString( phylogenies_infiles[ i ].getName(), max_length, true, ' ' ) + "\t" );
101             sb.append( ForesterUtil.round( dss[ i ].median(), support_statistics.PLACES ) + "\t" );
102             sb.append( ForesterUtil.round( dss[ i ].arithmeticMean(), support_statistics.PLACES ) + "\t" );
103             try {
104                 sb.append( ForesterUtil.round( dss[ i ].sampleStandardDeviation(), support_statistics.PLACES ) + "\t" );
105             }
106             catch ( final ArithmeticException ex ) {
107                 sb.append( "n/a\t" );
108             }
109             sb.append( ForesterUtil.round( dss[ i ].getMin(), support_statistics.PLACES ) + "\t" );
110             sb.append( ForesterUtil.round( dss[ i ].getMax(), support_statistics.PLACES ) + "\t" );
111             sb.append( dss[ i ].getN() + "\t" );
112             if ( dss_comp != null ) {
113                 final double z_score = dss_comp.sampleStandardUnit( dss[ i ].arithmeticMean() );
114                 sb.append( ForesterUtil.round( z_score, support_statistics.PLACES ) + "\t" );
115             }
116             sb.append( ForesterUtil.getLineSeparator() );
117         }
118         if ( dss_comp != null ) {
119             sb.append( ForesterUtil.getLineSeparator() );
120             sb.append( "\t" + ForesterUtil.normalizeString( "values for support means:", max_length, true, ' ' )
121                     + "\t\t" );
122             sb.append( ForesterUtil.round( dss_comp.arithmeticMean(), support_statistics.PLACES ) + "\t" );
123             sb.append( ForesterUtil.round( dss_comp.sampleStandardDeviation(), support_statistics.PLACES ) + "\t" );
124             sb.append( ForesterUtil.round( dss_comp.getMin(), support_statistics.PLACES ) + "\t" );
125             sb.append( ForesterUtil.round( dss_comp.getMax(), support_statistics.PLACES ) + "\t" );
126         }
127         return sb;
128     }
129
130     public static void main( final String args[] ) {
131         ForesterUtil.printProgramInformation( support_statistics.PRG_NAME,
132                                               support_statistics.PRG_VERSION,
133                                               support_statistics.PRG_DATE );
134         if ( ( args.length < 1 ) ) {
135             System.out.println();
136             System.out.println( "wrong number of arguments" );
137             System.out.println();
138             support_statistics.printHelp();
139             System.exit( -1 );
140         }
141         CommandLineArguments cla = null;
142         try {
143             cla = new CommandLineArguments( args );
144         }
145         catch ( final Exception e ) {
146             ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
147         }
148         if ( cla.isOptionSet( support_statistics.HELP_OPTION ) ) {
149             System.out.println();
150             support_statistics.printHelp();
151             System.exit( 0 );
152         }
153         final List<String> allowed_options = new ArrayList<String>();
154         allowed_options.add( support_statistics.OUTPUTFILE_OPTION );
155         final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
156         if ( dissallowed_options.length() > 0 ) {
157             ForesterUtil.fatalError( support_statistics.PRG_NAME, "unknown option(s): " + dissallowed_options );
158         }
159         final File[] phylogenies_infiles = new File[ cla.getNumberOfNames() ];
160         for( int i = 0; i < phylogenies_infiles.length; ++i ) {
161             phylogenies_infiles[ i ] = cla.getFile( i );
162         }
163         File outfile = null;
164         if ( cla.isOptionSet( support_statistics.OUTPUTFILE_OPTION ) ) {
165             try {
166                 outfile = new File( cla.getOptionValue( support_statistics.OUTPUTFILE_OPTION ) );
167             }
168             catch ( final IllegalArgumentException e ) {
169                 ForesterUtil.fatalError( support_statistics.PRG_NAME, "error in command line: " + e.getMessage() );
170             }
171             final String error = ForesterUtil.isWritableFile( outfile );
172             if ( error != null ) {
173                 ForesterUtil.fatalError( support_statistics.PRG_NAME, error );
174             }
175         }
176         final Phylogeny[] phylogenies = new Phylogeny[ phylogenies_infiles.length ];
177         for( int i = 0; i < phylogenies_infiles.length; i++ ) {
178             try {
179                 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
180                 final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( phylogenies_infiles[ i ], true );
181                 phylogenies[ i ] = factory.create( phylogenies_infiles[ i ], pp )[ 0 ];
182             }
183             catch ( final IOException e ) {
184                 ForesterUtil.fatalError( support_statistics.PRG_NAME, "could not read \"" + phylogenies_infiles[ i ]
185                         + "\": " + e.getMessage() );
186             }
187         }
188         final StringBuffer sb = support_statistics.analyze( phylogenies_infiles, phylogenies );
189         System.out.println();
190         System.out.println( sb );
191         System.out.println();
192         if ( outfile != null ) {
193             try {
194                 final PrintWriter out = new PrintWriter( outfile );
195                 out.println( sb );
196                 out.flush();
197                 out.close();
198                 System.out.println( "wrote file: " + outfile );
199             }
200             catch ( final IOException e ) {
201                 ForesterUtil.fatalError( support_statistics.PRG_NAME, "failed to write output: " + e.getMessage() );
202             }
203         }
204         System.out.println( support_statistics.PRG_NAME + ": successfully completed" );
205         System.out.println();
206     }
207
208     private static void printHelp() {
209         System.out.println( "usage:" );
210         System.out.println();
211         System.out.println( support_statistics.PRG_NAME + " [-o=<outfile>] <phylogeny infile 1> "
212                 + "<phylogeny infile 2> <phylogeny infile 3> ..." );
213         System.out.println();
214         System.out.println( " options: " );
215         System.out.println();
216         System.out.println( " -o=<outfile> : write output to file" );
217         System.out.println();
218     }
219 }