2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2009 Christian M. Zmasek
6 // Copyright (C) 2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.application;
29 import java.io.IOException;
30 import java.util.ArrayList;
31 import java.util.List;
33 import java.util.SortedMap;
35 import org.forester.go.GoId;
36 import org.forester.go.GoTerm;
37 import org.forester.go.GoUtils;
38 import org.forester.go.OBOparser;
39 import org.forester.util.BasicDescriptiveStatistics;
40 import org.forester.util.CommandLineArguments;
41 import org.forester.util.DescriptiveStatistics;
42 import org.forester.util.ForesterUtil;
43 import org.forester.util.GeneralTable;
47 private static final String ALL = "{ALL}";
48 final static private String HELP_OPTION_1 = "help";
49 final static private String HELP_OPTION_2 = "h";
50 final static private String PRG_NAME = "goac";
51 final static private String PRG_VERSION = "1.03";
52 final static private String PRG_DATE = "2010.04.21";
53 final static private String E_MAIL = "czmasek@burnham.org";
54 final static private String WWW = "www.phylosoft.org/forester/goac";
56 private static void addStats( final SortedMap<String, List<GoId>> data_to_be_analyzed,
57 final GeneralTable<String, Double> table ) {
58 for( final String go : table.getColumnIdentifiers() ) {
59 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
60 for( final String label : data_to_be_analyzed.keySet() ) {
61 if ( !label.equals( ALL ) ) {
62 final Double value = table.getValue( go, label );
63 stats.addValue( value == null ? 0.0 : value );
66 table.setValue( go, "{AVG}", stats.arithmeticMean() );
67 table.setValue( go, "{SUM}", stats.getSum() );
68 table.setValue( go, "{MED}", stats.median() );
69 if ( stats.getN() > 1 ) {
70 table.setValue( go, "{SD}", stats.sampleStandardDeviation() );
73 table.setValue( go, "{SD}", new Double( 0 ) );
75 table.setValue( go, "{MIN}", stats.getMin() );
76 table.setValue( go, "{MAX}", stats.getMax() );
80 public static void main( final String args[] ) {
81 CommandLineArguments cla = null;
83 cla = new CommandLineArguments( args );
85 catch ( final Exception e ) {
86 ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
88 if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) {
92 final List<String> allowed_options = new ArrayList<String>();
93 if ( cla.getNumberOfNames() != 3 ) {
97 final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
98 if ( dissallowed_options.length() > 0 ) {
99 ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
101 final File obofile = cla.getFile( 0 );
102 final File query_superterms_file = cla.getFile( 1 );
103 final File exp_file = cla.getFile( 2 );
104 final OBOparser parser = new OBOparser( obofile, OBOparser.ReturnType.BASIC_GO_TERM );
105 List<GoTerm> all_go_terms = null;
107 all_go_terms = parser.parse();
109 catch ( final IOException e ) {
110 ForesterUtil.fatalError( PRG_NAME, e.toString() );
112 final Map<GoId, GoTerm> goid_to_term_map = GoUtils.createGoIdToGoTermMap( all_go_terms );
113 final List<GoId> query_superterms_ids = new ArrayList<GoId>();
114 SortedMap<String, List<GoId>> query_superterms_id_raw = null;
116 query_superterms_id_raw = GoUtils.parseGoIds( query_superterms_file, "#", "" );
118 catch ( final IOException e ) {
119 ForesterUtil.printErrorMessage( PRG_NAME, e.getMessage() );
122 final List<GoId> queries = query_superterms_id_raw.get( "" );
123 for( final GoId id : queries ) {
124 if ( !goid_to_term_map.containsKey( id ) ) {
125 ForesterUtil.printErrorMessage( PRG_NAME, "\"" + id + "\" not present in \"" + obofile + "\"" );
128 query_superterms_ids.add( id );
130 SortedMap<String, List<GoId>> data_to_be_analyzed = null;
132 data_to_be_analyzed = GoUtils.parseGoIds( exp_file, "#", ">" );
134 catch ( final IOException e ) {
135 ForesterUtil.printErrorMessage( PRG_NAME, e.getMessage() );
138 final List<GoId> all_ids = new ArrayList<GoId>();
139 for( final String label : data_to_be_analyzed.keySet() ) {
140 final List<GoId> experiment_set_ids = data_to_be_analyzed.get( label );
141 for( final GoId go_id : experiment_set_ids ) {
142 if ( !goid_to_term_map.containsKey( go_id ) ) {
143 ForesterUtil.printErrorMessage( PRG_NAME, "GO id [" + go_id + "] not found in GO id to term map" );
146 all_ids.add( go_id );
149 if ( data_to_be_analyzed.size() > 1 ) {
150 data_to_be_analyzed.put( ALL, all_ids );
152 final GeneralTable<String, Double> table_counts = new GeneralTable<String, Double>();
153 final GeneralTable<String, Double> table_percentage = new GeneralTable<String, Double>();
154 for( final String label : data_to_be_analyzed.keySet() ) {
155 System.out.println();
156 System.out.println( label + "\t\t\t\t" );
157 final List<GoId> experiment_set_ids = data_to_be_analyzed.get( label );
158 Map<GoId, Integer> counts_id = null;
160 counts_id = GoUtils.countCategoriesId( query_superterms_ids, experiment_set_ids, goid_to_term_map );
162 catch ( final Exception e ) {
163 ForesterUtil.printErrorMessage( PRG_NAME, e.getMessage() );
167 for( final GoId id : counts_id.keySet() ) {
168 sum += counts_id.get( id );
171 table_counts.setValue( "{total}", label, ( double ) sum );
173 for( final GoId id : counts_id.keySet() ) {
174 final int counts = counts_id.get( id );
175 double percentage = 0.0;
177 percentage = ( 100.0 * counts ) / ( sum );
179 System.out.println( counts + "\t" + counts + "/" + sum + "\t" + percentage + "\t" + id + "\t"
180 + goid_to_term_map.get( id ).getName() );
181 table_counts.setValue( goid_to_term_map.get( id ).getName(), label, ( double ) counts );
182 table_percentage.setValue( goid_to_term_map.get( id ).getName(), label, percentage );
185 addStats( data_to_be_analyzed, table_counts );
186 addStats( data_to_be_analyzed, table_percentage );
187 System.out.println();
188 System.out.println();
189 System.out.println();
190 System.out.println( table_counts.toString( ForesterUtil.FORMATTER_3 ) );
191 System.out.println();
192 System.out.println();
193 System.out.println();
194 System.out.println( table_percentage.toString( ForesterUtil.FORMATTER_3 ) );
195 System.out.println();
198 private static void printHelp() {
199 ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW );
200 System.out.println( "Usage:" );
201 System.out.println();
204 + " <file with all GO terms, in 'obo' format> <file with ancestral term ids> <file with go ids to be analyzed>" );
205 System.out.println();
206 System.out.println();