in progress...
[jalview.git] / forester / java / src / org / forester / application / rio.java
1 // $Id:
2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2017 Christian M. Zmasek
6 // All rights reserved
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
12 //
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
21 //
22 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
23
24 package org.forester.application;
25
26 import java.io.File;
27 import java.io.FilenameFilter;
28 import java.io.IOException;
29 import java.util.ArrayList;
30 import java.util.Arrays;
31 import java.util.List;
32
33 import org.forester.rio.RIO;
34 import org.forester.rio.RIO.REROOTING;
35 import org.forester.rio.RIOUtil;
36 import org.forester.sdi.SDIutil.ALGORITHM;
37 import org.forester.util.CommandLineArguments;
38 import org.forester.util.EasyWriter;
39 import org.forester.util.ForesterUtil;
40
41 public class rio {
42     //
43
44     public final static String  PRG_NAME                       = "rio";
45     public final static String  PRG_VERSION                    = "5.900";
46     public final static String  PRG_DATE                       = "170420";
47     final static private String E_MAIL                         = "phyloxml@gmail.com";
48     final static private String WWW                            = "https://sites.google.com/site/cmzmasek/home/software/forester";
49     final static private String HELP_OPTION_1                  = "help";
50     final static private String HELP_OPTION_2                  = "h";
51     final static private String GT_FIRST                       = "f";
52     final static private String GT_LAST                        = "l";
53     final static private String REROOTING_OPT                  = "r";
54     final static private String OUTGROUP                       = "o";
55     final static private String USE_SDIR                       = "s";
56     final static private String GENE_TREES_SUFFIX_OPTION       = "g";
57     final static private String MAPPINGS_DIR_OPTION            = "m";
58     final static private String MAPPINGS_SUFFIX_OPTION         = "ms";
59     final static private String CONSENSUS_TREES_DIR_OPTION     = "co";
60     final static private String CONSENSUS_TREES_SUFFIX_OPTION  = "cos";
61     final static private String MAPPINGS_SUFFIX_DEFAULT        = ".nim";
62     final static private String CONSENSUS_TREE_SUFFIX_DEFAULT  = ".xml";
63     final static private String ORTHOLOG_GROUPS_CUTOFF_OPTION  = "c";
64     final static private String GENE_TREES_SUFFIX_DEFAULT      = ".mlt";
65     final static private double ORTHOLOG_GROUPS_CUTOFF_DEFAULT = 0.5;
66
67     public static void main( final String[] args ) {
68         ForesterUtil.printProgramInformation( PRG_NAME,
69                                               "resampled inference of orthologs",
70                                               PRG_VERSION,
71                                               PRG_DATE,
72                                               E_MAIL,
73                                               WWW,
74                                               ForesterUtil.getForesterLibraryInformation() );
75         CommandLineArguments cla = null;
76         try {
77             cla = new CommandLineArguments( args );
78         }
79         catch ( final Exception e ) {
80             ForesterUtil.fatalError( e.getMessage() );
81         }
82         if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) {
83             printHelp();
84         }
85         if ( ( args.length < 3 ) || ( args.length > 11 ) || ( cla.getNumberOfNames() < 3 ) ) {
86             System.out.println();
87             System.out.println( "error: incorrect number of arguments" );
88             System.out.println();
89             printHelp();
90         }
91         final List<String> allowed_options = new ArrayList<String>();
92         allowed_options.add( GT_FIRST );
93         allowed_options.add( GT_LAST );
94         allowed_options.add( REROOTING_OPT );
95         allowed_options.add( OUTGROUP );
96         allowed_options.add( USE_SDIR );
97         allowed_options.add( GENE_TREES_SUFFIX_OPTION );
98         allowed_options.add( ORTHOLOG_GROUPS_CUTOFF_OPTION );
99         allowed_options.add( MAPPINGS_DIR_OPTION );
100         allowed_options.add( MAPPINGS_SUFFIX_OPTION );
101         allowed_options.add( CONSENSUS_TREES_DIR_OPTION );
102         allowed_options.add( CONSENSUS_TREES_SUFFIX_OPTION );
103         final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
104         if ( dissallowed_options.length() > 0 ) {
105             ForesterUtil.fatalError( "unknown option(s): " + dissallowed_options );
106         }
107         final File gene_trees_file = cla.getFile( 0 );
108         final boolean use_dir;
109         File indir = null;
110         File outdir = null;
111         if ( gene_trees_file.isDirectory() ) {
112             if ( !gene_trees_file.exists() ) {
113                 ForesterUtil.fatalError( "gene trees directory \"" + gene_trees_file + "\" does not exist" );
114             }
115             if ( gene_trees_file.listFiles().length < 1 ) {
116                 ForesterUtil.fatalError( "gene trees directory \"" + gene_trees_file + "\" is empty" );
117             }
118             use_dir = true;
119             indir = gene_trees_file;
120         }
121         else {
122             use_dir = false;
123         }
124         final File species_tree_file = cla.getFile( 1 );
125         File orthology_outtable = null;
126         if ( use_dir ) {
127             outdir = cla.getFile( 2 );
128         }
129         else {
130             orthology_outtable = cla.getFile( 2 );
131         }
132         File logfile;
133         if ( use_dir ) {
134             if ( ( cla.getNumberOfNames() < 4 ) ) {
135                 System.out.println();
136                 System.out.println( "error: incorrect number of arguments" );
137                 System.out.println();
138                 printHelp();
139             }
140             logfile = cla.getFile( 3 );
141             if ( logfile.exists() ) {
142                 ForesterUtil.fatalError( "\"" + logfile + "\" already exists" );
143             }
144         }
145         else {
146             if ( cla.getNumberOfNames() > 3 ) {
147                 logfile = cla.getFile( 3 );
148                 if ( logfile.exists() ) {
149                     ForesterUtil.fatalError( "\"" + logfile + "\" already exists" );
150                 }
151             }
152             else {
153                 logfile = null;
154             }
155         }
156         boolean sdir = false;
157         if ( cla.isOptionSet( USE_SDIR ) ) {
158             if ( cla.isOptionHasAValue( USE_SDIR ) ) {
159                 ForesterUtil.fatalError( "no value allowed for -" + USE_SDIR );
160             }
161             sdir = true;
162             if ( !use_dir && logfile != null ) {
163                 ForesterUtil.fatalError( "no logfile output for SDIR algorithm" );
164             }
165         }
166         String outgroup = null;
167         if ( cla.isOptionSet( OUTGROUP ) ) {
168             if ( sdir ) {
169                 ForesterUtil.fatalError( "no outgroup option for SDIR algorithm" );
170             }
171             if ( use_dir ) {
172                 ForesterUtil.fatalError( "no outgroup option for operating on gene trees directory" );
173             }
174             if ( !cla.isOptionHasAValue( OUTGROUP ) ) {
175                 ForesterUtil.fatalError( "no value for -" + OUTGROUP );
176             }
177             outgroup = cla.getOptionValueAsCleanString( OUTGROUP );
178         }
179         REROOTING rerooting = REROOTING.BY_ALGORITHM;
180         if ( cla.isOptionSet( REROOTING_OPT ) ) {
181             if ( !cla.isOptionHasAValue( REROOTING_OPT ) ) {
182                 ForesterUtil.fatalError( "no value for -" + REROOTING_OPT );
183             }
184             if ( sdir ) {
185                 ForesterUtil.fatalError( "no re-rooting option for SDIR algorithm" );
186             }
187             final String rerooting_str = cla.getOptionValueAsCleanString( REROOTING_OPT ).toLowerCase();
188             if ( rerooting_str.equals( "none" ) ) {
189                 rerooting = REROOTING.NONE;
190             }
191             else if ( rerooting_str.equals( "midpoint" ) ) {
192                 rerooting = REROOTING.MIDPOINT;
193             }
194             else if ( rerooting_str.equals( "outgroup" ) ) {
195                 if ( use_dir ) {
196                     ForesterUtil.fatalError( "no outgroup option for operating on gene trees directory" );
197                 }
198                 rerooting = REROOTING.OUTGROUP;
199             }
200             else {
201                 ForesterUtil
202                         .fatalError( "values for re-rooting are: 'none', 'midpoint', or 'outgroup' (minizming duplications is default)" );
203             }
204         }
205         if ( ForesterUtil.isEmpty( outgroup ) && ( rerooting == REROOTING.OUTGROUP ) ) {
206             ForesterUtil.fatalError( "selected re-rooting by outgroup, but outgroup not set" );
207         }
208         if ( !ForesterUtil.isEmpty( outgroup ) && ( rerooting != REROOTING.OUTGROUP ) ) {
209             ForesterUtil.fatalError( "outgroup set, but selected re-rooting by other approach" );
210         }
211         int gt_first = RIO.DEFAULT_RANGE;
212         int gt_last = RIO.DEFAULT_RANGE;
213         if ( cla.isOptionSet( GT_FIRST ) ) {
214             if ( sdir ) {
215                 ForesterUtil.fatalError( "no gene tree range option for SDIR algorithm" );
216             }
217             if ( !cla.isOptionHasAValue( GT_FIRST ) ) {
218                 ForesterUtil.fatalError( "no value for -" + GT_FIRST );
219             }
220             try {
221                 gt_first = cla.getOptionValueAsInt( GT_FIRST );
222             }
223             catch ( final IOException e ) {
224                 ForesterUtil.fatalError( "could not parse integer for -" + GT_FIRST + " option" );
225             }
226             if ( gt_first < 0 ) {
227                 ForesterUtil.fatalError( "attempt to set index of first tree to analyze to: " + gt_first );
228             }
229         }
230         if ( cla.isOptionSet( GT_LAST ) ) {
231             if ( sdir ) {
232                 ForesterUtil.fatalError( "no gene tree range option for SDIR algorithm" );
233             }
234             if ( !cla.isOptionHasAValue( GT_LAST ) ) {
235                 ForesterUtil.fatalError( "no value for -" + GT_LAST );
236             }
237             try {
238                 gt_last = cla.getOptionValueAsInt( GT_LAST );
239             }
240             catch ( final IOException e ) {
241                 ForesterUtil.fatalError( "could not parse integer for -" + GT_LAST + " option" );
242             }
243             if ( gt_last < 0 ) {
244                 ForesterUtil.fatalError( "attempt to set index of last tree to analyze to: " + gt_last );
245             }
246         }
247         if ( ( ( gt_last != RIO.DEFAULT_RANGE ) && ( gt_first != RIO.DEFAULT_RANGE ) ) && ( ( gt_last < gt_first ) ) ) {
248             ForesterUtil.fatalError( "attempt to set range (0-based) of gene to analyze to: from " + gt_first + " to "
249                     + gt_last );
250         }
251         double ortholog_group_cutoff = ORTHOLOG_GROUPS_CUTOFF_DEFAULT;
252         if ( cla.isOptionSet( ORTHOLOG_GROUPS_CUTOFF_OPTION ) ) {
253             if ( sdir ) {
254                 ForesterUtil.fatalError( "ortholog groups cutoff for SDIR algorithm" );
255             }
256             if ( !cla.isOptionHasAValue( ORTHOLOG_GROUPS_CUTOFF_OPTION ) ) {
257                 ForesterUtil.fatalError( "no value for -" + ORTHOLOG_GROUPS_CUTOFF_OPTION );
258             }
259             try {
260                 ortholog_group_cutoff = cla.getOptionValueAsDouble( ORTHOLOG_GROUPS_CUTOFF_OPTION );
261             }
262             catch ( final IOException e ) {
263                 ForesterUtil.fatalError( "could not parse double for -" + ORTHOLOG_GROUPS_CUTOFF_OPTION + " option" );
264             }
265             if ( ortholog_group_cutoff < 0 ) {
266                 ForesterUtil.fatalError( "attempt to set ortholog groups cutoff to: " + ortholog_group_cutoff );
267             }
268             if ( ortholog_group_cutoff > 1 ) {
269                 ForesterUtil.fatalError( "attempt to set ortholog groups cutoff to: " + ortholog_group_cutoff );
270             }
271         }
272         if ( !use_dir ) {
273             ForesterUtil.fatalErrorIfFileNotReadable( gene_trees_file );
274         }
275         final String gene_trees_suffix;
276         if ( cla.isOptionSet( GENE_TREES_SUFFIX_OPTION ) ) {
277             if ( !use_dir ) {
278                 ForesterUtil.fatalError( "no gene tree suffix option when operating on indivual gene trees" );
279             }
280             if ( !cla.isOptionHasAValue( GENE_TREES_SUFFIX_OPTION ) ) {
281                 ForesterUtil.fatalError( "no value for -" + GENE_TREES_SUFFIX_OPTION );
282             }
283             gene_trees_suffix = cla.getOptionValueAsCleanString( GENE_TREES_SUFFIX_OPTION );
284         }
285         else {
286             gene_trees_suffix = GENE_TREES_SUFFIX_DEFAULT;
287         }
288         final boolean perform_id_mapping;
289         final File id_mapping_dir;
290         if ( cla.isOptionSet( MAPPINGS_DIR_OPTION ) ) {
291             id_mapping_dir = new File( cla.getOptionValue( MAPPINGS_DIR_OPTION ) );
292             perform_id_mapping = true;
293             if ( !use_dir ) {
294                 ForesterUtil.fatalError( "no id mapping when operating on indivual gene trees" );
295             }
296             if ( !id_mapping_dir.exists() ) {
297                 ForesterUtil.fatalError( "id mappings directory \"" + id_mapping_dir + "\" does not exist" );
298             }
299             if ( !id_mapping_dir.isDirectory() ) {
300                 ForesterUtil.fatalError( "id mappings directory \"" + id_mapping_dir + "\" is not a directory" );
301             }
302             if ( id_mapping_dir.listFiles().length < 1 ) {
303                 ForesterUtil.fatalError( "id mappings directory \"" + id_mapping_dir + "\" is empty" );
304             }
305         }
306         else {
307             id_mapping_dir = null;
308             perform_id_mapping = false;
309         }
310         final String id_mapping_suffix;
311         if ( cla.isOptionSet( MAPPINGS_SUFFIX_OPTION ) ) {
312             if ( !use_dir ) {
313                 ForesterUtil.fatalError( "no id mapping file suffix option when operating on indivual gene trees" );
314             }
315             if ( !perform_id_mapping ) {
316                 ForesterUtil.fatalError( "no id mapping directory given" );
317             }
318             if ( !cla.isOptionHasAValue( MAPPINGS_SUFFIX_OPTION ) ) {
319                 ForesterUtil.fatalError( "no value for -" + MAPPINGS_SUFFIX_OPTION );
320             }
321             id_mapping_suffix = cla.getOptionValueAsCleanString( MAPPINGS_SUFFIX_OPTION );
322         }
323         else {
324             id_mapping_suffix = MAPPINGS_SUFFIX_DEFAULT;
325         }
326         boolean perform_gsdir_on_best_tree;
327         final File best_trees_indir;
328         if ( cla.isOptionSet( CONSENSUS_TREES_DIR_OPTION ) ) {
329             best_trees_indir = new File( cla.getOptionValue( CONSENSUS_TREES_DIR_OPTION ) );
330             perform_gsdir_on_best_tree = true;
331             if ( !use_dir ) {
332                 ForesterUtil
333                         .fatalError( "no consensus (\"best\") gene tree GSDIR analysis when operating on individual gene trees" );
334             }
335             if ( !best_trees_indir.exists() ) {
336                 ForesterUtil.fatalError( "consensus (\"best\") gene tree directory \"" + best_trees_indir
337                         + "\" does not exist" );
338             }
339             if ( !best_trees_indir.isDirectory() ) {
340                 ForesterUtil.fatalError( "consensus (\"best\") gene tree directory \"" + best_trees_indir
341                         + "\" is not a directory" );
342             }
343             if ( best_trees_indir.listFiles().length < 1 ) {
344                 ForesterUtil
345                         .fatalError( "consensus (\"best\") gene tree directory \"" + best_trees_indir + "\" is empty" );
346             }
347         }
348         else {
349             best_trees_indir = null;
350             perform_gsdir_on_best_tree = false;
351         }
352         final String best_trees_suffix;
353         if ( cla.isOptionSet( CONSENSUS_TREES_SUFFIX_OPTION ) ) {
354             if ( !use_dir ) {
355                 ForesterUtil
356                         .fatalError( "no consensus (\"best\") gene tree suffix option when operating on individual gene trees" );
357             }
358             if ( !perform_gsdir_on_best_tree ) {
359                 ForesterUtil.fatalError( "no consensus (\"best\") gene tree directory given" );
360             }
361             if ( !cla.isOptionHasAValue( CONSENSUS_TREES_SUFFIX_OPTION ) ) {
362                 ForesterUtil.fatalError( "no value for -" + CONSENSUS_TREES_SUFFIX_OPTION );
363             }
364             best_trees_suffix = cla.getOptionValueAsCleanString( CONSENSUS_TREES_SUFFIX_OPTION );
365         }
366         else {
367             best_trees_suffix = CONSENSUS_TREE_SUFFIX_DEFAULT;
368         }
369         ForesterUtil.fatalErrorIfFileNotReadable( species_tree_file );
370         if ( !use_dir && orthology_outtable.exists() ) {
371             ForesterUtil.fatalError( "\"" + orthology_outtable + "\" already exists" );
372         }
373         long time = 0;
374         try {
375             if ( use_dir ) {
376                 System.out.println( "Gene trees in-dir                   :\t" + indir.getCanonicalPath() );
377                 System.out.println( "Gene trees suffix                   :\t" + gene_trees_suffix );
378             }
379             else {
380                 System.out.println( "Gene trees                          :\t" + gene_trees_file.getCanonicalPath() );
381             }
382             System.out.println( "Species tree                        :\t" + species_tree_file.getCanonicalPath() );
383         }
384         catch ( final IOException e ) {
385             ForesterUtil.fatalError( e.getLocalizedMessage() );
386         }
387         if ( perform_id_mapping ) {
388             try {
389                 System.out.println( "Id mappings in-dir                  :\t" + id_mapping_dir.getCanonicalPath() );
390             }
391             catch ( IOException e ) {
392                 ForesterUtil.fatalError( e.getLocalizedMessage() );
393             }
394             System.out.println( "Id mappings suffix                  :\t" + id_mapping_suffix );
395         }
396         if ( perform_gsdir_on_best_tree ) {
397             try {
398                 System.out.println( "Consensus (\"best\") gene trees in-dir:\t" + best_trees_indir.getCanonicalPath() );
399             }
400             catch ( IOException e ) {
401                 ForesterUtil.fatalError( e.getLocalizedMessage() );
402             }
403             System.out.println( "Consensus (\"best\") gene trees suffix:\t" + best_trees_suffix );
404         }
405         if ( use_dir ) {
406             System.out.println( "Out-dir                             :\t" + outdir );
407         }
408         else {
409             System.out.println( "All vs all orthology results table  :\t" + orthology_outtable );
410         }
411         if ( logfile != null ) {
412             System.out.println( "Logfile                             :\t" + logfile );
413         }
414         System.out.println( "Ortholog groups cutoff              :\t" + ortholog_group_cutoff );
415         if ( gt_first != RIO.DEFAULT_RANGE ) {
416             System.out.println( "First gene tree to analyze          :\t" + gt_first );
417         }
418         if ( gt_last != RIO.DEFAULT_RANGE ) {
419             System.out.println( "Last gene tree to analyze           :\t" + gt_last );
420         }
421         String rerooting_str = "";
422         switch ( rerooting ) {
423             case BY_ALGORITHM: {
424                 rerooting_str = "by minimizing duplications";
425                 break;
426             }
427             case MIDPOINT: {
428                 rerooting_str = "by midpoint method";
429                 break;
430             }
431             case OUTGROUP: {
432                 rerooting_str = "by outgroup: " + outgroup;
433                 break;
434             }
435             case NONE: {
436                 rerooting_str = "none";
437                 break;
438             }
439         }
440         System.out.println( "Re-rooting                          : \t" + rerooting_str );
441         if ( !sdir ) {
442             System.out.println( "Non binary species tree             :\tallowed" );
443         }
444         else {
445             System.out.println( "Non binary species tree             :\tdisallowed" );
446         }
447         time = System.currentTimeMillis();
448         final ALGORITHM algorithm;
449         if ( sdir ) {
450             algorithm = ALGORITHM.SDIR;
451         }
452         else {
453             algorithm = ALGORITHM.GSDIR;
454         }
455         EasyWriter log = null;
456         if ( use_dir ) {
457             if ( outdir.exists() ) {
458                 if ( !outdir.isDirectory() ) {
459                     ForesterUtil.fatalError( PRG_NAME,
460                                              "out-directory [" + outdir + "] already exists but is not a directory" );
461                 }
462             }
463             else {
464                 final boolean success = outdir.mkdirs();
465                 if ( !success ) {
466                     ForesterUtil.fatalError( PRG_NAME, "could not create out-directory [" + outdir + "]" );
467                 }
468             }
469             final String species_tree_file_name = species_tree_file.getName();
470             final File gene_trees_files[] = indir.listFiles( new FilenameFilter() {
471
472                 @Override
473                 public boolean accept( final File dir, final String name ) {
474                     return ( ( name.endsWith( gene_trees_suffix ) ) && !( name.equals( species_tree_file_name ) ) );
475                 }
476             } );
477             if ( gene_trees_files.length < 1 ) {
478                 ForesterUtil.fatalError( PRG_NAME,
479                                          "in-directory [" + indir
480                                                  + "] does not contain any gene tree files with suffix "
481                                                  + gene_trees_suffix );
482             }
483             try {
484                 log = ForesterUtil.createEasyWriter( logfile );
485             }
486             catch ( final IOException e ) {
487                 ForesterUtil.fatalError( PRG_NAME, "could not create [" + logfile + "]" );
488             }
489             Arrays.sort( gene_trees_files );
490             try {
491                 log.print( "# program" );
492                 log.print( "\t" );
493                 log.print( PRG_NAME );
494                 log.println();
495                 log.print( "# version" );
496                 log.print( "\t" );
497                 log.print( PRG_VERSION );
498                 log.println();
499                 log.print( "# date" );
500                 log.print( "\t" );
501                 log.print( PRG_DATE );
502                 log.println();
503                 log.print( "# Algorithm " );
504                 log.print( "\t" );
505                 log.print( algorithm.toString() );
506                 log.println();
507                 log.print( "# Gene trees in-dir" );
508                 log.print( "\t" );
509                 log.print( indir.getCanonicalPath() );
510                 log.println();
511                 log.print( "# Gene trees suffix" );
512                 log.print( "\t" );
513                 log.print( gene_trees_suffix );
514                 log.println();
515                 log.print( "# Species tree" );
516                 log.print( "\t" );
517                 log.print( species_tree_file.getCanonicalPath() );
518                 log.println();
519                 if ( perform_id_mapping ) {
520                     log.print( "# Id mappings in-dir" );
521                     log.print( "\t" );
522                     log.print( id_mapping_dir.getCanonicalPath() );
523                     log.println();
524                     log.print( "# Id mappings suffix" );
525                     log.print( "\t" );
526                     log.print( id_mapping_suffix );
527                     log.println();
528                 }
529                 if ( perform_gsdir_on_best_tree ) {
530                     log.print( "# Consensus (\"best\") gene tree dir" );
531                     log.print( "\t" );
532                     log.print( best_trees_indir.getCanonicalPath() );
533                     log.println();
534                     log.print( "# Consensus (\"best\") gene tree suffix" );
535                     log.print( "\t" );
536                     log.print( best_trees_suffix );
537                     log.println();
538                 }
539                 log.print( "# Out-dir" );
540                 log.print( "\t" );
541                 log.print( outdir.getCanonicalPath() );
542                 log.println();
543                 log.print( "# Logfile" );
544                 log.print( "\t" );
545                 log.print( logfile.getCanonicalPath() );
546                 log.println();
547                 log.print( "# Ortholog groups cutoff" );
548                 log.print( "\t" );
549                 log.print( Double.toString( ortholog_group_cutoff ) );
550                 log.println();
551                 if ( gt_first != RIO.DEFAULT_RANGE ) {
552                     log.print( "# First gene tree to analyze" );
553                     log.print( "\t" );
554                     log.print( Integer.toString( gt_first ) );
555                     log.println();
556                 }
557                 if ( gt_last != RIO.DEFAULT_RANGE ) {
558                     log.print( "# Last gene tree to analyze" );
559                     log.print( "\t" );
560                     log.print( Integer.toString( gt_last ) );
561                     log.println();
562                 }
563                 log.print( "# Re-rooting" );
564                 log.print( "\t" );
565                 log.print( rerooting_str );
566                 log.println();
567                 log.print( "# Non binary species tree" );
568                 log.print( "\t" );
569                 if ( !sdir ) {
570                     log.print( "allowed" );
571                 }
572                 else {
573                     log.print( "disallowed" );
574                 }
575                 log.println();
576                 log.println();
577                 log.print( "NAME" );
578                 log.print( "\t" );
579                 log.print( "EXT NODES" );
580                 log.print( "\t" );
581                 log.print( ortholog_group_cutoff + " O GROUPS" );
582                 log.print( "\t" );
583                 log.print( "0.05 O GROUPS" );
584                 log.print( "\t" );
585                 log.print( "0.25 O GROUPS" );
586                 log.print( "\t" );
587                 log.print( "0.5 O GROUPS" );
588                 log.print( "\t" );
589                 log.print( "0.75 O GROUPS" );
590                 log.print( "\t" );
591                 log.print( "0.95 O GROUPS" );
592                 log.print( "\t" );
593                 if ( perform_gsdir_on_best_tree ) {
594                     log.print( "BEST TREE DUP" );
595                     log.print( "\t" );
596                     log.print( "MEDIAN DUP - BEST TREE DUP" );
597                     log.print( "\t" );
598                 }
599                 log.print( "MEDIAN DUP" );
600                 log.print( "\t" );
601                 log.print( "MEAN DUP" );
602                 log.print( "\t" );
603                 log.print( "MEAN DUP SD" );
604                 log.print( "\t" );
605                 log.print( "MIN DUP" );
606                 log.print( "\t" );
607                 log.print( "MAX DUP" );
608                 log.print( "\t" );
609                 log.print( "REMOVED EXT NODES" );
610                 log.print( "\t" );
611                 log.print( "N" );
612                 log.println();
613             }
614             catch ( IOException e ) {
615                 ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
616             }
617             int counter = 1;
618             for( final File gf : gene_trees_files ) {
619                 String outname = gf.getName();
620                 System.out
621                         .print( "\r                                                                                            " );
622                 System.out.print( "\r" + counter + "/" + gene_trees_files.length + ": " + outname );
623                 counter++;
624                 if ( outname.indexOf( "." ) > 0 ) {
625                     outname = outname.substring( 0, outname.lastIndexOf( "." ) );
626                 }
627                 try {
628                     RIOUtil.executeAnalysis( gf,
629                                              species_tree_file,
630                                              new File( outdir.getCanonicalFile() + "/" + outname
631                                                      + RIOUtil.ORTHO_OUTTABLE_SUFFIX ),
632                                              new File( outdir.getCanonicalFile() + "/" + outname
633                                                      + RIOUtil.ORTHO_OUTTABLE_WITH_MAP_SUFFIX ),
634                                              new File( outdir.getCanonicalFile() + "/" + outname
635                                                      + RIOUtil.ORTHOLOG_GROUPS_SUFFIX ),
636                                              new File( outdir.getCanonicalFile() + "/" + outname
637                                                      + RIOUtil.LOGFILE_SUFFIX ),
638                                              outgroup,
639                                              rerooting,
640                                              gt_first,
641                                              gt_last,
642                                              new File( outdir.getCanonicalFile() + "/" + outname
643                                                      + RIOUtil.STRIPPED_SPECIES_TREE_SUFFIX ),
644                                              new File( outdir.getCanonicalFile() + "/" + outname
645                                                      + RIOUtil.OUT_MIN_DUP_GENE_TREE_SUFFIX ),
646                                              new File( outdir.getCanonicalFile() + "/" + outname
647                                                      + RIOUtil.OUT_MED_DUP_GENE_TREE_SUFFIX ),
648                                              true,
649                                              algorithm,
650                                              true,
651                                              log,
652                                              ortholog_group_cutoff,
653                                              perform_id_mapping,
654                                              id_mapping_dir,
655                                              id_mapping_suffix,
656                                              perform_gsdir_on_best_tree,
657                                              outdir,
658                                              best_trees_indir,
659                                              best_trees_suffix );
660                 }
661                 catch ( IOException e ) {
662                     ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
663                 }
664             }
665             System.out
666                     .print( "\r                                                                                        " );
667             System.out.println();
668         }
669         else {
670             String outname = ForesterUtil.removeFileExtension( orthology_outtable.toString() );
671             RIOUtil.executeAnalysis( gene_trees_file,
672                                      species_tree_file,
673                                      orthology_outtable,
674                                      null,
675                                      new File( outname + RIOUtil.ORTHOLOG_GROUPS_SUFFIX ),
676                                      logfile,
677                                      outgroup,
678                                      rerooting,
679                                      gt_first,
680                                      gt_last,
681                                      new File( outname + RIOUtil.STRIPPED_SPECIES_TREE_SUFFIX ),
682                                      new File( outname + RIOUtil.OUT_MIN_DUP_GENE_TREE_SUFFIX ),
683                                      new File( outname + RIOUtil.OUT_MED_DUP_GENE_TREE_SUFFIX ),
684                                      algorithm == ALGORITHM.GSDIR,
685                                      algorithm,
686                                      false,
687                                      null,
688                                      ortholog_group_cutoff,
689                                      false,
690                                      null,
691                                      null,
692                                      false,
693                                      null,
694                                      null,
695                                      null );
696         }
697         if ( !use_dir ) {
698             time = System.currentTimeMillis() - time;
699             System.out.println( "Time                                :\t" + time + "ms" );
700         }
701         else {
702             try {
703                 log.close();
704             }
705             catch ( IOException e ) {
706                 ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
707             }
708             time = System.currentTimeMillis() - time;
709             System.out.println( "Time                                :\t" + time + "ms" );
710         }
711         System.exit( 0 );
712     }
713
714     private final static void printHelp() {
715         System.out.println( "Usage" );
716         System.out.println();
717         System.out.println( PRG_NAME
718                 + " [options] <gene trees infile> <species tree infile> <all vs all orthology table outfile> [logfile]" );
719         System.out.println();
720         System.out.println( PRG_NAME + " [options] <gene trees indir> <species tree infile> <outdir> <logfile>" );
721         System.out.println();
722         System.out.println();
723         System.out.println( " Options" );
724         System.out.println( "  -" + GT_FIRST + "=<first>     : first gene tree to analyze (0-based index)" );
725         System.out.println( "  -" + GT_LAST + "=<last>      : last gene tree to analyze (0-based index)" );
726         System.out.println( "  -" + ORTHOLOG_GROUPS_CUTOFF_OPTION
727                 + "=<cutoff>    : cutoff value for ortholog groups (default: " + ORTHOLOG_GROUPS_CUTOFF_DEFAULT + ")" );
728         System.out.println( "  -" + REROOTING_OPT
729                 + "=<re-rooting>: re-rooting method for gene trees, possible values or 'none', 'midpoint'," );
730         System.out.println( "                   or 'outgroup' (default: by minizming duplications)" );
731         System.out.println( "  -" + OUTGROUP
732                 + "=<outgroup>  : for rooting by outgroup, name of outgroup (external gene tree node)" );
733         System.out.println( "  -" + USE_SDIR
734                 + "             : to use SDIR instead of GSDIR (faster, but non-binary species trees are" );
735         System.out.println( "                   disallowed, as are most options)" );
736         System.out.println( "  -" + GENE_TREES_SUFFIX_OPTION
737                 + "=<suffix>    : suffix for gene trees when operating on gene tree directories (default: "
738                 + GENE_TREES_SUFFIX_DEFAULT + ")" );
739         System.out.println( "  -" + MAPPINGS_DIR_OPTION + "=<dir>       : directory for id mapping files" );
740         System.out.println( "  -" + MAPPINGS_SUFFIX_OPTION + "=<suffix>   : suffix for id mapping files (default: "
741                 + MAPPINGS_SUFFIX_DEFAULT + ")" );
742         System.out.println( "  -" + CONSENSUS_TREES_DIR_OPTION
743                 + "=<dir>      : directory with consenus (\"best\") gene trees to be analyzed with GSDIR" );
744         System.out.println( "  -" + CONSENSUS_TREES_SUFFIX_OPTION
745                 + "=<suffix>  : suffix for consenus (\"best\") gene trees (default: " + CONSENSUS_TREE_SUFFIX_DEFAULT
746                 + ")" );
747         System.out.println();
748         System.out.println( " Formats" );
749         System.out
750                 .println( "  The gene trees, as well as the species tree, ideally are in phyloXML (www.phyloxml.org) format," );
751         System.out
752                 .println( "  but can also be in New Hamphshire (Newick) or Nexus format as long as species information can be" );
753         System.out
754                 .println( "  extracted from the gene names (e.g. \"HUMAN\" from \"BCL2_HUMAN\") and matched to a single species" );
755         System.out.println( "  in the species tree." );
756         System.out.println();
757         System.out.println( " Examples" );
758         System.out.println( "  rio -s gene_trees.nh species.xml outtable.tsv" );
759         System.out.println( "  rio gene_trees.nh species.xml outtable.tsv log.txt" );
760         System.out.println( "  rio -c=0.9 -f=10 -l=100 -r=none gene_trees.xml species.xml outtable.tsv log.txt" );
761         System.out.println( "  rio -g=.xml gene_trees_dir species.xml out_dir log.tsv" );
762         System.out.println( "  rio -g=.mlt -m=id_maps_dir -ms=.nim -c=0.8 gene_trees_dir species.xml out_dir log.tsv" );
763         System.out.println( "  rio -m=id_maps_dir -c=0.8 gene_trees_dir species.xml out_dir log.tsv" );
764         System.out
765                 .println( "  rio -m=id_maps_dir -co=consensus_dir -cos=.xml -c=0.8 gene_trees_dir species.xml out_dir log.tsv" );
766         System.out.println();
767         System.exit( -1 );
768     }
769 }