in progress...
[jalview.git] / forester / java / src / org / forester / application / rio.java
1 // $Id:
2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2017 Christian M. Zmasek
6 // All rights reserved
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
12 //
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
21 //
22 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
23
24 package org.forester.application;
25
26 import java.io.File;
27 import java.io.FilenameFilter;
28 import java.io.IOException;
29 import java.util.ArrayList;
30 import java.util.Arrays;
31 import java.util.List;
32
33 import org.forester.rio.RIO;
34 import org.forester.rio.RIO.REROOTING;
35 import org.forester.rio.RIOUtil;
36 import org.forester.sdi.SDIutil.ALGORITHM;
37 import org.forester.util.CommandLineArguments;
38 import org.forester.util.EasyWriter;
39 import org.forester.util.ForesterUtil;
40
41 public class rio {
42     //
43
44     public final static String  PRG_NAME                       = "rio";
45     public final static String  PRG_VERSION                    = "5.900";
46     public final static String  PRG_DATE                       = "170420";
47     final static private String E_MAIL                         = "phyloxml@gmail.com";
48     final static private String WWW                            = "https://sites.google.com/site/cmzmasek/home/software/forester";
49     final static private String HELP_OPTION_1                  = "help";
50     final static private String HELP_OPTION_2                  = "h";
51     final static private String GT_FIRST                       = "f";
52     final static private String GT_LAST                        = "l";
53     final static private String REROOTING_OPT                  = "r";
54     final static private String OUTGROUP                       = "o";
55     final static private String USE_SDIR                       = "s";
56     final static private String GENE_TREES_SUFFIX_OPTION       = "g";
57     final static private String MAPPINGS_DIR_OPTION            = "m";
58     final static private String MAPPINGS_SUFFIX_OPTION         = "ms";
59     final static private String CONSENSUS_TREES_DIR_OPTION     = "co";
60     final static private String CONSENSUS_TREES_SUFFIX_OPTION  = "cos";
61     final static private String MAPPINGS_SUFFIX_DEFAULT        = ".nim";
62     final static private String CONSENSUS_TREE_SUFFIX_DEFAULT  = ".xml";
63     final static private String ORTHOLOG_GROUPS_CUTOFF_OPTION  = "c";
64     final static private String GENE_TREES_SUFFIX_DEFAULT      = ".mlt";
65     final static private double ORTHOLOG_GROUPS_CUTOFF_DEFAULT = 0.5;
66
67     public static void main( final String[] args ) {
68         ForesterUtil.printProgramInformation( PRG_NAME,
69                                               "resampled inference of orthologs",
70                                               PRG_VERSION,
71                                               PRG_DATE,
72                                               E_MAIL,
73                                               WWW,
74                                               ForesterUtil.getForesterLibraryInformation() );
75         CommandLineArguments cla = null;
76         try {
77             cla = new CommandLineArguments( args );
78         }
79         catch ( final Exception e ) {
80             ForesterUtil.fatalError( e.getMessage() );
81         }
82         if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) {
83             printHelp();
84         }
85         if ( ( args.length < 3 ) || ( args.length > 11 ) || ( cla.getNumberOfNames() < 3 ) ) {
86             System.out.println();
87             System.out.println( "error: incorrect number of arguments" );
88             System.out.println();
89             printHelp();
90         }
91         final List<String> allowed_options = new ArrayList<String>();
92         allowed_options.add( GT_FIRST );
93         allowed_options.add( GT_LAST );
94         allowed_options.add( REROOTING_OPT );
95         allowed_options.add( OUTGROUP );
96         allowed_options.add( USE_SDIR );
97         allowed_options.add( GENE_TREES_SUFFIX_OPTION );
98         allowed_options.add( ORTHOLOG_GROUPS_CUTOFF_OPTION );
99         allowed_options.add( MAPPINGS_DIR_OPTION );
100         allowed_options.add( MAPPINGS_SUFFIX_OPTION );
101         allowed_options.add( CONSENSUS_TREES_DIR_OPTION );
102         allowed_options.add( CONSENSUS_TREES_SUFFIX_OPTION );
103         final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
104         if ( dissallowed_options.length() > 0 ) {
105             ForesterUtil.fatalError( "unknown option(s): " + dissallowed_options );
106         }
107         final File gene_trees_file = cla.getFile( 0 );
108         final boolean use_dir;
109         File indir = null;
110         File outdir = null;
111         if ( gene_trees_file.isDirectory() ) {
112             if ( !gene_trees_file.exists() ) {
113                 ForesterUtil.fatalError( "gene trees directory \"" + gene_trees_file + "\" does not exist" );
114             }
115             if ( gene_trees_file.listFiles().length < 1 ) {
116                 ForesterUtil.fatalError( "gene trees directory \"" + gene_trees_file + "\" is empty" );
117             }
118             use_dir = true;
119             indir = gene_trees_file;
120         }
121         else {
122             use_dir = false;
123         }
124         final File species_tree_file = cla.getFile( 1 );
125         File orthology_outtable = null;
126         if ( use_dir ) {
127             outdir = cla.getFile( 2 );
128         }
129         else {
130             orthology_outtable = cla.getFile( 2 );
131         }
132         File logfile;
133         if ( use_dir ) {
134             if ( ( cla.getNumberOfNames() < 4 ) ) {
135                 System.out.println();
136                 System.out.println( "error: incorrect number of arguments" );
137                 System.out.println();
138                 printHelp();
139             }
140             logfile = cla.getFile( 3 );
141             if ( logfile.exists() ) {
142                 ForesterUtil.fatalError( "\"" + logfile + "\" already exists" );
143             }
144         }
145         else {
146             if ( cla.getNumberOfNames() > 3 ) {
147                 logfile = cla.getFile( 3 );
148                 if ( logfile.exists() ) {
149                     ForesterUtil.fatalError( "\"" + logfile + "\" already exists" );
150                 }
151             }
152             else {
153                 logfile = null;
154             }
155         }
156         boolean sdir = false;
157         if ( cla.isOptionSet( USE_SDIR ) ) {
158             if ( cla.isOptionHasAValue( USE_SDIR ) ) {
159                 ForesterUtil.fatalError( "no value allowed for -" + USE_SDIR );
160             }
161             sdir = true;
162             if ( !use_dir && logfile != null ) {
163                 ForesterUtil.fatalError( "no logfile output for SDIR algorithm" );
164             }
165         }
166         String outgroup = null;
167         if ( cla.isOptionSet( OUTGROUP ) ) {
168             if ( sdir ) {
169                 ForesterUtil.fatalError( "no outgroup option for SDIR algorithm" );
170             }
171             if ( use_dir ) {
172                 ForesterUtil.fatalError( "no outgroup option for operating on gene trees directory" );
173             }
174             if ( !cla.isOptionHasAValue( OUTGROUP ) ) {
175                 ForesterUtil.fatalError( "no value for -" + OUTGROUP );
176             }
177             outgroup = cla.getOptionValueAsCleanString( OUTGROUP );
178         }
179         REROOTING rerooting = REROOTING.BY_ALGORITHM;
180         if ( cla.isOptionSet( REROOTING_OPT ) ) {
181             if ( !cla.isOptionHasAValue( REROOTING_OPT ) ) {
182                 ForesterUtil.fatalError( "no value for -" + REROOTING_OPT );
183             }
184             if ( sdir ) {
185                 ForesterUtil.fatalError( "no re-rooting option for SDIR algorithm" );
186             }
187             final String rerooting_str = cla.getOptionValueAsCleanString( REROOTING_OPT ).toLowerCase();
188             if ( rerooting_str.equals( "none" ) ) {
189                 rerooting = REROOTING.NONE;
190             }
191             else if ( rerooting_str.equals( "midpoint" ) ) {
192                 rerooting = REROOTING.MIDPOINT;
193             }
194             else if ( rerooting_str.equals( "outgroup" ) ) {
195                 if ( use_dir ) {
196                     ForesterUtil.fatalError( "no outgroup option for operating on gene trees directory" );
197                 }
198                 rerooting = REROOTING.OUTGROUP;
199             }
200             else {
201                 ForesterUtil
202                         .fatalError( "values for re-rooting are: 'none', 'midpoint', or 'outgroup' (minizming duplications is default)" );
203             }
204         }
205         if ( ForesterUtil.isEmpty( outgroup ) && ( rerooting == REROOTING.OUTGROUP ) ) {
206             ForesterUtil.fatalError( "selected re-rooting by outgroup, but outgroup not set" );
207         }
208         if ( !ForesterUtil.isEmpty( outgroup ) && ( rerooting != REROOTING.OUTGROUP ) ) {
209             ForesterUtil.fatalError( "outgroup set, but selected re-rooting by other approach" );
210         }
211         int gt_first = RIO.DEFAULT_RANGE;
212         int gt_last = RIO.DEFAULT_RANGE;
213         if ( cla.isOptionSet( GT_FIRST ) ) {
214             if ( sdir ) {
215                 ForesterUtil.fatalError( "no gene tree range option for SDIR algorithm" );
216             }
217             if ( !cla.isOptionHasAValue( GT_FIRST ) ) {
218                 ForesterUtil.fatalError( "no value for -" + GT_FIRST );
219             }
220             try {
221                 gt_first = cla.getOptionValueAsInt( GT_FIRST );
222             }
223             catch ( final IOException e ) {
224                 ForesterUtil.fatalError( "could not parse integer for -" + GT_FIRST + " option" );
225             }
226             if ( gt_first < 0 ) {
227                 ForesterUtil.fatalError( "attempt to set index of first tree to analyze to: " + gt_first );
228             }
229         }
230         if ( cla.isOptionSet( GT_LAST ) ) {
231             if ( sdir ) {
232                 ForesterUtil.fatalError( "no gene tree range option for SDIR algorithm" );
233             }
234             if ( !cla.isOptionHasAValue( GT_LAST ) ) {
235                 ForesterUtil.fatalError( "no value for -" + GT_LAST );
236             }
237             try {
238                 gt_last = cla.getOptionValueAsInt( GT_LAST );
239             }
240             catch ( final IOException e ) {
241                 ForesterUtil.fatalError( "could not parse integer for -" + GT_LAST + " option" );
242             }
243             if ( gt_last < 0 ) {
244                 ForesterUtil.fatalError( "attempt to set index of last tree to analyze to: " + gt_last );
245             }
246         }
247         if ( ( ( gt_last != RIO.DEFAULT_RANGE ) && ( gt_first != RIO.DEFAULT_RANGE ) ) && ( ( gt_last < gt_first ) ) ) {
248             ForesterUtil.fatalError( "attempt to set range (0-based) of gene to analyze to: from " + gt_first + " to "
249                     + gt_last );
250         }
251         double ortholog_group_cutoff = ORTHOLOG_GROUPS_CUTOFF_DEFAULT;
252         if ( cla.isOptionSet( ORTHOLOG_GROUPS_CUTOFF_OPTION ) ) {
253             if ( sdir ) {
254                 ForesterUtil.fatalError( "ortholog groups cutoff for SDIR algorithm" );
255             }
256             if ( !cla.isOptionHasAValue( ORTHOLOG_GROUPS_CUTOFF_OPTION ) ) {
257                 ForesterUtil.fatalError( "no value for -" + ORTHOLOG_GROUPS_CUTOFF_OPTION );
258             }
259             try {
260                 ortholog_group_cutoff = cla.getOptionValueAsDouble( ORTHOLOG_GROUPS_CUTOFF_OPTION );
261             }
262             catch ( final IOException e ) {
263                 ForesterUtil.fatalError( "could not parse double for -" + ORTHOLOG_GROUPS_CUTOFF_OPTION + " option" );
264             }
265             if ( ortholog_group_cutoff < 0 ) {
266                 ForesterUtil.fatalError( "attempt to set ortholog groups cutoff to: " + ortholog_group_cutoff );
267             }
268             if ( ortholog_group_cutoff > 1 ) {
269                 ForesterUtil.fatalError( "attempt to set ortholog groups cutoff to: " + ortholog_group_cutoff );
270             }
271         }
272         if ( !use_dir ) {
273             ForesterUtil.fatalErrorIfFileNotReadable( gene_trees_file );
274         }
275         final String gene_trees_suffix;
276         if ( cla.isOptionSet( GENE_TREES_SUFFIX_OPTION ) ) {
277             if ( !use_dir ) {
278                 ForesterUtil.fatalError( "no gene tree suffix option when operating on indivual gene trees" );
279             }
280             if ( !cla.isOptionHasAValue( GENE_TREES_SUFFIX_OPTION ) ) {
281                 ForesterUtil.fatalError( "no value for -" + GENE_TREES_SUFFIX_OPTION );
282             }
283             gene_trees_suffix = cla.getOptionValueAsCleanString( GENE_TREES_SUFFIX_OPTION );
284         }
285         else {
286             gene_trees_suffix = GENE_TREES_SUFFIX_DEFAULT;
287         }
288         final boolean perform_id_mapping;
289         final File id_mapping_dir;
290         if ( cla.isOptionSet( MAPPINGS_DIR_OPTION ) ) {
291             id_mapping_dir = new File( cla.getOptionValue( MAPPINGS_DIR_OPTION ) );
292             perform_id_mapping = true;
293             if ( !use_dir ) {
294                 ForesterUtil.fatalError( "no id mapping when operating on indivual gene trees" );
295             }
296             if ( !id_mapping_dir.exists() ) {
297                 ForesterUtil.fatalError( "id mappings directory \"" + id_mapping_dir + "\" does not exist" );
298             }
299             if ( !id_mapping_dir.isDirectory() ) {
300                 ForesterUtil.fatalError( "id mappings directory \"" + id_mapping_dir + "\" is not a directory" );
301             }
302             if ( id_mapping_dir.listFiles().length < 1 ) {
303                 ForesterUtil.fatalError( "id mappings directory \"" + id_mapping_dir + "\" is empty" );
304             }
305         }
306         else {
307             id_mapping_dir = null;
308             perform_id_mapping = false;
309         }
310         final String id_mapping_suffix;
311         if ( cla.isOptionSet( MAPPINGS_SUFFIX_OPTION ) ) {
312             if ( !use_dir ) {
313                 ForesterUtil.fatalError( "no id mapping file suffix option when operating on indivual gene trees" );
314             }
315             if ( !perform_id_mapping ) {
316                 ForesterUtil.fatalError( "no id mapping directory given" );
317             }
318             if ( !cla.isOptionHasAValue( MAPPINGS_SUFFIX_OPTION ) ) {
319                 ForesterUtil.fatalError( "no value for -" + MAPPINGS_SUFFIX_OPTION );
320             }
321             id_mapping_suffix = cla.getOptionValueAsCleanString( MAPPINGS_SUFFIX_OPTION );
322         }
323         else {
324             id_mapping_suffix = MAPPINGS_SUFFIX_DEFAULT;
325         }
326         boolean perform_gsdir_on_best_tree;
327         final File best_trees_indir;
328         if ( cla.isOptionSet( CONSENSUS_TREES_DIR_OPTION ) ) {
329             best_trees_indir = new File( cla.getOptionValue( CONSENSUS_TREES_DIR_OPTION ) );
330             perform_gsdir_on_best_tree = true;
331             if ( !use_dir ) {
332                 ForesterUtil
333                         .fatalError( "no consensus (\"best\") gene tree GSDIR analysis when operating on individual gene trees" );
334             }
335             if ( !best_trees_indir.exists() ) {
336                 ForesterUtil.fatalError( "consensus (\"best\") gene tree directory \"" + best_trees_indir
337                         + "\" does not exist" );
338             }
339             if ( !best_trees_indir.isDirectory() ) {
340                 ForesterUtil.fatalError( "consensus (\"best\") gene tree directory \"" + best_trees_indir
341                         + "\" is not a directory" );
342             }
343             if ( best_trees_indir.listFiles().length < 1 ) {
344                 ForesterUtil
345                         .fatalError( "consensus (\"best\") gene tree directory \"" + best_trees_indir + "\" is empty" );
346             }
347         }
348         else {
349             best_trees_indir = null;
350             perform_gsdir_on_best_tree = false;
351         }
352         final String best_trees_suffix;
353         if ( cla.isOptionSet( CONSENSUS_TREES_SUFFIX_OPTION ) ) {
354             if ( !use_dir ) {
355                 ForesterUtil
356                         .fatalError( "no consensus (\"best\") gene tree suffix option when operating on individual gene trees" );
357             }
358             if ( !perform_gsdir_on_best_tree ) {
359                 ForesterUtil.fatalError( "no consensus (\"best\") gene tree directory given" );
360             }
361             if ( !cla.isOptionHasAValue( CONSENSUS_TREES_SUFFIX_OPTION ) ) {
362                 ForesterUtil.fatalError( "no value for -" + CONSENSUS_TREES_SUFFIX_OPTION );
363             }
364             best_trees_suffix = cla.getOptionValueAsCleanString( CONSENSUS_TREES_SUFFIX_OPTION );
365         }
366         else {
367             best_trees_suffix = CONSENSUS_TREE_SUFFIX_DEFAULT;
368         }
369         ////////////////////////////////
370         ForesterUtil.fatalErrorIfFileNotReadable( species_tree_file );
371         if ( !use_dir && orthology_outtable.exists() ) {
372             ForesterUtil.fatalError( "\"" + orthology_outtable + "\" already exists" );
373         }
374         long time = 0;
375         try {
376             if ( use_dir ) {
377                 System.out.println( "Gene trees in-dir                   :\t" + indir.getCanonicalPath() );
378                 System.out.println( "Gene trees suffix                   :\t" + gene_trees_suffix );
379             }
380             else {
381                 System.out.println( "Gene trees                          :\t" + gene_trees_file.getCanonicalPath() );
382             }
383             System.out.println( "Species tree                        :\t" + species_tree_file.getCanonicalPath() );
384         }
385         catch ( final IOException e ) {
386             ForesterUtil.fatalError( e.getLocalizedMessage() );
387         }
388         if ( perform_id_mapping ) {
389             try {
390                 System.out.println( "Id mappings in-dir                  :\t" + id_mapping_dir.getCanonicalPath() );
391             }
392             catch ( IOException e ) {
393                 ForesterUtil.fatalError( e.getLocalizedMessage() );
394             }
395             System.out.println( "Id mappings suffix                  :\t" + id_mapping_suffix );
396         }
397         if ( perform_gsdir_on_best_tree ) {
398             try {
399                 System.out.println( "Consensus (\"best\") gene tree dir    :\t" + best_trees_indir.getCanonicalPath() );
400             }
401             catch ( IOException e ) {
402                 ForesterUtil.fatalError( e.getLocalizedMessage() );
403             }
404             System.out.println( "Consensus (\"best\") gene tree suffix :\t" + best_trees_suffix );
405         }
406         if ( use_dir ) {
407             System.out.println( "Out-dir                             :\t" + outdir );
408         }
409         else {
410             System.out.println( "All vs all orthology results table  :\t" + orthology_outtable );
411         }
412         if ( logfile != null ) {
413             System.out.println( "Logfile                             :\t" + logfile );
414         }
415         System.out.println( "Ortholog groups cutoff              :\t" + ortholog_group_cutoff );
416         if ( gt_first != RIO.DEFAULT_RANGE ) {
417             System.out.println( "First gene tree to analyze          :\t" + gt_first );
418         }
419         if ( gt_last != RIO.DEFAULT_RANGE ) {
420             System.out.println( "Last gene tree to analyze           :\t" + gt_last );
421         }
422         String rerooting_str = "";
423         switch ( rerooting ) {
424             case BY_ALGORITHM: {
425                 rerooting_str = "by minimizing duplications";
426                 break;
427             }
428             case MIDPOINT: {
429                 rerooting_str = "by midpoint method";
430                 break;
431             }
432             case OUTGROUP: {
433                 rerooting_str = "by outgroup: " + outgroup;
434                 break;
435             }
436             case NONE: {
437                 rerooting_str = "none";
438                 break;
439             }
440         }
441         System.out.println( "Re-rooting                          : \t" + rerooting_str );
442         if ( !sdir ) {
443             System.out.println( "Non binary species tree             :\tallowed" );
444         }
445         else {
446             System.out.println( "Non binary species tree             :\tdisallowed" );
447         }
448         time = System.currentTimeMillis();
449         final ALGORITHM algorithm;
450         if ( sdir ) {
451             algorithm = ALGORITHM.SDIR;
452         }
453         else {
454             algorithm = ALGORITHM.GSDIR;
455         }
456         EasyWriter log = null;
457         if ( use_dir ) {
458             if ( outdir.exists() ) {
459                 if ( !outdir.isDirectory() ) {
460                     ForesterUtil.fatalError( PRG_NAME,
461                                              "out-directory [" + outdir + "] already exists but is not a directory" );
462                 }
463             }
464             else {
465                 final boolean success = outdir.mkdirs();
466                 if ( !success ) {
467                     ForesterUtil.fatalError( PRG_NAME, "could not create out-directory [" + outdir + "]" );
468                 }
469             }
470             final String species_tree_file_name = species_tree_file.getName();
471             final File gene_trees_files[] = indir.listFiles( new FilenameFilter() {
472
473                 @Override
474                 public boolean accept( final File dir, final String name ) {
475                     return ( ( name.endsWith( gene_trees_suffix ) ) && !( name.equals( species_tree_file_name ) ) );
476                 }
477             } );
478             if ( gene_trees_files.length < 1 ) {
479                 ForesterUtil.fatalError( PRG_NAME,
480                                          "in-directory [" + indir
481                                                  + "] does not contain any gene tree files with suffix "
482                                                  + gene_trees_suffix );
483             }
484             try {
485                 log = ForesterUtil.createEasyWriter( logfile );
486             }
487             catch ( final IOException e ) {
488                 ForesterUtil.fatalError( PRG_NAME, "could not create [" + logfile + "]" );
489             }
490             Arrays.sort( gene_trees_files );
491             try {
492                 log.print( "# program" );
493                 log.print( "\t" );
494                 log.print( PRG_NAME );
495                 log.println();
496                 log.print( "# version" );
497                 log.print( "\t" );
498                 log.print( PRG_VERSION );
499                 log.println();
500                 log.print( "# date" );
501                 log.print( "\t" );
502                 log.print( PRG_DATE );
503                 log.println();
504                 log.print( "# Algorithm " );
505                 log.print( "\t" );
506                 log.print( algorithm.toString() );
507                 log.println();
508                 log.print( "# Gene trees in-dir" );
509                 log.print( "\t" );
510                 log.print( indir.getCanonicalPath() );
511                 log.println();
512                 log.print( "# Gene trees suffix" );
513                 log.print( "\t" );
514                 log.print( gene_trees_suffix );
515                 log.println();
516                 log.print( "# Species tree" );
517                 log.print( "\t" );
518                 log.print( species_tree_file.getCanonicalPath() );
519                 log.println();
520                 log.print( "# Out-dir" );
521                 log.print( "\t" );
522                 log.print( outdir.getCanonicalPath() );
523                 log.println();
524                 log.print( "# Logfile" );
525                 log.print( "\t" );
526                 log.print( logfile.getCanonicalPath() );
527                 log.println();
528                 log.print( "# Ortholog groups cutoff" );
529                 log.print( "\t" );
530                 log.print( Double.toString( ortholog_group_cutoff ) );
531                 log.println();
532                 if ( gt_first != RIO.DEFAULT_RANGE ) {
533                     log.print( "# First gene tree to analyze" );
534                     log.print( "\t" );
535                     log.print( Integer.toString( gt_first ) );
536                     log.println();
537                 }
538                 if ( gt_last != RIO.DEFAULT_RANGE ) {
539                     log.print( "# Last gene tree to analyze" );
540                     log.print( "\t" );
541                     log.print( Integer.toString( gt_last ) );
542                     log.println();
543                 }
544                 log.print( "# Re-rooting" );
545                 log.print( "\t" );
546                 log.print( rerooting_str );
547                 log.println();
548                 log.print( "# Non binary species tree" );
549                 log.print( "\t" );
550                 if ( !sdir ) {
551                     log.print( "allowed" );
552                 }
553                 else {
554                     log.print( "disallowed" );
555                 }
556                 log.println();
557                 log.println();
558                 log.print( "NAME" );
559                 log.print( "\t" );
560                 log.print( "EXT NODES" );
561                 log.print( "\t" );
562                 log.print( ortholog_group_cutoff + " O GROUPS" );
563                 log.print( "\t" );
564                 log.print( "0.05 O GROUPS" );
565                 log.print( "\t" );
566                 log.print( "0.25 O GROUPS" );
567                 log.print( "\t" );
568                 log.print( "0.5 O GROUPS" );
569                 log.print( "\t" );
570                 log.print( "0.75 O GROUPS" );
571                 log.print( "\t" );
572                 log.print( "0.95 O GROUPS" );
573                 log.print( "\t" );
574                 if ( perform_gsdir_on_best_tree ) {
575                     log.print( "BEST TREE DUP" );
576                     log.print( "\t" );
577                     log.print( "MEDIAN DUP - BEST TREE DUP" );
578                     log.print( "\t" );
579                 }
580                 log.print( "MEDIAN DUP" );
581                 log.print( "\t" );
582                 log.print( "MEAN DUP" );
583                 log.print( "\t" );
584                 log.print( "MEAN DUP SD" );
585                 log.print( "\t" );
586                 log.print( "MIN DUP" );
587                 log.print( "\t" );
588                 log.print( "MAX DUP" );
589                 log.print( "\t" );
590                 log.print( "REMOVED EXT NODES" );
591                 log.print( "\t" );
592                 log.print( "N" );
593                 log.println();
594             }
595             catch ( IOException e ) {
596                 ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
597             }
598             int counter = 1;
599             for( final File gf : gene_trees_files ) {
600                 String outname = gf.getName();
601                 System.out
602                         .print( "\r                                                                                            " );
603                 System.out.print( "\r" + counter + "/" + gene_trees_files.length + ": " + outname );
604                 counter++;
605                 if ( outname.indexOf( "." ) > 0 ) {
606                     outname = outname.substring( 0, outname.lastIndexOf( "." ) );
607                 }
608                 try {
609                     RIOUtil.executeAnalysis( gf,
610                                              species_tree_file,
611                                              new File( outdir.getCanonicalFile() + "/" + outname
612                                                      + RIOUtil.ORTHO_OUTTABLE_SUFFIX ),
613                                              new File( outdir.getCanonicalFile() + "/" + outname
614                                                      + RIOUtil.ORTHO_OUTTABLE_WITH_MAP_SUFFIX ),
615                                              new File( outdir.getCanonicalFile() + "/" + outname
616                                                      + RIOUtil.ORTHOLOG_GROUPS_SUFFIX ),
617                                              new File( outdir.getCanonicalFile() + "/" + outname
618                                                      + RIOUtil.LOGFILE_SUFFIX ),
619                                              outgroup,
620                                              rerooting,
621                                              gt_first,
622                                              gt_last,
623                                              new File( outdir.getCanonicalFile() + "/" + outname
624                                                      + RIOUtil.STRIPPED_SPECIES_TREE_SUFFIX ),
625                                              new File( outdir.getCanonicalFile() + "/" + outname
626                                                      + RIOUtil.OUT_MIN_DUP_GENE_TREE_SUFFIX ),
627                                              new File( outdir.getCanonicalFile() + "/" + outname
628                                                      + RIOUtil.OUT_MED_DUP_GENE_TREE_SUFFIX ),
629                                              true,
630                                              algorithm,
631                                              true,
632                                              log,
633                                              ortholog_group_cutoff,
634                                              perform_id_mapping,
635                                              id_mapping_dir,
636                                              id_mapping_suffix,
637                                              perform_gsdir_on_best_tree,
638                                              outdir,
639                                              best_trees_indir,
640                                              best_trees_suffix );
641                 }
642                 catch ( IOException e ) {
643                     ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
644                 }
645             }
646             System.out
647                     .print( "\r                                                                                        " );
648             System.out.println();
649         }
650         else {
651             String outname = ForesterUtil.removeFileExtension( orthology_outtable.toString() );
652             RIOUtil.executeAnalysis( gene_trees_file,
653                                      species_tree_file,
654                                      orthology_outtable,
655                                      null,
656                                      new File( outname + RIOUtil.ORTHOLOG_GROUPS_SUFFIX ),
657                                      logfile,
658                                      outgroup,
659                                      rerooting,
660                                      gt_first,
661                                      gt_last,
662                                      new File( outname + RIOUtil.STRIPPED_SPECIES_TREE_SUFFIX ),
663                                      new File( outname + RIOUtil.OUT_MIN_DUP_GENE_TREE_SUFFIX ),
664                                      new File( outname + RIOUtil.OUT_MED_DUP_GENE_TREE_SUFFIX ),
665                                      algorithm == ALGORITHM.GSDIR,
666                                      algorithm,
667                                      false,
668                                      null,
669                                      ortholog_group_cutoff,
670                                      false,
671                                      null,
672                                      null,
673                                      false,
674                                      null,
675                                      null,
676                                      null );
677         }
678         if ( !use_dir ) {
679             time = System.currentTimeMillis() - time;
680             System.out.println( "Time                                :\t" + time + "ms" );
681         }
682         else {
683             try {
684                 log.close();
685             }
686             catch ( IOException e ) {
687                 ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
688             }
689             time = System.currentTimeMillis() - time;
690             System.out.println( "Time                                :\t" + time + "ms" );
691         }
692         System.exit( 0 );
693     }
694
695     private final static void printHelp() {
696         System.out.println( "Usage" );
697         System.out.println();
698         System.out.println( PRG_NAME
699                 + " [options] <gene trees infile> <species tree infile> <all vs all orthology table outfile> [logfile]" );
700         System.out.println();
701         System.out.println( PRG_NAME + " [options] <gene trees indir> <species tree infile> <outdir> <logfile>" );
702         System.out.println();
703         System.out.println();
704         System.out.println( " Options" );
705         System.out.println( "  -" + GT_FIRST + "=<first>     : first gene tree to analyze (0-based index)" );
706         System.out.println( "  -" + GT_LAST + "=<last>      : last gene tree to analyze (0-based index)" );
707         System.out.println( "  -" + ORTHOLOG_GROUPS_CUTOFF_OPTION
708                 + "=<cutoff>    : cutoff value for ortholog groups (default: " + ORTHOLOG_GROUPS_CUTOFF_DEFAULT + ")" );
709         System.out.println( "  -" + REROOTING_OPT
710                 + "=<re-rooting>: re-rooting method for gene trees, possible values or 'none', 'midpoint'," );
711         System.out.println( "                   or 'outgroup' (default: by minizming duplications)" );
712         System.out.println( "  -" + OUTGROUP
713                 + "=<outgroup>  : for rooting by outgroup, name of outgroup (external gene tree node)" );
714         System.out.println( "  -" + USE_SDIR
715                 + "             : to use SDIR instead of GSDIR (faster, but non-binary species trees are" );
716         System.out.println( "                   disallowed, as are most options)" );
717         System.out.println( "  -" + GENE_TREES_SUFFIX_OPTION
718                 + "=<suffix>    : suffix for gene trees when operating on gene tree directories (default: "
719                 + GENE_TREES_SUFFIX_DEFAULT + ")" );
720         System.out.println( "  -" + MAPPINGS_DIR_OPTION + "=<dir>       : directory for id mapping files" );
721         System.out.println( "  -" + MAPPINGS_SUFFIX_OPTION + "=<suffix>   : suffix for id mapping files (default: "
722                 + MAPPINGS_SUFFIX_DEFAULT + ")" );
723         System.out.println( "  -" + CONSENSUS_TREES_DIR_OPTION
724                 + "=<dir>      : directory with consenus (\"best\") gene trees to be analyzed with GSDIR" );
725         System.out.println( "  -" + CONSENSUS_TREES_SUFFIX_OPTION
726                 + "=<suffix>  : suffix for consenus (\"best\") gene trees (default: " + CONSENSUS_TREE_SUFFIX_DEFAULT
727                 + ")" );
728         ///
729         System.out.println();
730         System.out.println( " Formats" );
731         System.out
732                 .println( "  The gene trees, as well as the species tree, ideally are in phyloXML (www.phyloxml.org) format," );
733         System.out
734                 .println( "  but can also be in New Hamphshire (Newick) or Nexus format as long as species information can be" );
735         System.out
736                 .println( "  extracted from the gene names (e.g. \"HUMAN\" from \"BCL2_HUMAN\") and matched to a single species" );
737         System.out.println( "  in the species tree." );
738         System.out.println();
739         System.out.println( " Examples" );
740         System.out.println( "  rio -s gene_trees.nh species.xml outtable.tsv" );
741         System.out.println( "  rio gene_trees.nh species.xml outtable.tsv log.txt" );
742         System.out.println( "  rio -c=0.9 -f=10 -l=100 -r=none gene_trees.xml species.xml outtable.tsv log.txt" );
743         System.out.println( "  rio -g=.xml gene_trees_dir species.xml out_dir log.tsv" );
744         System.out.println( "  rio -g=.mlt -m=id_maps_dir -ms=.nim -c=0.8 gene_trees_dir species.xml out_dir log.tsv" );
745         System.out.println( "  rio -m=id_maps_dir -c=0.8 gene_trees_dir species.xml out_dir log.tsv" );
746         System.out
747                 .println( "  rio -m=id_maps_dir -co=consensus_dir -cos=.xml -c=0.8 gene_trees_dir species.xml out_dir log.tsv" );
748         System.out.println();
749         System.exit( -1 );
750     }
751 }