2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2017 Christian M. Zmasek
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
24 package org.forester.application;
27 import java.io.FilenameFilter;
28 import java.io.IOException;
29 import java.util.ArrayList;
30 import java.util.Arrays;
31 import java.util.List;
33 import org.forester.rio.RIO;
34 import org.forester.rio.RIO.REROOTING;
35 import org.forester.rio.RIOUtil;
36 import org.forester.sdi.SDIutil.ALGORITHM;
37 import org.forester.util.CommandLineArguments;
38 import org.forester.util.EasyWriter;
39 import org.forester.util.ForesterUtil;
44 public final static String PRG_NAME = "rio";
45 public final static String PRG_VERSION = "5.900";
46 public final static String PRG_DATE = "170420";
47 final static private String E_MAIL = "phyloxml@gmail.com";
48 final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester";
49 final static private String HELP_OPTION_1 = "help";
50 final static private String HELP_OPTION_2 = "h";
51 final static private String GT_FIRST = "f";
52 final static private String GT_LAST = "l";
53 final static private String REROOTING_OPT = "r";
54 final static private String OUTGROUP = "o";
55 final static private String USE_SDIR = "s";
56 final static private String GENE_TREES_SUFFIX_OPTION = "g";
57 final static private String MAPPINGS_DIR_OPTION = "m";
58 final static private String MAPPINGS_SUFFIX_OPTION = "ms";
59 final static private String CONSENSUS_TREES_DIR_OPTION = "co";
60 final static private String CONSENSUS_TREES_SUFFIX_OPTION = "cos";
61 final static private String MAPPINGS_SUFFIX_DEFAULT = ".nim";
62 final static private String CONSENSUS_TREE_SUFFIX_DEFAULT = ".xml";
63 final static private String ORTHOLOG_GROUPS_CUTOFF_OPTION = "c";
64 final static private String GENE_TREES_SUFFIX_DEFAULT = ".mlt";
65 final static private double ORTHOLOG_GROUPS_CUTOFF_DEFAULT = 0.5;
67 public static void main( final String[] args ) {
68 ForesterUtil.printProgramInformation( PRG_NAME,
69 "resampled inference of orthologs",
74 ForesterUtil.getForesterLibraryInformation() );
75 CommandLineArguments cla = null;
77 cla = new CommandLineArguments( args );
79 catch ( final Exception e ) {
80 ForesterUtil.fatalError( e.getMessage() );
82 if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) {
85 if ( ( args.length < 3 ) || ( args.length > 11 ) || ( cla.getNumberOfNames() < 3 ) ) {
87 System.out.println( "error: incorrect number of arguments" );
91 final List<String> allowed_options = new ArrayList<String>();
92 allowed_options.add( GT_FIRST );
93 allowed_options.add( GT_LAST );
94 allowed_options.add( REROOTING_OPT );
95 allowed_options.add( OUTGROUP );
96 allowed_options.add( USE_SDIR );
97 allowed_options.add( GENE_TREES_SUFFIX_OPTION );
98 allowed_options.add( ORTHOLOG_GROUPS_CUTOFF_OPTION );
99 allowed_options.add( MAPPINGS_DIR_OPTION );
100 allowed_options.add( MAPPINGS_SUFFIX_OPTION );
101 allowed_options.add( CONSENSUS_TREES_DIR_OPTION );
102 allowed_options.add( CONSENSUS_TREES_SUFFIX_OPTION );
103 final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
104 if ( dissallowed_options.length() > 0 ) {
105 ForesterUtil.fatalError( "unknown option(s): " + dissallowed_options );
107 final File gene_trees_file = cla.getFile( 0 );
108 final boolean use_dir;
111 if ( gene_trees_file.isDirectory() ) {
112 if ( !gene_trees_file.exists() ) {
113 ForesterUtil.fatalError( "gene trees directory \"" + gene_trees_file + "\" does not exist" );
115 if ( gene_trees_file.listFiles().length < 1 ) {
116 ForesterUtil.fatalError( "gene trees directory \"" + gene_trees_file + "\" is empty" );
119 indir = gene_trees_file;
124 final File species_tree_file = cla.getFile( 1 );
125 File orthology_outtable = null;
127 outdir = cla.getFile( 2 );
130 orthology_outtable = cla.getFile( 2 );
134 if ( ( cla.getNumberOfNames() < 4 ) ) {
135 System.out.println();
136 System.out.println( "error: incorrect number of arguments" );
137 System.out.println();
140 logfile = cla.getFile( 3 );
141 if ( logfile.exists() ) {
142 ForesterUtil.fatalError( "\"" + logfile + "\" already exists" );
146 if ( cla.getNumberOfNames() > 3 ) {
147 logfile = cla.getFile( 3 );
148 if ( logfile.exists() ) {
149 ForesterUtil.fatalError( "\"" + logfile + "\" already exists" );
156 boolean sdir = false;
157 if ( cla.isOptionSet( USE_SDIR ) ) {
158 if ( cla.isOptionHasAValue( USE_SDIR ) ) {
159 ForesterUtil.fatalError( "no value allowed for -" + USE_SDIR );
162 if ( !use_dir && logfile != null ) {
163 ForesterUtil.fatalError( "no logfile output for SDIR algorithm" );
166 String outgroup = null;
167 if ( cla.isOptionSet( OUTGROUP ) ) {
169 ForesterUtil.fatalError( "no outgroup option for SDIR algorithm" );
172 ForesterUtil.fatalError( "no outgroup option for operating on gene trees directory" );
174 if ( !cla.isOptionHasAValue( OUTGROUP ) ) {
175 ForesterUtil.fatalError( "no value for -" + OUTGROUP );
177 outgroup = cla.getOptionValueAsCleanString( OUTGROUP );
179 REROOTING rerooting = REROOTING.BY_ALGORITHM;
180 if ( cla.isOptionSet( REROOTING_OPT ) ) {
181 if ( !cla.isOptionHasAValue( REROOTING_OPT ) ) {
182 ForesterUtil.fatalError( "no value for -" + REROOTING_OPT );
185 ForesterUtil.fatalError( "no re-rooting option for SDIR algorithm" );
187 final String rerooting_str = cla.getOptionValueAsCleanString( REROOTING_OPT ).toLowerCase();
188 if ( rerooting_str.equals( "none" ) ) {
189 rerooting = REROOTING.NONE;
191 else if ( rerooting_str.equals( "midpoint" ) ) {
192 rerooting = REROOTING.MIDPOINT;
194 else if ( rerooting_str.equals( "outgroup" ) ) {
196 ForesterUtil.fatalError( "no outgroup option for operating on gene trees directory" );
198 rerooting = REROOTING.OUTGROUP;
202 .fatalError( "values for re-rooting are: 'none', 'midpoint', or 'outgroup' (minizming duplications is default)" );
205 if ( ForesterUtil.isEmpty( outgroup ) && ( rerooting == REROOTING.OUTGROUP ) ) {
206 ForesterUtil.fatalError( "selected re-rooting by outgroup, but outgroup not set" );
208 if ( !ForesterUtil.isEmpty( outgroup ) && ( rerooting != REROOTING.OUTGROUP ) ) {
209 ForesterUtil.fatalError( "outgroup set, but selected re-rooting by other approach" );
211 int gt_first = RIO.DEFAULT_RANGE;
212 int gt_last = RIO.DEFAULT_RANGE;
213 if ( cla.isOptionSet( GT_FIRST ) ) {
215 ForesterUtil.fatalError( "no gene tree range option for SDIR algorithm" );
217 if ( !cla.isOptionHasAValue( GT_FIRST ) ) {
218 ForesterUtil.fatalError( "no value for -" + GT_FIRST );
221 gt_first = cla.getOptionValueAsInt( GT_FIRST );
223 catch ( final IOException e ) {
224 ForesterUtil.fatalError( "could not parse integer for -" + GT_FIRST + " option" );
226 if ( gt_first < 0 ) {
227 ForesterUtil.fatalError( "attempt to set index of first tree to analyze to: " + gt_first );
230 if ( cla.isOptionSet( GT_LAST ) ) {
232 ForesterUtil.fatalError( "no gene tree range option for SDIR algorithm" );
234 if ( !cla.isOptionHasAValue( GT_LAST ) ) {
235 ForesterUtil.fatalError( "no value for -" + GT_LAST );
238 gt_last = cla.getOptionValueAsInt( GT_LAST );
240 catch ( final IOException e ) {
241 ForesterUtil.fatalError( "could not parse integer for -" + GT_LAST + " option" );
244 ForesterUtil.fatalError( "attempt to set index of last tree to analyze to: " + gt_last );
247 if ( ( ( gt_last != RIO.DEFAULT_RANGE ) && ( gt_first != RIO.DEFAULT_RANGE ) ) && ( ( gt_last < gt_first ) ) ) {
248 ForesterUtil.fatalError( "attempt to set range (0-based) of gene to analyze to: from " + gt_first + " to "
251 double ortholog_group_cutoff = ORTHOLOG_GROUPS_CUTOFF_DEFAULT;
252 if ( cla.isOptionSet( ORTHOLOG_GROUPS_CUTOFF_OPTION ) ) {
254 ForesterUtil.fatalError( "ortholog groups cutoff for SDIR algorithm" );
256 if ( !cla.isOptionHasAValue( ORTHOLOG_GROUPS_CUTOFF_OPTION ) ) {
257 ForesterUtil.fatalError( "no value for -" + ORTHOLOG_GROUPS_CUTOFF_OPTION );
260 ortholog_group_cutoff = cla.getOptionValueAsDouble( ORTHOLOG_GROUPS_CUTOFF_OPTION );
262 catch ( final IOException e ) {
263 ForesterUtil.fatalError( "could not parse double for -" + ORTHOLOG_GROUPS_CUTOFF_OPTION + " option" );
265 if ( ortholog_group_cutoff < 0 ) {
266 ForesterUtil.fatalError( "attempt to set ortholog groups cutoff to: " + ortholog_group_cutoff );
268 if ( ortholog_group_cutoff > 1 ) {
269 ForesterUtil.fatalError( "attempt to set ortholog groups cutoff to: " + ortholog_group_cutoff );
273 ForesterUtil.fatalErrorIfFileNotReadable( gene_trees_file );
275 final String gene_trees_suffix;
276 if ( cla.isOptionSet( GENE_TREES_SUFFIX_OPTION ) ) {
278 ForesterUtil.fatalError( "no gene tree suffix option when operating on indivual gene trees" );
280 if ( !cla.isOptionHasAValue( GENE_TREES_SUFFIX_OPTION ) ) {
281 ForesterUtil.fatalError( "no value for -" + GENE_TREES_SUFFIX_OPTION );
283 gene_trees_suffix = cla.getOptionValueAsCleanString( GENE_TREES_SUFFIX_OPTION );
286 gene_trees_suffix = GENE_TREES_SUFFIX_DEFAULT;
288 final boolean perform_id_mapping;
289 final File id_mapping_dir;
290 if ( cla.isOptionSet( MAPPINGS_DIR_OPTION ) ) {
291 id_mapping_dir = new File( cla.getOptionValue( MAPPINGS_DIR_OPTION ) );
292 perform_id_mapping = true;
294 ForesterUtil.fatalError( "no id mapping when operating on indivual gene trees" );
296 if ( !id_mapping_dir.exists() ) {
297 ForesterUtil.fatalError( "id mappings directory \"" + id_mapping_dir + "\" does not exist" );
299 if ( !id_mapping_dir.isDirectory() ) {
300 ForesterUtil.fatalError( "id mappings directory \"" + id_mapping_dir + "\" is not a directory" );
302 if ( id_mapping_dir.listFiles().length < 1 ) {
303 ForesterUtil.fatalError( "id mappings directory \"" + id_mapping_dir + "\" is empty" );
307 id_mapping_dir = null;
308 perform_id_mapping = false;
310 final String id_mapping_suffix;
311 if ( cla.isOptionSet( MAPPINGS_SUFFIX_OPTION ) ) {
313 ForesterUtil.fatalError( "no id mapping file suffix option when operating on indivual gene trees" );
315 if ( !perform_id_mapping ) {
316 ForesterUtil.fatalError( "no id mapping directory given" );
318 if ( !cla.isOptionHasAValue( MAPPINGS_SUFFIX_OPTION ) ) {
319 ForesterUtil.fatalError( "no value for -" + MAPPINGS_SUFFIX_OPTION );
321 id_mapping_suffix = cla.getOptionValueAsCleanString( MAPPINGS_SUFFIX_OPTION );
324 id_mapping_suffix = MAPPINGS_SUFFIX_DEFAULT;
326 boolean perform_gsdir_on_best_tree;
327 final File best_trees_indir;
328 if ( cla.isOptionSet( CONSENSUS_TREES_DIR_OPTION ) ) {
329 best_trees_indir = new File( cla.getOptionValue( CONSENSUS_TREES_DIR_OPTION ) );
330 perform_gsdir_on_best_tree = true;
333 .fatalError( "no consensus (\"best\") gene tree GSDIR analysis when operating on individual gene trees" );
335 if ( !best_trees_indir.exists() ) {
336 ForesterUtil.fatalError( "consensus (\"best\") gene tree directory \"" + best_trees_indir
337 + "\" does not exist" );
339 if ( !best_trees_indir.isDirectory() ) {
340 ForesterUtil.fatalError( "consensus (\"best\") gene tree directory \"" + best_trees_indir
341 + "\" is not a directory" );
343 if ( best_trees_indir.listFiles().length < 1 ) {
345 .fatalError( "consensus (\"best\") gene tree directory \"" + best_trees_indir + "\" is empty" );
349 best_trees_indir = null;
350 perform_gsdir_on_best_tree = false;
352 final String best_trees_suffix;
353 if ( cla.isOptionSet( CONSENSUS_TREES_SUFFIX_OPTION ) ) {
356 .fatalError( "no consensus (\"best\") gene tree suffix option when operating on individual gene trees" );
358 if ( !perform_gsdir_on_best_tree ) {
359 ForesterUtil.fatalError( "no consensus (\"best\") gene tree directory given" );
361 if ( !cla.isOptionHasAValue( CONSENSUS_TREES_SUFFIX_OPTION ) ) {
362 ForesterUtil.fatalError( "no value for -" + CONSENSUS_TREES_SUFFIX_OPTION );
364 best_trees_suffix = cla.getOptionValueAsCleanString( CONSENSUS_TREES_SUFFIX_OPTION );
367 best_trees_suffix = CONSENSUS_TREE_SUFFIX_DEFAULT;
369 ForesterUtil.fatalErrorIfFileNotReadable( species_tree_file );
370 if ( !use_dir && orthology_outtable.exists() ) {
371 ForesterUtil.fatalError( "\"" + orthology_outtable + "\" already exists" );
376 System.out.println( "Gene trees in-dir :\t" + indir.getCanonicalPath() );
377 System.out.println( "Gene trees suffix :\t" + gene_trees_suffix );
380 System.out.println( "Gene trees :\t" + gene_trees_file.getCanonicalPath() );
382 System.out.println( "Species tree :\t" + species_tree_file.getCanonicalPath() );
384 catch ( final IOException e ) {
385 ForesterUtil.fatalError( e.getLocalizedMessage() );
387 if ( perform_id_mapping ) {
389 System.out.println( "Id mappings in-dir :\t" + id_mapping_dir.getCanonicalPath() );
391 catch ( IOException e ) {
392 ForesterUtil.fatalError( e.getLocalizedMessage() );
394 System.out.println( "Id mappings suffix :\t" + id_mapping_suffix );
396 if ( perform_gsdir_on_best_tree ) {
398 System.out.println( "Consensus (\"best\") gene trees in-dir:\t" + best_trees_indir.getCanonicalPath() );
400 catch ( IOException e ) {
401 ForesterUtil.fatalError( e.getLocalizedMessage() );
403 System.out.println( "Consensus (\"best\") gene trees suffix:\t" + best_trees_suffix );
406 System.out.println( "Out-dir :\t" + outdir );
409 System.out.println( "All vs all orthology results table :\t" + orthology_outtable );
411 if ( logfile != null ) {
412 System.out.println( "Logfile :\t" + logfile );
414 System.out.println( "Ortholog groups cutoff :\t" + ortholog_group_cutoff );
415 if ( gt_first != RIO.DEFAULT_RANGE ) {
416 System.out.println( "First gene tree to analyze :\t" + gt_first );
418 if ( gt_last != RIO.DEFAULT_RANGE ) {
419 System.out.println( "Last gene tree to analyze :\t" + gt_last );
421 String rerooting_str = "";
422 switch ( rerooting ) {
424 rerooting_str = "by minimizing duplications";
428 rerooting_str = "by midpoint method";
432 rerooting_str = "by outgroup: " + outgroup;
436 rerooting_str = "none";
440 System.out.println( "Re-rooting : \t" + rerooting_str );
442 System.out.println( "Non binary species tree :\tallowed" );
445 System.out.println( "Non binary species tree :\tdisallowed" );
447 time = System.currentTimeMillis();
448 final ALGORITHM algorithm;
450 algorithm = ALGORITHM.SDIR;
453 algorithm = ALGORITHM.GSDIR;
455 EasyWriter log = null;
457 if ( outdir.exists() ) {
458 if ( !outdir.isDirectory() ) {
459 ForesterUtil.fatalError( PRG_NAME,
460 "out-directory [" + outdir + "] already exists but is not a directory" );
464 final boolean success = outdir.mkdirs();
466 ForesterUtil.fatalError( PRG_NAME, "could not create out-directory [" + outdir + "]" );
469 final String species_tree_file_name = species_tree_file.getName();
470 final File gene_trees_files[] = indir.listFiles( new FilenameFilter() {
473 public boolean accept( final File dir, final String name ) {
474 return ( ( name.endsWith( gene_trees_suffix ) ) && !( name.equals( species_tree_file_name ) ) );
477 if ( gene_trees_files.length < 1 ) {
478 ForesterUtil.fatalError( PRG_NAME,
479 "in-directory [" + indir
480 + "] does not contain any gene tree files with suffix "
481 + gene_trees_suffix );
484 log = ForesterUtil.createEasyWriter( logfile );
486 catch ( final IOException e ) {
487 ForesterUtil.fatalError( PRG_NAME, "could not create [" + logfile + "]" );
489 Arrays.sort( gene_trees_files );
491 log.print( "# program" );
493 log.print( PRG_NAME );
495 log.print( "# version" );
497 log.print( PRG_VERSION );
499 log.print( "# date" );
501 log.print( PRG_DATE );
503 log.print( "# Algorithm " );
505 log.print( algorithm.toString() );
507 log.print( "# Gene trees in-dir" );
509 log.print( indir.getCanonicalPath() );
511 log.print( "# Gene trees suffix" );
513 log.print( gene_trees_suffix );
515 log.print( "# Species tree" );
517 log.print( species_tree_file.getCanonicalPath() );
519 if ( perform_id_mapping ) {
520 log.print( "# Id mappings in-dir" );
522 log.print( id_mapping_dir.getCanonicalPath() );
524 log.print( "# Id mappings suffix" );
526 log.print( id_mapping_suffix );
529 if ( perform_gsdir_on_best_tree ) {
530 log.print( "# Consensus (\"best\") gene tree dir" );
532 log.print( best_trees_indir.getCanonicalPath() );
534 log.print( "# Consensus (\"best\") gene tree suffix" );
536 log.print( best_trees_suffix );
539 log.print( "# Out-dir" );
541 log.print( outdir.getCanonicalPath() );
543 log.print( "# Logfile" );
545 log.print( logfile.getCanonicalPath() );
547 log.print( "# Ortholog groups cutoff" );
549 log.print( Double.toString( ortholog_group_cutoff ) );
551 if ( gt_first != RIO.DEFAULT_RANGE ) {
552 log.print( "# First gene tree to analyze" );
554 log.print( Integer.toString( gt_first ) );
557 if ( gt_last != RIO.DEFAULT_RANGE ) {
558 log.print( "# Last gene tree to analyze" );
560 log.print( Integer.toString( gt_last ) );
563 log.print( "# Re-rooting" );
565 log.print( rerooting_str );
567 log.print( "# Non binary species tree" );
570 log.print( "allowed" );
573 log.print( "disallowed" );
579 log.print( "EXT NODES" );
581 log.print( ortholog_group_cutoff + " O GROUPS" );
583 log.print( "0.05 O GROUPS" );
585 log.print( "0.25 O GROUPS" );
587 log.print( "0.5 O GROUPS" );
589 log.print( "0.75 O GROUPS" );
591 log.print( "0.95 O GROUPS" );
593 if ( perform_gsdir_on_best_tree ) {
594 log.print( "BEST TREE DUP" );
596 log.print( "MEDIAN DUP - BEST TREE DUP" );
599 log.print( "MEDIAN DUP" );
601 log.print( "MEAN DUP" );
603 log.print( "MEAN DUP SD" );
605 log.print( "MIN DUP" );
607 log.print( "MAX DUP" );
609 log.print( "REMOVED EXT NODES" );
614 catch ( IOException e ) {
615 ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
618 for( final File gf : gene_trees_files ) {
619 String outname = gf.getName();
622 System.out.print( "\r" + counter + "/" + gene_trees_files.length + ": " + outname );
624 if ( outname.indexOf( "." ) > 0 ) {
625 outname = outname.substring( 0, outname.lastIndexOf( "." ) );
628 RIOUtil.executeAnalysis( gf,
630 new File( outdir.getCanonicalFile() + "/" + outname
631 + RIOUtil.ORTHO_OUTTABLE_SUFFIX ),
632 new File( outdir.getCanonicalFile() + "/" + outname
633 + RIOUtil.ORTHO_OUTTABLE_WITH_MAP_SUFFIX ),
634 new File( outdir.getCanonicalFile() + "/" + outname
635 + RIOUtil.ORTHOLOG_GROUPS_SUFFIX ),
636 new File( outdir.getCanonicalFile() + "/" + outname
637 + RIOUtil.LOGFILE_SUFFIX ),
642 new File( outdir.getCanonicalFile() + "/" + outname
643 + RIOUtil.STRIPPED_SPECIES_TREE_SUFFIX ),
644 new File( outdir.getCanonicalFile() + "/" + outname
645 + RIOUtil.OUT_MIN_DUP_GENE_TREE_SUFFIX ),
646 new File( outdir.getCanonicalFile() + "/" + outname
647 + RIOUtil.OUT_MED_DUP_GENE_TREE_SUFFIX ),
652 ortholog_group_cutoff,
656 perform_gsdir_on_best_tree,
661 catch ( IOException e ) {
662 ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
667 System.out.println();
670 String outname = ForesterUtil.removeFileExtension( orthology_outtable.toString() );
671 RIOUtil.executeAnalysis( gene_trees_file,
675 new File( outname + RIOUtil.ORTHOLOG_GROUPS_SUFFIX ),
681 new File( outname + RIOUtil.STRIPPED_SPECIES_TREE_SUFFIX ),
682 new File( outname + RIOUtil.OUT_MIN_DUP_GENE_TREE_SUFFIX ),
683 new File( outname + RIOUtil.OUT_MED_DUP_GENE_TREE_SUFFIX ),
684 algorithm == ALGORITHM.GSDIR,
688 ortholog_group_cutoff,
698 time = System.currentTimeMillis() - time;
699 System.out.println( "Time :\t" + time + "ms" );
705 catch ( IOException e ) {
706 ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
708 time = System.currentTimeMillis() - time;
709 System.out.println( "Time :\t" + time + "ms" );
714 private final static void printHelp() {
715 System.out.println( "Usage" );
716 System.out.println();
717 System.out.println( PRG_NAME
718 + " [options] <gene trees infile> <species tree infile> <all vs all orthology table outfile> [logfile]" );
719 System.out.println();
720 System.out.println( PRG_NAME + " [options] <gene trees indir> <species tree infile> <outdir> <logfile>" );
721 System.out.println();
722 System.out.println();
723 System.out.println( " Options" );
724 System.out.println( " -" + GT_FIRST + "=<first> : first gene tree to analyze (0-based index)" );
725 System.out.println( " -" + GT_LAST + "=<last> : last gene tree to analyze (0-based index)" );
726 System.out.println( " -" + ORTHOLOG_GROUPS_CUTOFF_OPTION
727 + "=<cutoff> : cutoff value for ortholog groups (default: " + ORTHOLOG_GROUPS_CUTOFF_DEFAULT + ")" );
728 System.out.println( " -" + REROOTING_OPT
729 + "=<re-rooting>: re-rooting method for gene trees, possible values or 'none', 'midpoint'," );
730 System.out.println( " or 'outgroup' (default: by minizming duplications)" );
731 System.out.println( " -" + OUTGROUP
732 + "=<outgroup> : for rooting by outgroup, name of outgroup (external gene tree node)" );
733 System.out.println( " -" + USE_SDIR
734 + " : to use SDIR instead of GSDIR (faster, but non-binary species trees are" );
735 System.out.println( " disallowed, as are most options)" );
736 System.out.println( " -" + GENE_TREES_SUFFIX_OPTION
737 + "=<suffix> : suffix for gene trees when operating on gene tree directories (default: "
738 + GENE_TREES_SUFFIX_DEFAULT + ")" );
739 System.out.println( " -" + MAPPINGS_DIR_OPTION + "=<dir> : directory for id mapping files" );
740 System.out.println( " -" + MAPPINGS_SUFFIX_OPTION + "=<suffix> : suffix for id mapping files (default: "
741 + MAPPINGS_SUFFIX_DEFAULT + ")" );
742 System.out.println( " -" + CONSENSUS_TREES_DIR_OPTION
743 + "=<dir> : directory with consenus (\"best\") gene trees to be analyzed with GSDIR" );
744 System.out.println( " -" + CONSENSUS_TREES_SUFFIX_OPTION
745 + "=<suffix> : suffix for consenus (\"best\") gene trees (default: " + CONSENSUS_TREE_SUFFIX_DEFAULT
747 System.out.println();
748 System.out.println( " Formats" );
750 .println( " The gene trees, as well as the species tree, ideally are in phyloXML (www.phyloxml.org) format," );
752 .println( " but can also be in New Hamphshire (Newick) or Nexus format as long as species information can be" );
754 .println( " extracted from the gene names (e.g. \"HUMAN\" from \"BCL2_HUMAN\") and matched to a single species" );
755 System.out.println( " in the species tree." );
756 System.out.println();
757 System.out.println( " Examples" );
758 System.out.println( " rio -s gene_trees.nh species.xml outtable.tsv" );
759 System.out.println( " rio gene_trees.nh species.xml outtable.tsv log.txt" );
760 System.out.println( " rio -c=0.9 -f=10 -l=100 -r=none gene_trees.xml species.xml outtable.tsv log.txt" );
761 System.out.println( " rio -g=.xml gene_trees_dir species.xml out_dir log.tsv" );
762 System.out.println( " rio -g=.mlt -m=id_maps_dir -ms=.nim -c=0.8 gene_trees_dir species.xml out_dir log.tsv" );
763 System.out.println( " rio -m=id_maps_dir -c=0.8 gene_trees_dir species.xml out_dir log.tsv" );
765 .println( " rio -m=id_maps_dir -co=consensus_dir -cos=.xml -c=0.8 gene_trees_dir species.xml out_dir log.tsv" );
766 System.out.println();