in progress
[jalview.git] / forester / java / src / org / forester / application / gsdi.java
1 // $Id:
2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
25
26 package org.forester.application;
27
28 import java.io.File;
29 import java.io.FilenameFilter;
30 import java.io.IOException;
31 import java.text.SimpleDateFormat;
32 import java.util.ArrayList;
33 import java.util.Date;
34 import java.util.List;
35 import java.util.SortedMap;
36 import java.util.SortedSet;
37 import java.util.TreeMap;
38 import java.util.TreeSet;
39
40 import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
41 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
42 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
43 import org.forester.io.writers.PhylogenyWriter;
44 import org.forester.phylogeny.Phylogeny;
45 import org.forester.phylogeny.PhylogenyMethods;
46 import org.forester.phylogeny.PhylogenyNode;
47 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
48 import org.forester.phylogeny.factories.PhylogenyFactory;
49 import org.forester.sdi.GSDI;
50 import org.forester.sdi.GSDII;
51 import org.forester.sdi.GSDIR;
52 import org.forester.sdi.SDIException;
53 import org.forester.sdi.SDIutil;
54 import org.forester.sdi.SDIutil.ALGORITHM;
55 import org.forester.util.CommandLineArguments;
56 import org.forester.util.EasyWriter;
57 import org.forester.util.ForesterConstants;
58 import org.forester.util.ForesterUtil;
59
60 public final class gsdi {
61
62     final static public boolean REPLACE_UNDERSCORES_IN_NH_SPECIES_TREE = true;
63     final static private String ALLOW_STRIPPING_OF_GENE_TREE_OPTION    = "g";
64     final static private String GSDIR_OPTION                           = "r";
65     final static private String MOST_PARSIMONIOUS_OPTION               = "m";
66     final static private String SUFFIX_FOR_DIR_OPTION                  = "s";
67     final static private String GUESS_FORMAT_OF_SPECIES_TREE           = "q";
68     final static private String TRANSFER_TAXONOMY_OPTION               = "t";
69     final static private String HELP_OPTION_1                          = "help";
70     final static private String HELP_OPTION_2                          = "h";
71     final static private String SUFFIX_FOR_SPECIES_TREE_USED           = "_species_tree_used.xml";
72     final static private String LOGFILE_SUFFIX                         = "_gsdi_log.txt";
73     final static private String REMAPPED_SUFFIX                        = "_gsdi_remapped.txt";
74     final static private String PRG_NAME                               = "gsdi";
75     final static private String PRG_VERSION                            = "1.001";
76     final static private String PRG_DATE                               = "170327";
77     final static private String PRG_DESC                               = "general speciation duplication inference";
78     final static private String E_MAIL                                 = "phyloxml@gmail.com";
79     final static private String WWW                                    = "https://sites.google.com/site/cmzmasek/home/software/forester";
80
81     public static void main( final String args[] ) {
82         try {
83             ForesterUtil.printProgramInformation( PRG_NAME,
84                                                   PRG_DESC,
85                                                   PRG_VERSION,
86                                                   PRG_DATE,
87                                                   E_MAIL,
88                                                   WWW,
89                                                   ForesterUtil.getForesterLibraryInformation() );
90             CommandLineArguments cla = null;
91             try {
92                 cla = new CommandLineArguments( args );
93             }
94             catch ( final Exception e ) {
95                 ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
96             }
97             if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) ) {
98                 System.out.println();
99                 gsdi.print_help();
100                 System.exit( 0 );
101             }
102             else if ( ( args.length < 2 ) || ( cla.getNumberOfNames() != 2 && cla.getNumberOfNames() != 3 ) ) {
103                 System.out.println();
104                 System.out.println( "Wrong number of arguments." );
105                 System.out.println();
106                 gsdi.print_help();
107                 System.exit( -1 );
108             }
109             final List<String> allowed_options = new ArrayList<String>();
110             allowed_options.add( GSDIR_OPTION );
111             allowed_options.add( GUESS_FORMAT_OF_SPECIES_TREE );
112             allowed_options.add( MOST_PARSIMONIOUS_OPTION );
113             allowed_options.add( ALLOW_STRIPPING_OF_GENE_TREE_OPTION );
114             allowed_options.add( TRANSFER_TAXONOMY_OPTION );
115             allowed_options.add( SUFFIX_FOR_DIR_OPTION );
116             final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
117             if ( dissallowed_options.length() > 0 ) {
118                 ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
119             }
120             execute( cla );
121         }
122         catch ( final IOException e ) {
123             ForesterUtil.fatalError( gsdi.PRG_NAME, e.getMessage() );
124         }
125     }
126
127     private static void execute( final CommandLineArguments cla ) throws IOException {
128         ALGORITHM base_algorithm = ALGORITHM.GSDI;
129         boolean most_parsimonous_duplication_model = false;
130         boolean allow_stripping_of_gene_tree = false;
131         if ( cla.isOptionSet( GSDIR_OPTION ) ) {
132             base_algorithm = ALGORITHM.GSDIR;
133         }
134         if ( cla.isOptionSet( MOST_PARSIMONIOUS_OPTION ) ) {
135             if ( base_algorithm == ALGORITHM.SDI ) {
136                 ForesterUtil.fatalError( PRG_NAME, "Cannot use most parsimonious duplication mode with SDI" );
137             }
138             most_parsimonous_duplication_model = true;
139         }
140         if ( cla.isOptionSet( ALLOW_STRIPPING_OF_GENE_TREE_OPTION ) ) {
141             if ( base_algorithm == ALGORITHM.SDI ) {
142                 ForesterUtil.fatalError( PRG_NAME, "Cannot allow stripping of gene tree with SDI" );
143             }
144             allow_stripping_of_gene_tree = true;
145         }
146         boolean transfer_taxonomy = false;
147         if ( cla.isOptionSet( TRANSFER_TAXONOMY_OPTION ) ) {
148             transfer_taxonomy = true;
149         }
150         boolean use_gene_tree_dir = false;
151         final String gene_tree_suffix;
152         if ( cla.isOptionSet( SUFFIX_FOR_DIR_OPTION ) ) {
153             gene_tree_suffix = cla.getOptionValue( SUFFIX_FOR_DIR_OPTION );
154             use_gene_tree_dir = true;
155         }
156         else {
157             gene_tree_suffix = null;
158         }
159         File gene_tree_file = null;
160         File species_tree_file = null;
161         File out_file = null;
162         File log_file = null;
163         try {
164             gene_tree_file = cla.getFile( 0 );
165             species_tree_file = cla.getFile( 1 );
166             if ( cla.getNumberOfNames() == 3 ) {
167                 out_file = cla.getFile( 2 );
168                 log_file = new File( ForesterUtil.removeSuffix( out_file.toString() ) + LOGFILE_SUFFIX );
169             }
170         }
171         catch ( final IllegalArgumentException e ) {
172             ForesterUtil.fatalError( PRG_NAME, "error in command line: " + e.getMessage() );
173         }
174         if ( use_gene_tree_dir ) {
175             final File dir = new File( gene_tree_file.toString() );
176             final File gene_tree_files[] = dir.listFiles( new FilenameFilter() {
177
178                 @Override
179                 public boolean accept( final File dir, final String name ) {
180                     return name.endsWith( gene_tree_suffix );
181                 }
182             } );
183             for( final File file : gene_tree_files ) {
184                 System.out.println( file );
185             }
186             execute( base_algorithm,
187                      most_parsimonous_duplication_model,
188                      allow_stripping_of_gene_tree,
189                      transfer_taxonomy,
190                      gene_tree_files,
191                      species_tree_file );
192         }
193         else {
194             execute( base_algorithm,
195                      most_parsimonous_duplication_model,
196                      allow_stripping_of_gene_tree,
197                      transfer_taxonomy,
198                      gene_tree_file,
199                      species_tree_file,
200                      out_file,
201                      log_file );
202         }
203     }
204
205     private static void execute( final ALGORITHM base_algorithm,
206                                  final boolean most_parsimonous_duplication_model,
207                                  final boolean allow_stripping_of_gene_tree,
208                                  final boolean transfer_taxonomy,
209                                  final File gene_tree_files[],
210                                  final File species_tree_file )
211             throws IOException {
212         for( final File gene_tree_file : gene_tree_files ) {
213             if (gene_tree_file.getName().equals( species_tree_file.getName() )) {
214                 continue;
215             }
216             execute( base_algorithm,
217                      most_parsimonous_duplication_model,
218                      allow_stripping_of_gene_tree,
219                      transfer_taxonomy,
220                      gene_tree_file,
221                      species_tree_file,
222                      new File( gene_tree_file + "gsdi" ),
223                      new File( gene_tree_file + "gsdi_log" ) );
224         }
225     }
226
227     private static void execute( final ALGORITHM base_algorithm,
228                                  final boolean most_parsimonous_duplication_model,
229                                  final boolean allow_stripping_of_gene_tree,
230                                  final boolean transfer_taxonomy,
231                                  final File gene_tree_file,
232                                  final File species_tree_file,
233                                  final File out_file,
234                                  final File log_file )
235             throws IOException {
236         if ( ForesterUtil.isReadableFile( gene_tree_file ) != null ) {
237             ForesterUtil.fatalError( gsdi.PRG_NAME, ForesterUtil.isReadableFile( gene_tree_file ) );
238         }
239         if ( ForesterUtil.isReadableFile( species_tree_file ) != null ) {
240             ForesterUtil.fatalError( gsdi.PRG_NAME, ForesterUtil.isReadableFile( species_tree_file ) );
241         }
242         if ( ForesterUtil.isWritableFile( out_file ) != null ) {
243             ForesterUtil.fatalError( gsdi.PRG_NAME, ForesterUtil.isWritableFile( out_file ) );
244         }
245         if ( ForesterUtil.isWritableFile( log_file ) != null ) {
246             ForesterUtil.fatalError( gsdi.PRG_NAME, ForesterUtil.isWritableFile( log_file ) );
247         }
248         EasyWriter log_writer = null;
249         try {
250             log_writer = ForesterUtil.createEasyWriter( log_file );
251         }
252         catch ( final IOException e ) {
253             ForesterUtil.fatalError( gsdi.PRG_NAME, "Failed to create [" + log_file + "]: " + e.getMessage() );
254         }
255         Phylogeny species_tree = null;
256         Phylogeny gene_tree = null;
257         try {
258             final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
259             gene_tree = factory.create( gene_tree_file, PhyloXmlParser.createPhyloXmlParserXsdValidating() )[ 0 ];
260         }
261         catch ( final IOException e ) {
262             fatalError( "error",
263                         "failed to read gene tree from [" + gene_tree_file + "]: " + e.getMessage(),
264                         log_writer );
265         }
266         try {
267             species_tree = SDIutil.parseSpeciesTree( gene_tree,
268                                                      species_tree_file,
269                                                      REPLACE_UNDERSCORES_IN_NH_SPECIES_TREE,
270                                                      true,
271                                                      TAXONOMY_EXTRACTION.NO );
272         }
273         catch ( final PhyloXmlDataFormatException e ) {
274             fatalError( "user error",
275                         "failed to transfer general node name, in [" + species_tree_file + "]: " + e.getMessage(),
276                         log_writer );
277         }
278         catch ( final SDIException e ) {
279             fatalError( "user error", e.getMessage(), log_writer );
280         }
281         catch ( final IOException e ) {
282             fatalError( "error",
283                         "Failed to read species tree from [" + species_tree_file + "]: " + e.getMessage(),
284                         log_writer );
285         }
286         gene_tree.setRooted( true );
287         species_tree.setRooted( true );
288         if ( !gene_tree.isCompletelyBinary() ) {
289             fatalError( "user error", "gene tree [" + gene_tree_file + "] is not completely binary", log_writer );
290         }
291         if ( base_algorithm == ALGORITHM.SDI ) {
292             if ( !species_tree.isCompletelyBinary() ) {
293                 fatalError( "user error",
294                             "species tree is not completely binary, use GSDI or GSDIR instead",
295                             log_writer );
296             }
297         }
298         log_writer.println( PRG_NAME + " - " + PRG_DESC );
299         log_writer.println( "  version         : " + PRG_VERSION );
300         log_writer.println( "  date            : " + PRG_DATE );
301         log_writer.println( "  forester version: " + ForesterConstants.FORESTER_VERSION );
302         log_writer.println();
303         log_writer.println( "Start time                               : "
304                 + new SimpleDateFormat( "yyyyMMdd HH:mm:ss" ).format( new Date() ) );
305         System.out.println( "Start time                               : "
306                 + new SimpleDateFormat( "yyyyMMdd HH:mm:ss" ).format( new Date() ) );
307         log_writer.println( "Gene tree file                           : " + gene_tree_file.getCanonicalPath() );
308         System.out.println( "Gene tree file                           : " + gene_tree_file.getCanonicalPath() );
309         log_writer.println( "Gene tree name                           : "
310                 + ( ForesterUtil.isEmpty( gene_tree.getName() ) ? "" : gene_tree.getName() ) );
311         System.out.println( "Gene tree name                           : "
312                 + ( ForesterUtil.isEmpty( gene_tree.getName() ) ? "" : gene_tree.getName() ) );
313         log_writer.println( "Species tree file                        : " + species_tree_file.getCanonicalPath() );
314         System.out.println( "Species tree file                        : " + species_tree_file.getCanonicalPath() );
315         log_writer.println( "Species tree name                        : "
316                 + ( ForesterUtil.isEmpty( species_tree.getName() ) ? "" : gene_tree.getName() ) );
317         System.out.println( "Species tree name                        : "
318                 + ( ForesterUtil.isEmpty( species_tree.getName() ) ? "" : gene_tree.getName() ) );
319         System.out.println( "Transfer taxonomy                        : " + transfer_taxonomy );
320         GSDII gsdii = null;
321         final long start_time = new Date().getTime();
322         try {
323             if ( base_algorithm == ALGORITHM.GSDI ) {
324                 System.out.println( "Algorithm                                : GSDI" );
325                 log_writer.println( "Algorithm                                : GSDI" );
326             }
327             else if ( base_algorithm == ALGORITHM.GSDIR ) {
328                 System.out.println( "Algorithm                                : GSDIR" );
329                 log_writer.println( "Algorithm                                : GSDIR" );
330             }
331             System.out.println( "Use most parsimonous duplication model   : " + most_parsimonous_duplication_model );
332             System.out.println( "Allow stripping of gene tree nodes       : " + allow_stripping_of_gene_tree );
333             log_writer.println( "Use most parsimonous duplication model   : " + most_parsimonous_duplication_model );
334             log_writer.println( "Allow stripping of gene tree nodes       : " + allow_stripping_of_gene_tree );
335             log_writer.flush();
336             if ( base_algorithm == ALGORITHM.GSDI ) {
337                 gsdii = new GSDI( gene_tree,
338                                   species_tree,
339                                   most_parsimonous_duplication_model,
340                                   allow_stripping_of_gene_tree,
341                                   true,
342                                   transfer_taxonomy );
343             }
344             else if ( base_algorithm == ALGORITHM.GSDIR ) {
345                 gsdii = new GSDIR( gene_tree, species_tree, allow_stripping_of_gene_tree, true, transfer_taxonomy );
346             }
347         }
348         catch ( final SDIException e ) {
349             fatalError( "user error", e.getLocalizedMessage(), log_writer );
350         }
351         catch ( final IOException e ) {
352             fatalError( "error", e.toString(), log_writer );
353         }
354         catch ( final OutOfMemoryError e ) {
355             ForesterUtil.outOfMemoryError( e );
356         }
357         catch ( final Exception e ) {
358             e.printStackTrace();
359             fatalError( "unexpected error", e.toString(), log_writer );
360         }
361         System.out.println( "Running time (excluding I/O)             : " + ( new Date().getTime() - start_time )
362                 + "ms" );
363         log_writer.println( "Running time (excluding I/O)             : " + ( new Date().getTime() - start_time )
364                 + "ms" );
365         System.out.println( "Mapping based on                         : " + gsdii.getTaxCompBase() );
366         log_writer.println( "Mapping based on                         : " + gsdii.getTaxCompBase() );
367         try {
368             final PhylogenyWriter writer = new PhylogenyWriter();
369             if ( base_algorithm == ALGORITHM.GSDIR ) {
370                 writer.toPhyloXML( out_file, ( ( GSDIR ) gsdii ).getMinDuplicationsSumGeneTree(), 0 );
371             }
372             else {
373                 writer.toPhyloXML( out_file, gene_tree, 0 );
374             }
375         }
376         catch ( final IOException e ) {
377             ForesterUtil.fatalError( PRG_NAME,
378                                      "Failed to write to [" + out_file.getCanonicalPath() + "]: " + e.getMessage() );
379         }
380         System.out.println( "Wrote resulting gene tree to             : " + out_file.getCanonicalPath() );
381         log_writer.println( "Wrote resulting gene tree to             : " + out_file.getCanonicalPath() );
382         final File species_tree_used_file = new File( ForesterUtil.removeSuffix( out_file.toString() )
383                 + SUFFIX_FOR_SPECIES_TREE_USED );
384         try {
385             final PhylogenyWriter writer = new PhylogenyWriter();
386             writer.toPhyloXML( species_tree_used_file, species_tree, 0 );
387         }
388         catch ( final IOException e ) {
389             ForesterUtil.fatalError( PRG_NAME,
390                                      "Failed to write to [" + species_tree_used_file.getCanonicalPath() + "]: "
391                                              + e.getMessage() );
392         }
393         System.out.println( "Wrote (stripped) species tree to         : " + species_tree_used_file.getCanonicalPath() );
394         log_writer.println( "Wrote (stripped) species tree to         : " + species_tree_used_file.getCanonicalPath() );
395         if ( ( gsdii.getReMappedScientificNamesFromGeneTree() != null )
396                 && !gsdii.getReMappedScientificNamesFromGeneTree().isEmpty() ) {
397             System.out.println( "Number of gene tree species remapped     : "
398                     + gsdii.getReMappedScientificNamesFromGeneTree().size() );
399             log_writer.println( "Number of gene tree species remapped     : "
400                     + gsdii.getReMappedScientificNamesFromGeneTree().size() );
401             writeToRemappedFile( out_file, gsdii.getReMappedScientificNamesFromGeneTree(), log_writer );
402         }
403         System.out.println( "Number of external nodes in gene tree    : " + gene_tree.getNumberOfExternalNodes() );
404         log_writer.println( "Number of external nodes in gene tree    : " + gene_tree.getNumberOfExternalNodes() );
405         System.out.println( "Number of external nodes in species tree : " + species_tree.getNumberOfExternalNodes() );
406         log_writer.println( "Number of external nodes in species tree : " + species_tree.getNumberOfExternalNodes() );
407         final int poly = PhylogenyMethods.countNumberOfPolytomies( species_tree );
408         System.out.println( "Number of polytomies in species tree     : " + poly );
409         log_writer.println( "Number of polytomies in species tree     : " + poly );
410         System.out.println( "External nodes stripped from gene tree   : "
411                 + gsdii.getStrippedExternalGeneTreeNodes().size() );
412         log_writer.println( "External nodes stripped from gene tree   : "
413                 + gsdii.getStrippedExternalGeneTreeNodes().size() );
414         System.out
415                 .println( "External nodes stripped from species tree: " + gsdii.getStrippedSpeciesTreeNodes().size() );
416         log_writer
417                 .println( "External nodes stripped from species tree: " + gsdii.getStrippedSpeciesTreeNodes().size() );
418         System.out.println();
419         System.out.println( "Number of speciations                    : " + gsdii.getSpeciationsSum() );
420         log_writer.println( "Number of speciations                    : " + gsdii.getSpeciationsSum() );
421         if ( ( base_algorithm == ALGORITHM.GSDIR ) ) {
422             final GSDIR gsdir = ( GSDIR ) gsdii;
423             System.out.println( "Minimal number of duplications           : " + gsdir.getMinDuplicationsSum() );
424             log_writer.println( "Minimal number of duplications           : " + gsdir.getMinDuplicationsSum() );
425         }
426         else if ( ( base_algorithm == ALGORITHM.GSDI ) ) {
427             final GSDI gsdi = ( GSDI ) gsdii;
428             System.out.println( "Number of duplications                   : " + gsdi.getDuplicationsSum() );
429             log_writer.println( "Number of duplications                   : " + gsdi.getDuplicationsSum() );
430             if ( !most_parsimonous_duplication_model ) {
431                 final int u = gsdi.getSpeciationOrDuplicationEventsSum();
432                 System.out.println( "Number of potential duplications         : " + u );
433                 log_writer.println( "Number of potential duplications         : " + u );
434             }
435         }
436         log_writer.println();
437         printMappedNodesToLog( log_writer, gsdii );
438         log_writer.println();
439         printStrippedGeneTreeNodesToLog( log_writer, gsdii );
440         System.out.println();
441         System.out.println( "Wrote log to                             : " + log_file.getCanonicalPath() );
442         System.out.println();
443         log_writer.close();
444     }
445
446     private static void fatalError( final String type, final String msg, final EasyWriter log_writer ) {
447         try {
448             log_writer.flush();
449             log_writer.println();
450             log_writer.print( type.toUpperCase() + ": " );
451             log_writer.println( msg );
452             log_writer.close();
453         }
454         catch ( final IOException e ) {
455             e.printStackTrace();
456         }
457         ForesterUtil.fatalError( gsdi.PRG_NAME, msg );
458     }
459
460     private static void print_help() {
461         System.out.println( "Usage: " + PRG_NAME
462                 + " [-options] <gene tree in phyloXML format, or directory with gene trees> <species tree> [outfile]" );
463         System.out.println();
464         System.out.println( "Options:" );
465         System.out.println( " -" + ALLOW_STRIPPING_OF_GENE_TREE_OPTION
466                 + "         : to allow stripping of gene tree nodes without a matching species" );
467         System.out.println( " -" + MOST_PARSIMONIOUS_OPTION
468                 + "         : use most parimonious duplication model for GSDI: " );
469         System.out.println( "              assign nodes as speciations which would otherwise be assiged" );
470         System.out.println( "              as potential duplications due to polytomies in the species tree" );
471         System.out.println( " -" + GUESS_FORMAT_OF_SPECIES_TREE
472                 + "         : to allow species tree in other formats than phyloXML (i.e. Newick, NHX, Nexus)" );
473         System.out.println( " -" + GSDIR_OPTION
474                 + "         : to use GSDIR algorithm instead of GSDI algorithm (re-rooting)" );
475         System.out.println( " -" + TRANSFER_TAXONOMY_OPTION
476                 + "         : to transfer taxonomic data from species tree to gene tree" );
477         System.out.println( " -" + SUFFIX_FOR_DIR_OPTION
478                 + "=<suffix>: suffix for gene trees for analyzing entire directory of trees" );
479         System.out.println();
480         System.out.println();
481         System.out.println( "Gene tree(s):" );
482         System.out.println( " in phyloXM format, with taxonomy and sequence data in appropriate fields" );
483         System.out.println();
484         System.out.println( "Species tree:" );
485         System.out.println( " in phyloXML format (unless option -" + GUESS_FORMAT_OF_SPECIES_TREE + " is used)" );
486         System.out.println();
487         System.out.println( "Examples: gsdi -" + ALLOW_STRIPPING_OF_GENE_TREE_OPTION
488                 + " gene_tree.xml tree_of_life.xml out.xml" );
489         System.out.println( "          gsdi -" + SUFFIX_FOR_DIR_OPTION + " -" + SUFFIX_FOR_DIR_OPTION + "=.xml"
490                 + " gene_tree_dir tree_of_life.xml" );
491         System.out.println();
492     }
493
494     private static void printMappedNodesToLog( final EasyWriter log_writer, final GSDII gsdi ) throws IOException {
495         final SortedSet<String> ss = new TreeSet<String>();
496         for( final PhylogenyNode n : gsdi.getMappedExternalSpeciesTreeNodes() ) {
497             ss.add( n.toString() );
498         }
499         log_writer.println( "The following " + ss.size() + " species were used: " );
500         for( final String s : ss ) {
501             log_writer.println( "  " + s );
502         }
503     }
504
505     private static void printStrippedGeneTreeNodesToLog( final EasyWriter log_writer, final GSDII gsdi )
506             throws IOException {
507         final SortedMap<String, Integer> sm = new TreeMap<String, Integer>();
508         for( final PhylogenyNode n : gsdi.getStrippedExternalGeneTreeNodes() ) {
509             final String s = n.toString();
510             if ( sm.containsKey( s ) ) {
511                 sm.put( s, sm.get( s ) + 1 );
512             }
513             else {
514                 sm.put( s, 1 );
515             }
516         }
517         log_writer.println( "The following " + sm.size() + " nodes were stripped from the gene tree: " );
518         for( final String s : sm.keySet() ) {
519             final int count = sm.get( s );
520             if ( count == 1 ) {
521                 log_writer.println( "  " + s );
522             }
523             else {
524                 log_writer.println( "  " + s + " [" + count + "]" );
525             }
526         }
527     }
528
529     private static void writeToRemappedFile( final File out_file,
530                                              final SortedSet<String> remapped,
531                                              final EasyWriter log_writer )
532             throws IOException {
533         final File file = new File( ForesterUtil.removeSuffix( out_file.toString() ) + REMAPPED_SUFFIX );
534         final EasyWriter remapped_writer = ForesterUtil.createEasyWriter( file );
535         for( final String s : remapped ) {
536             remapped_writer.println( s );
537         }
538         remapped_writer.close();
539         System.out.println( "Wrote remapped gene tree species to      : " + file.getCanonicalPath() );
540         log_writer.println( "Wrote remapped gene tree species to      : " + file.getCanonicalPath() );
541     }
542 }