2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.application;
29 import java.io.FilenameFilter;
30 import java.io.IOException;
31 import java.text.SimpleDateFormat;
32 import java.util.ArrayList;
33 import java.util.Date;
34 import java.util.List;
35 import java.util.SortedMap;
36 import java.util.SortedSet;
37 import java.util.TreeMap;
38 import java.util.TreeSet;
40 import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
41 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
42 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
43 import org.forester.io.writers.PhylogenyWriter;
44 import org.forester.phylogeny.Phylogeny;
45 import org.forester.phylogeny.PhylogenyMethods;
46 import org.forester.phylogeny.PhylogenyNode;
47 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
48 import org.forester.phylogeny.factories.PhylogenyFactory;
49 import org.forester.sdi.GSDI;
50 import org.forester.sdi.GSDII;
51 import org.forester.sdi.GSDIR;
52 import org.forester.sdi.SDIException;
53 import org.forester.sdi.SDIutil;
54 import org.forester.sdi.SDIutil.ALGORITHM;
55 import org.forester.util.CommandLineArguments;
56 import org.forester.util.EasyWriter;
57 import org.forester.util.ForesterConstants;
58 import org.forester.util.ForesterUtil;
60 public final class gsdi {
62 final static public boolean REPLACE_UNDERSCORES_IN_NH_SPECIES_TREE = true;
63 final static private String ALLOW_STRIPPING_OF_GENE_TREE_OPTION = "g";
64 final static private String GSDIR_OPTION = "r";
65 final static private String MOST_PARSIMONIOUS_OPTION = "m";
66 final static private String SUFFIX_FOR_DIR_OPTION = "s";
67 final static private String GUESS_FORMAT_OF_SPECIES_TREE = "q";
68 final static private String TRANSFER_TAXONOMY_OPTION = "t";
69 final static private String HELP_OPTION_1 = "help";
70 final static private String HELP_OPTION_2 = "h";
71 final static private String SUFFIX_FOR_SPECIES_TREE_USED = "_species_tree_used.xml";
72 final static private String OUTTREE_SUFFIX = "_gsdir.xml";
73 final static private String LOGFILE_NAME = "00_gsdi_log.txt";
74 final static private String REMAPPED_SUFFIX = "_gsdi_remapped.txt";
75 final static private String PRG_NAME = "gsdi";
76 final static private String PRG_VERSION = "1.100";
77 final static private String PRG_DATE = "170331";
78 final static private String PRG_DESC = "general speciation duplication inference";
79 final static private String E_MAIL = "phyloxml@gmail.com";
80 final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester";
82 public static void main( final String args[] ) {
84 ForesterUtil.printProgramInformation( PRG_NAME,
90 ForesterUtil.getForesterLibraryInformation() );
91 CommandLineArguments cla = null;
93 cla = new CommandLineArguments( args );
95 catch ( final Exception e ) {
96 ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
98 if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) ) {
103 else if ( ( args.length < 2 ) || ( cla.getNumberOfNames() != 3 ) ) {
104 System.out.println();
105 System.out.println( "Wrong number of arguments." );
106 System.out.println();
110 final List<String> allowed_options = new ArrayList<String>();
111 allowed_options.add( GSDIR_OPTION );
112 allowed_options.add( GUESS_FORMAT_OF_SPECIES_TREE );
113 allowed_options.add( MOST_PARSIMONIOUS_OPTION );
114 allowed_options.add( ALLOW_STRIPPING_OF_GENE_TREE_OPTION );
115 allowed_options.add( TRANSFER_TAXONOMY_OPTION );
116 allowed_options.add( SUFFIX_FOR_DIR_OPTION );
117 final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
118 if ( dissallowed_options.length() > 0 ) {
119 ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
123 catch ( final IOException e ) {
124 ForesterUtil.fatalError( gsdi.PRG_NAME, e.getMessage() );
128 private static void execute( final CommandLineArguments cla ) throws IOException {
129 ALGORITHM base_algorithm = ALGORITHM.GSDI;
130 boolean most_parsimonous_duplication_model = false;
131 boolean allow_stripping_of_gene_tree = false;
132 if ( cla.isOptionSet( GSDIR_OPTION ) ) {
133 base_algorithm = ALGORITHM.GSDIR;
135 if ( cla.isOptionSet( MOST_PARSIMONIOUS_OPTION ) ) {
136 if ( base_algorithm == ALGORITHM.SDI ) {
137 ForesterUtil.fatalError( PRG_NAME, "Cannot use most parsimonious duplication mode with SDI" );
139 most_parsimonous_duplication_model = true;
141 if ( cla.isOptionSet( ALLOW_STRIPPING_OF_GENE_TREE_OPTION ) ) {
142 if ( base_algorithm == ALGORITHM.SDI ) {
143 ForesterUtil.fatalError( PRG_NAME, "Cannot allow stripping of gene tree with SDI" );
145 allow_stripping_of_gene_tree = true;
147 boolean transfer_taxonomy = false;
148 if ( cla.isOptionSet( TRANSFER_TAXONOMY_OPTION ) ) {
149 transfer_taxonomy = true;
151 boolean use_gene_tree_dir = false;
152 final String gene_tree_suffix;
153 if ( cla.isOptionSet( SUFFIX_FOR_DIR_OPTION ) ) {
154 gene_tree_suffix = cla.getOptionValue( SUFFIX_FOR_DIR_OPTION );
155 use_gene_tree_dir = true;
158 gene_tree_suffix = null;
160 File gene_tree_file = null;
161 File species_tree_file = null;
162 File out_file = null;
163 File log_file = null;
166 gene_tree_file = cla.getFile( 0 );
167 species_tree_file = cla.getFile( 1 );
168 if ( use_gene_tree_dir ) {
169 out_dir = cla.getFile( 2 );
170 if ( out_dir.exists() ) {
171 if ( !out_dir.isDirectory() ) {
173 .fatalError( gsdi.PRG_NAME,
174 "out-directory [" + out_dir + "] already exists but is not a directory" );
178 final boolean success = out_dir.mkdirs();
180 ForesterUtil.fatalError( gsdi.PRG_NAME, "could not create out-directory [" + out_dir + "]" );
185 out_file = cla.getFile( 2 );
188 catch ( final IllegalArgumentException e ) {
189 ForesterUtil.fatalError( PRG_NAME, "error in command line: " + e.getMessage() );
191 if ( use_gene_tree_dir ) {
192 final File indir = new File( gene_tree_file.toString() );
193 if ( !indir.exists() ) {
194 ForesterUtil.fatalError( gsdi.PRG_NAME, "in-directory [" + indir + "] does not exist" );
196 if ( !indir.isDirectory() ) {
197 ForesterUtil.fatalError( gsdi.PRG_NAME, "in-directory [" + indir + "] is not a directory" );
199 final String species_tree_file_name = species_tree_file.getName();
200 final File gene_tree_files[] = indir.listFiles( new FilenameFilter() {
203 public boolean accept( final File dir, final String name ) {
204 return (( name.endsWith( gene_tree_suffix) ) && !(name.equals( species_tree_file_name)));
208 if ( gene_tree_files.length < 1 ) {
209 ForesterUtil.fatalError( gsdi.PRG_NAME,
210 "in-directory [" + indir + "] does not contain any gene tree files with suffix "
211 + gene_tree_suffix );
214 execute( base_algorithm,
215 most_parsimonous_duplication_model,
216 allow_stripping_of_gene_tree,
223 execute( base_algorithm,
224 most_parsimonous_duplication_model,
225 allow_stripping_of_gene_tree,
234 private static void execute( final ALGORITHM base_algorithm,
235 final boolean most_parsimonous_duplication_model,
236 final boolean allow_stripping_of_gene_tree,
237 final boolean transfer_taxonomy,
238 final File gene_tree_files[],
239 final File species_tree_file,
242 final File log_file = new File( outdir, LOGFILE_NAME );
243 if ( ForesterUtil.isWritableFile( log_file ) != null ) {
244 ForesterUtil.fatalError( gsdi.PRG_NAME, ForesterUtil.isWritableFile( log_file ) );
246 EasyWriter log_writer = null;
248 log_writer = ForesterUtil.createEasyWriter( log_file );
250 catch ( final IOException e ) {
251 ForesterUtil.fatalError( gsdi.PRG_NAME, "Failed to create [" + log_file + "]: " + e.getMessage() );
253 log_writer.println( "# " + PRG_NAME );
254 log_writer.println( "# Version\t" + PRG_VERSION );
255 log_writer.println( "# Date\t" + PRG_DATE );
256 log_writer.println( "# Forester version\t" + ForesterConstants.FORESTER_VERSION );
257 log_writer.println( "# Species tree\t" + species_tree_file.getCanonicalPath() );
258 if ( base_algorithm == ALGORITHM.GSDI ) {
259 log_writer.println( "# Algorithm\tGSDI" );
261 else if ( base_algorithm == ALGORITHM.GSDIR ) {
262 log_writer.println( "# Algorithm\tGSDIR" );
264 log_writer.println( "# Use most parsimonous duplication model\t" + most_parsimonous_duplication_model );
265 log_writer.println( "# Allow stripping of gene tree nodes\t" + allow_stripping_of_gene_tree );
266 log_writer.println( "# Start time\t" + new SimpleDateFormat( "yyyyMMdd HH:mm:ss" ).format( new Date() ) );
268 for( final File gene_tree_file : gene_tree_files ) {
270 String outname = gene_tree_file.getName();
271 if ( outname.indexOf( "." ) > 0 ) {
272 outname = outname.substring( 0, outname.lastIndexOf( "." ) );
274 outname = outname + OUTTREE_SUFFIX;
275 execute( base_algorithm,
276 most_parsimonous_duplication_model,
277 allow_stripping_of_gene_tree,
281 new File( outdir, outname ),
283 System.out.print( "\r" + counter++ );
286 System.out.print( "\r" );
288 System.out.println("Analyzed " + counter + " gene trees");
289 System.out.println();
290 System.out.println( "Wrote log to: " + log_file.getCanonicalPath() );
291 System.out.println();
294 private static void execute( final ALGORITHM base_algorithm,
295 final boolean most_parsimonous_duplication_model,
296 final boolean allow_stripping_of_gene_tree,
297 final boolean transfer_taxonomy,
298 final File gene_tree_file,
299 final File species_tree_file,
301 final EasyWriter log_writer )
303 if ( ForesterUtil.isReadableFile( gene_tree_file ) != null ) {
304 ForesterUtil.fatalError( gsdi.PRG_NAME, ForesterUtil.isReadableFile( gene_tree_file ) );
306 if ( ForesterUtil.isReadableFile( species_tree_file ) != null ) {
307 ForesterUtil.fatalError( gsdi.PRG_NAME, ForesterUtil.isReadableFile( species_tree_file ) );
309 if ( ForesterUtil.isWritableFile( out_file ) != null ) {
310 ForesterUtil.fatalError( gsdi.PRG_NAME, ForesterUtil.isWritableFile( out_file ) );
312 Phylogeny species_tree = null;
313 Phylogeny gene_tree = null;
315 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
316 gene_tree = factory.create( gene_tree_file, PhyloXmlParser.createPhyloXmlParserXsdValidating() )[ 0 ];
318 catch ( final IOException e ) {
320 "failed to read gene tree from [" + gene_tree_file + "]: " + e.getMessage(),
324 species_tree = SDIutil.parseSpeciesTree( gene_tree,
326 REPLACE_UNDERSCORES_IN_NH_SPECIES_TREE,
328 TAXONOMY_EXTRACTION.NO );
330 catch ( final PhyloXmlDataFormatException e ) {
331 fatalError( "user error",
332 "failed to transfer general node name, in [" + species_tree_file + "]: " + e.getMessage(),
335 catch ( final SDIException e ) {
336 fatalError( "user error", e.getMessage(), log_writer );
338 catch ( final IOException e ) {
340 "Failed to read species tree from [" + species_tree_file + "]: " + e.getMessage(),
343 gene_tree.setRooted( true );
344 species_tree.setRooted( true );
345 if ( !gene_tree.isCompletelyBinary() ) {
346 fatalError( "user error", "gene tree [" + gene_tree_file + "] is not completely binary", log_writer );
348 if ( base_algorithm == ALGORITHM.SDI ) {
349 if ( !species_tree.isCompletelyBinary() ) {
350 fatalError( "user error",
351 "species tree is not completely binary, use GSDI or GSDIR instead",
355 log_writer.print( gene_tree_file.getName() );
356 log_writer.print( "\t" );
357 log_writer.print( ( ForesterUtil.isEmpty( gene_tree.getName() ) ? "" : gene_tree.getName() ) );
358 log_writer.print( "\t" );
362 if ( base_algorithm == ALGORITHM.GSDI ) {
363 gsdii = new GSDI( gene_tree,
365 most_parsimonous_duplication_model,
366 allow_stripping_of_gene_tree,
370 else if ( base_algorithm == ALGORITHM.GSDIR ) {
371 gsdii = new GSDIR( gene_tree, species_tree, allow_stripping_of_gene_tree, true, transfer_taxonomy );
374 catch ( final SDIException e ) {
375 fatalError( "user error", e.getLocalizedMessage(), log_writer );
377 catch ( final IOException e ) {
378 fatalError( "error", e.toString(), log_writer );
380 catch ( final OutOfMemoryError e ) {
381 ForesterUtil.outOfMemoryError( e );
383 catch ( final Exception e ) {
385 fatalError( "unexpected error", e.toString(), log_writer );
388 final PhylogenyWriter writer = new PhylogenyWriter();
389 if ( base_algorithm == ALGORITHM.GSDIR ) {
390 writer.toPhyloXML( out_file, ( ( GSDIR ) gsdii ).getMinDuplicationsSumGeneTree(), 0 );
393 writer.toPhyloXML( out_file, gene_tree, 0 );
396 catch ( final IOException e ) {
397 ForesterUtil.fatalError( PRG_NAME,
398 "Failed to write to [" + out_file.getCanonicalPath() + "]: " + e.getMessage() );
400 log_writer.print( gene_tree.getNumberOfExternalNodes() + "\t" );
401 log_writer.print( gsdii.getSpeciationsSum() + "\t" );
402 if ( ( base_algorithm == ALGORITHM.GSDIR ) ) {
403 final GSDIR gsdir = ( GSDIR ) gsdii;
404 log_writer.print( gsdir.getMinDuplicationsSum() + "\t" );
406 else if ( ( base_algorithm == ALGORITHM.GSDI ) ) {
407 final GSDI gsdi = ( GSDI ) gsdii;
408 log_writer.print( gsdi.getDuplicationsSum() + "\t" );
409 if ( !most_parsimonous_duplication_model ) {
410 log_writer.print( gsdi.getSpeciationOrDuplicationEventsSum() + "\t" );
413 log_writer.print( gsdii.getTaxCompBase() + "\t" );
414 log_writer.println();
417 private static void execute( final ALGORITHM base_algorithm,
418 final boolean most_parsimonous_duplication_model,
419 final boolean allow_stripping_of_gene_tree,
420 final boolean transfer_taxonomy,
421 final File gene_tree_file,
422 final File species_tree_file,
424 final File log_file )
426 if ( ForesterUtil.isReadableFile( gene_tree_file ) != null ) {
427 ForesterUtil.fatalError( gsdi.PRG_NAME, ForesterUtil.isReadableFile( gene_tree_file ) );
429 if ( ForesterUtil.isReadableFile( species_tree_file ) != null ) {
430 ForesterUtil.fatalError( gsdi.PRG_NAME, ForesterUtil.isReadableFile( species_tree_file ) );
432 if ( ForesterUtil.isWritableFile( out_file ) != null ) {
433 ForesterUtil.fatalError( gsdi.PRG_NAME, ForesterUtil.isWritableFile( out_file ) );
435 if ( ForesterUtil.isWritableFile( log_file ) != null ) {
436 ForesterUtil.fatalError( gsdi.PRG_NAME, ForesterUtil.isWritableFile( log_file ) );
438 EasyWriter log_writer = null;
440 log_writer = ForesterUtil.createEasyWriter( log_file );
442 catch ( final IOException e ) {
443 ForesterUtil.fatalError( gsdi.PRG_NAME, "Failed to create [" + log_file + "]: " + e.getMessage() );
445 Phylogeny species_tree = null;
446 Phylogeny gene_tree = null;
448 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
449 gene_tree = factory.create( gene_tree_file, PhyloXmlParser.createPhyloXmlParserXsdValidating() )[ 0 ];
451 catch ( final IOException e ) {
453 "failed to read gene tree from [" + gene_tree_file + "]: " + e.getMessage(),
457 species_tree = SDIutil.parseSpeciesTree( gene_tree,
459 REPLACE_UNDERSCORES_IN_NH_SPECIES_TREE,
461 TAXONOMY_EXTRACTION.NO );
463 catch ( final PhyloXmlDataFormatException e ) {
464 fatalError( "user error",
465 "failed to transfer general node name, in [" + species_tree_file + "]: " + e.getMessage(),
468 catch ( final SDIException e ) {
469 fatalError( "user error", e.getMessage(), log_writer );
471 catch ( final IOException e ) {
473 "Failed to read species tree from [" + species_tree_file + "]: " + e.getMessage(),
476 gene_tree.setRooted( true );
477 species_tree.setRooted( true );
478 if ( !gene_tree.isCompletelyBinary() ) {
479 fatalError( "user error", "gene tree [" + gene_tree_file + "] is not completely binary", log_writer );
481 if ( base_algorithm == ALGORITHM.SDI ) {
482 if ( !species_tree.isCompletelyBinary() ) {
483 fatalError( "user error",
484 "species tree is not completely binary, use GSDI or GSDIR instead",
488 log_writer.println( PRG_NAME + " - " + PRG_DESC );
489 log_writer.println( " version : " + PRG_VERSION );
490 log_writer.println( " date : " + PRG_DATE );
491 log_writer.println( " forester version: " + ForesterConstants.FORESTER_VERSION );
492 log_writer.println();
493 log_writer.println( "Start time : "
494 + new SimpleDateFormat( "yyyyMMdd HH:mm:ss" ).format( new Date() ) );
495 System.out.println( "Start time : "
496 + new SimpleDateFormat( "yyyyMMdd HH:mm:ss" ).format( new Date() ) );
497 log_writer.println( "Gene tree file : " + gene_tree_file.getCanonicalPath() );
498 System.out.println( "Gene tree file : " + gene_tree_file.getCanonicalPath() );
499 log_writer.println( "Gene tree name : "
500 + ( ForesterUtil.isEmpty( gene_tree.getName() ) ? "" : gene_tree.getName() ) );
501 System.out.println( "Gene tree name : "
502 + ( ForesterUtil.isEmpty( gene_tree.getName() ) ? "" : gene_tree.getName() ) );
503 log_writer.println( "Species tree file : " + species_tree_file.getCanonicalPath() );
504 System.out.println( "Species tree file : " + species_tree_file.getCanonicalPath() );
505 log_writer.println( "Species tree name : "
506 + ( ForesterUtil.isEmpty( species_tree.getName() ) ? "" : gene_tree.getName() ) );
507 System.out.println( "Species tree name : "
508 + ( ForesterUtil.isEmpty( species_tree.getName() ) ? "" : gene_tree.getName() ) );
509 System.out.println( "Transfer taxonomy : " + transfer_taxonomy );
511 final long start_time = new Date().getTime();
513 if ( base_algorithm == ALGORITHM.GSDI ) {
514 System.out.println( "Algorithm : GSDI" );
515 log_writer.println( "Algorithm : GSDI" );
517 else if ( base_algorithm == ALGORITHM.GSDIR ) {
518 System.out.println( "Algorithm : GSDIR" );
519 log_writer.println( "Algorithm : GSDIR" );
521 System.out.println( "Use most parsimonous duplication model : " + most_parsimonous_duplication_model );
522 System.out.println( "Allow stripping of gene tree nodes : " + allow_stripping_of_gene_tree );
523 log_writer.println( "Use most parsimonous duplication model : " + most_parsimonous_duplication_model );
524 log_writer.println( "Allow stripping of gene tree nodes : " + allow_stripping_of_gene_tree );
526 if ( base_algorithm == ALGORITHM.GSDI ) {
527 gsdii = new GSDI( gene_tree,
529 most_parsimonous_duplication_model,
530 allow_stripping_of_gene_tree,
534 else if ( base_algorithm == ALGORITHM.GSDIR ) {
535 gsdii = new GSDIR( gene_tree, species_tree, allow_stripping_of_gene_tree, true, transfer_taxonomy );
538 catch ( final SDIException e ) {
539 fatalError( "user error", e.getLocalizedMessage(), log_writer );
541 catch ( final IOException e ) {
542 fatalError( "error", e.toString(), log_writer );
544 catch ( final OutOfMemoryError e ) {
545 ForesterUtil.outOfMemoryError( e );
547 catch ( final Exception e ) {
549 fatalError( "unexpected error", e.toString(), log_writer );
551 System.out.println( "Running time (excluding I/O) : " + ( new Date().getTime() - start_time )
553 log_writer.println( "Running time (excluding I/O) : " + ( new Date().getTime() - start_time )
555 System.out.println( "Mapping based on : " + gsdii.getTaxCompBase() );
556 log_writer.println( "Mapping based on : " + gsdii.getTaxCompBase() );
558 final PhylogenyWriter writer = new PhylogenyWriter();
559 if ( base_algorithm == ALGORITHM.GSDIR ) {
560 writer.toPhyloXML( out_file, ( ( GSDIR ) gsdii ).getMinDuplicationsSumGeneTree(), 0 );
563 writer.toPhyloXML( out_file, gene_tree, 0 );
566 catch ( final IOException e ) {
567 ForesterUtil.fatalError( PRG_NAME,
568 "Failed to write to [" + out_file.getCanonicalPath() + "]: " + e.getMessage() );
570 System.out.println( "Wrote resulting gene tree to : " + out_file.getCanonicalPath() );
571 log_writer.println( "Wrote resulting gene tree to : " + out_file.getCanonicalPath() );
572 final File species_tree_used_file = new File( ForesterUtil.removeSuffix( out_file.toString() )
573 + SUFFIX_FOR_SPECIES_TREE_USED );
575 final PhylogenyWriter writer = new PhylogenyWriter();
576 writer.toPhyloXML( species_tree_used_file, species_tree, 0 );
578 catch ( final IOException e ) {
579 ForesterUtil.fatalError( PRG_NAME,
580 "Failed to write to [" + species_tree_used_file.getCanonicalPath() + "]: "
583 System.out.println( "Wrote (stripped) species tree to : " + species_tree_used_file.getCanonicalPath() );
584 log_writer.println( "Wrote (stripped) species tree to : " + species_tree_used_file.getCanonicalPath() );
585 if ( ( gsdii.getReMappedScientificNamesFromGeneTree() != null )
586 && !gsdii.getReMappedScientificNamesFromGeneTree().isEmpty() ) {
587 System.out.println( "Number of gene tree species remapped : "
588 + gsdii.getReMappedScientificNamesFromGeneTree().size() );
589 log_writer.println( "Number of gene tree species remapped : "
590 + gsdii.getReMappedScientificNamesFromGeneTree().size() );
591 writeToRemappedFile( out_file, gsdii.getReMappedScientificNamesFromGeneTree(), log_writer );
593 System.out.println( "Number of external nodes in gene tree : " + gene_tree.getNumberOfExternalNodes() );
594 log_writer.println( "Number of external nodes in gene tree : " + gene_tree.getNumberOfExternalNodes() );
595 System.out.println( "Number of external nodes in species tree : " + species_tree.getNumberOfExternalNodes() );
596 log_writer.println( "Number of external nodes in species tree : " + species_tree.getNumberOfExternalNodes() );
597 final int poly = PhylogenyMethods.countNumberOfPolytomies( species_tree );
598 System.out.println( "Number of polytomies in species tree : " + poly );
599 log_writer.println( "Number of polytomies in species tree : " + poly );
600 System.out.println( "External nodes stripped from gene tree : "
601 + gsdii.getStrippedExternalGeneTreeNodes().size() );
602 log_writer.println( "External nodes stripped from gene tree : "
603 + gsdii.getStrippedExternalGeneTreeNodes().size() );
605 .println( "External nodes stripped from species tree: " + gsdii.getStrippedSpeciesTreeNodes().size() );
607 .println( "External nodes stripped from species tree: " + gsdii.getStrippedSpeciesTreeNodes().size() );
608 System.out.println();
609 System.out.println( "Number of speciations : " + gsdii.getSpeciationsSum() );
610 log_writer.println( "Number of speciations : " + gsdii.getSpeciationsSum() );
611 if ( ( base_algorithm == ALGORITHM.GSDIR ) ) {
612 final GSDIR gsdir = ( GSDIR ) gsdii;
613 System.out.println( "Minimal number of duplications : " + gsdir.getMinDuplicationsSum() );
614 log_writer.println( "Minimal number of duplications : " + gsdir.getMinDuplicationsSum() );
616 else if ( ( base_algorithm == ALGORITHM.GSDI ) ) {
617 final GSDI gsdi = ( GSDI ) gsdii;
618 System.out.println( "Number of duplications : " + gsdi.getDuplicationsSum() );
619 log_writer.println( "Number of duplications : " + gsdi.getDuplicationsSum() );
620 if ( !most_parsimonous_duplication_model ) {
621 final int u = gsdi.getSpeciationOrDuplicationEventsSum();
622 System.out.println( "Number of potential duplications : " + u );
623 log_writer.println( "Number of potential duplications : " + u );
626 log_writer.println();
627 printMappedNodesToLog( log_writer, gsdii );
628 log_writer.println();
629 printStrippedGeneTreeNodesToLog( log_writer, gsdii );
630 System.out.println();
631 System.out.println( "Wrote log to : " + log_file.getCanonicalPath() );
632 System.out.println();
636 private static void fatalError( final String type, final String msg, final EasyWriter log_writer ) {
639 log_writer.println();
640 log_writer.print( type.toUpperCase() + ": " );
641 log_writer.println( msg );
644 catch ( final IOException e ) {
647 ForesterUtil.fatalError( gsdi.PRG_NAME, msg );
650 private static void print_help() {
651 System.out.println( "Usage: " + PRG_NAME
652 + " [-options] <gene tree file, or gene trees in-directory> <species tree> <outfile, or out-directory>" );
653 System.out.println();
654 System.out.println( "Options:" );
655 System.out.println( " -" + ALLOW_STRIPPING_OF_GENE_TREE_OPTION
656 + " : to allow stripping of gene tree nodes without a matching species" );
657 System.out.println( " -" + MOST_PARSIMONIOUS_OPTION
658 + " : use most parimonious duplication model for GSDI: " );
659 System.out.println( " assign nodes as speciations which would otherwise be assiged" );
660 System.out.println( " as potential duplications due to polytomies in the species tree" );
661 System.out.println( " -" + GUESS_FORMAT_OF_SPECIES_TREE
662 + " : to allow species tree in other formats than phyloXML (i.e. Newick, NHX, Nexus)" );
663 System.out.println( " -" + GSDIR_OPTION
664 + " : to use GSDIR algorithm instead of GSDI algorithm (re-rooting)" );
665 System.out.println( " -" + TRANSFER_TAXONOMY_OPTION
666 + " : to transfer taxonomic data from species tree to gene tree" );
667 System.out.println( " -" + SUFFIX_FOR_DIR_OPTION
668 + "=<suffix>: suffix for gene trees for analyzing entire directory of trees" );
669 System.out.println();
670 System.out.println();
671 System.out.println( "Gene tree(s):" );
672 System.out.println( " in phyloXM format, with taxonomy and sequence data in appropriate fields" );
673 System.out.println();
674 System.out.println( "Species tree:" );
675 System.out.println( " in phyloXML format (unless option -" + GUESS_FORMAT_OF_SPECIES_TREE + " is used)" );
676 System.out.println();
677 System.out.println( "Examples: gsdi -" + ALLOW_STRIPPING_OF_GENE_TREE_OPTION
678 + " gene_tree.xml tree_of_life.xml out.xml" );
679 System.out.println( " gsdi -" + SUFFIX_FOR_DIR_OPTION + " -" + SUFFIX_FOR_DIR_OPTION + "=.xml"
680 + " gene_tree_dir tree_of_life.xml out_dir" );
681 System.out.println();
684 private static void printMappedNodesToLog( final EasyWriter log_writer, final GSDII gsdi ) throws IOException {
685 final SortedSet<String> ss = new TreeSet<String>();
686 for( final PhylogenyNode n : gsdi.getMappedExternalSpeciesTreeNodes() ) {
687 ss.add( n.toString() );
689 log_writer.println( "The following " + ss.size() + " species were used: " );
690 for( final String s : ss ) {
691 log_writer.println( " " + s );
695 private static void printStrippedGeneTreeNodesToLog( final EasyWriter log_writer, final GSDII gsdi )
697 final SortedMap<String, Integer> sm = new TreeMap<String, Integer>();
698 for( final PhylogenyNode n : gsdi.getStrippedExternalGeneTreeNodes() ) {
699 final String s = n.toString();
700 if ( sm.containsKey( s ) ) {
701 sm.put( s, sm.get( s ) + 1 );
707 log_writer.println( "The following " + sm.size() + " nodes were stripped from the gene tree: " );
708 for( final String s : sm.keySet() ) {
709 final int count = sm.get( s );
711 log_writer.println( " " + s );
714 log_writer.println( " " + s + " [" + count + "]" );
719 private static void writeToRemappedFile( final File out_file,
720 final SortedSet<String> remapped,
721 final EasyWriter log_writer )
723 final File file = new File( ForesterUtil.removeSuffix( out_file.toString() ) + REMAPPED_SUFFIX );
724 final EasyWriter remapped_writer = ForesterUtil.createEasyWriter( file );
725 for( final String s : remapped ) {
726 remapped_writer.println( s );
728 remapped_writer.close();
729 System.out.println( "Wrote remapped gene tree species to : " + file.getCanonicalPath() );
730 log_writer.println( "Wrote remapped gene tree species to : " + file.getCanonicalPath() );