Merge branch 'master' of https://github.com/cmzmasek/forester.git into JalviewIntegration
[jalview.git] / forester / java / src / org / forester / rio / RIOUtil.java
1
2 package org.forester.rio;
3
4 import java.io.File;
5 import java.io.FileNotFoundException;
6 import java.io.IOException;
7 import java.math.RoundingMode;
8 import java.util.ArrayList;
9 import java.util.Iterator;
10 import java.util.List;
11 import java.util.Map;
12 import java.util.Map.Entry;
13 import java.util.SortedMap;
14 import java.util.SortedSet;
15 import java.util.TreeSet;
16
17 import org.forester.datastructures.IntMatrix;
18 import org.forester.io.parsers.IteratingPhylogenyParser;
19 import org.forester.io.parsers.PhylogenyParser;
20 import org.forester.io.parsers.nexus.NexusPhylogeniesParser;
21 import org.forester.io.parsers.nhx.NHXParser;
22 import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
23 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
24 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
25 import org.forester.io.parsers.util.ParserUtils;
26 import org.forester.io.writers.PhylogenyWriter;
27 import org.forester.phylogeny.Phylogeny;
28 import org.forester.phylogeny.PhylogenyMethods;
29 import org.forester.phylogeny.PhylogenyNode;
30 import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY;
31 import org.forester.phylogeny.data.Sequence;
32 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
33 import org.forester.phylogeny.factories.PhylogenyFactory;
34 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
35 import org.forester.rio.RIO.REROOTING;
36 import org.forester.sdi.GSDIR;
37 import org.forester.sdi.SDIException;
38 import org.forester.sdi.SDIutil;
39 import org.forester.sdi.SDIutil.ALGORITHM;
40 import org.forester.util.BasicDescriptiveStatistics;
41 import org.forester.util.BasicTable;
42 import org.forester.util.BasicTableParser;
43 import org.forester.util.EasyWriter;
44 import org.forester.util.ForesterUtil;
45
46 public final class RIOUtil {
47
48     public final static String STRIPPED_SPECIES_TREE_SUFFIX   = "_RIO_stripped_species_tree.xml";
49     public final static String ORTHO_OUTTABLE_SUFFIX          = "_RIO_orthologies.tsv";
50     public final static String ORTHO_OUTTABLE_WITH_MAP_SUFFIX = "_RIO_orthologies_ext_map.tsv";
51     public final static String OUT_MIN_DUP_GENE_TREE_SUFFIX   = "_RIO_gene_tree_min_dup_";
52     public final static String OUT_MED_DUP_GENE_TREE_SUFFIX   = "_RIO_gene_tree_med_dup_";
53     public final static String BEST_TREE_SUFFIX               = "_RIO_consensus_gene_tree_dup_";
54     public final static String ORTHOLOG_GROUPS_SUFFIX         = "_RIO_ortholog_groups.tsv";
55     public final static String LOGFILE_SUFFIX                 = "_RIO_log.tsv";
56
57     public static final void executeAnalysis( final File gene_trees_file,
58                                               final File species_tree_file,
59                                               final File orthology_outtable,
60                                               final File orthology_outtable_with_mappings,
61                                               final File orthology_groups_outfile,
62                                               final File logfile,
63                                               final String outgroup,
64                                               final REROOTING rerooting,
65                                               final int gt_first,
66                                               final int gt_last,
67                                               final File return_species_tree,
68                                               final File return_min_dup_gene_tree,
69                                               final File return_median_dup_gene_tree,
70                                               final boolean transfer_taxonomy,
71                                               final ALGORITHM algorithm,
72                                               final boolean use_gene_trees_dir,
73                                               final EasyWriter log,
74                                               final double ortholog_group_cutoff,
75                                               final boolean perform_id_mapping,
76                                               final File id_mapping_dir,
77                                               final String id_mapping_suffix,
78                                               final boolean perform_gsdir_on_best_tree,
79                                               final File outdir,
80                                               final File best_trees_indir,
81                                               final String best_trees_suffix ) {
82         try {
83             final SortedMap<String, String> id_map;
84             if ( perform_id_mapping ) {
85                 id_map = obtainMapping( id_mapping_dir, gene_trees_file.getName(), id_mapping_suffix );
86             }
87             else {
88                 id_map = null;
89             }
90             final RIO rio;
91             boolean iterating = false;
92             final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true );
93             if ( p instanceof PhyloXmlParser ) {
94                 rio = RIO.executeAnalysis( gene_trees_file,
95                                            species_tree_file,
96                                            algorithm,
97                                            rerooting,
98                                            outgroup,
99                                            gt_first,
100                                            gt_last,
101                                            logfile != null,
102                                            true,
103                                            transfer_taxonomy );
104             }
105             else {
106                 iterating = true;
107                 if ( p instanceof NHXParser ) {
108                     final NHXParser nhx = ( NHXParser ) p;
109                     nhx.setReplaceUnderscores( false );
110                     nhx.setIgnoreQuotes( true );
111                     nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE );
112                 }
113                 else if ( p instanceof NexusPhylogeniesParser ) {
114                     final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p;
115                     nex.setReplaceUnderscores( false );
116                     nex.setIgnoreQuotes( true );
117                     nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE );
118                 }
119                 else {
120                     throw new RuntimeException( "unknown parser type: " + p );
121                 }
122                 final IteratingPhylogenyParser ip = ( IteratingPhylogenyParser ) p;
123                 ip.setSource( gene_trees_file );
124                 rio = RIO.executeAnalysis( ip,
125                                            species_tree_file,
126                                            algorithm,
127                                            rerooting,
128                                            outgroup,
129                                            gt_first,
130                                            gt_last,
131                                            logfile != null,
132                                            !use_gene_trees_dir,
133                                            transfer_taxonomy );
134             }
135             if ( !use_gene_trees_dir ) {
136                 if ( algorithm == ALGORITHM.GSDIR ) {
137                     System.out.println( "Taxonomy linking based on           :\t" + rio.getGSDIRtaxCompBase() );
138                 }
139             }
140             final IntMatrix m;
141             if ( iterating ) {
142                 m = rio.getOrthologTable();
143             }
144             else {
145                 m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true );
146             }
147             final GSDIR gsdir_for_best_tree;
148             if ( perform_gsdir_on_best_tree ) {
149                 gsdir_for_best_tree = analyzeConsensusTree( gene_trees_file,
150                                                             species_tree_file,
151                                                             outdir,
152                                                             best_trees_indir,
153                                                             id_map,
154                                                             best_trees_suffix );
155             }
156             else {
157                 gsdir_for_best_tree = null;
158             }
159             final BasicDescriptiveStatistics stats = rio.getDuplicationsStatistics();
160             if ( perform_id_mapping ) {
161                 writeOrthologyTable( orthology_outtable, stats.getN(), m, !use_gene_trees_dir, id_map, true );
162                 writeOrthologyTable( orthology_outtable_with_mappings,
163                                      stats.getN(),
164                                      m,
165                                      !use_gene_trees_dir,
166                                      id_map,
167                                      false );
168             }
169             else {
170                 writeOrthologyTable( orthology_outtable, stats.getN(), m, !use_gene_trees_dir, null, false );
171             }
172             final int ortholog_groups = writeOrtologGroups( orthology_groups_outfile,
173                                                             ortholog_group_cutoff,
174                                                             stats.getN(),
175                                                             m,
176                                                             !use_gene_trees_dir,
177                                                             false,
178                                                             id_map );
179             final int ortholog_groups_005 = writeOrtologGroups( null, 0.05, stats.getN(), m, false, true, null );
180             final int ortholog_groups_025 = writeOrtologGroups( null, 0.25, stats.getN(), m, false, true, null );
181             final int ortholog_groups_05 = writeOrtologGroups( null, 0.5, stats.getN(), m, false, true, null );
182             final int ortholog_groups_075 = writeOrtologGroups( null, 0.75, stats.getN(), m, false, true, null );
183             final int ortholog_groups_095 = writeOrtologGroups( null, 0.95, stats.getN(), m, false, true, null );
184             if ( ( algorithm != ALGORITHM.SDIR ) && ( logfile != null ) ) {
185                 writeLogFile( logfile,
186                               rio,
187                               species_tree_file,
188                               gene_trees_file,
189                               orthology_outtable,
190                               org.forester.application.rio.PRG_NAME,
191                               org.forester.application.rio.PRG_VERSION,
192                               org.forester.application.rio.PRG_DATE,
193                               ForesterUtil.getForesterLibraryInformation(),
194                               !use_gene_trees_dir );
195             }
196             if ( return_species_tree != null ) {
197                 writeTree( rio.getSpeciesTree(),
198                            return_species_tree,
199                            use_gene_trees_dir ? null : "Wrote (stripped) species tree to    :\t",
200                            null );
201             }
202             if ( return_min_dup_gene_tree != null && rio.getMinDuplicationsGeneTree() != null ) {
203                 final int min = ( int ) rio.getDuplicationsStatistics().getMin();
204                 writeTree( rio.getMinDuplicationsGeneTree(),
205                            new File( return_min_dup_gene_tree.toString() + min + ".xml" ),
206                            use_gene_trees_dir ? null : "Wrote one min duplication gene tree :\t",
207                            id_map );
208             }
209             if ( return_median_dup_gene_tree != null && rio.getDuplicationsToTreeMap() != null ) {
210                 final int med = ( int ) rio.getDuplicationsStatistics().median();
211                 writeTree( rio.getDuplicationsToTreeMap().get( med ),
212                            new File( return_median_dup_gene_tree.toString() + med + ".xml" ),
213                            use_gene_trees_dir ? null : "Wrote one med duplication gene tree :\t",
214                            id_map );
215             }
216             final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.##" );
217             final int min = ( int ) stats.getMin();
218             final int max = ( int ) stats.getMax();
219             final int median = ( int ) stats.median();
220             int min_count = 0;
221             int max_count = 0;
222             int median_count = 0;
223             for( double d : stats.getData() ) {
224                 if ( ( ( int ) d ) == min ) {
225                     ++min_count;
226                 }
227                 if ( ( ( int ) d ) == max ) {
228                     ++max_count;
229                 }
230                 if ( ( ( int ) d ) == median ) {
231                     ++median_count;
232                 }
233             }
234             final double min_count_percentage = ( 100.0 * min_count ) / stats.getN();
235             final double max_count_percentage = ( 100.0 * max_count ) / stats.getN();
236             final double median_count_percentage = ( 100.0 * median_count ) / stats.getN();
237             if ( use_gene_trees_dir ) {
238                 String name = gene_trees_file.getName();
239                 if ( name.indexOf( "." ) > 0 ) {
240                     name = name.substring( 0, name.lastIndexOf( "." ) );
241                 }
242                 log.print( name );
243                 log.print( "\t" );
244                 log.print( Integer.toString( rio.getExtNodesOfAnalyzedGeneTrees() ) );
245                 log.print( "\t" );
246                 log.print( Integer.toString( ortholog_groups ) );
247                 //
248                 log.print( "\t" );
249                 log.print( Integer.toString( ortholog_groups_005 ) );
250                 log.print( "\t" );
251                 log.print( Integer.toString( ortholog_groups_025 ) );
252                 log.print( "\t" );
253                 log.print( Integer.toString( ortholog_groups_05 ) );
254                 log.print( "\t" );
255                 log.print( Integer.toString( ortholog_groups_075 ) );
256                 log.print( "\t" );
257                 log.print( Integer.toString( ortholog_groups_095 ) );
258                 //
259                 if ( true ) {
260                     log.print( "\t" );
261                     log.print( Integer.toString( gsdir_for_best_tree.getMinDuplicationsSum() ) );
262                     log.print( "\t" );
263                     log.print( df.format( median - gsdir_for_best_tree.getMinDuplicationsSum() ) );
264                 }
265                 //
266                 log.print( "\t" );
267                 if ( stats.getN() > 3 ) {
268                     log.print( df.format( median ) );
269                 }
270                 else {
271                     log.print( "" );
272                 }
273                 log.print( "\t" );
274                 log.print( df.format( stats.arithmeticMean() ) );
275                 log.print( "\t" );
276                 if ( stats.getN() > 3 ) {
277                     log.print( df.format( stats.sampleStandardDeviation() ) );
278                 }
279                 else {
280                     log.print( "" );
281                 }
282                 log.print( "\t" );
283                 log.print( Integer.toString( min ) );
284                 log.print( "\t" );
285                 log.print( Integer.toString( max ) );
286                 log.print( "\t" );
287                 log.print( Integer.toString( rio.getRemovedGeneTreeNodes().size() ) );
288                 log.print( "\t" );
289                 log.print( Integer.toString( stats.getN() ) );
290                 log.println();
291             }
292             else {
293                 System.out.println( "Gene tree internal nodes            :\t" + rio.getIntNodesOfAnalyzedGeneTrees() );
294                 System.out.println( "Gene tree external nodes            :\t" + rio.getExtNodesOfAnalyzedGeneTrees() );
295                 System.out.println( "Mean number of duplications         :\t" + df.format( stats.arithmeticMean() )
296                         + "\t" + df.format( ( 100.0 * stats.arithmeticMean() ) / rio.getIntNodesOfAnalyzedGeneTrees() )
297                         + "%\t(sd: " + df.format( stats.sampleStandardDeviation() ) + ")" );
298                 if ( stats.getN() > 3 ) {
299                     System.out.println( "Median number of duplications       :\t" + df.format( median ) + "\t"
300                             + df.format( ( 100.0 * median ) / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%" );
301                 }
302                 System.out.println( "Minimum duplications                :\t" + min + "\t"
303                         + df.format( ( 100.0 * min ) / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%" );
304                 System.out.println( "Maximum duplications                :\t" + ( int ) max + "\t"
305                         + df.format( ( 100.0 * max ) / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%" );
306                 System.out.println( "Gene trees with median duplications :\t" + median_count + "\t"
307                         + df.format( median_count_percentage ) + "%" );
308                 System.out.println( "Gene trees with minimum duplications:\t" + min_count + "\t"
309                         + df.format( min_count_percentage ) + "%" );
310                 System.out.println( "Gene trees with maximum duplications:\t" + max_count + "\t"
311                         + df.format( max_count_percentage ) + "%" );
312                 if ( algorithm == ALGORITHM.GSDIR ) {
313                     System.out.println( "Removed ext gene tree nodes         :\t"
314                             + rio.getRemovedGeneTreeNodes().size() );
315                 }
316             }
317         }
318         catch ( final RIOException e ) {
319             ForesterUtil.fatalError( e.getLocalizedMessage() );
320         }
321         catch ( final SDIException e ) {
322             ForesterUtil.fatalError( e.getLocalizedMessage() );
323         }
324         catch ( final IOException e ) {
325             ForesterUtil.fatalError( e.getLocalizedMessage() );
326         }
327         catch ( final OutOfMemoryError e ) {
328             ForesterUtil.outOfMemoryError( e );
329         }
330         catch ( final Exception e ) {
331             ForesterUtil.unexpectedFatalError( e );
332         }
333         catch ( final Error e ) {
334             ForesterUtil.unexpectedFatalError( e );
335         }
336     }
337
338     private final static GSDIR analyzeConsensusTree( final File gene_trees_file,
339                                                      final File species_tree_file,
340                                                      final File outdir,
341                                                      final File best_trees_indir,
342                                                      final SortedMap<String, String> id_map,
343                                                      final String best_trees_suffix )
344             throws IOException, FileNotFoundException, PhyloXmlDataFormatException, SDIException {
345         final File the_one = ForesterUtil.getMatchingFile( best_trees_indir,
346                                                            gene_trees_file.getName(),
347                                                            best_trees_suffix );
348         final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
349         final Phylogeny best_tree = factory.create( the_one, PhyloXmlParser.createPhyloXmlParserXsdValidating() )[ 0 ];
350         final Phylogeny species_tree = SDIutil
351                 .parseSpeciesTree( best_tree, species_tree_file, false, true, TAXONOMY_EXTRACTION.NO );
352         PhylogenyMethods.deleteInternalNodesWithOnlyOneDescendent( species_tree );
353         best_tree.setRooted( true );
354         species_tree.setRooted( true );
355         if ( !best_tree.isCompletelyBinaryAllow3ChildrenAtRoot() ) {
356             throw new IOException( "gene tree matching to ["
357                     + ForesterUtil.removeFileExtension( gene_trees_file.getName() ) + "] is not completely binary" );
358         }
359         final PhylogenyNodeIterator it = best_tree.iteratorExternalForward();
360         while ( it.hasNext() ) {
361             final PhylogenyNode n = it.next();
362             final String name = n.getName().trim();
363             if ( !ForesterUtil.isEmpty( name ) ) {
364                 try {
365                     ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE );
366                 }
367                 catch ( final PhyloXmlDataFormatException e ) {
368                     // Ignore.
369                 }
370             }
371         }
372         final GSDIR gsdir_for_best_tree = new GSDIR( best_tree, species_tree, true, true, true );
373         final Phylogeny result_gene_tree = gsdir_for_best_tree.getMinDuplicationsSumGeneTree();
374         result_gene_tree.setRerootable( false );
375         PhylogenyMethods.orderAppearance( result_gene_tree.getRoot(), true, true, DESCENDANT_SORT_PRIORITY.NODE_NAME );
376         final String outname = ForesterUtil.removeFileExtension( the_one.getName() );
377         final File outfile = new File( outdir.getCanonicalFile() + "/" + outname + RIOUtil.BEST_TREE_SUFFIX
378                 + gsdir_for_best_tree.getMinDuplicationsSum() + ".xml" );
379         writeTree( result_gene_tree, outfile, null, id_map );
380         return gsdir_for_best_tree;
381     }
382
383     private static final void writeOrthologyTable( final File table_outfile,
384                                                    final int gene_trees_analyzed,
385                                                    final IntMatrix m,
386                                                    final boolean verbose,
387                                                    final SortedMap<String, String> id_map,
388                                                    final boolean replace_ids )
389             throws IOException {
390         final EasyWriter w = ForesterUtil.createEasyWriter( table_outfile );
391         final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.####" );
392         df.setDecimalSeparatorAlwaysShown( false );
393         df.setRoundingMode( RoundingMode.HALF_UP );
394         for( int i = 0; i < m.size(); ++i ) {
395             w.print( "\t" );
396             if ( replace_ids ) {
397                 if ( !id_map.containsKey( m.getLabel( i ) ) ) {
398                     throw new IOException( "no id mapping for \"" + m.getLabel( i ) + "\" (attempting to write ["
399                             + table_outfile + "])" );
400                 }
401                 w.print( id_map.get( m.getLabel( i ) ) );
402             }
403             else {
404                 w.print( m.getLabel( i ) );
405             }
406         }
407         w.println();
408         for( int x = 0; x < m.size(); ++x ) {
409             if ( replace_ids ) {
410                 w.print( id_map.get( m.getLabel( x ) ) );
411             }
412             else {
413                 w.print( m.getLabel( x ) );
414             }
415             for( int y = 0; y < m.size(); ++y ) {
416                 w.print( "\t" );
417                 if ( x == y ) {
418                     if ( m.get( x, y ) != gene_trees_analyzed ) {
419                         ForesterUtil.unexpectedFatalError( "diagonal value is off" );
420                     }
421                     w.print( "-" );
422                 }
423                 else {
424                     w.print( df.format( ( ( double ) m.get( x, y ) ) / gene_trees_analyzed ) );
425                 }
426             }
427             w.println();
428         }
429         if ( !replace_ids && id_map != null && id_map.size() > 0 ) {
430             w.println();
431             
432             final Iterator<?> it = id_map.entrySet().iterator();
433             while (it.hasNext()) {
434                 Map.Entry<String, String> pair = ( Entry<String, String> ) it.next();
435                 w.println( pair.getKey()  + "\t" + pair.getValue() );
436             } //TODO
437             
438             /*
439             id_map.forEach( ( k, v ) -> {
440                 try {
441                     w.println( k + "\t" + v );
442                 }
443                 catch ( final IOException e ) {
444                     //ignore
445                 }
446             } );*/
447         }
448         w.close();
449         if ( verbose ) {
450             System.out.println( "Wrote table to                      :\t" + table_outfile.getCanonicalPath() );
451         }
452     }
453
454     private static final int writeOrtologGroups( final File outfile,
455                                                  final double cutoff,
456                                                  final int gene_trees_analyzed,
457                                                  final IntMatrix m,
458                                                  final boolean verbose,
459                                                  final boolean calc_conly,
460                                                  final SortedMap<String, String> id_map )
461             throws IOException {
462         List<SortedSet<String>> groups = new ArrayList<SortedSet<String>>();
463         BasicDescriptiveStatistics stats = new BasicDescriptiveStatistics();
464         int below_075 = 0;
465         int below_05 = 0;
466         int below_025 = 0;
467         for( int x = 1; x < m.size(); ++x ) {
468             final String a = m.getLabel( x );
469             for( int y = 0; y < x; ++y ) {
470                 final String b = m.getLabel( y );
471                 final double s = ( ( double ) m.get( x, y ) ) / gene_trees_analyzed;
472                 stats.addValue( s );
473                 if ( s < 0.75 ) {
474                     below_075++;
475                     if ( s < 0.5 ) {
476                         below_05++;
477                         if ( s < 0.25 ) {
478                             below_025++;
479                         }
480                     }
481                 }
482                 if ( s >= cutoff ) {
483                     boolean found = false;
484                     for( final SortedSet<String> group : groups ) {
485                         if ( group.contains( a ) ) {
486                             group.add( b );
487                             found = true;
488                         }
489                         if ( group.contains( b ) ) {
490                             group.add( a );
491                             found = true;
492                         }
493                     }
494                     if ( !found ) {
495                         final SortedSet<String> new_group = new TreeSet<String>();
496                         new_group.add( a );
497                         new_group.add( b );
498                         groups.add( new_group );
499                     }
500                 }
501             }
502         }
503         //Deal with singlets:
504         for( int x = 0; x < m.size(); ++x ) {
505             final String a = m.getLabel( x );
506             boolean found = false;
507             for( final SortedSet<String> group : groups ) {
508                 if ( group.contains( a ) ) {
509                     found = true;
510                     break;
511                 }
512             }
513             if ( !found ) {
514                 final SortedSet<String> new_group = new TreeSet<String>();
515                 new_group.add( a );
516                 groups.add( new_group );
517             }
518         }
519         if ( calc_conly ) {
520             return groups.size();
521         }
522         final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.####" );
523         df.setDecimalSeparatorAlwaysShown( false );
524         df.setRoundingMode( RoundingMode.HALF_UP );
525         final EasyWriter w = ForesterUtil.createEasyWriter( outfile );
526         int counter = 1;
527         for( final SortedSet<String> group : groups ) {
528             w.print( Integer.toString( counter++ ) );
529             for( final String s : group ) {
530                 w.print( "\t" );
531                 if ( id_map != null && id_map.size() > 0 ) {
532                     if ( !id_map.containsKey( s ) ) {
533                         throw new IOException( "no id mapping for \"" + s + "\" (attempting to write [" + outfile
534                                 + "])" );
535                     }
536                     w.print( id_map.get( s ) );
537                 }
538                 else {
539                     w.print( s );
540                 }
541             }
542             w.println();
543         }
544         w.println();
545         w.println( "# Cutoff\t" + df.format( cutoff ) );
546         w.println();
547         w.println( "# Orthology support statistics:" );
548         if ( stats.getN() > 3 ) {
549             w.println( "# Median\t" + df.format( stats.median() ) );
550         }
551         w.println( "# Mean\t" + df.format( stats.arithmeticMean() ) );
552         if ( stats.getN() > 3 ) {
553             w.println( "# SD\t" + df.format( stats.sampleStandardDeviation() ) );
554         }
555         w.println( "# Min\t" + df.format( stats.getMin() ) );
556         w.println( "# Max\t" + df.format( stats.getMax() ) );
557         w.println( "# Total\t" + df.format( stats.getN() ) );
558         w.println( "# Below 0.75\t" + below_075 + "\t" + df.format( ( 100.0 * below_075 / stats.getN() ) ) + "%" );
559         w.println( "# Below 0.5\t" + below_05 + "\t" + df.format( ( 100.0 * below_05 / stats.getN() ) ) + "%" );
560         w.println( "# Below 0.25\t" + below_025 + "\t" + df.format( ( 100.0 * below_025 / stats.getN() ) ) + "%" );
561         w.close();
562         if ( verbose ) {
563             System.out.println( "Number of ortholog groups           :\t" + groups.size() );
564             System.out.println( "Wrote orthologs groups table to     :\t" + outfile.getCanonicalPath() );
565         }
566         return groups.size();
567     }
568
569     private static void writeTree( final Phylogeny p,
570                                    final File f,
571                                    final String comment,
572                                    final SortedMap<String, String> id_map )
573             throws IOException {
574         if ( id_map != null && id_map.size() > 0 ) {
575             final PhylogenyNodeIterator it = p.iteratorExternalForward();
576             while ( it.hasNext() ) {
577                 final PhylogenyNode n = it.next();
578                 if ( !id_map.containsKey( n.getName() ) ) {
579                     throw new IOException( "no id mapping for \"" + n.getName() + "\" (attempting to write [" + f
580                             + "])" );
581                 }
582                 final Sequence seq = new Sequence();
583                 seq.setName( id_map.get( n.getName() ) );
584                 n.getNodeData().addSequence( seq );
585             }
586         }
587         final PhylogenyWriter writer = new PhylogenyWriter();
588         writer.toPhyloXML( f, p, 0 );
589         if ( comment != null ) {
590             System.out.println( comment + f.getCanonicalPath() );
591         }
592     }
593
594     private static void writeLogFile( final File logfile,
595                                       final RIO rio,
596                                       final File species_tree_file,
597                                       final File gene_trees_file,
598                                       final File outtable,
599                                       final String prg_name,
600                                       final String prg_v,
601                                       final String prg_date,
602                                       final String f,
603                                       final boolean verbose )
604             throws IOException {
605         final EasyWriter out = ForesterUtil.createEasyWriter( logfile );
606         out.println( "# " + prg_name );
607         out.println( "# version : " + prg_v );
608         out.println( "# date    : " + prg_date );
609         out.println( "# based on: " + f );
610         out.println( "# ----------------------------------" );
611         out.println( "Gene trees                          :\t" + gene_trees_file.getCanonicalPath() );
612         out.println( "Species tree                        :\t" + species_tree_file.getCanonicalPath() );
613         out.println( "All vs all orthology table          :\t" + outtable.getCanonicalPath() );
614         out.flush();
615         out.println( rio.getLog().toString() );
616         out.close();
617         if ( verbose ) {
618             System.out.println( "Wrote log to                        :\t" + logfile.getCanonicalPath() );
619         }
620     }
621
622     private final static SortedMap<String, String> obtainMapping( final File dir,
623                                                                   final String prefix,
624                                                                   final String suffix )
625             throws IOException {
626         final File the_one = ForesterUtil.getMatchingFile( dir, prefix, suffix );
627         final BasicTable<String> t = BasicTableParser.parse( the_one, '\t' );
628         return t.getColumnsAsMap( 0, 1 );
629     }
630 }