capitalisation
[jalview.git] / forester / java / src / org / forester / rio / RIOUtil.java
1
2 package org.forester.rio;
3
4 import java.io.File;
5 import java.io.FileNotFoundException;
6 import java.io.IOException;
7 import java.math.RoundingMode;
8 import java.util.ArrayList;
9 import java.util.List;
10 import java.util.SortedMap;
11 import java.util.SortedSet;
12 import java.util.TreeSet;
13
14 import org.forester.datastructures.IntMatrix;
15 import org.forester.io.parsers.IteratingPhylogenyParser;
16 import org.forester.io.parsers.PhylogenyParser;
17 import org.forester.io.parsers.nexus.NexusPhylogeniesParser;
18 import org.forester.io.parsers.nhx.NHXParser;
19 import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
20 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
21 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
22 import org.forester.io.parsers.util.ParserUtils;
23 import org.forester.io.writers.PhylogenyWriter;
24 import org.forester.phylogeny.Phylogeny;
25 import org.forester.phylogeny.PhylogenyMethods;
26 import org.forester.phylogeny.PhylogenyNode;
27 import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY;
28 import org.forester.phylogeny.data.Sequence;
29 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
30 import org.forester.phylogeny.factories.PhylogenyFactory;
31 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
32 import org.forester.rio.RIO.REROOTING;
33 import org.forester.sdi.GSDIR;
34 import org.forester.sdi.SDIException;
35 import org.forester.sdi.SDIutil;
36 import org.forester.sdi.SDIutil.ALGORITHM;
37 import org.forester.util.BasicDescriptiveStatistics;
38 import org.forester.util.BasicTable;
39 import org.forester.util.BasicTableParser;
40 import org.forester.util.EasyWriter;
41 import org.forester.util.ForesterUtil;
42
43 public final class RIOUtil {
44
45     public final static String STRIPPED_SPECIES_TREE_SUFFIX   = "_RIO_stripped_species_tree.xml";
46     public final static String ORTHO_OUTTABLE_SUFFIX          = "_RIO_orthologies.tsv";
47     public final static String ORTHO_OUTTABLE_WITH_MAP_SUFFIX = "_RIO_orthologies_ext_map.tsv";
48     public final static String OUT_MIN_DUP_GENE_TREE_SUFFIX   = "_RIO_gene_tree_min_dup_";
49     public final static String OUT_MED_DUP_GENE_TREE_SUFFIX   = "_RIO_gene_tree_med_dup_";
50     public final static String BEST_TREE_SUFFIX               = "_RIO_consensus_gene_tree_dup_";
51     public final static String ORTHOLOG_GROUPS_SUFFIX         = "_RIO_ortholog_groups.tsv";
52     public final static String LOGFILE_SUFFIX                 = "_RIO_log.tsv";
53
54     public static final void executeAnalysis( final File gene_trees_file,
55                                               final File species_tree_file,
56                                               final File orthology_outtable,
57                                               final File orthology_outtable_with_mappings,
58                                               final File orthology_groups_outfile,
59                                               final File logfile,
60                                               final String outgroup,
61                                               final REROOTING rerooting,
62                                               final int gt_first,
63                                               final int gt_last,
64                                               final File return_species_tree,
65                                               final File return_min_dup_gene_tree,
66                                               final File return_median_dup_gene_tree,
67                                               final boolean transfer_taxonomy,
68                                               final ALGORITHM algorithm,
69                                               final boolean use_gene_trees_dir,
70                                               final EasyWriter log,
71                                               final double ortholog_group_cutoff,
72                                               final boolean perform_id_mapping,
73                                               final File id_mapping_dir,
74                                               final String id_mapping_suffix,
75                                               final boolean perform_gsdir_on_best_tree,
76                                               final File outdir,
77                                               final File best_trees_indir,
78                                               final String best_trees_suffix ) {
79         try {
80             final SortedMap<String, String> id_map;
81             if ( perform_id_mapping ) {
82                 id_map = obtainMapping( id_mapping_dir, gene_trees_file.getName(), id_mapping_suffix );
83             }
84             else {
85                 id_map = null;
86             }
87             final RIO rio;
88             boolean iterating = false;
89             final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true );
90             if ( p instanceof PhyloXmlParser ) {
91                 rio = RIO.executeAnalysis( gene_trees_file,
92                                            species_tree_file,
93                                            algorithm,
94                                            rerooting,
95                                            outgroup,
96                                            gt_first,
97                                            gt_last,
98                                            logfile != null,
99                                            true,
100                                            transfer_taxonomy );
101             }
102             else {
103                 iterating = true;
104                 if ( p instanceof NHXParser ) {
105                     final NHXParser nhx = ( NHXParser ) p;
106                     nhx.setReplaceUnderscores( false );
107                     nhx.setIgnoreQuotes( true );
108                     nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE );
109                 }
110                 else if ( p instanceof NexusPhylogeniesParser ) {
111                     final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p;
112                     nex.setReplaceUnderscores( false );
113                     nex.setIgnoreQuotes( true );
114                     nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE );
115                 }
116                 else {
117                     throw new RuntimeException( "unknown parser type: " + p );
118                 }
119                 final IteratingPhylogenyParser ip = ( IteratingPhylogenyParser ) p;
120                 ip.setSource( gene_trees_file );
121                 rio = RIO.executeAnalysis( ip,
122                                            species_tree_file,
123                                            algorithm,
124                                            rerooting,
125                                            outgroup,
126                                            gt_first,
127                                            gt_last,
128                                            logfile != null,
129                                            !use_gene_trees_dir,
130                                            transfer_taxonomy );
131             }
132             if ( !use_gene_trees_dir ) {
133                 if ( algorithm == ALGORITHM.GSDIR ) {
134                     System.out.println( "Taxonomy linking based on           :\t" + rio.getGSDIRtaxCompBase() );
135                 }
136             }
137             final IntMatrix m;
138             if ( iterating ) {
139                 m = rio.getOrthologTable();
140             }
141             else {
142                 m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true );
143             }
144             final GSDIR gsdir_for_best_tree;
145             if ( perform_gsdir_on_best_tree ) {
146                 gsdir_for_best_tree = analyzeConsensusTree( gene_trees_file,
147                                                             species_tree_file,
148                                                             outdir,
149                                                             best_trees_indir,
150                                                             id_map,
151                                                             best_trees_suffix );
152             }
153             else {
154                 gsdir_for_best_tree = null;
155             }
156             final BasicDescriptiveStatistics stats = rio.getDuplicationsStatistics();
157             if ( perform_id_mapping ) {
158                 writeOrthologyTable( orthology_outtable, stats.getN(), m, !use_gene_trees_dir, id_map, true );
159                 writeOrthologyTable( orthology_outtable_with_mappings,
160                                      stats.getN(),
161                                      m,
162                                      !use_gene_trees_dir,
163                                      id_map,
164                                      false );
165             }
166             else {
167                 writeOrthologyTable( orthology_outtable, stats.getN(), m, !use_gene_trees_dir, null, false );
168             }
169             final int ortholog_groups = writeOrtologGroups( orthology_groups_outfile,
170                                                             ortholog_group_cutoff,
171                                                             stats.getN(),
172                                                             m,
173                                                             !use_gene_trees_dir,
174                                                             false,
175                                                             id_map );
176             final int ortholog_groups_005 = writeOrtologGroups( null, 0.05, stats.getN(), m, false, true, null );
177             final int ortholog_groups_025 = writeOrtologGroups( null, 0.25, stats.getN(), m, false, true, null );
178             final int ortholog_groups_05 = writeOrtologGroups( null, 0.5, stats.getN(), m, false, true, null );
179             final int ortholog_groups_075 = writeOrtologGroups( null, 0.75, stats.getN(), m, false, true, null );
180             final int ortholog_groups_095 = writeOrtologGroups( null, 0.95, stats.getN(), m, false, true, null );
181             if ( ( algorithm != ALGORITHM.SDIR ) && ( logfile != null ) ) {
182                 writeLogFile( logfile,
183                               rio,
184                               species_tree_file,
185                               gene_trees_file,
186                               orthology_outtable,
187                               org.forester.application.rio.PRG_NAME,
188                               org.forester.application.rio.PRG_VERSION,
189                               org.forester.application.rio.PRG_DATE,
190                               ForesterUtil.getForesterLibraryInformation(),
191                               !use_gene_trees_dir );
192             }
193             if ( return_species_tree != null ) {
194                 writeTree( rio.getSpeciesTree(),
195                            return_species_tree,
196                            use_gene_trees_dir ? null : "Wrote (stripped) species tree to    :\t",
197                            null );
198             }
199             if ( return_min_dup_gene_tree != null && rio.getMinDuplicationsGeneTree() != null ) {
200                 final int min = ( int ) rio.getDuplicationsStatistics().getMin();
201                 writeTree( rio.getMinDuplicationsGeneTree(),
202                            new File( return_min_dup_gene_tree.toString() + min + ".xml" ),
203                            use_gene_trees_dir ? null : "Wrote one min duplication gene tree :\t",
204                            id_map );
205             }
206             if ( return_median_dup_gene_tree != null && rio.getDuplicationsToTreeMap() != null ) {
207                 final int med = ( int ) rio.getDuplicationsStatistics().median();
208                 writeTree( rio.getDuplicationsToTreeMap().get( med ),
209                            new File( return_median_dup_gene_tree.toString() + med + ".xml" ),
210                            use_gene_trees_dir ? null : "Wrote one med duplication gene tree :\t",
211                            id_map );
212             }
213             final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.##" );
214             final int min = ( int ) stats.getMin();
215             final int max = ( int ) stats.getMax();
216             final int median = ( int ) stats.median();
217             int min_count = 0;
218             int max_count = 0;
219             int median_count = 0;
220             for( double d : stats.getData() ) {
221                 if ( ( ( int ) d ) == min ) {
222                     ++min_count;
223                 }
224                 if ( ( ( int ) d ) == max ) {
225                     ++max_count;
226                 }
227                 if ( ( ( int ) d ) == median ) {
228                     ++median_count;
229                 }
230             }
231             final double min_count_percentage = ( 100.0 * min_count ) / stats.getN();
232             final double max_count_percentage = ( 100.0 * max_count ) / stats.getN();
233             final double median_count_percentage = ( 100.0 * median_count ) / stats.getN();
234             if ( use_gene_trees_dir ) {
235                 String name = gene_trees_file.getName();
236                 if ( name.indexOf( "." ) > 0 ) {
237                     name = name.substring( 0, name.lastIndexOf( "." ) );
238                 }
239                 log.print( name );
240                 log.print( "\t" );
241                 log.print( Integer.toString( rio.getExtNodesOfAnalyzedGeneTrees() ) );
242                 log.print( "\t" );
243                 log.print( Integer.toString( ortholog_groups ) );
244                 //
245                 log.print( "\t" );
246                 log.print( Integer.toString( ortholog_groups_005 ) );
247                 log.print( "\t" );
248                 log.print( Integer.toString( ortholog_groups_025 ) );
249                 log.print( "\t" );
250                 log.print( Integer.toString( ortholog_groups_05 ) );
251                 log.print( "\t" );
252                 log.print( Integer.toString( ortholog_groups_075 ) );
253                 log.print( "\t" );
254                 log.print( Integer.toString( ortholog_groups_095 ) );
255                 //
256                 if ( true ) {
257                     log.print( "\t" );
258                     log.print( Integer.toString( gsdir_for_best_tree.getMinDuplicationsSum() ) );
259                     log.print( "\t" );
260                     log.print( df.format( median - gsdir_for_best_tree.getMinDuplicationsSum() ) );
261                 }
262                 //
263                 log.print( "\t" );
264                 if ( stats.getN() > 3 ) {
265                     log.print( df.format( median ) );
266                 }
267                 else {
268                     log.print( "" );
269                 }
270                 log.print( "\t" );
271                 log.print( df.format( stats.arithmeticMean() ) );
272                 log.print( "\t" );
273                 if ( stats.getN() > 3 ) {
274                     log.print( df.format( stats.sampleStandardDeviation() ) );
275                 }
276                 else {
277                     log.print( "" );
278                 }
279                 log.print( "\t" );
280                 log.print( Integer.toString( min ) );
281                 log.print( "\t" );
282                 log.print( Integer.toString( max ) );
283                 log.print( "\t" );
284                 log.print( Integer.toString( rio.getRemovedGeneTreeNodes().size() ) );
285                 log.print( "\t" );
286                 log.print( Integer.toString( stats.getN() ) );
287                 log.println();
288             }
289             else {
290                 System.out.println( "Gene tree internal nodes            :\t" + rio.getIntNodesOfAnalyzedGeneTrees() );
291                 System.out.println( "Gene tree external nodes            :\t" + rio.getExtNodesOfAnalyzedGeneTrees() );
292                 System.out.println( "Mean number of duplications         :\t" + df.format( stats.arithmeticMean() )
293                         + "\t" + df.format( ( 100.0 * stats.arithmeticMean() ) / rio.getIntNodesOfAnalyzedGeneTrees() )
294                         + "%\t(sd: " + df.format( stats.sampleStandardDeviation() ) + ")" );
295                 if ( stats.getN() > 3 ) {
296                     System.out.println( "Median number of duplications       :\t" + df.format( median ) + "\t"
297                             + df.format( ( 100.0 * median ) / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%" );
298                 }
299                 System.out.println( "Minimum duplications                :\t" + min + "\t"
300                         + df.format( ( 100.0 * min ) / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%" );
301                 System.out.println( "Maximum duplications                :\t" + ( int ) max + "\t"
302                         + df.format( ( 100.0 * max ) / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%" );
303                 System.out.println( "Gene trees with median duplications :\t" + median_count + "\t"
304                         + df.format( median_count_percentage ) + "%" );
305                 System.out.println( "Gene trees with minimum duplications:\t" + min_count + "\t"
306                         + df.format( min_count_percentage ) + "%" );
307                 System.out.println( "Gene trees with maximum duplications:\t" + max_count + "\t"
308                         + df.format( max_count_percentage ) + "%" );
309                 if ( algorithm == ALGORITHM.GSDIR ) {
310                     System.out.println( "Removed ext gene tree nodes         :\t"
311                             + rio.getRemovedGeneTreeNodes().size() );
312                 }
313             }
314         }
315         catch ( final RIOException e ) {
316             ForesterUtil.fatalError( e.getLocalizedMessage() );
317         }
318         catch ( final SDIException e ) {
319             ForesterUtil.fatalError( e.getLocalizedMessage() );
320         }
321         catch ( final IOException e ) {
322             ForesterUtil.fatalError( e.getLocalizedMessage() );
323         }
324         catch ( final OutOfMemoryError e ) {
325             ForesterUtil.outOfMemoryError( e );
326         }
327         catch ( final Exception e ) {
328             ForesterUtil.unexpectedFatalError( e );
329         }
330         catch ( final Error e ) {
331             ForesterUtil.unexpectedFatalError( e );
332         }
333     }
334
335     private final static GSDIR analyzeConsensusTree( final File gene_trees_file,
336                                                      final File species_tree_file,
337                                                      final File outdir,
338                                                      final File best_trees_indir,
339                                                      final SortedMap<String, String> id_map,
340                                                      final String best_trees_suffix )
341             throws IOException, FileNotFoundException, PhyloXmlDataFormatException, SDIException {
342         final File the_one = ForesterUtil.getMatchingFile( best_trees_indir,
343                                                            gene_trees_file.getName(),
344                                                            best_trees_suffix );
345         final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
346         final Phylogeny best_tree = factory.create( the_one, PhyloXmlParser.createPhyloXmlParserXsdValidating() )[ 0 ];
347         final Phylogeny species_tree = SDIutil
348                 .parseSpeciesTree( best_tree, species_tree_file, false, true, TAXONOMY_EXTRACTION.NO );
349         PhylogenyMethods.deleteInternalNodesWithOnlyOneDescendent( species_tree );
350         best_tree.setRooted( true );
351         species_tree.setRooted( true );
352         if ( !best_tree.isCompletelyBinaryAllow3ChildrenAtRoot() ) {
353             throw new IOException( "gene tree matching to ["
354                     + ForesterUtil.removeFileExtension( gene_trees_file.getName() ) + "] is not completely binary" );
355         }
356         final PhylogenyNodeIterator it = best_tree.iteratorExternalForward();
357         while ( it.hasNext() ) {
358             final PhylogenyNode n = it.next();
359             final String name = n.getName().trim();
360             if ( !ForesterUtil.isEmpty( name ) ) {
361                 try {
362                     ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE );
363                 }
364                 catch ( final PhyloXmlDataFormatException e ) {
365                     // Ignore.
366                 }
367             }
368         }
369         final GSDIR gsdir_for_best_tree = new GSDIR( best_tree, species_tree, true, true, true );
370         final Phylogeny result_gene_tree = gsdir_for_best_tree.getMinDuplicationsSumGeneTree();
371         result_gene_tree.setRerootable( false );
372         PhylogenyMethods.orderAppearance( result_gene_tree.getRoot(), true, true, DESCENDANT_SORT_PRIORITY.NODE_NAME );
373         final String outname = ForesterUtil.removeFileExtension( the_one.getName() );
374         final File outfile = new File( outdir.getCanonicalFile() + "/" + outname + RIOUtil.BEST_TREE_SUFFIX
375                 + gsdir_for_best_tree.getMinDuplicationsSum() + ".xml" );
376         writeTree( result_gene_tree, outfile, null, id_map );
377         return gsdir_for_best_tree;
378     }
379
380     private static final void writeOrthologyTable( final File table_outfile,
381                                                    final int gene_trees_analyzed,
382                                                    final IntMatrix m,
383                                                    final boolean verbose,
384                                                    final SortedMap<String, String> id_map,
385                                                    final boolean replace_ids )
386             throws IOException {
387         final EasyWriter w = ForesterUtil.createEasyWriter( table_outfile );
388         final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.####" );
389         df.setDecimalSeparatorAlwaysShown( false );
390         df.setRoundingMode( RoundingMode.HALF_UP );
391         for( int i = 0; i < m.size(); ++i ) {
392             w.print( "\t" );
393             if ( replace_ids ) {
394                 if ( !id_map.containsKey( m.getLabel( i ) ) ) {
395                     throw new IOException( "no id mapping for \"" + m.getLabel( i ) + "\" (attempting to write ["
396                             + table_outfile + "])" );
397                 }
398                 w.print( id_map.get( m.getLabel( i ) ) );
399             }
400             else {
401                 w.print( m.getLabel( i ) );
402             }
403         }
404         w.println();
405         for( int x = 0; x < m.size(); ++x ) {
406             if ( replace_ids ) {
407                 w.print( id_map.get( m.getLabel( x ) ) );
408             }
409             else {
410                 w.print( m.getLabel( x ) );
411             }
412             for( int y = 0; y < m.size(); ++y ) {
413                 w.print( "\t" );
414                 if ( x == y ) {
415                     if ( m.get( x, y ) != gene_trees_analyzed ) {
416                         ForesterUtil.unexpectedFatalError( "diagonal value is off" );
417                     }
418                     w.print( "-" );
419                 }
420                 else {
421                     w.print( df.format( ( ( double ) m.get( x, y ) ) / gene_trees_analyzed ) );
422                 }
423             }
424             w.println();
425         }
426         if ( !replace_ids && id_map != null && id_map.size() > 0 ) {
427             w.println();
428             id_map.forEach( ( k, v ) -> {
429                 try {
430                     w.println( k + "\t" + v );
431                 }
432                 catch ( final IOException e ) {
433                     //ignore
434                 }
435             } );
436         }
437         w.close();
438         if ( verbose ) {
439             System.out.println( "Wrote table to                      :\t" + table_outfile.getCanonicalPath() );
440         }
441     }
442
443     private static final int writeOrtologGroups( final File outfile,
444                                                  final double cutoff,
445                                                  final int gene_trees_analyzed,
446                                                  final IntMatrix m,
447                                                  final boolean verbose,
448                                                  final boolean calc_conly,
449                                                  final SortedMap<String, String> id_map )
450             throws IOException {
451         List<SortedSet<String>> groups = new ArrayList<SortedSet<String>>();
452         BasicDescriptiveStatistics stats = new BasicDescriptiveStatistics();
453         int below_075 = 0;
454         int below_05 = 0;
455         int below_025 = 0;
456         for( int x = 1; x < m.size(); ++x ) {
457             final String a = m.getLabel( x );
458             for( int y = 0; y < x; ++y ) {
459                 final String b = m.getLabel( y );
460                 final double s = ( ( double ) m.get( x, y ) ) / gene_trees_analyzed;
461                 stats.addValue( s );
462                 if ( s < 0.75 ) {
463                     below_075++;
464                     if ( s < 0.5 ) {
465                         below_05++;
466                         if ( s < 0.25 ) {
467                             below_025++;
468                         }
469                     }
470                 }
471                 if ( s >= cutoff ) {
472                     boolean found = false;
473                     for( final SortedSet<String> group : groups ) {
474                         if ( group.contains( a ) ) {
475                             group.add( b );
476                             found = true;
477                         }
478                         if ( group.contains( b ) ) {
479                             group.add( a );
480                             found = true;
481                         }
482                     }
483                     if ( !found ) {
484                         final SortedSet<String> new_group = new TreeSet<String>();
485                         new_group.add( a );
486                         new_group.add( b );
487                         groups.add( new_group );
488                     }
489                 }
490             }
491         }
492         //Deal with singlets:
493         for( int x = 0; x < m.size(); ++x ) {
494             final String a = m.getLabel( x );
495             boolean found = false;
496             for( final SortedSet<String> group : groups ) {
497                 if ( group.contains( a ) ) {
498                     found = true;
499                     break;
500                 }
501             }
502             if ( !found ) {
503                 final SortedSet<String> new_group = new TreeSet<String>();
504                 new_group.add( a );
505                 groups.add( new_group );
506             }
507         }
508         if ( calc_conly ) {
509             return groups.size();
510         }
511         final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.####" );
512         df.setDecimalSeparatorAlwaysShown( false );
513         df.setRoundingMode( RoundingMode.HALF_UP );
514         final EasyWriter w = ForesterUtil.createEasyWriter( outfile );
515         int counter = 1;
516         for( final SortedSet<String> group : groups ) {
517             w.print( Integer.toString( counter++ ) );
518             for( final String s : group ) {
519                 w.print( "\t" );
520                 if ( id_map != null && id_map.size() > 0 ) {
521                     if ( !id_map.containsKey( s ) ) {
522                         throw new IOException( "no id mapping for \"" + s + "\" (attempting to write [" + outfile
523                                 + "])" );
524                     }
525                     w.print( id_map.get( s ) );
526                 }
527                 else {
528                     w.print( s );
529                 }
530             }
531             w.println();
532         }
533         w.println();
534         w.println( "# Cutoff\t" + df.format( cutoff ) );
535         w.println();
536         w.println( "# Orthology support statistics:" );
537         if ( stats.getN() > 3 ) {
538             w.println( "# Median\t" + df.format( stats.median() ) );
539         }
540         w.println( "# Mean\t" + df.format( stats.arithmeticMean() ) );
541         if ( stats.getN() > 3 ) {
542             w.println( "# SD\t" + df.format( stats.sampleStandardDeviation() ) );
543         }
544         w.println( "# Min\t" + df.format( stats.getMin() ) );
545         w.println( "# Max\t" + df.format( stats.getMax() ) );
546         w.println( "# Total\t" + df.format( stats.getN() ) );
547         w.println( "# Below 0.75\t" + below_075 + "\t" + df.format( ( 100.0 * below_075 / stats.getN() ) ) + "%" );
548         w.println( "# Below 0.5\t" + below_05 + "\t" + df.format( ( 100.0 * below_05 / stats.getN() ) ) + "%" );
549         w.println( "# Below 0.25\t" + below_025 + "\t" + df.format( ( 100.0 * below_025 / stats.getN() ) ) + "%" );
550         w.close();
551         if ( verbose ) {
552             System.out.println( "Number of ortholog groups           :\t" + groups.size() );
553             System.out.println( "Wrote orthologs groups table to     :\t" + outfile.getCanonicalPath() );
554         }
555         return groups.size();
556     }
557
558     private static void writeTree( final Phylogeny p,
559                                    final File f,
560                                    final String comment,
561                                    final SortedMap<String, String> id_map )
562             throws IOException {
563         if ( id_map != null && id_map.size() > 0 ) {
564             final PhylogenyNodeIterator it = p.iteratorExternalForward();
565             while ( it.hasNext() ) {
566                 final PhylogenyNode n = it.next();
567                 if ( !id_map.containsKey( n.getName() ) ) {
568                     throw new IOException( "no id mapping for \"" + n.getName() + "\" (attempting to write [" + f
569                             + "])" );
570                 }
571                 final Sequence seq = new Sequence();
572                 seq.setName( id_map.get( n.getName() ) );
573                 n.getNodeData().addSequence( seq );
574             }
575         }
576         final PhylogenyWriter writer = new PhylogenyWriter();
577         writer.toPhyloXML( f, p, 0 );
578         if ( comment != null ) {
579             System.out.println( comment + f.getCanonicalPath() );
580         }
581     }
582
583     private static void writeLogFile( final File logfile,
584                                       final RIO rio,
585                                       final File species_tree_file,
586                                       final File gene_trees_file,
587                                       final File outtable,
588                                       final String prg_name,
589                                       final String prg_v,
590                                       final String prg_date,
591                                       final String f,
592                                       final boolean verbose )
593             throws IOException {
594         final EasyWriter out = ForesterUtil.createEasyWriter( logfile );
595         out.println( "# " + prg_name );
596         out.println( "# version : " + prg_v );
597         out.println( "# date    : " + prg_date );
598         out.println( "# based on: " + f );
599         out.println( "# ----------------------------------" );
600         out.println( "Gene trees                          :\t" + gene_trees_file.getCanonicalPath() );
601         out.println( "Species tree                        :\t" + species_tree_file.getCanonicalPath() );
602         out.println( "All vs all orthology table          :\t" + outtable.getCanonicalPath() );
603         out.flush();
604         out.println( rio.getLog().toString() );
605         out.close();
606         if ( verbose ) {
607             System.out.println( "Wrote log to                        :\t" + logfile.getCanonicalPath() );
608         }
609     }
610
611     private final static SortedMap<String, String> obtainMapping( final File dir,
612                                                                   final String prefix,
613                                                                   final String suffix )
614             throws IOException {
615         final File the_one = ForesterUtil.getMatchingFile( dir, prefix, suffix );
616         final BasicTable<String> t = BasicTableParser.parse( the_one, '\t' );
617         return t.getColumnsAsMap( 0, 1 );
618     }
619 }