inprogress
[jalview.git] / forester / java / src / org / forester / surfacing / SurfacingUtil.java
1 // $Id:
2 //
3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
5 //
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
8 // All rights reserved
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 //
24 // Contact: phylosoft @ gmail . com
25 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26
27 package org.forester.surfacing;
28
29 import java.io.BufferedWriter;
30 import java.io.File;
31 import java.io.FileWriter;
32 import java.io.IOException;
33 import java.io.Writer;
34 import java.text.DecimalFormat;
35 import java.text.NumberFormat;
36 import java.util.ArrayList;
37 import java.util.Arrays;
38 import java.util.Collections;
39 import java.util.Comparator;
40 import java.util.HashMap;
41 import java.util.HashSet;
42 import java.util.Iterator;
43 import java.util.List;
44 import java.util.Map;
45 import java.util.Map.Entry;
46 import java.util.PriorityQueue;
47 import java.util.Set;
48 import java.util.SortedMap;
49 import java.util.SortedSet;
50 import java.util.TreeMap;
51 import java.util.TreeSet;
52 import java.util.regex.Matcher;
53 import java.util.regex.Pattern;
54
55 import org.forester.application.surfacing;
56 import org.forester.evoinference.distance.NeighborJoining;
57 import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
58 import org.forester.evoinference.matrix.character.CharacterStateMatrix;
59 import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
60 import org.forester.evoinference.matrix.character.CharacterStateMatrix.Format;
61 import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates;
62 import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
63 import org.forester.evoinference.matrix.distance.DistanceMatrix;
64 import org.forester.go.GoId;
65 import org.forester.go.GoNameSpace;
66 import org.forester.go.GoTerm;
67 import org.forester.go.PfamToGoMapping;
68 import org.forester.io.parsers.nexus.NexusConstants;
69 import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
70 import org.forester.io.parsers.util.ParserUtils;
71 import org.forester.io.writers.PhylogenyWriter;
72 import org.forester.phylogeny.Phylogeny;
73 import org.forester.phylogeny.PhylogenyMethods;
74 import org.forester.phylogeny.PhylogenyNode;
75 import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE;
76 import org.forester.phylogeny.data.BinaryCharacters;
77 import org.forester.phylogeny.data.Confidence;
78 import org.forester.phylogeny.data.Taxonomy;
79 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
80 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
81 import org.forester.protein.BasicDomain;
82 import org.forester.protein.BasicProtein;
83 import org.forester.protein.BinaryDomainCombination;
84 import org.forester.protein.Domain;
85 import org.forester.protein.Protein;
86 import org.forester.species.Species;
87 import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
88 import org.forester.surfacing.GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder;
89 import org.forester.surfacing.PrintableDomainSimilarity.PRINT_OPTION;
90 import org.forester.util.AsciiHistogram;
91 import org.forester.util.BasicDescriptiveStatistics;
92 import org.forester.util.BasicTable;
93 import org.forester.util.BasicTableParser;
94 import org.forester.util.CommandLineArguments;
95 import org.forester.util.DescriptiveStatistics;
96 import org.forester.util.ForesterUtil;
97
98 public final class SurfacingUtil {
99
100     final static class DomainComparator implements Comparator<Domain> {
101
102         final private boolean _ascending;
103
104         public DomainComparator( final boolean ascending ) {
105             _ascending = ascending;
106         }
107
108         @Override
109         public final int compare( final Domain d0, final Domain d1 ) {
110             if ( d0.getFrom() < d1.getFrom() ) {
111                 return _ascending ? -1 : 1;
112             }
113             else if ( d0.getFrom() > d1.getFrom() ) {
114                 return _ascending ? 1 : -1;
115             }
116             return 0;
117         }
118     }
119     private final static NumberFormat       FORMATTER_3                      = new DecimalFormat( "0.000" );
120     private static final Comparator<Domain> ASCENDING_CONFIDENCE_VALUE_ORDER = new Comparator<Domain>() {
121
122                                                                                  @Override
123                                                                                  public int compare( final Domain d1,
124                                                                                                      final Domain d2 ) {
125                                                                                      if ( d1.getPerSequenceEvalue() < d2
126                                                                                              .getPerSequenceEvalue() ) {
127                                                                                          return -1;
128                                                                                      }
129                                                                                      else if ( d1
130                                                                                              .getPerSequenceEvalue() > d2
131                                                                                              .getPerSequenceEvalue() ) {
132                                                                                          return 1;
133                                                                                      }
134                                                                                      else {
135                                                                                          return d1.compareTo( d2 );
136                                                                                      }
137                                                                                  }
138                                                                              };
139     public final static Pattern             PATTERN_SP_STYLE_TAXONOMY        = Pattern.compile( "^[A-Z0-9]{3,5}$" );
140
141     public static void addAllBinaryDomainCombinationToSet( final GenomeWideCombinableDomains genome,
142                                                            final SortedSet<BinaryDomainCombination> binary_domain_combinations ) {
143         final SortedMap<String, CombinableDomains> all_cd = genome.getAllCombinableDomainsIds();
144         for( final String domain_id : all_cd.keySet() ) {
145             binary_domain_combinations.addAll( all_cd.get( domain_id ).toBinaryDomainCombinations() );
146         }
147     }
148
149     public static void addAllDomainIdsToSet( final GenomeWideCombinableDomains genome,
150                                              final SortedSet<String> domain_ids ) {
151         final SortedSet<String> domains = genome.getAllDomainIds();
152         for( final String domain : domains ) {
153             domain_ids.add( domain );
154         }
155     }
156
157     public static void addHtmlHead( final Writer w, final String title ) throws IOException {
158         w.write( SurfacingConstants.NL );
159         w.write( "<head>" );
160         w.write( "<title>" );
161         w.write( title );
162         w.write( "</title>" );
163         w.write( SurfacingConstants.NL );
164         w.write( "<style>" );
165         w.write( SurfacingConstants.NL );
166         w.write( "a:visited { color : #6633FF; text-decoration : none; }" );
167         w.write( SurfacingConstants.NL );
168         w.write( "a:link { color : #6633FF; text-decoration : none; }" );
169         w.write( SurfacingConstants.NL );
170         w.write( "a:active { color : #99FF00; text-decoration : none; }" );
171         w.write( SurfacingConstants.NL );
172         w.write( "a:hover { color : #FFFFFF; background-color : #99FF00; text-decoration : none; }" );
173         w.write( SurfacingConstants.NL );
174         //
175         w.write( "a.pl:visited { color : #505050; text-decoration : none; font-size: 7pt;}" );
176         w.write( SurfacingConstants.NL );
177         w.write( "a.pl:link { color : #505050; text-decoration : none; font-size: 7pt;}" );
178         w.write( SurfacingConstants.NL );
179         w.write( "a.pl:active { color : #505050; text-decoration : none; font-size: 7pt;}" );
180         w.write( SurfacingConstants.NL );
181         w.write( "a.pl:hover { color : #FFFFFF; background-color : #99FF00; text-decoration : none; font-size: 7pt;}" );
182         w.write( SurfacingConstants.NL );
183         //
184         w.write( "a.ps:visited { color : #707070; text-decoration : none; font-size: 7pt;}" );
185         w.write( SurfacingConstants.NL );
186         w.write( "a.ps:link { color : #707070; text-decoration : none; font-size: 7pt;}" );
187         w.write( SurfacingConstants.NL );
188         w.write( "a.ps:active { color : #707070; text-decoration : none; font-size: 7pt;}" );
189         w.write( SurfacingConstants.NL );
190         w.write( "a.ps:hover { color : #FFFFFF; background-color : #99FF00; text-decoration : none; font-size: 7pt;}" );
191         w.write( SurfacingConstants.NL );
192         //
193         w.write( "td { text-align: left; vertical-align: top; font-family: Verdana, Arial, Helvetica; font-size: 8pt}" );
194         w.write( SurfacingConstants.NL );
195         w.write( "h1 { color : #0000FF; font-family: Verdana, Arial, Helvetica; font-size: 18pt; font-weight: bold }" );
196         w.write( SurfacingConstants.NL );
197         w.write( "h2 { color : #0000FF; font-family: Verdana, Arial, Helvetica; font-size: 16pt; font-weight: bold }" );
198         w.write( SurfacingConstants.NL );
199         w.write( "</style>" );
200         w.write( SurfacingConstants.NL );
201         w.write( "</head>" );
202         w.write( SurfacingConstants.NL );
203     }
204
205     private final static void addToCountMap( final Map<String, Integer> map, final String s ) {
206         if ( map.containsKey( s ) ) {
207             map.put( s, map.get( s ) + 1 );
208         }
209         else {
210             map.put( s, 1 );
211         }
212     }
213
214     public static DescriptiveStatistics calculateDescriptiveStatisticsForMeanValues( final Set<DomainSimilarity> similarities ) {
215         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
216         for( final DomainSimilarity similarity : similarities ) {
217             stats.addValue( similarity.getMeanSimilarityScore() );
218         }
219         return stats;
220     }
221
222     private static void calculateIndependentDomainCombinationGains( final Phylogeny local_phylogeny_l,
223                                                                     final String outfilename_for_counts,
224                                                                     final String outfilename_for_dc,
225                                                                     final String outfilename_for_dc_for_go_mapping,
226                                                                     final String outfilename_for_dc_for_go_mapping_unique,
227                                                                     final String outfilename_for_rank_counts,
228                                                                     final String outfilename_for_ancestor_species_counts,
229                                                                     final String outfilename_for_protein_stats,
230                                                                     final Map<String, DescriptiveStatistics> protein_length_stats_by_dc,
231                                                                     final Map<String, DescriptiveStatistics> domain_number_stats_by_dc,
232                                                                     final Map<String, DescriptiveStatistics> domain_length_stats_by_domain ) {
233         try {
234             //
235             //            if ( protein_length_stats_by_dc != null ) {
236             //                for( final Entry<?, DescriptiveStatistics> entry : protein_length_stats_by_dc.entrySet() ) {
237             //                    System.out.print( entry.getKey().toString() );
238             //                    System.out.print( ": " );
239             //                    double[] a = entry.getValue().getDataAsDoubleArray();
240             //                    for( int i = 0; i < a.length; i++ ) {
241             //                        System.out.print( a[ i ] + " " );
242             //                    }
243             //                    System.out.println();
244             //                }
245             //            }
246             //            if ( domain_number_stats_by_dc != null ) {
247             //                for( final Entry<?, DescriptiveStatistics> entry : domain_number_stats_by_dc.entrySet() ) {
248             //                    System.out.print( entry.getKey().toString() );
249             //                    System.out.print( ": " );
250             //                    double[] a = entry.getValue().getDataAsDoubleArray();
251             //                    for( int i = 0; i < a.length; i++ ) {
252             //                        System.out.print( a[ i ] + " " );
253             //                    }
254             //                    System.out.println();
255             //                }
256             //            }
257             //
258             final BufferedWriter out_counts = new BufferedWriter( new FileWriter( outfilename_for_counts ) );
259             final BufferedWriter out_dc = new BufferedWriter( new FileWriter( outfilename_for_dc ) );
260             final BufferedWriter out_dc_for_go_mapping = new BufferedWriter( new FileWriter( outfilename_for_dc_for_go_mapping ) );
261             final BufferedWriter out_dc_for_go_mapping_unique = new BufferedWriter( new FileWriter( outfilename_for_dc_for_go_mapping_unique ) );
262             final SortedMap<String, Integer> dc_gain_counts = new TreeMap<String, Integer>();
263             for( final PhylogenyNodeIterator it = local_phylogeny_l.iteratorPostorder(); it.hasNext(); ) {
264                 final PhylogenyNode n = it.next();
265                 final Set<String> gained_dc = n.getNodeData().getBinaryCharacters().getGainedCharacters();
266                 for( final String dc : gained_dc ) {
267                     if ( dc_gain_counts.containsKey( dc ) ) {
268                         dc_gain_counts.put( dc, dc_gain_counts.get( dc ) + 1 );
269                     }
270                     else {
271                         dc_gain_counts.put( dc, 1 );
272                     }
273                 }
274             }
275             final SortedMap<Integer, Integer> histogram = new TreeMap<Integer, Integer>();
276             final SortedMap<Integer, StringBuilder> domain_lists = new TreeMap<Integer, StringBuilder>();
277             final SortedMap<Integer, DescriptiveStatistics> dc_reapp_counts_to_protein_length_stats = new TreeMap<Integer, DescriptiveStatistics>();
278             final SortedMap<Integer, DescriptiveStatistics> dc_reapp_counts_to_domain_number_stats = new TreeMap<Integer, DescriptiveStatistics>();
279             final SortedMap<Integer, DescriptiveStatistics> dc_reapp_counts_to_domain_lengths_stats = new TreeMap<Integer, DescriptiveStatistics>();
280             final SortedMap<Integer, PriorityQueue<String>> domain_lists_go = new TreeMap<Integer, PriorityQueue<String>>();
281             final SortedMap<Integer, SortedSet<String>> domain_lists_go_unique = new TreeMap<Integer, SortedSet<String>>();
282             final Set<String> dcs = dc_gain_counts.keySet();
283             final SortedSet<String> more_than_once = new TreeSet<String>();
284             DescriptiveStatistics gained_once_lengths_stats = new BasicDescriptiveStatistics();
285             DescriptiveStatistics gained_once_domain_count_stats = new BasicDescriptiveStatistics();
286             DescriptiveStatistics gained_multiple_times_lengths_stats = new BasicDescriptiveStatistics();
287             final DescriptiveStatistics gained_multiple_times_domain_count_stats = new BasicDescriptiveStatistics();
288             long gained_multiple_times_domain_length_sum = 0;
289             long gained_once_domain_length_sum = 0;
290             long gained_multiple_times_domain_length_count = 0;
291             long gained_once_domain_length_count = 0;
292             for( final String dc : dcs ) {
293                 final int count = dc_gain_counts.get( dc );
294                 if ( histogram.containsKey( count ) ) {
295                     histogram.put( count, histogram.get( count ) + 1 );
296                     domain_lists.get( count ).append( ", " + dc );
297                     domain_lists_go.get( count ).addAll( splitDomainCombination( dc ) );
298                     domain_lists_go_unique.get( count ).addAll( splitDomainCombination( dc ) );
299                 }
300                 else {
301                     histogram.put( count, 1 );
302                     domain_lists.put( count, new StringBuilder( dc ) );
303                     final PriorityQueue<String> q = new PriorityQueue<String>();
304                     q.addAll( splitDomainCombination( dc ) );
305                     domain_lists_go.put( count, q );
306                     final SortedSet<String> set = new TreeSet<String>();
307                     set.addAll( splitDomainCombination( dc ) );
308                     domain_lists_go_unique.put( count, set );
309                 }
310                 if ( protein_length_stats_by_dc != null ) {
311                     if ( !dc_reapp_counts_to_protein_length_stats.containsKey( count ) ) {
312                         dc_reapp_counts_to_protein_length_stats.put( count, new BasicDescriptiveStatistics() );
313                     }
314                     dc_reapp_counts_to_protein_length_stats.get( count ).addValue( protein_length_stats_by_dc.get( dc )
315                             .arithmeticMean() );
316                 }
317                 if ( domain_number_stats_by_dc != null ) {
318                     if ( !dc_reapp_counts_to_domain_number_stats.containsKey( count ) ) {
319                         dc_reapp_counts_to_domain_number_stats.put( count, new BasicDescriptiveStatistics() );
320                     }
321                     dc_reapp_counts_to_domain_number_stats.get( count ).addValue( domain_number_stats_by_dc.get( dc )
322                             .arithmeticMean() );
323                 }
324                 if ( domain_length_stats_by_domain != null ) {
325                     if ( !dc_reapp_counts_to_domain_lengths_stats.containsKey( count ) ) {
326                         dc_reapp_counts_to_domain_lengths_stats.put( count, new BasicDescriptiveStatistics() );
327                     }
328                     final String[] ds = dc.split( "=" );
329                     dc_reapp_counts_to_domain_lengths_stats.get( count ).addValue( domain_length_stats_by_domain
330                             .get( ds[ 0 ] ).arithmeticMean() );
331                     dc_reapp_counts_to_domain_lengths_stats.get( count ).addValue( domain_length_stats_by_domain
332                             .get( ds[ 1 ] ).arithmeticMean() );
333                 }
334                 if ( count > 1 ) {
335                     more_than_once.add( dc );
336                     if ( protein_length_stats_by_dc != null ) {
337                         final DescriptiveStatistics s = protein_length_stats_by_dc.get( dc );
338                         for( final double element : s.getData() ) {
339                             gained_multiple_times_lengths_stats.addValue( element );
340                         }
341                     }
342                     if ( domain_number_stats_by_dc != null ) {
343                         final DescriptiveStatistics s = domain_number_stats_by_dc.get( dc );
344                         for( final double element : s.getData() ) {
345                             gained_multiple_times_domain_count_stats.addValue( element );
346                         }
347                     }
348                     if ( domain_length_stats_by_domain != null ) {
349                         final String[] ds = dc.split( "=" );
350                         final DescriptiveStatistics s0 = domain_length_stats_by_domain.get( ds[ 0 ] );
351                         final DescriptiveStatistics s1 = domain_length_stats_by_domain.get( ds[ 1 ] );
352                         for( final double element : s0.getData() ) {
353                             gained_multiple_times_domain_length_sum += element;
354                             ++gained_multiple_times_domain_length_count;
355                         }
356                         for( final double element : s1.getData() ) {
357                             gained_multiple_times_domain_length_sum += element;
358                             ++gained_multiple_times_domain_length_count;
359                         }
360                     }
361                 }
362                 else {
363                     if ( protein_length_stats_by_dc != null ) {
364                         final DescriptiveStatistics s = protein_length_stats_by_dc.get( dc );
365                         for( final double element : s.getData() ) {
366                             gained_once_lengths_stats.addValue( element );
367                         }
368                     }
369                     if ( domain_number_stats_by_dc != null ) {
370                         final DescriptiveStatistics s = domain_number_stats_by_dc.get( dc );
371                         for( final double element : s.getData() ) {
372                             gained_once_domain_count_stats.addValue( element );
373                         }
374                     }
375                     if ( domain_length_stats_by_domain != null ) {
376                         final String[] ds = dc.split( "=" );
377                         final DescriptiveStatistics s0 = domain_length_stats_by_domain.get( ds[ 0 ] );
378                         final DescriptiveStatistics s1 = domain_length_stats_by_domain.get( ds[ 1 ] );
379                         for( final double element : s0.getData() ) {
380                             gained_once_domain_length_sum += element;
381                             ++gained_once_domain_length_count;
382                         }
383                         for( final double element : s1.getData() ) {
384                             gained_once_domain_length_sum += element;
385                             ++gained_once_domain_length_count;
386                         }
387                     }
388                 }
389             }
390             final Set<Integer> histogram_keys = histogram.keySet();
391             for( final Integer histogram_key : histogram_keys ) {
392                 final int count = histogram.get( histogram_key );
393                 final StringBuilder dc = domain_lists.get( histogram_key );
394                 out_counts.write( histogram_key + "\t" + count + ForesterUtil.LINE_SEPARATOR );
395                 out_dc.write( histogram_key + "\t" + dc + ForesterUtil.LINE_SEPARATOR );
396                 out_dc_for_go_mapping.write( "#" + histogram_key + ForesterUtil.LINE_SEPARATOR );
397                 final Object[] sorted = domain_lists_go.get( histogram_key ).toArray();
398                 Arrays.sort( sorted );
399                 for( final Object domain : sorted ) {
400                     out_dc_for_go_mapping.write( domain + ForesterUtil.LINE_SEPARATOR );
401                 }
402                 out_dc_for_go_mapping_unique.write( "#" + histogram_key + ForesterUtil.LINE_SEPARATOR );
403                 for( final String domain : domain_lists_go_unique.get( histogram_key ) ) {
404                     out_dc_for_go_mapping_unique.write( domain + ForesterUtil.LINE_SEPARATOR );
405                 }
406             }
407             out_counts.close();
408             out_dc.close();
409             out_dc_for_go_mapping.close();
410             out_dc_for_go_mapping_unique.close();
411             final SortedMap<String, Integer> lca_rank_counts = new TreeMap<String, Integer>();
412             final SortedMap<String, Integer> lca_ancestor_species_counts = new TreeMap<String, Integer>();
413             for( final String dc : more_than_once ) {
414                 final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
415                 for( final PhylogenyNodeIterator it = local_phylogeny_l.iteratorExternalForward(); it.hasNext(); ) {
416                     final PhylogenyNode n = it.next();
417                     if ( n.getNodeData().getBinaryCharacters().getGainedCharacters().contains( dc ) ) {
418                         nodes.add( n );
419                     }
420                 }
421                 for( int i = 0; i < ( nodes.size() - 1 ); ++i ) {
422                     for( int j = i + 1; j < nodes.size(); ++j ) {
423                         final PhylogenyNode lca = PhylogenyMethods.calculateLCA( nodes.get( i ), nodes.get( j ) );
424                         String rank = "unknown";
425                         if ( lca.getNodeData().isHasTaxonomy()
426                                 && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getRank() ) ) {
427                             rank = lca.getNodeData().getTaxonomy().getRank();
428                         }
429                         addToCountMap( lca_rank_counts, rank );
430                         String lca_species;
431                         if ( lca.getNodeData().isHasTaxonomy()
432                                 && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getScientificName() ) ) {
433                             lca_species = lca.getNodeData().getTaxonomy().getScientificName();
434                         }
435                         else if ( lca.getNodeData().isHasTaxonomy()
436                                 && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getCommonName() ) ) {
437                             lca_species = lca.getNodeData().getTaxonomy().getCommonName();
438                         }
439                         else {
440                             lca_species = lca.getName();
441                         }
442                         addToCountMap( lca_ancestor_species_counts, lca_species );
443                     }
444                 }
445             }
446             final BufferedWriter out_for_rank_counts = new BufferedWriter( new FileWriter( outfilename_for_rank_counts ) );
447             final BufferedWriter out_for_ancestor_species_counts = new BufferedWriter( new FileWriter( outfilename_for_ancestor_species_counts ) );
448             ForesterUtil.map2writer( out_for_rank_counts, lca_rank_counts, "\t", ForesterUtil.LINE_SEPARATOR );
449             ForesterUtil.map2writer( out_for_ancestor_species_counts,
450                                      lca_ancestor_species_counts,
451                                      "\t",
452                                      ForesterUtil.LINE_SEPARATOR );
453             out_for_rank_counts.close();
454             out_for_ancestor_species_counts.close();
455             if ( !ForesterUtil.isEmpty( outfilename_for_protein_stats )
456                     && ( ( domain_length_stats_by_domain != null ) || ( protein_length_stats_by_dc != null ) || ( domain_number_stats_by_dc != null ) ) ) {
457                 final BufferedWriter w = new BufferedWriter( new FileWriter( outfilename_for_protein_stats ) );
458                 w.write( "Domain Lengths: " );
459                 w.write( "\n" );
460                 if ( domain_length_stats_by_domain != null ) {
461                     for( final Entry<Integer, DescriptiveStatistics> entry : dc_reapp_counts_to_domain_lengths_stats
462                             .entrySet() ) {
463                         w.write( entry.getKey().toString() );
464                         w.write( "\t" + entry.getValue().arithmeticMean() );
465                         w.write( "\t" + entry.getValue().median() );
466                         w.write( "\n" );
467                     }
468                 }
469                 w.flush();
470                 w.write( "\n" );
471                 w.write( "\n" );
472                 w.write( "Protein Lengths: " );
473                 w.write( "\n" );
474                 if ( protein_length_stats_by_dc != null ) {
475                     for( final Entry<Integer, DescriptiveStatistics> entry : dc_reapp_counts_to_protein_length_stats
476                             .entrySet() ) {
477                         w.write( entry.getKey().toString() );
478                         w.write( "\t" + entry.getValue().arithmeticMean() );
479                         w.write( "\t" + entry.getValue().median() );
480                         w.write( "\n" );
481                     }
482                 }
483                 w.flush();
484                 w.write( "\n" );
485                 w.write( "\n" );
486                 w.write( "Number of domains: " );
487                 w.write( "\n" );
488                 if ( domain_number_stats_by_dc != null ) {
489                     for( final Entry<Integer, DescriptiveStatistics> entry : dc_reapp_counts_to_domain_number_stats
490                             .entrySet() ) {
491                         w.write( entry.getKey().toString() );
492                         w.write( "\t" + entry.getValue().arithmeticMean() );
493                         w.write( "\t" + entry.getValue().median() );
494                         w.write( "\n" );
495                     }
496                 }
497                 w.flush();
498                 w.write( "\n" );
499                 w.write( "\n" );
500                 w.write( "Gained once, domain lengths:" );
501                 w.write( "\n" );
502                 w.write( "N: " + gained_once_domain_length_count );
503                 w.write( "\n" );
504                 w.write( "Avg: " + ( ( double ) gained_once_domain_length_sum / gained_once_domain_length_count ) );
505                 w.write( "\n" );
506                 w.write( "\n" );
507                 w.write( "Gained multiple times, domain lengths:" );
508                 w.write( "\n" );
509                 w.write( "N: " + gained_multiple_times_domain_length_count );
510                 w.write( "\n" );
511                 w.write( "Avg: "
512                         + ( ( double ) gained_multiple_times_domain_length_sum / gained_multiple_times_domain_length_count ) );
513                 w.write( "\n" );
514                 w.write( "\n" );
515                 w.write( "\n" );
516                 w.write( "\n" );
517                 w.write( "Gained once, protein lengths:" );
518                 w.write( "\n" );
519                 w.write( gained_once_lengths_stats.toString() );
520                 gained_once_lengths_stats = null;
521                 w.write( "\n" );
522                 w.write( "\n" );
523                 w.write( "Gained once, domain counts:" );
524                 w.write( "\n" );
525                 w.write( gained_once_domain_count_stats.toString() );
526                 gained_once_domain_count_stats = null;
527                 w.write( "\n" );
528                 w.write( "\n" );
529                 w.write( "Gained multiple times, protein lengths:" );
530                 w.write( "\n" );
531                 w.write( gained_multiple_times_lengths_stats.toString() );
532                 gained_multiple_times_lengths_stats = null;
533                 w.write( "\n" );
534                 w.write( "\n" );
535                 w.write( "Gained multiple times, domain counts:" );
536                 w.write( "\n" );
537                 w.write( gained_multiple_times_domain_count_stats.toString() );
538                 w.flush();
539                 w.close();
540             }
541         }
542         catch ( final IOException e ) {
543             ForesterUtil.printWarningMessage( surfacing.PRG_NAME, "Failure to write: " + e );
544         }
545         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote independent domain combination gains fitch counts to ["
546                 + outfilename_for_counts + "]" );
547         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote independent domain combination gains fitch lists to ["
548                 + outfilename_for_dc + "]" );
549         ForesterUtil.programMessage( surfacing.PRG_NAME,
550                                      "Wrote independent domain combination gains fitch lists to (for GO mapping) ["
551                                              + outfilename_for_dc_for_go_mapping + "]" );
552         ForesterUtil.programMessage( surfacing.PRG_NAME,
553                                      "Wrote independent domain combination gains fitch lists to (for GO mapping, unique) ["
554                                              + outfilename_for_dc_for_go_mapping_unique + "]" );
555     }
556
557     public static void checkForOutputFileWriteability( final File outfile ) {
558         final String error = ForesterUtil.isWritableFile( outfile );
559         if ( !ForesterUtil.isEmpty( error ) ) {
560             ForesterUtil.fatalError( surfacing.PRG_NAME, error );
561         }
562     }
563
564     public static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
565                                                                 final String[][] input_file_properties,
566                                                                 final String automated_pairwise_comparison_suffix,
567                                                                 final File outdir ) {
568         for( int i = 0; i < input_file_properties.length; ++i ) {
569             for( int j = 0; j < i; ++j ) {
570                 final String species_i = input_file_properties[ i ][ 1 ];
571                 final String species_j = input_file_properties[ j ][ 1 ];
572                 String pairwise_similarities_output_file_str = surfacing.PAIRWISE_DOMAIN_COMPARISONS_PREFIX + species_i
573                         + "_" + species_j + automated_pairwise_comparison_suffix;
574                 switch ( domain_similarity_print_option ) {
575                     case HTML:
576                         if ( !pairwise_similarities_output_file_str.endsWith( ".html" ) ) {
577                             pairwise_similarities_output_file_str += ".html";
578                         }
579                         break;
580                 }
581                 final String error = ForesterUtil
582                         .isWritableFile( new File( outdir == null ? pairwise_similarities_output_file_str : outdir
583                                 + ForesterUtil.FILE_SEPARATOR + pairwise_similarities_output_file_str ) );
584                 if ( !ForesterUtil.isEmpty( error ) ) {
585                     ForesterUtil.fatalError( surfacing.PRG_NAME, error );
586                 }
587             }
588         }
589     }
590
591     private static SortedSet<String> collectAllDomainsChangedOnSubtree( final PhylogenyNode subtree_root,
592                                                                         final boolean get_gains ) {
593         final SortedSet<String> domains = new TreeSet<String>();
594         for( final PhylogenyNode descendant : PhylogenyMethods.getAllDescendants( subtree_root ) ) {
595             final BinaryCharacters chars = descendant.getNodeData().getBinaryCharacters();
596             if ( get_gains ) {
597                 domains.addAll( chars.getGainedCharacters() );
598             }
599             else {
600                 domains.addAll( chars.getLostCharacters() );
601             }
602         }
603         return domains;
604     }
605
606     public static void collectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile( final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
607                                                                                            final BinaryDomainCombination.DomainCombinationType dc_type,
608                                                                                            final List<BinaryDomainCombination> all_binary_domains_combination_gained,
609                                                                                            final boolean get_gains ) {
610         final SortedSet<String> sorted_ids = new TreeSet<String>();
611         for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) {
612             sorted_ids.add( matrix.getIdentifier( i ) );
613         }
614         for( final String id : sorted_ids ) {
615             for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) {
616                 if ( ( get_gains && ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) )
617                         || ( !get_gains && ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.LOSS ) ) ) {
618                     if ( dc_type == BinaryDomainCombination.DomainCombinationType.DIRECTED_ADJACTANT ) {
619                         all_binary_domains_combination_gained.add( AdjactantDirectedBinaryDomainCombination
620                                 .createInstance( matrix.getCharacter( c ) ) );
621                     }
622                     else if ( dc_type == BinaryDomainCombination.DomainCombinationType.DIRECTED ) {
623                         all_binary_domains_combination_gained.add( DirectedBinaryDomainCombination
624                                 .createInstance( matrix.getCharacter( c ) ) );
625                     }
626                     else {
627                         all_binary_domains_combination_gained.add( BasicBinaryDomainCombination.createInstance( matrix
628                                 .getCharacter( c ) ) );
629                     }
630                 }
631             }
632         }
633     }
634
635     private static File createBaseDirForPerNodeDomainFiles( final String base_dir,
636                                                             final boolean domain_combinations,
637                                                             final CharacterStateMatrix.GainLossStates state,
638                                                             final String outfile ) {
639         File per_node_go_mapped_domain_gain_loss_files_base_dir = new File( new File( outfile ).getParent()
640                 + ForesterUtil.FILE_SEPARATOR + base_dir );
641         if ( !per_node_go_mapped_domain_gain_loss_files_base_dir.exists() ) {
642             per_node_go_mapped_domain_gain_loss_files_base_dir.mkdir();
643         }
644         if ( domain_combinations ) {
645             per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
646                     + ForesterUtil.FILE_SEPARATOR + "DC" );
647         }
648         else {
649             per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
650                     + ForesterUtil.FILE_SEPARATOR + "DOMAINS" );
651         }
652         if ( !per_node_go_mapped_domain_gain_loss_files_base_dir.exists() ) {
653             per_node_go_mapped_domain_gain_loss_files_base_dir.mkdir();
654         }
655         if ( state == GainLossStates.GAIN ) {
656             per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
657                     + ForesterUtil.FILE_SEPARATOR + "GAINS" );
658         }
659         else if ( state == GainLossStates.LOSS ) {
660             per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
661                     + ForesterUtil.FILE_SEPARATOR + "LOSSES" );
662         }
663         else {
664             per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
665                     + ForesterUtil.FILE_SEPARATOR + "PRESENT" );
666         }
667         if ( !per_node_go_mapped_domain_gain_loss_files_base_dir.exists() ) {
668             per_node_go_mapped_domain_gain_loss_files_base_dir.mkdir();
669         }
670         return per_node_go_mapped_domain_gain_loss_files_base_dir;
671     }
672
673     public static Map<String, List<GoId>> createDomainIdToGoIdMap( final List<PfamToGoMapping> pfam_to_go_mappings ) {
674         final Map<String, List<GoId>> domain_id_to_go_ids_map = new HashMap<String, List<GoId>>( pfam_to_go_mappings.size() );
675         for( final PfamToGoMapping pfam_to_go : pfam_to_go_mappings ) {
676             if ( !domain_id_to_go_ids_map.containsKey( pfam_to_go.getKey() ) ) {
677                 domain_id_to_go_ids_map.put( pfam_to_go.getKey(), new ArrayList<GoId>() );
678             }
679             domain_id_to_go_ids_map.get( pfam_to_go.getKey() ).add( pfam_to_go.getValue() );
680         }
681         return domain_id_to_go_ids_map;
682     }
683
684     public static Map<String, Set<String>> createDomainIdToSecondaryFeaturesMap( final File secondary_features_map_file )
685             throws IOException {
686         final BasicTable<String> primary_table = BasicTableParser.parse( secondary_features_map_file, '\t' );
687         final Map<String, Set<String>> map = new TreeMap<String, Set<String>>();
688         for( int r = 0; r < primary_table.getNumberOfRows(); ++r ) {
689             final String domain_id = primary_table.getValue( 0, r );
690             if ( !map.containsKey( domain_id ) ) {
691                 map.put( domain_id, new HashSet<String>() );
692             }
693             map.get( domain_id ).add( primary_table.getValue( 1, r ) );
694         }
695         return map;
696     }
697
698     public static Phylogeny createNjTreeBasedOnMatrixToFile( final File nj_tree_outfile, final DistanceMatrix distance ) {
699         checkForOutputFileWriteability( nj_tree_outfile );
700         final NeighborJoining nj = NeighborJoining.createInstance();
701         final Phylogeny phylogeny = nj.execute( ( BasicSymmetricalDistanceMatrix ) distance );
702         phylogeny.setName( nj_tree_outfile.getName() );
703         writePhylogenyToFile( phylogeny, nj_tree_outfile.toString() );
704         return phylogeny;
705     }
706
707     public static StringBuilder createParametersAsString( final boolean ignore_dufs,
708                                                           final double e_value_max,
709                                                           final int max_allowed_overlap,
710                                                           final boolean no_engulfing_overlaps,
711                                                           final File cutoff_scores_file,
712                                                           final BinaryDomainCombination.DomainCombinationType dc_type ) {
713         final StringBuilder parameters_sb = new StringBuilder();
714         parameters_sb.append( "E-value: " + e_value_max );
715         if ( cutoff_scores_file != null ) {
716             parameters_sb.append( ", Cutoff-scores-file: " + cutoff_scores_file );
717         }
718         else {
719             parameters_sb.append( ", Cutoff-scores-file: not-set" );
720         }
721         if ( max_allowed_overlap != surfacing.MAX_ALLOWED_OVERLAP_DEFAULT ) {
722             parameters_sb.append( ", Max-overlap: " + max_allowed_overlap );
723         }
724         else {
725             parameters_sb.append( ", Max-overlap: not-set" );
726         }
727         if ( no_engulfing_overlaps ) {
728             parameters_sb.append( ", Engulfing-overlaps: not-allowed" );
729         }
730         else {
731             parameters_sb.append( ", Engulfing-overlaps: allowed" );
732         }
733         if ( ignore_dufs ) {
734             parameters_sb.append( ", Ignore-dufs: true" );
735         }
736         else {
737             parameters_sb.append( ", Ignore-dufs: false" );
738         }
739         parameters_sb.append( ", DC type (if applicable): " + dc_type );
740         return parameters_sb;
741     }
742
743     private static SortedSet<BinaryDomainCombination> createSetOfAllBinaryDomainCombinationsPerGenome( final GenomeWideCombinableDomains gwcd ) {
744         final SortedMap<String, CombinableDomains> cds = gwcd.getAllCombinableDomainsIds();
745         final SortedSet<BinaryDomainCombination> binary_combinations = new TreeSet<BinaryDomainCombination>();
746         for( final String domain_id : cds.keySet() ) {
747             final CombinableDomains cd = cds.get( domain_id );
748             binary_combinations.addAll( cd.toBinaryDomainCombinations() );
749         }
750         return binary_combinations;
751     }
752
753     public static void createSplitWriters( final File out_dir,
754                                            final String my_outfile,
755                                            final Map<Character, Writer> split_writers ) throws IOException {
756         split_writers.put( 'a', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
757                 + "_domains_A.html" ) ) );
758         split_writers.put( 'b', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
759                 + "_domains_B.html" ) ) );
760         split_writers.put( 'c', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
761                 + "_domains_C.html" ) ) );
762         split_writers.put( 'd', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
763                 + "_domains_D.html" ) ) );
764         split_writers.put( 'e', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
765                 + "_domains_E.html" ) ) );
766         split_writers.put( 'f', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
767                 + "_domains_F.html" ) ) );
768         split_writers.put( 'g', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
769                 + "_domains_G.html" ) ) );
770         split_writers.put( 'h', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
771                 + "_domains_H.html" ) ) );
772         split_writers.put( 'i', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
773                 + "_domains_I.html" ) ) );
774         split_writers.put( 'j', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
775                 + "_domains_J.html" ) ) );
776         split_writers.put( 'k', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
777                 + "_domains_K.html" ) ) );
778         split_writers.put( 'l', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
779                 + "_domains_L.html" ) ) );
780         split_writers.put( 'm', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
781                 + "_domains_M.html" ) ) );
782         split_writers.put( 'n', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
783                 + "_domains_N.html" ) ) );
784         split_writers.put( 'o', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
785                 + "_domains_O.html" ) ) );
786         split_writers.put( 'p', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
787                 + "_domains_P.html" ) ) );
788         split_writers.put( 'q', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
789                 + "_domains_Q.html" ) ) );
790         split_writers.put( 'r', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
791                 + "_domains_R.html" ) ) );
792         split_writers.put( 's', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
793                 + "_domains_S.html" ) ) );
794         split_writers.put( 't', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
795                 + "_domains_T.html" ) ) );
796         split_writers.put( 'u', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
797                 + "_domains_U.html" ) ) );
798         split_writers.put( 'v', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
799                 + "_domains_V.html" ) ) );
800         split_writers.put( 'w', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
801                 + "_domains_W.html" ) ) );
802         split_writers.put( 'x', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
803                 + "_domains_X.html" ) ) );
804         split_writers.put( 'y', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
805                 + "_domains_Y.html" ) ) );
806         split_writers.put( 'z', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
807                 + "_domains_Z.html" ) ) );
808         split_writers.put( '0', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
809                 + "_domains_0.html" ) ) );
810     }
811
812     public static Map<String, Integer> createTaxCodeToIdMap( final Phylogeny phy ) {
813         final Map<String, Integer> m = new HashMap<String, Integer>();
814         for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
815             final PhylogenyNode n = iter.next();
816             if ( n.getNodeData().isHasTaxonomy() ) {
817                 final Taxonomy t = n.getNodeData().getTaxonomy();
818                 final String c = t.getTaxonomyCode();
819                 if ( !ForesterUtil.isEmpty( c ) ) {
820                     if ( n.getNodeData().getTaxonomy() == null ) {
821                         ForesterUtil.fatalError( surfacing.PRG_NAME, "no taxonomy id for node " + n );
822                     }
823                     final String id = n.getNodeData().getTaxonomy().getIdentifier().getValue();
824                     if ( ForesterUtil.isEmpty( id ) ) {
825                         ForesterUtil.fatalError( surfacing.PRG_NAME, "no taxonomy id for node " + n );
826                     }
827                     if ( m.containsKey( c ) ) {
828                         ForesterUtil.fatalError( surfacing.PRG_NAME, "taxonomy code " + c + " is not unique" );
829                     }
830                     final int iid = Integer.valueOf( id );
831                     if ( m.containsValue( iid ) ) {
832                         ForesterUtil.fatalError( surfacing.PRG_NAME, "taxonomy id " + iid + " is not unique" );
833                     }
834                     m.put( c, iid );
835                 }
836             }
837             else {
838                 ForesterUtil.fatalError( surfacing.PRG_NAME, "no taxonomy for node " + n );
839             }
840         }
841         return m;
842     }
843
844     public static void decoratePrintableDomainSimilarities( final SortedSet<DomainSimilarity> domain_similarities,
845                                                             final Detailedness detailedness ) {
846         for( final DomainSimilarity domain_similarity : domain_similarities ) {
847             if ( domain_similarity instanceof PrintableDomainSimilarity ) {
848                 final PrintableDomainSimilarity printable_domain_similarity = ( PrintableDomainSimilarity ) domain_similarity;
849                 printable_domain_similarity.setDetailedness( detailedness );
850             }
851         }
852     }
853
854     public static void doit( final List<Protein> proteins,
855                              final List<String> query_domain_ids_nc_order,
856                              final Writer out,
857                              final String separator,
858                              final String limit_to_species,
859                              final Map<String, List<Integer>> average_protein_lengths_by_dc ) throws IOException {
860         for( final Protein protein : proteins ) {
861             if ( ForesterUtil.isEmpty( limit_to_species )
862                     || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) {
863                 if ( protein.contains( query_domain_ids_nc_order, true ) ) {
864                     out.write( protein.getSpecies().getSpeciesId() );
865                     out.write( separator );
866                     out.write( protein.getProteinId().getId() );
867                     out.write( separator );
868                     out.write( "[" );
869                     final Set<String> visited_domain_ids = new HashSet<String>();
870                     boolean first = true;
871                     for( final Domain domain : protein.getProteinDomains() ) {
872                         if ( !visited_domain_ids.contains( domain.getDomainId() ) ) {
873                             visited_domain_ids.add( domain.getDomainId() );
874                             if ( first ) {
875                                 first = false;
876                             }
877                             else {
878                                 out.write( " " );
879                             }
880                             out.write( domain.getDomainId() );
881                             out.write( " {" );
882                             out.write( "" + domain.getTotalCount() );
883                             out.write( "}" );
884                         }
885                     }
886                     out.write( "]" );
887                     out.write( separator );
888                     if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription()
889                             .equals( SurfacingConstants.NONE ) ) ) {
890                         out.write( protein.getDescription() );
891                     }
892                     out.write( separator );
893                     if ( !( ForesterUtil.isEmpty( protein.getAccession() ) || protein.getAccession()
894                             .equals( SurfacingConstants.NONE ) ) ) {
895                         out.write( protein.getAccession() );
896                     }
897                     out.write( SurfacingConstants.NL );
898                 }
899             }
900         }
901         out.flush();
902     }
903
904     public static void domainsPerProteinsStatistics( final String genome,
905                                                      final List<Protein> protein_list,
906                                                      final DescriptiveStatistics all_genomes_domains_per_potein_stats,
907                                                      final SortedMap<Integer, Integer> all_genomes_domains_per_potein_histo,
908                                                      final SortedSet<String> domains_which_are_always_single,
909                                                      final SortedSet<String> domains_which_are_sometimes_single_sometimes_not,
910                                                      final SortedSet<String> domains_which_never_single,
911                                                      final Writer writer ) {
912         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
913         for( final Protein protein : protein_list ) {
914             final int domains = protein.getNumberOfProteinDomains();
915             //System.out.println( domains );
916             stats.addValue( domains );
917             all_genomes_domains_per_potein_stats.addValue( domains );
918             if ( !all_genomes_domains_per_potein_histo.containsKey( domains ) ) {
919                 all_genomes_domains_per_potein_histo.put( domains, 1 );
920             }
921             else {
922                 all_genomes_domains_per_potein_histo.put( domains,
923                                                           1 + all_genomes_domains_per_potein_histo.get( domains ) );
924             }
925             if ( domains == 1 ) {
926                 final String domain = protein.getProteinDomain( 0 ).getDomainId();
927                 if ( !domains_which_are_sometimes_single_sometimes_not.contains( domain ) ) {
928                     if ( domains_which_never_single.contains( domain ) ) {
929                         domains_which_never_single.remove( domain );
930                         domains_which_are_sometimes_single_sometimes_not.add( domain );
931                     }
932                     else {
933                         domains_which_are_always_single.add( domain );
934                     }
935                 }
936             }
937             else if ( domains > 1 ) {
938                 for( final Domain d : protein.getProteinDomains() ) {
939                     final String domain = d.getDomainId();
940                     // System.out.println( domain );
941                     if ( !domains_which_are_sometimes_single_sometimes_not.contains( domain ) ) {
942                         if ( domains_which_are_always_single.contains( domain ) ) {
943                             domains_which_are_always_single.remove( domain );
944                             domains_which_are_sometimes_single_sometimes_not.add( domain );
945                         }
946                         else {
947                             domains_which_never_single.add( domain );
948                         }
949                     }
950                 }
951             }
952         }
953         try {
954             writer.write( genome );
955             writer.write( "\t" );
956             if ( stats.getN() >= 1 ) {
957                 writer.write( stats.arithmeticMean() + "" );
958                 writer.write( "\t" );
959                 if ( stats.getN() >= 2 ) {
960                     writer.write( stats.sampleStandardDeviation() + "" );
961                 }
962                 else {
963                     writer.write( "" );
964                 }
965                 writer.write( "\t" );
966                 writer.write( stats.median() + "" );
967                 writer.write( "\t" );
968                 writer.write( stats.getN() + "" );
969                 writer.write( "\t" );
970                 writer.write( stats.getMin() + "" );
971                 writer.write( "\t" );
972                 writer.write( stats.getMax() + "" );
973             }
974             else {
975                 writer.write( "\t" );
976                 writer.write( "\t" );
977                 writer.write( "\t" );
978                 writer.write( "0" );
979                 writer.write( "\t" );
980                 writer.write( "\t" );
981             }
982             writer.write( "\n" );
983         }
984         catch ( final IOException e ) {
985             e.printStackTrace();
986         }
987     }
988
989     public static void executeDomainLengthAnalysis( final String[][] input_file_properties,
990                                                     final int number_of_genomes,
991                                                     final DomainLengthsTable domain_lengths_table,
992                                                     final File outfile ) throws IOException {
993         final DecimalFormat df = new DecimalFormat( "#.00" );
994         checkForOutputFileWriteability( outfile );
995         final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) );
996         out.write( "MEAN BASED STATISTICS PER SPECIES" );
997         out.write( ForesterUtil.LINE_SEPARATOR );
998         out.write( domain_lengths_table.createMeanBasedStatisticsPerSpeciesTable().toString() );
999         out.write( ForesterUtil.LINE_SEPARATOR );
1000         out.write( ForesterUtil.LINE_SEPARATOR );
1001         final List<DomainLengths> domain_lengths_list = domain_lengths_table.getDomainLengthsList();
1002         out.write( "OUTLIER SPECIES PER DOMAIN (Z>=1.5)" );
1003         out.write( ForesterUtil.LINE_SEPARATOR );
1004         for( final DomainLengths domain_lengths : domain_lengths_list ) {
1005             final List<Species> species_list = domain_lengths.getMeanBasedOutlierSpecies( 1.5 );
1006             if ( species_list.size() > 0 ) {
1007                 out.write( domain_lengths.getDomainId() + "\t" );
1008                 for( final Species species : species_list ) {
1009                     out.write( species + "\t" );
1010                 }
1011                 out.write( ForesterUtil.LINE_SEPARATOR );
1012             }
1013         }
1014         out.write( ForesterUtil.LINE_SEPARATOR );
1015         out.write( ForesterUtil.LINE_SEPARATOR );
1016         out.write( "OUTLIER SPECIES (Z 1.0)" );
1017         out.write( ForesterUtil.LINE_SEPARATOR );
1018         final DescriptiveStatistics stats_for_all_species = domain_lengths_table
1019                 .calculateMeanBasedStatisticsForAllSpecies();
1020         out.write( stats_for_all_species.asSummary() );
1021         out.write( ForesterUtil.LINE_SEPARATOR );
1022         final AsciiHistogram histo = new AsciiHistogram( stats_for_all_species );
1023         out.write( histo.toStringBuffer( 40, '=', 60, 4 ).toString() );
1024         out.write( ForesterUtil.LINE_SEPARATOR );
1025         final double population_sd = stats_for_all_species.sampleStandardDeviation();
1026         final double population_mean = stats_for_all_species.arithmeticMean();
1027         for( final Species species : domain_lengths_table.getSpecies() ) {
1028             final double x = domain_lengths_table.calculateMeanBasedStatisticsForSpecies( species ).arithmeticMean();
1029             final double z = ( x - population_mean ) / population_sd;
1030             out.write( species + "\t" + z );
1031             out.write( ForesterUtil.LINE_SEPARATOR );
1032         }
1033         out.write( ForesterUtil.LINE_SEPARATOR );
1034         for( final Species species : domain_lengths_table.getSpecies() ) {
1035             final DescriptiveStatistics stats_for_species = domain_lengths_table
1036                     .calculateMeanBasedStatisticsForSpecies( species );
1037             final double x = stats_for_species.arithmeticMean();
1038             final double z = ( x - population_mean ) / population_sd;
1039             if ( ( z <= -1.0 ) || ( z >= 1.0 ) ) {
1040                 out.write( species + "\t" + df.format( z ) + "\t" + stats_for_species.asSummary() );
1041                 out.write( ForesterUtil.LINE_SEPARATOR );
1042             }
1043         }
1044         out.close();
1045         System.gc();
1046     }
1047
1048     /**
1049      * Warning: This side-effects 'all_bin_domain_combinations_encountered'!
1050      * 
1051      * 
1052      * @param output_file
1053      * @param all_bin_domain_combinations_changed
1054      * @param sum_of_all_domains_encountered
1055      * @param all_bin_domain_combinations_encountered
1056      * @param is_gains_analysis
1057      * @param protein_length_stats_by_dc 
1058      * @throws IOException
1059      */
1060     public static void executeFitchGainsAnalysis( final File output_file,
1061                                                   final List<BinaryDomainCombination> all_bin_domain_combinations_changed,
1062                                                   final int sum_of_all_domains_encountered,
1063                                                   final SortedSet<BinaryDomainCombination> all_bin_domain_combinations_encountered,
1064                                                   final boolean is_gains_analysis ) throws IOException {
1065         checkForOutputFileWriteability( output_file );
1066         final Writer out = ForesterUtil.createBufferedWriter( output_file );
1067         final SortedMap<Object, Integer> bdc_to_counts = ForesterUtil
1068                 .listToSortedCountsMap( all_bin_domain_combinations_changed );
1069         final SortedSet<String> all_domains_in_combination_changed_more_than_once = new TreeSet<String>();
1070         final SortedSet<String> all_domains_in_combination_changed_only_once = new TreeSet<String>();
1071         int above_one = 0;
1072         int one = 0;
1073         for( final Object bdc_object : bdc_to_counts.keySet() ) {
1074             final BinaryDomainCombination bdc = ( BinaryDomainCombination ) bdc_object;
1075             final int count = bdc_to_counts.get( bdc_object );
1076             if ( count < 1 ) {
1077                 ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME, "count < 1 " );
1078             }
1079             out.write( bdc + "\t" + count + ForesterUtil.LINE_SEPARATOR );
1080             if ( count > 1 ) {
1081                 all_domains_in_combination_changed_more_than_once.add( bdc.getId0() );
1082                 all_domains_in_combination_changed_more_than_once.add( bdc.getId1() );
1083                 above_one++;
1084             }
1085             else if ( count == 1 ) {
1086                 all_domains_in_combination_changed_only_once.add( bdc.getId0() );
1087                 all_domains_in_combination_changed_only_once.add( bdc.getId1() );
1088                 one++;
1089             }
1090         }
1091         final int all = all_bin_domain_combinations_encountered.size();
1092         int never_lost = -1;
1093         if ( !is_gains_analysis ) {
1094             all_bin_domain_combinations_encountered.removeAll( all_bin_domain_combinations_changed );
1095             never_lost = all_bin_domain_combinations_encountered.size();
1096             for( final BinaryDomainCombination bdc : all_bin_domain_combinations_encountered ) {
1097                 out.write( bdc + "\t" + "0" + ForesterUtil.LINE_SEPARATOR );
1098             }
1099         }
1100         if ( is_gains_analysis ) {
1101             out.write( "Sum of all distinct domain combinations appearing once               : " + one
1102                     + ForesterUtil.LINE_SEPARATOR );
1103             out.write( "Sum of all distinct domain combinations appearing more than once     : " + above_one
1104                     + ForesterUtil.LINE_SEPARATOR );
1105             out.write( "Sum of all distinct domains in combinations apppearing only once     : "
1106                     + all_domains_in_combination_changed_only_once.size() + ForesterUtil.LINE_SEPARATOR );
1107             out.write( "Sum of all distinct domains in combinations apppearing more than once: "
1108                     + all_domains_in_combination_changed_more_than_once.size() + ForesterUtil.LINE_SEPARATOR );
1109         }
1110         else {
1111             out.write( "Sum of all distinct domain combinations never lost                   : " + never_lost
1112                     + ForesterUtil.LINE_SEPARATOR );
1113             out.write( "Sum of all distinct domain combinations lost once                    : " + one
1114                     + ForesterUtil.LINE_SEPARATOR );
1115             out.write( "Sum of all distinct domain combinations lost more than once          : " + above_one
1116                     + ForesterUtil.LINE_SEPARATOR );
1117             out.write( "Sum of all distinct domains in combinations lost only once           : "
1118                     + all_domains_in_combination_changed_only_once.size() + ForesterUtil.LINE_SEPARATOR );
1119             out.write( "Sum of all distinct domains in combinations lost more than once: "
1120                     + all_domains_in_combination_changed_more_than_once.size() + ForesterUtil.LINE_SEPARATOR );
1121         }
1122         out.write( "All binary combinations                                              : " + all
1123                 + ForesterUtil.LINE_SEPARATOR );
1124         out.write( "All domains                                                          : "
1125                 + sum_of_all_domains_encountered );
1126         out.close();
1127         ForesterUtil.programMessage( surfacing.PRG_NAME,
1128                                      "Wrote fitch domain combination dynamics counts analysis to \"" + output_file
1129                                              + "\"" );
1130     }
1131
1132     /**
1133      * 
1134      * @param all_binary_domains_combination_lost_fitch 
1135      * @param use_last_in_fitch_parsimony 
1136      * @param consider_directedness_and_adjacency_for_bin_combinations 
1137      * @param all_binary_domains_combination_gained if null ignored, otherwise this is to list all binary domain combinations
1138      * which were gained under unweighted (Fitch) parsimony.
1139      */
1140     public static void executeParsimonyAnalysis( final long random_number_seed_for_fitch_parsimony,
1141                                                  final boolean radomize_fitch_parsimony,
1142                                                  final String outfile_name,
1143                                                  final DomainParsimonyCalculator domain_parsimony,
1144                                                  final Phylogeny phylogeny,
1145                                                  final Map<String, List<GoId>> domain_id_to_go_ids_map,
1146                                                  final Map<GoId, GoTerm> go_id_to_term_map,
1147                                                  final GoNameSpace go_namespace_limit,
1148                                                  final String parameters_str,
1149                                                  final Map<String, Set<String>>[] domain_id_to_secondary_features_maps,
1150                                                  final SortedSet<String> positive_filter,
1151                                                  final boolean output_binary_domain_combinations_for_graphs,
1152                                                  final List<BinaryDomainCombination> all_binary_domains_combination_gained_fitch,
1153                                                  final List<BinaryDomainCombination> all_binary_domains_combination_lost_fitch,
1154                                                  final BinaryDomainCombination.DomainCombinationType dc_type,
1155                                                  final Map<String, DescriptiveStatistics> protein_length_stats_by_dc,
1156                                                  final Map<String, DescriptiveStatistics> domain_number_stats_by_dc,
1157                                                  final Map<String, DescriptiveStatistics> domain_length_stats_by_domain,
1158                                                  final Map<String, Integer> tax_code_to_id_map,
1159                                                  final boolean write_to_nexus,
1160                                                  final boolean use_last_in_fitch_parsimony ) {
1161         final String sep = ForesterUtil.LINE_SEPARATOR + "###################" + ForesterUtil.LINE_SEPARATOR;
1162         final String date_time = ForesterUtil.getCurrentDateTime();
1163         final SortedSet<String> all_pfams_encountered = new TreeSet<String>();
1164         final SortedSet<String> all_pfams_gained_as_domains = new TreeSet<String>();
1165         final SortedSet<String> all_pfams_lost_as_domains = new TreeSet<String>();
1166         final SortedSet<String> all_pfams_gained_as_dom_combinations = new TreeSet<String>();
1167         final SortedSet<String> all_pfams_lost_as_dom_combinations = new TreeSet<String>();
1168         if ( write_to_nexus ) {
1169             writeToNexus( outfile_name, domain_parsimony, phylogeny );
1170         }
1171         // DOLLO DOMAINS
1172         // -------------
1173         Phylogeny local_phylogeny_l = phylogeny.copy();
1174         if ( ( positive_filter != null ) && ( positive_filter.size() > 0 ) ) {
1175             domain_parsimony.executeDolloParsimonyOnDomainPresence( positive_filter );
1176         }
1177         else {
1178             domain_parsimony.executeDolloParsimonyOnDomainPresence();
1179         }
1180         SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossMatrix(), outfile_name
1181                 + surfacing.PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_DOMAINS, Format.FORESTER );
1182         SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossCountsMatrix(), outfile_name
1183                 + surfacing.PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_DOMAINS, Format.FORESTER );
1184         SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
1185                                                            CharacterStateMatrix.GainLossStates.GAIN,
1186                                                            outfile_name + surfacing.PARSIMONY_OUTPUT_DOLLO_GAINS_D,
1187                                                            sep,
1188                                                            ForesterUtil.LINE_SEPARATOR,
1189                                                            null );
1190         SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
1191                                                            CharacterStateMatrix.GainLossStates.LOSS,
1192                                                            outfile_name + surfacing.PARSIMONY_OUTPUT_DOLLO_LOSSES_D,
1193                                                            sep,
1194                                                            ForesterUtil.LINE_SEPARATOR,
1195                                                            null );
1196         SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(), null, outfile_name
1197                 + surfacing.PARSIMONY_OUTPUT_DOLLO_PRESENT_D, sep, ForesterUtil.LINE_SEPARATOR, null );
1198         //HTML:
1199         writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
1200                                        go_id_to_term_map,
1201                                        go_namespace_limit,
1202                                        false,
1203                                        domain_parsimony.getGainLossMatrix(),
1204                                        CharacterStateMatrix.GainLossStates.GAIN,
1205                                        outfile_name + surfacing.PARSIMONY_OUTPUT_DOLLO_GAINS_HTML_D,
1206                                        sep,
1207                                        ForesterUtil.LINE_SEPARATOR,
1208                                        "Dollo Parsimony | Gains | Domains",
1209                                        "+",
1210                                        domain_id_to_secondary_features_maps,
1211                                        all_pfams_encountered,
1212                                        all_pfams_gained_as_domains,
1213                                        "_dollo_gains_d",
1214                                        tax_code_to_id_map );
1215         writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
1216                                        go_id_to_term_map,
1217                                        go_namespace_limit,
1218                                        false,
1219                                        domain_parsimony.getGainLossMatrix(),
1220                                        CharacterStateMatrix.GainLossStates.LOSS,
1221                                        outfile_name + surfacing.PARSIMONY_OUTPUT_DOLLO_LOSSES_HTML_D,
1222                                        sep,
1223                                        ForesterUtil.LINE_SEPARATOR,
1224                                        "Dollo Parsimony | Losses | Domains",
1225                                        "-",
1226                                        domain_id_to_secondary_features_maps,
1227                                        all_pfams_encountered,
1228                                        all_pfams_lost_as_domains,
1229                                        "_dollo_losses_d",
1230                                        tax_code_to_id_map );
1231         //        writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
1232         //                                       go_id_to_term_map,
1233         //                                       go_namespace_limit,
1234         //                                       false,
1235         //                                       domain_parsimony.getGainLossMatrix(),
1236         //                                       null,
1237         //                                       outfile_name + surfacing.PARSIMONY_OUTPUT_DOLLO_PRESENT_HTML_D,
1238         //                                       sep,
1239         //                                       ForesterUtil.LINE_SEPARATOR,
1240         //                                       "Dollo Parsimony | Present | Domains",
1241         //                                       "",
1242         //                                       domain_id_to_secondary_features_maps,
1243         //                                       all_pfams_encountered,
1244         //                                       null,
1245         //                                       "_dollo_present_d",
1246         //                                       tax_code_to_id_map );
1247         preparePhylogeny( local_phylogeny_l,
1248                           domain_parsimony,
1249                           date_time,
1250                           "Dollo parsimony on domain presence/absence",
1251                           "dollo_on_domains_" + outfile_name,
1252                           parameters_str );
1253         SurfacingUtil.writePhylogenyToFile( local_phylogeny_l, outfile_name
1254                 + surfacing.DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO );
1255         try {
1256             writeAllDomainsChangedOnAllSubtrees( local_phylogeny_l, true, outfile_name, "_dollo_all_gains_d" );
1257             writeAllDomainsChangedOnAllSubtrees( local_phylogeny_l, false, outfile_name, "_dollo_all_losses_d" );
1258         }
1259         catch ( final IOException e ) {
1260             e.printStackTrace();
1261             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
1262         }
1263         if ( domain_parsimony.calculateNumberOfBinaryDomainCombination() > 0 ) {
1264             // FITCH DOMAIN COMBINATIONS
1265             // -------------------------
1266             local_phylogeny_l = phylogeny.copy();
1267             String randomization = "no";
1268             if ( radomize_fitch_parsimony ) {
1269                 domain_parsimony.executeFitchParsimonyOnBinaryDomainCombintion( random_number_seed_for_fitch_parsimony );
1270                 randomization = "yes, seed = " + random_number_seed_for_fitch_parsimony;
1271             }
1272             else {
1273                 domain_parsimony.executeFitchParsimonyOnBinaryDomainCombintion( use_last_in_fitch_parsimony );
1274             }
1275             SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossMatrix(), outfile_name
1276                     + surfacing.PARSIMONY_OUTPUT_GL_SUFFIX_FITCH_BINARY_COMBINATIONS, Format.FORESTER );
1277             SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossCountsMatrix(), outfile_name
1278                     + surfacing.PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_FITCH_BINARY_COMBINATIONS, Format.FORESTER );
1279             SurfacingUtil
1280                     .writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
1281                                                           CharacterStateMatrix.GainLossStates.GAIN,
1282                                                           outfile_name + surfacing.PARSIMONY_OUTPUT_FITCH_GAINS_BC,
1283                                                           sep,
1284                                                           ForesterUtil.LINE_SEPARATOR,
1285                                                           null );
1286             SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
1287                                                                CharacterStateMatrix.GainLossStates.LOSS,
1288                                                                outfile_name
1289                                                                        + surfacing.PARSIMONY_OUTPUT_FITCH_LOSSES_BC,
1290                                                                sep,
1291                                                                ForesterUtil.LINE_SEPARATOR,
1292                                                                null );
1293             SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(), null, outfile_name
1294                     + surfacing.PARSIMONY_OUTPUT_FITCH_PRESENT_BC, sep, ForesterUtil.LINE_SEPARATOR, null );
1295             if ( all_binary_domains_combination_gained_fitch != null ) {
1296                 collectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
1297                                                                                     dc_type,
1298                                                                                     all_binary_domains_combination_gained_fitch,
1299                                                                                     true );
1300             }
1301             if ( all_binary_domains_combination_lost_fitch != null ) {
1302                 collectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
1303                                                                                     dc_type,
1304                                                                                     all_binary_domains_combination_lost_fitch,
1305                                                                                     false );
1306             }
1307             if ( output_binary_domain_combinations_for_graphs ) {
1308                 SurfacingUtil
1309                         .writeBinaryStatesMatrixAsListToFileForBinaryCombinationsForGraphAnalysis( domain_parsimony
1310                                                                                                            .getGainLossMatrix(),
1311                                                                                                    null,
1312                                                                                                    outfile_name
1313                                                                                                            + surfacing.PARSIMONY_OUTPUT_FITCH_PRESENT_BC_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS,
1314                                                                                                    sep,
1315                                                                                                    ForesterUtil.LINE_SEPARATOR,
1316                                                                                                    BinaryDomainCombination.OutputFormat.DOT );
1317             }
1318             // HTML:
1319             writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
1320                                            go_id_to_term_map,
1321                                            go_namespace_limit,
1322                                            true,
1323                                            domain_parsimony.getGainLossMatrix(),
1324                                            CharacterStateMatrix.GainLossStates.GAIN,
1325                                            outfile_name + surfacing.PARSIMONY_OUTPUT_FITCH_GAINS_HTML_BC,
1326                                            sep,
1327                                            ForesterUtil.LINE_SEPARATOR,
1328                                            "Fitch Parsimony | Gains | Domain Combinations",
1329                                            "+",
1330                                            null,
1331                                            all_pfams_encountered,
1332                                            all_pfams_gained_as_dom_combinations,
1333                                            "_fitch_gains_dc",
1334                                            tax_code_to_id_map );
1335             writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
1336                                            go_id_to_term_map,
1337                                            go_namespace_limit,
1338                                            true,
1339                                            domain_parsimony.getGainLossMatrix(),
1340                                            CharacterStateMatrix.GainLossStates.LOSS,
1341                                            outfile_name + surfacing.PARSIMONY_OUTPUT_FITCH_LOSSES_HTML_BC,
1342                                            sep,
1343                                            ForesterUtil.LINE_SEPARATOR,
1344                                            "Fitch Parsimony | Losses | Domain Combinations",
1345                                            "-",
1346                                            null,
1347                                            all_pfams_encountered,
1348                                            all_pfams_lost_as_dom_combinations,
1349                                            "_fitch_losses_dc",
1350                                            tax_code_to_id_map );
1351             //            writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
1352             //                                           go_id_to_term_map,
1353             //                                           go_namespace_limit,
1354             //                                           true,
1355             //                                           domain_parsimony.getGainLossMatrix(),
1356             //                                           null,
1357             //                                           outfile_name + surfacing.PARSIMONY_OUTPUT_FITCH_PRESENT_HTML_BC,
1358             //                                           sep,
1359             //                                           ForesterUtil.LINE_SEPARATOR,
1360             //                                           "Fitch Parsimony | Present | Domain Combinations",
1361             //                                           "",
1362             //                                           null,
1363             //                                           all_pfams_encountered,
1364             //                                           null,
1365             //                                           "_fitch_present_dc",
1366             //                                           tax_code_to_id_map );
1367             writeAllEncounteredPfamsToFile( domain_id_to_go_ids_map,
1368                                             go_id_to_term_map,
1369                                             outfile_name,
1370                                             all_pfams_encountered );
1371             writePfamsToFile( outfile_name + surfacing.ALL_PFAMS_GAINED_AS_DOMAINS_SUFFIX, all_pfams_gained_as_domains );
1372             writePfamsToFile( outfile_name + surfacing.ALL_PFAMS_LOST_AS_DOMAINS_SUFFIX, all_pfams_lost_as_domains );
1373             writePfamsToFile( outfile_name + surfacing.ALL_PFAMS_GAINED_AS_DC_SUFFIX,
1374                               all_pfams_gained_as_dom_combinations );
1375             writePfamsToFile( outfile_name + surfacing.ALL_PFAMS_LOST_AS_DC_SUFFIX, all_pfams_lost_as_dom_combinations );
1376             preparePhylogeny( local_phylogeny_l,
1377                               domain_parsimony,
1378                               date_time,
1379                               "Fitch parsimony on binary domain combination presence/absence randomization: "
1380                                       + randomization,
1381                               "fitch_on_binary_domain_combinations_" + outfile_name,
1382                               parameters_str );
1383             SurfacingUtil.writePhylogenyToFile( local_phylogeny_l, outfile_name
1384                     + surfacing.BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH );
1385             calculateIndependentDomainCombinationGains( local_phylogeny_l,
1386                                                         outfile_name
1387                                                                 + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_OUTPUT_SUFFIX,
1388                                                         outfile_name
1389                                                                 + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_OUTPUT_SUFFIX,
1390                                                         outfile_name
1391                                                                 + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_SUFFIX,
1392                                                         outfile_name
1393                                                                 + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_UNIQUE_SUFFIX,
1394                                                         outfile_name + "_indep_dc_gains_fitch_lca_ranks.txt",
1395                                                         outfile_name + "_indep_dc_gains_fitch_lca_taxonomies.txt",
1396                                                         outfile_name + "_indep_dc_gains_fitch_protein_statistics.txt",
1397                                                         protein_length_stats_by_dc,
1398                                                         domain_number_stats_by_dc,
1399                                                         domain_length_stats_by_domain );
1400         }
1401     }
1402
1403     public static void executeParsimonyAnalysisForSecondaryFeatures( final String outfile_name,
1404                                                                      final DomainParsimonyCalculator secondary_features_parsimony,
1405                                                                      final Phylogeny phylogeny,
1406                                                                      final String parameters_str,
1407                                                                      final Map<Species, MappingResults> mapping_results_map,
1408                                                                      final boolean use_last_in_fitch_parsimony ) {
1409         final String sep = ForesterUtil.LINE_SEPARATOR + "###################" + ForesterUtil.LINE_SEPARATOR;
1410         final String date_time = ForesterUtil.getCurrentDateTime();
1411         System.out.println();
1412         writeToNexus( outfile_name + surfacing.NEXUS_SECONDARY_FEATURES,
1413                       secondary_features_parsimony.createMatrixOfSecondaryFeaturePresenceOrAbsence( null ),
1414                       phylogeny );
1415         Phylogeny local_phylogeny_copy = phylogeny.copy();
1416         secondary_features_parsimony.executeDolloParsimonyOnSecondaryFeatures( mapping_results_map );
1417         SurfacingUtil.writeMatrixToFile( secondary_features_parsimony.getGainLossMatrix(), outfile_name
1418                 + surfacing.PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_SECONDARY_FEATURES, Format.FORESTER );
1419         SurfacingUtil.writeMatrixToFile( secondary_features_parsimony.getGainLossCountsMatrix(), outfile_name
1420                 + surfacing.PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_SECONDARY_FEATURES, Format.FORESTER );
1421         SurfacingUtil
1422                 .writeBinaryStatesMatrixAsListToFile( secondary_features_parsimony.getGainLossMatrix(),
1423                                                       CharacterStateMatrix.GainLossStates.GAIN,
1424                                                       outfile_name
1425                                                               + surfacing.PARSIMONY_OUTPUT_DOLLO_GAINS_SECONDARY_FEATURES,
1426                                                       sep,
1427                                                       ForesterUtil.LINE_SEPARATOR,
1428                                                       null );
1429         SurfacingUtil
1430                 .writeBinaryStatesMatrixAsListToFile( secondary_features_parsimony.getGainLossMatrix(),
1431                                                       CharacterStateMatrix.GainLossStates.LOSS,
1432                                                       outfile_name
1433                                                               + surfacing.PARSIMONY_OUTPUT_DOLLO_LOSSES_SECONDARY_FEATURES,
1434                                                       sep,
1435                                                       ForesterUtil.LINE_SEPARATOR,
1436                                                       null );
1437         SurfacingUtil
1438                 .writeBinaryStatesMatrixAsListToFile( secondary_features_parsimony.getGainLossMatrix(),
1439                                                       null,
1440                                                       outfile_name
1441                                                               + surfacing.PARSIMONY_OUTPUT_DOLLO_PRESENT_SECONDARY_FEATURES,
1442                                                       sep,
1443                                                       ForesterUtil.LINE_SEPARATOR,
1444                                                       null );
1445         preparePhylogeny( local_phylogeny_copy,
1446                           secondary_features_parsimony,
1447                           date_time,
1448                           "Dollo parsimony on secondary feature presence/absence",
1449                           "dollo_on_secondary_features_" + outfile_name,
1450                           parameters_str );
1451         SurfacingUtil.writePhylogenyToFile( local_phylogeny_copy, outfile_name
1452                 + surfacing.SECONDARY_FEATURES_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO );
1453         // FITCH DOMAIN COMBINATIONS
1454         // -------------------------
1455         local_phylogeny_copy = phylogeny.copy();
1456         final String randomization = "no";
1457         secondary_features_parsimony
1458                 .executeFitchParsimonyOnBinaryDomainCombintionOnSecondaryFeatures( use_last_in_fitch_parsimony );
1459         preparePhylogeny( local_phylogeny_copy,
1460                           secondary_features_parsimony,
1461                           date_time,
1462                           "Fitch parsimony on secondary binary domain combination presence/absence randomization: "
1463                                   + randomization,
1464                           "fitch_on_binary_domain_combinations_" + outfile_name,
1465                           parameters_str );
1466         SurfacingUtil.writePhylogenyToFile( local_phylogeny_copy, outfile_name
1467                 + surfacing.BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH_MAPPED );
1468         calculateIndependentDomainCombinationGains( local_phylogeny_copy, outfile_name
1469                 + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_MAPPED_OUTPUT_SUFFIX, outfile_name
1470                 + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_MAPPED_OUTPUT_SUFFIX, outfile_name
1471                 + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX, outfile_name
1472                 + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX, outfile_name
1473                 + "_MAPPED_indep_dc_gains_fitch_lca_ranks.txt", outfile_name
1474                 + "_MAPPED_indep_dc_gains_fitch_lca_taxonomies.txt", null, null, null, null );
1475     }
1476
1477     public static void executePlusMinusAnalysis( final File output_file,
1478                                                  final List<String> plus_minus_analysis_high_copy_base,
1479                                                  final List<String> plus_minus_analysis_high_copy_target,
1480                                                  final List<String> plus_minus_analysis_low_copy,
1481                                                  final List<GenomeWideCombinableDomains> gwcd_list,
1482                                                  final SortedMap<Species, List<Protein>> protein_lists_per_species,
1483                                                  final Map<String, List<GoId>> domain_id_to_go_ids_map,
1484                                                  final Map<GoId, GoTerm> go_id_to_term_map,
1485                                                  final List<Object> plus_minus_analysis_numbers ) {
1486         final Set<String> all_spec = new HashSet<String>();
1487         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
1488             all_spec.add( gwcd.getSpecies().getSpeciesId() );
1489         }
1490         final File html_out_dom = new File( output_file + surfacing.PLUS_MINUS_DOM_SUFFIX_HTML );
1491         final File plain_out_dom = new File( output_file + surfacing.PLUS_MINUS_DOM_SUFFIX );
1492         final File html_out_dc = new File( output_file + surfacing.PLUS_MINUS_DC_SUFFIX_HTML );
1493         final File all_domains_go_ids_out_dom = new File( output_file + surfacing.PLUS_MINUS_ALL_GO_IDS_DOM_SUFFIX );
1494         final File passing_domains_go_ids_out_dom = new File( output_file
1495                 + surfacing.PLUS_MINUS_PASSING_GO_IDS_DOM_SUFFIX );
1496         final File proteins_file_base = new File( output_file + "" );
1497         final int min_diff = ( ( Integer ) plus_minus_analysis_numbers.get( 0 ) ).intValue();
1498         final double factor = ( ( Double ) plus_minus_analysis_numbers.get( 1 ) ).doubleValue();
1499         try {
1500             DomainCountsDifferenceUtil.calculateCopyNumberDifferences( gwcd_list,
1501                                                                        protein_lists_per_species,
1502                                                                        plus_minus_analysis_high_copy_base,
1503                                                                        plus_minus_analysis_high_copy_target,
1504                                                                        plus_minus_analysis_low_copy,
1505                                                                        min_diff,
1506                                                                        factor,
1507                                                                        plain_out_dom,
1508                                                                        html_out_dom,
1509                                                                        html_out_dc,
1510                                                                        domain_id_to_go_ids_map,
1511                                                                        go_id_to_term_map,
1512                                                                        all_domains_go_ids_out_dom,
1513                                                                        passing_domains_go_ids_out_dom,
1514                                                                        proteins_file_base );
1515         }
1516         catch ( final IOException e ) {
1517             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
1518         }
1519         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote plus minus domain analysis results to \""
1520                 + html_out_dom + "\"" );
1521         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote plus minus domain analysis results to \""
1522                 + plain_out_dom + "\"" );
1523         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote plus minus domain analysis results to \"" + html_out_dc
1524                 + "\"" );
1525         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote plus minus domain analysis based passing GO ids to \""
1526                 + passing_domains_go_ids_out_dom + "\"" );
1527         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote plus minus domain analysis based all GO ids to \""
1528                 + all_domains_go_ids_out_dom + "\"" );
1529     }
1530
1531     public static void extractProteinNames( final List<Protein> proteins,
1532                                             final List<String> query_domain_ids_nc_order,
1533                                             final Writer out,
1534                                             final String separator,
1535                                             final String limit_to_species ) throws IOException {
1536         for( final Protein protein : proteins ) {
1537             if ( ForesterUtil.isEmpty( limit_to_species )
1538                     || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) {
1539                 if ( protein.contains( query_domain_ids_nc_order, true ) ) {
1540                     out.write( protein.getSpecies().getSpeciesId() );
1541                     out.write( separator );
1542                     out.write( protein.getProteinId().getId() );
1543                     out.write( separator );
1544                     out.write( "[" );
1545                     final Set<String> visited_domain_ids = new HashSet<String>();
1546                     boolean first = true;
1547                     for( final Domain domain : protein.getProteinDomains() ) {
1548                         if ( !visited_domain_ids.contains( domain.getDomainId() ) ) {
1549                             visited_domain_ids.add( domain.getDomainId() );
1550                             if ( first ) {
1551                                 first = false;
1552                             }
1553                             else {
1554                                 out.write( " " );
1555                             }
1556                             out.write( domain.getDomainId() );
1557                             out.write( " {" );
1558                             out.write( "" + domain.getTotalCount() );
1559                             out.write( "}" );
1560                         }
1561                     }
1562                     out.write( "]" );
1563                     out.write( separator );
1564                     if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription()
1565                             .equals( SurfacingConstants.NONE ) ) ) {
1566                         out.write( protein.getDescription() );
1567                     }
1568                     out.write( separator );
1569                     if ( !( ForesterUtil.isEmpty( protein.getAccession() ) || protein.getAccession()
1570                             .equals( SurfacingConstants.NONE ) ) ) {
1571                         out.write( protein.getAccession() );
1572                     }
1573                     out.write( SurfacingConstants.NL );
1574                 }
1575             }
1576         }
1577         out.flush();
1578     }
1579
1580     public static void extractProteinNames( final SortedMap<Species, List<Protein>> protein_lists_per_species,
1581                                             final String domain_id,
1582                                             final Writer out,
1583                                             final String separator,
1584                                             final String limit_to_species,
1585                                             final double domain_e_cutoff ) throws IOException {
1586         //System.out.println( "Per domain E-value: " + domain_e_cutoff );
1587         for( final Species species : protein_lists_per_species.keySet() ) {
1588             //System.out.println( species + ":" );
1589             for( final Protein protein : protein_lists_per_species.get( species ) ) {
1590                 if ( ForesterUtil.isEmpty( limit_to_species )
1591                         || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) {
1592                     final List<Domain> domains = protein.getProteinDomains( domain_id );
1593                     if ( domains.size() > 0 ) {
1594                         out.write( protein.getSpecies().getSpeciesId() );
1595                         out.write( separator );
1596                         out.write( protein.getProteinId().getId() );
1597                         out.write( separator );
1598                         out.write( domain_id.toString() );
1599                         out.write( separator );
1600                         int prev_to = -1;
1601                         for( final Domain domain : domains ) {
1602                             if ( ( domain_e_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= domain_e_cutoff ) ) {
1603                                 out.write( "/" );
1604                                 out.write( domain.getFrom() + "-" + domain.getTo() );
1605                                 if ( prev_to >= 0 ) {
1606                                     final int l = domain.getFrom() - prev_to;
1607                                     // System.out.println( l );
1608                                 }
1609                                 prev_to = domain.getTo();
1610                             }
1611                         }
1612                         out.write( "/" );
1613                         out.write( separator );
1614                         final List<Domain> domain_list = new ArrayList<Domain>();
1615                         for( final Domain domain : protein.getProteinDomains() ) {
1616                             if ( ( domain_e_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= domain_e_cutoff ) ) {
1617                                 domain_list.add( domain );
1618                             }
1619                         }
1620                         final Domain domain_ary[] = new Domain[ domain_list.size() ];
1621                         for( int i = 0; i < domain_list.size(); ++i ) {
1622                             domain_ary[ i ] = domain_list.get( i );
1623                         }
1624                         Arrays.sort( domain_ary, new DomainComparator( true ) );
1625                         out.write( "{" );
1626                         boolean first = true;
1627                         for( final Domain domain : domain_ary ) {
1628                             if ( first ) {
1629                                 first = false;
1630                             }
1631                             else {
1632                                 out.write( "," );
1633                             }
1634                             out.write( domain.getDomainId().toString() );
1635                             out.write( ":" + domain.getFrom() + "-" + domain.getTo() );
1636                             out.write( ":" + domain.getPerDomainEvalue() );
1637                         }
1638                         out.write( "}" );
1639                         if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription()
1640                                 .equals( SurfacingConstants.NONE ) ) ) {
1641                             out.write( protein.getDescription() );
1642                         }
1643                         out.write( separator );
1644                         if ( !( ForesterUtil.isEmpty( protein.getAccession() ) || protein.getAccession()
1645                                 .equals( SurfacingConstants.NONE ) ) ) {
1646                             out.write( protein.getAccession() );
1647                         }
1648                         out.write( SurfacingConstants.NL );
1649                     }
1650                 }
1651             }
1652         }
1653         out.flush();
1654     }
1655
1656     public static SortedSet<String> getAllDomainIds( final List<GenomeWideCombinableDomains> gwcd_list ) {
1657         final SortedSet<String> all_domains_ids = new TreeSet<String>();
1658         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
1659             final Set<String> all_domains = gwcd.getAllDomainIds();
1660             //    for( final Domain domain : all_domains ) {
1661             all_domains_ids.addAll( all_domains );
1662             //    }
1663         }
1664         return all_domains_ids;
1665     }
1666
1667     public static SortedMap<String, Integer> getDomainCounts( final List<Protein> protein_domain_collections ) {
1668         final SortedMap<String, Integer> map = new TreeMap<String, Integer>();
1669         for( final Protein protein_domain_collection : protein_domain_collections ) {
1670             for( final Object name : protein_domain_collection.getProteinDomains() ) {
1671                 final BasicDomain protein_domain = ( BasicDomain ) name;
1672                 final String id = protein_domain.getDomainId();
1673                 if ( map.containsKey( id ) ) {
1674                     map.put( id, map.get( id ) + 1 );
1675                 }
1676                 else {
1677                     map.put( id, 1 );
1678                 }
1679             }
1680         }
1681         return map;
1682     }
1683
1684     public static int getNumberOfNodesLackingName( final Phylogeny p, final StringBuilder names ) {
1685         final PhylogenyNodeIterator it = p.iteratorPostorder();
1686         int c = 0;
1687         while ( it.hasNext() ) {
1688             final PhylogenyNode n = it.next();
1689             if ( ForesterUtil.isEmpty( n.getName() )
1690                     && ( !n.getNodeData().isHasTaxonomy() || ForesterUtil.isEmpty( n.getNodeData().getTaxonomy()
1691                             .getScientificName() ) )
1692                     && ( !n.getNodeData().isHasTaxonomy() || ForesterUtil.isEmpty( n.getNodeData().getTaxonomy()
1693                             .getCommonName() ) ) ) {
1694                 if ( n.getParent() != null ) {
1695                     names.append( " " );
1696                     names.append( n.getParent().getName() );
1697                 }
1698                 final List l = n.getAllExternalDescendants();
1699                 for( final Object object : l ) {
1700                     System.out.println( l.toString() );
1701                 }
1702                 ++c;
1703             }
1704         }
1705         return c;
1706     }
1707
1708     public static void log( final String msg, final Writer w ) {
1709         try {
1710             w.write( msg );
1711             w.write( ForesterUtil.LINE_SEPARATOR );
1712         }
1713         catch ( final IOException e ) {
1714             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
1715         }
1716     }
1717
1718     public static Phylogeny[] obtainAndPreProcessIntrees( final File[] intree_files,
1719                                                           final int number_of_genomes,
1720                                                           final String[][] input_file_properties ) {
1721         final Phylogeny[] intrees = new Phylogeny[ intree_files.length ];
1722         int i = 0;
1723         for( final File intree_file : intree_files ) {
1724             Phylogeny intree = null;
1725             final String error = ForesterUtil.isReadableFile( intree_file );
1726             if ( !ForesterUtil.isEmpty( error ) ) {
1727                 ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read input tree file [" + intree_file + "]: "
1728                         + error );
1729             }
1730             try {
1731                 final Phylogeny[] p_array = ParserBasedPhylogenyFactory.getInstance()
1732                         .create( intree_file, ParserUtils.createParserDependingOnFileType( intree_file, true ) );
1733                 if ( p_array.length < 1 ) {
1734                     ForesterUtil.fatalError( surfacing.PRG_NAME, "file [" + intree_file
1735                             + "] does not contain any phylogeny in phyloXML format" );
1736                 }
1737                 else if ( p_array.length > 1 ) {
1738                     ForesterUtil.fatalError( surfacing.PRG_NAME, "file [" + intree_file
1739                             + "] contains more than one phylogeny in phyloXML format" );
1740                 }
1741                 intree = p_array[ 0 ];
1742             }
1743             catch ( final Exception e ) {
1744                 ForesterUtil.fatalError( surfacing.PRG_NAME, "failed to read input tree from file [" + intree_file
1745                         + "]: " + error );
1746             }
1747             if ( ( intree == null ) || intree.isEmpty() ) {
1748                 ForesterUtil.fatalError( surfacing.PRG_NAME, "input tree [" + intree_file + "] is empty" );
1749             }
1750             if ( !intree.isRooted() ) {
1751                 ForesterUtil.fatalError( surfacing.PRG_NAME, "input tree [" + intree_file + "] is not rooted" );
1752             }
1753             if ( intree.getNumberOfExternalNodes() < number_of_genomes ) {
1754                 ForesterUtil.fatalError( surfacing.PRG_NAME,
1755                                          "number of external nodes [" + intree.getNumberOfExternalNodes()
1756                                                  + "] of input tree [" + intree_file
1757                                                  + "] is smaller than the number of genomes the be analyzed ["
1758                                                  + number_of_genomes + "]" );
1759             }
1760             final StringBuilder parent_names = new StringBuilder();
1761             final int nodes_lacking_name = getNumberOfNodesLackingName( intree, parent_names );
1762             if ( nodes_lacking_name > 0 ) {
1763                 ForesterUtil.fatalError( surfacing.PRG_NAME, "input tree [" + intree_file + "] has "
1764                         + nodes_lacking_name + " node(s) lacking a name [parent names:" + parent_names + "]" );
1765             }
1766             preparePhylogenyForParsimonyAnalyses( intree, input_file_properties );
1767             if ( !intree.isCompletelyBinary() ) {
1768                 ForesterUtil.printWarningMessage( surfacing.PRG_NAME, "input tree [" + intree_file
1769                         + "] is not completely binary" );
1770             }
1771             intrees[ i++ ] = intree;
1772         }
1773         return intrees;
1774     }
1775
1776     public static Phylogeny obtainFirstIntree( final File intree_file ) {
1777         Phylogeny intree = null;
1778         final String error = ForesterUtil.isReadableFile( intree_file );
1779         if ( !ForesterUtil.isEmpty( error ) ) {
1780             ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read input tree file [" + intree_file + "]: " + error );
1781         }
1782         try {
1783             final Phylogeny[] phys = ParserBasedPhylogenyFactory.getInstance()
1784                     .create( intree_file, ParserUtils.createParserDependingOnFileType( intree_file, true ) );
1785             if ( phys.length < 1 ) {
1786                 ForesterUtil.fatalError( surfacing.PRG_NAME, "file [" + intree_file
1787                         + "] does not contain any phylogeny in phyloXML format" );
1788             }
1789             else if ( phys.length > 1 ) {
1790                 ForesterUtil.fatalError( surfacing.PRG_NAME, "file [" + intree_file
1791                         + "] contains more than one phylogeny in phyloXML format" );
1792             }
1793             intree = phys[ 0 ];
1794         }
1795         catch ( final Exception e ) {
1796             ForesterUtil.fatalError( surfacing.PRG_NAME, "failed to read input tree from file [" + intree_file + "]: "
1797                     + error );
1798         }
1799         if ( ( intree == null ) || intree.isEmpty() ) {
1800             ForesterUtil.fatalError( surfacing.PRG_NAME, "input tree [" + intree_file + "] is empty" );
1801         }
1802         if ( !intree.isRooted() ) {
1803             ForesterUtil.fatalError( surfacing.PRG_NAME, "input tree [" + intree_file + "] is not rooted" );
1804         }
1805         return intree;
1806     }
1807
1808     public static void performDomainArchitectureAnalysis( final SortedMap<String, Set<String>> domain_architecutures,
1809                                                           final SortedMap<String, Integer> domain_architecuture_counts,
1810                                                           final int min_count,
1811                                                           final File da_counts_outfile,
1812                                                           final File unique_da_outfile ) {
1813         checkForOutputFileWriteability( da_counts_outfile );
1814         checkForOutputFileWriteability( unique_da_outfile );
1815         try {
1816             final BufferedWriter da_counts_out = new BufferedWriter( new FileWriter( da_counts_outfile ) );
1817             final BufferedWriter unique_da_out = new BufferedWriter( new FileWriter( unique_da_outfile ) );
1818             final Iterator<Entry<String, Integer>> it = domain_architecuture_counts.entrySet().iterator();
1819             while ( it.hasNext() ) {
1820                 final Map.Entry<String, Integer> e = it.next();
1821                 final String da = e.getKey();
1822                 final int count = e.getValue();
1823                 if ( count >= min_count ) {
1824                     da_counts_out.write( da );
1825                     da_counts_out.write( "\t" );
1826                     da_counts_out.write( String.valueOf( count ) );
1827                     da_counts_out.write( ForesterUtil.LINE_SEPARATOR );
1828                 }
1829                 if ( count == 1 ) {
1830                     final Iterator<Entry<String, Set<String>>> it2 = domain_architecutures.entrySet().iterator();
1831                     while ( it2.hasNext() ) {
1832                         final Map.Entry<String, Set<String>> e2 = it2.next();
1833                         final String genome = e2.getKey();
1834                         final Set<String> das = e2.getValue();
1835                         if ( das.contains( da ) ) {
1836                             unique_da_out.write( genome );
1837                             unique_da_out.write( "\t" );
1838                             unique_da_out.write( da );
1839                             unique_da_out.write( ForesterUtil.LINE_SEPARATOR );
1840                         }
1841                     }
1842                 }
1843             }
1844             unique_da_out.close();
1845             da_counts_out.close();
1846         }
1847         catch ( final IOException e ) {
1848             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
1849         }
1850         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote distance matrices to \"" + da_counts_outfile + "\"" );
1851         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote distance matrices to \"" + unique_da_outfile + "\"" );
1852         //
1853     }
1854
1855     public static void preparePhylogeny( final Phylogeny p,
1856                                          final DomainParsimonyCalculator domain_parsimony,
1857                                          final String date_time,
1858                                          final String method,
1859                                          final String name,
1860                                          final String parameters_str ) {
1861         domain_parsimony.decoratePhylogenyWithDomains( p );
1862         final StringBuilder desc = new StringBuilder();
1863         desc.append( "[Method: " + method + "] [Date: " + date_time + "] " );
1864         desc.append( "[Cost: " + domain_parsimony.getCost() + "] " );
1865         desc.append( "[Gains: " + domain_parsimony.getTotalGains() + "] " );
1866         desc.append( "[Losses: " + domain_parsimony.getTotalLosses() + "] " );
1867         desc.append( "[Unchanged: " + domain_parsimony.getTotalUnchanged() + "] " );
1868         desc.append( "[Parameters: " + parameters_str + "]" );
1869         p.setName( name );
1870         p.setDescription( desc.toString() );
1871         p.setConfidence( new Confidence( domain_parsimony.getCost(), "parsimony" ) );
1872         p.setRerootable( false );
1873         p.setRooted( true );
1874     }
1875
1876     public static void preparePhylogenyForParsimonyAnalyses( final Phylogeny intree,
1877                                                              final String[][] input_file_properties ) {
1878         final String[] genomes = new String[ input_file_properties.length ];
1879         for( int i = 0; i < input_file_properties.length; ++i ) {
1880             if ( intree.getNodes( input_file_properties[ i ][ 1 ] ).size() > 1 ) {
1881                 ForesterUtil.fatalError( surfacing.PRG_NAME, "node named [" + input_file_properties[ i ][ 1 ]
1882                         + "] is not unique in input tree " + intree.getName() );
1883             }
1884             genomes[ i ] = input_file_properties[ i ][ 1 ];
1885         }
1886         //
1887         final PhylogenyNodeIterator it = intree.iteratorPostorder();
1888         while ( it.hasNext() ) {
1889             final PhylogenyNode n = it.next();
1890             if ( ForesterUtil.isEmpty( n.getName() ) ) {
1891                 if ( n.getNodeData().isHasTaxonomy()
1892                         && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
1893                     n.setName( n.getNodeData().getTaxonomy().getTaxonomyCode() );
1894                 }
1895                 else if ( n.getNodeData().isHasTaxonomy()
1896                         && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
1897                     n.setName( n.getNodeData().getTaxonomy().getScientificName() );
1898                 }
1899                 else if ( n.getNodeData().isHasTaxonomy()
1900                         && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getCommonName() ) ) {
1901                     n.setName( n.getNodeData().getTaxonomy().getCommonName() );
1902                 }
1903                 else {
1904                     ForesterUtil
1905                             .fatalError( surfacing.PRG_NAME,
1906                                          "node with no name, scientific name, common name, or taxonomy code present" );
1907                 }
1908             }
1909         }
1910         //
1911         final List<String> igns = PhylogenyMethods.deleteExternalNodesPositiveSelection( genomes, intree );
1912         if ( igns.size() > 0 ) {
1913             System.out.println( "Not using the following " + igns.size() + " nodes:" );
1914             for( int i = 0; i < igns.size(); ++i ) {
1915                 System.out.println( " " + i + ": " + igns.get( i ) );
1916             }
1917             System.out.println( "--" );
1918         }
1919         for( final String[] input_file_propertie : input_file_properties ) {
1920             try {
1921                 intree.getNode( input_file_propertie[ 1 ] );
1922             }
1923             catch ( final IllegalArgumentException e ) {
1924                 ForesterUtil.fatalError( surfacing.PRG_NAME, "node named [" + input_file_propertie[ 1 ]
1925                         + "] not present/not unique in input tree" );
1926             }
1927         }
1928     }
1929
1930     public static void printOutPercentageOfMultidomainProteins( final SortedMap<Integer, Integer> all_genomes_domains_per_potein_histo,
1931                                                                 final Writer log_writer ) {
1932         int sum = 0;
1933         for( final Entry<Integer, Integer> entry : all_genomes_domains_per_potein_histo.entrySet() ) {
1934             sum += entry.getValue();
1935         }
1936         final double percentage = ( 100.0 * ( sum - all_genomes_domains_per_potein_histo.get( 1 ) ) ) / sum;
1937         ForesterUtil.programMessage( surfacing.PRG_NAME, "Percentage of multidomain proteins: " + percentage + "%" );
1938         log( "Percentage of multidomain proteins:            : " + percentage + "%", log_writer );
1939     }
1940
1941     private static void printSomeStats( final DescriptiveStatistics stats, final AsciiHistogram histo, final Writer w )
1942             throws IOException {
1943         w.write( "<hr>" );
1944         w.write( "<br>" );
1945         w.write( SurfacingConstants.NL );
1946         w.write( "<tt><pre>" );
1947         w.write( SurfacingConstants.NL );
1948         if ( histo != null ) {
1949             w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
1950             w.write( SurfacingConstants.NL );
1951         }
1952         w.write( "</pre></tt>" );
1953         w.write( SurfacingConstants.NL );
1954         w.write( "<table>" );
1955         w.write( SurfacingConstants.NL );
1956         w.write( "<tr><td>N: </td><td>" + stats.getN() + "</td></tr>" );
1957         w.write( SurfacingConstants.NL );
1958         w.write( "<tr><td>Min: </td><td>" + stats.getMin() + "</td></tr>" );
1959         w.write( SurfacingConstants.NL );
1960         w.write( "<tr><td>Max: </td><td>" + stats.getMax() + "</td></tr>" );
1961         w.write( SurfacingConstants.NL );
1962         w.write( "<tr><td>Mean: </td><td>" + stats.arithmeticMean() + "</td></tr>" );
1963         w.write( SurfacingConstants.NL );
1964         if ( stats.getN() > 1 ) {
1965             w.write( "<tr><td>SD: </td><td>" + stats.sampleStandardDeviation() + "</td></tr>" );
1966         }
1967         else {
1968             w.write( "<tr><td>SD: </td><td>n/a</td></tr>" );
1969         }
1970         w.write( SurfacingConstants.NL );
1971         w.write( "</table>" );
1972         w.write( SurfacingConstants.NL );
1973         w.write( "<br>" );
1974         w.write( SurfacingConstants.NL );
1975     }
1976
1977     public static void processFilter( final File filter_file, final SortedSet<String> filter ) {
1978         SortedSet<String> filter_str = null;
1979         try {
1980             filter_str = ForesterUtil.file2set( filter_file );
1981         }
1982         catch ( final IOException e ) {
1983             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
1984         }
1985         if ( filter_str != null ) {
1986             for( final String string : filter_str ) {
1987                 filter.add( string );
1988             }
1989         }
1990         if ( surfacing.VERBOSE ) {
1991             System.out.println( "Filter:" );
1992             for( final String domainId : filter ) {
1993                 System.out.println( domainId );
1994             }
1995         }
1996     }
1997
1998     public static String[][] processInputGenomesFile( final File input_genomes ) {
1999         String[][] input_file_properties = null;
2000         try {
2001             input_file_properties = ForesterUtil.file22dArray( input_genomes );
2002         }
2003         catch ( final IOException e ) {
2004             ForesterUtil.fatalError( surfacing.PRG_NAME,
2005                                      "genomes files is to be in the following format \"<hmmpfam output file> <species>\": "
2006                                              + e.getLocalizedMessage() );
2007         }
2008         final Set<String> specs = new HashSet<String>();
2009         final Set<String> paths = new HashSet<String>();
2010         for( int i = 0; i < input_file_properties.length; ++i ) {
2011             if ( !PhyloXmlUtil.TAXOMONY_CODE_PATTERN.matcher( input_file_properties[ i ][ 1 ] ).matches() ) {
2012                 ForesterUtil.fatalError( surfacing.PRG_NAME, "illegal format for species code: "
2013                         + input_file_properties[ i ][ 1 ] );
2014             }
2015             if ( specs.contains( input_file_properties[ i ][ 1 ] ) ) {
2016                 ForesterUtil.fatalError( surfacing.PRG_NAME, "species code " + input_file_properties[ i ][ 1 ]
2017                         + " is not unique" );
2018             }
2019             specs.add( input_file_properties[ i ][ 1 ] );
2020             if ( paths.contains( input_file_properties[ i ][ 0 ] ) ) {
2021                 ForesterUtil.fatalError( surfacing.PRG_NAME, "path " + input_file_properties[ i ][ 0 ]
2022                         + " is not unique" );
2023             }
2024             paths.add( input_file_properties[ i ][ 0 ] );
2025             final String error = ForesterUtil.isReadableFile( new File( input_file_properties[ i ][ 0 ] ) );
2026             if ( !ForesterUtil.isEmpty( error ) ) {
2027                 ForesterUtil.fatalError( surfacing.PRG_NAME, error );
2028             }
2029         }
2030         return input_file_properties;
2031     }
2032
2033     public static void processPlusMinusAnalysisOption( final CommandLineArguments cla,
2034                                                        final List<String> high_copy_base,
2035                                                        final List<String> high_copy_target,
2036                                                        final List<String> low_copy,
2037                                                        final List<Object> numbers ) {
2038         if ( cla.isOptionSet( surfacing.PLUS_MINUS_ANALYSIS_OPTION ) ) {
2039             if ( !cla.isOptionValueSet( surfacing.PLUS_MINUS_ANALYSIS_OPTION ) ) {
2040                 ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for 'plus-minus' file: -"
2041                         + surfacing.PLUS_MINUS_ANALYSIS_OPTION + "=<file>" );
2042             }
2043             final File plus_minus_file = new File( cla.getOptionValue( surfacing.PLUS_MINUS_ANALYSIS_OPTION ) );
2044             final String msg = ForesterUtil.isReadableFile( plus_minus_file );
2045             if ( !ForesterUtil.isEmpty( msg ) ) {
2046                 ForesterUtil.fatalError( surfacing.PRG_NAME, "can not read from \"" + plus_minus_file + "\": " + msg );
2047             }
2048             processPlusMinusFile( plus_minus_file, high_copy_base, high_copy_target, low_copy, numbers );
2049         }
2050     }
2051
2052     // First numbers is minimal difference, second is factor.
2053     public static void processPlusMinusFile( final File plus_minus_file,
2054                                              final List<String> high_copy_base,
2055                                              final List<String> high_copy_target,
2056                                              final List<String> low_copy,
2057                                              final List<Object> numbers ) {
2058         Set<String> species_set = null;
2059         int min_diff = surfacing.PLUS_MINUS_ANALYSIS_MIN_DIFF_DEFAULT;
2060         double factor = surfacing.PLUS_MINUS_ANALYSIS_FACTOR_DEFAULT;
2061         try {
2062             species_set = ForesterUtil.file2set( plus_minus_file );
2063         }
2064         catch ( final IOException e ) {
2065             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
2066         }
2067         if ( species_set != null ) {
2068             for( final String species : species_set ) {
2069                 final String species_trimmed = species.substring( 1 );
2070                 if ( species.startsWith( "+" ) ) {
2071                     if ( low_copy.contains( species_trimmed ) ) {
2072                         ForesterUtil.fatalError( surfacing.PRG_NAME,
2073                                                  "species/genome names can not appear with both '+' and '-' suffix, as appears the case for: \""
2074                                                          + species_trimmed + "\"" );
2075                     }
2076                     high_copy_base.add( species_trimmed );
2077                 }
2078                 else if ( species.startsWith( "*" ) ) {
2079                     if ( low_copy.contains( species_trimmed ) ) {
2080                         ForesterUtil.fatalError( surfacing.PRG_NAME,
2081                                                  "species/genome names can not appear with both '*' and '-' suffix, as appears the case for: \""
2082                                                          + species_trimmed + "\"" );
2083                     }
2084                     high_copy_target.add( species_trimmed );
2085                 }
2086                 else if ( species.startsWith( "-" ) ) {
2087                     if ( high_copy_base.contains( species_trimmed ) || high_copy_target.contains( species_trimmed ) ) {
2088                         ForesterUtil.fatalError( surfacing.PRG_NAME,
2089                                                  "species/genome names can not appear with both '+' or '*' and '-' suffix, as appears the case for: \""
2090                                                          + species_trimmed + "\"" );
2091                     }
2092                     low_copy.add( species_trimmed );
2093                 }
2094                 else if ( species.startsWith( "$D" ) ) {
2095                     try {
2096                         min_diff = Integer.parseInt( species.substring( 3 ) );
2097                     }
2098                     catch ( final NumberFormatException e ) {
2099                         ForesterUtil.fatalError( surfacing.PRG_NAME,
2100                                                  "could not parse integer value for minimal difference from: \""
2101                                                          + species.substring( 3 ) + "\"" );
2102                     }
2103                 }
2104                 else if ( species.startsWith( "$F" ) ) {
2105                     try {
2106                         factor = Double.parseDouble( species.substring( 3 ) );
2107                     }
2108                     catch ( final NumberFormatException e ) {
2109                         ForesterUtil.fatalError( surfacing.PRG_NAME, "could not parse double value for factor from: \""
2110                                 + species.substring( 3 ) + "\"" );
2111                     }
2112                 }
2113                 else if ( species.startsWith( "#" ) ) {
2114                     // Comment, ignore.
2115                 }
2116                 else {
2117                     ForesterUtil
2118                             .fatalError( surfacing.PRG_NAME,
2119                                          "species/genome names in 'plus minus' file must begin with '*' (high copy target genome), '+' (high copy base genomes), '-' (low copy genomes), '$D=<integer>' minimal Difference (default is 1), '$F=<double>' factor (default is 1.0), double), or '#' (ignore) suffix, encountered: \""
2120                                                  + species + "\"" );
2121                 }
2122                 numbers.add( new Integer( min_diff + "" ) );
2123                 numbers.add( new Double( factor + "" ) );
2124             }
2125         }
2126         else {
2127             ForesterUtil.fatalError( surfacing.PRG_NAME, "'plus minus' file [" + plus_minus_file + "] appears empty" );
2128         }
2129     }
2130
2131     /*
2132      * species | protein id | n-terminal domain | c-terminal domain | n-terminal domain per domain E-value | c-terminal domain per domain E-value
2133      * 
2134      * 
2135      */
2136     static public StringBuffer proteinToDomainCombinations( final Protein protein,
2137                                                             final String protein_id,
2138                                                             final String separator ) {
2139         final StringBuffer sb = new StringBuffer();
2140         if ( protein.getSpecies() == null ) {
2141             throw new IllegalArgumentException( "species must not be null" );
2142         }
2143         if ( ForesterUtil.isEmpty( protein.getSpecies().getSpeciesId() ) ) {
2144             throw new IllegalArgumentException( "species id must not be empty" );
2145         }
2146         final List<Domain> domains = protein.getProteinDomains();
2147         if ( domains.size() > 1 ) {
2148             final Map<String, Integer> counts = new HashMap<String, Integer>();
2149             for( final Domain domain : domains ) {
2150                 final String id = domain.getDomainId();
2151                 if ( counts.containsKey( id ) ) {
2152                     counts.put( id, counts.get( id ) + 1 );
2153                 }
2154                 else {
2155                     counts.put( id, 1 );
2156                 }
2157             }
2158             final Set<String> dcs = new HashSet<String>();
2159             for( int i = 1; i < domains.size(); ++i ) {
2160                 for( int j = 0; j < i; ++j ) {
2161                     Domain domain_n = domains.get( i );
2162                     Domain domain_c = domains.get( j );
2163                     if ( domain_n.getFrom() > domain_c.getFrom() ) {
2164                         domain_n = domains.get( j );
2165                         domain_c = domains.get( i );
2166                     }
2167                     final String dc = domain_n.getDomainId() + domain_c.getDomainId();
2168                     if ( !dcs.contains( dc ) ) {
2169                         dcs.add( dc );
2170                         sb.append( protein.getSpecies() );
2171                         sb.append( separator );
2172                         sb.append( protein_id );
2173                         sb.append( separator );
2174                         sb.append( domain_n.getDomainId() );
2175                         sb.append( separator );
2176                         sb.append( domain_c.getDomainId() );
2177                         sb.append( separator );
2178                         sb.append( domain_n.getPerDomainEvalue() );
2179                         sb.append( separator );
2180                         sb.append( domain_c.getPerDomainEvalue() );
2181                         sb.append( separator );
2182                         sb.append( counts.get( domain_n.getDomainId() ) );
2183                         sb.append( separator );
2184                         sb.append( counts.get( domain_c.getDomainId() ) );
2185                         sb.append( ForesterUtil.LINE_SEPARATOR );
2186                     }
2187                 }
2188             }
2189         }
2190         else if ( domains.size() == 1 ) {
2191             sb.append( protein.getSpecies() );
2192             sb.append( separator );
2193             sb.append( protein_id );
2194             sb.append( separator );
2195             sb.append( domains.get( 0 ).getDomainId() );
2196             sb.append( separator );
2197             sb.append( separator );
2198             sb.append( domains.get( 0 ).getPerDomainEvalue() );
2199             sb.append( separator );
2200             sb.append( separator );
2201             sb.append( 1 );
2202             sb.append( separator );
2203             sb.append( ForesterUtil.LINE_SEPARATOR );
2204         }
2205         else {
2206             sb.append( protein.getSpecies() );
2207             sb.append( separator );
2208             sb.append( protein_id );
2209             sb.append( separator );
2210             sb.append( separator );
2211             sb.append( separator );
2212             sb.append( separator );
2213             sb.append( separator );
2214             sb.append( separator );
2215             sb.append( ForesterUtil.LINE_SEPARATOR );
2216         }
2217         return sb;
2218     }
2219
2220     public static List<Domain> sortDomainsWithAscendingConfidenceValues( final Protein protein ) {
2221         final List<Domain> domains = new ArrayList<Domain>();
2222         for( final Domain d : protein.getProteinDomains() ) {
2223             domains.add( d );
2224         }
2225         Collections.sort( domains, SurfacingUtil.ASCENDING_CONFIDENCE_VALUE_ORDER );
2226         return domains;
2227     }
2228
2229     private static List<String> splitDomainCombination( final String dc ) {
2230         final String[] s = dc.split( "=" );
2231         if ( s.length != 2 ) {
2232             ForesterUtil.printErrorMessage( surfacing.PRG_NAME, "Stringyfied domain combination has illegal format: "
2233                     + dc );
2234             System.exit( -1 );
2235         }
2236         final List<String> l = new ArrayList<String>( 2 );
2237         l.add( s[ 0 ] );
2238         l.add( s[ 1 ] );
2239         return l;
2240     }
2241
2242     public static int storeDomainArchitectures( final String genome,
2243                                                 final SortedMap<String, Set<String>> domain_architecutures,
2244                                                 final List<Protein> protein_list,
2245                                                 final Map<String, Integer> distinct_domain_architecuture_counts ) {
2246         final Set<String> da = new HashSet<String>();
2247         domain_architecutures.put( genome, da );
2248         for( final Protein protein : protein_list ) {
2249             final String da_str = ( ( BasicProtein ) protein ).toDomainArchitectureString( "~", 3, "=" );
2250             if ( !da.contains( da_str ) ) {
2251                 if ( !distinct_domain_architecuture_counts.containsKey( da_str ) ) {
2252                     distinct_domain_architecuture_counts.put( da_str, 1 );
2253                 }
2254                 else {
2255                     distinct_domain_architecuture_counts.put( da_str,
2256                                                               distinct_domain_architecuture_counts.get( da_str ) + 1 );
2257                 }
2258                 da.add( da_str );
2259             }
2260         }
2261         return da.size();
2262     }
2263
2264     public static void writeAllDomainsChangedOnAllSubtrees( final Phylogeny p,
2265                                                             final boolean get_gains,
2266                                                             final String outdir,
2267                                                             final String suffix_for_filename ) throws IOException {
2268         CharacterStateMatrix.GainLossStates state = CharacterStateMatrix.GainLossStates.GAIN;
2269         if ( !get_gains ) {
2270             state = CharacterStateMatrix.GainLossStates.LOSS;
2271         }
2272         final File base_dir = createBaseDirForPerNodeDomainFiles( surfacing.BASE_DIRECTORY_PER_SUBTREE_DOMAIN_GAIN_LOSS_FILES,
2273                                                                   false,
2274                                                                   state,
2275                                                                   outdir );
2276         for( final PhylogenyNodeIterator it = p.iteratorPostorder(); it.hasNext(); ) {
2277             final PhylogenyNode node = it.next();
2278             if ( !node.isExternal() ) {
2279                 final SortedSet<String> domains = collectAllDomainsChangedOnSubtree( node, get_gains );
2280                 if ( domains.size() > 0 ) {
2281                     final Writer writer = ForesterUtil.createBufferedWriter( base_dir + ForesterUtil.FILE_SEPARATOR
2282                             + node.getName() + suffix_for_filename );
2283                     for( final String domain : domains ) {
2284                         writer.write( domain );
2285                         writer.write( ForesterUtil.LINE_SEPARATOR );
2286                     }
2287                     writer.close();
2288                 }
2289             }
2290         }
2291     }
2292
2293     private static void writeAllEncounteredPfamsToFile( final Map<String, List<GoId>> domain_id_to_go_ids_map,
2294                                                         final Map<GoId, GoTerm> go_id_to_term_map,
2295                                                         final String outfile_name,
2296                                                         final SortedSet<String> all_pfams_encountered ) {
2297         final File all_pfams_encountered_file = new File( outfile_name + surfacing.ALL_PFAMS_ENCOUNTERED_SUFFIX );
2298         final File all_pfams_encountered_with_go_annotation_file = new File( outfile_name
2299                 + surfacing.ALL_PFAMS_ENCOUNTERED_WITH_GO_ANNOTATION_SUFFIX );
2300         final File encountered_pfams_summary_file = new File( outfile_name + surfacing.ENCOUNTERED_PFAMS_SUMMARY_SUFFIX );
2301         int biological_process_counter = 0;
2302         int cellular_component_counter = 0;
2303         int molecular_function_counter = 0;
2304         int pfams_with_mappings_counter = 0;
2305         int pfams_without_mappings_counter = 0;
2306         int pfams_without_mappings_to_bp_or_mf_counter = 0;
2307         int pfams_with_mappings_to_bp_or_mf_counter = 0;
2308         try {
2309             final Writer all_pfams_encountered_writer = new BufferedWriter( new FileWriter( all_pfams_encountered_file ) );
2310             final Writer all_pfams_encountered_with_go_annotation_writer = new BufferedWriter( new FileWriter( all_pfams_encountered_with_go_annotation_file ) );
2311             final Writer summary_writer = new BufferedWriter( new FileWriter( encountered_pfams_summary_file ) );
2312             summary_writer.write( "# Pfam to GO mapping summary" );
2313             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
2314             summary_writer.write( "# Actual summary is at the end of this file." );
2315             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
2316             summary_writer.write( "# Encountered Pfams without a GO mapping:" );
2317             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
2318             for( final String pfam : all_pfams_encountered ) {
2319                 all_pfams_encountered_writer.write( pfam );
2320                 all_pfams_encountered_writer.write( ForesterUtil.LINE_SEPARATOR );
2321                 final String domain_id = new String( pfam );
2322                 if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
2323                     ++pfams_with_mappings_counter;
2324                     all_pfams_encountered_with_go_annotation_writer.write( pfam );
2325                     all_pfams_encountered_with_go_annotation_writer.write( ForesterUtil.LINE_SEPARATOR );
2326                     final List<GoId> go_ids = domain_id_to_go_ids_map.get( domain_id );
2327                     boolean maps_to_bp = false;
2328                     boolean maps_to_cc = false;
2329                     boolean maps_to_mf = false;
2330                     for( final GoId go_id : go_ids ) {
2331                         final GoTerm go_term = go_id_to_term_map.get( go_id );
2332                         if ( go_term.getGoNameSpace().isBiologicalProcess() ) {
2333                             maps_to_bp = true;
2334                         }
2335                         else if ( go_term.getGoNameSpace().isCellularComponent() ) {
2336                             maps_to_cc = true;
2337                         }
2338                         else if ( go_term.getGoNameSpace().isMolecularFunction() ) {
2339                             maps_to_mf = true;
2340                         }
2341                     }
2342                     if ( maps_to_bp ) {
2343                         ++biological_process_counter;
2344                     }
2345                     if ( maps_to_cc ) {
2346                         ++cellular_component_counter;
2347                     }
2348                     if ( maps_to_mf ) {
2349                         ++molecular_function_counter;
2350                     }
2351                     if ( maps_to_bp || maps_to_mf ) {
2352                         ++pfams_with_mappings_to_bp_or_mf_counter;
2353                     }
2354                     else {
2355                         ++pfams_without_mappings_to_bp_or_mf_counter;
2356                     }
2357                 }
2358                 else {
2359                     ++pfams_without_mappings_to_bp_or_mf_counter;
2360                     ++pfams_without_mappings_counter;
2361                     summary_writer.write( pfam );
2362                     summary_writer.write( ForesterUtil.LINE_SEPARATOR );
2363                 }
2364             }
2365             all_pfams_encountered_writer.close();
2366             all_pfams_encountered_with_go_annotation_writer.close();
2367             ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote all [" + all_pfams_encountered.size()
2368                     + "] encountered Pfams to: \"" + all_pfams_encountered_file + "\"" );
2369             ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote all [" + pfams_with_mappings_counter
2370                     + "] encountered Pfams with GO mappings to: \"" + all_pfams_encountered_with_go_annotation_file
2371                     + "\"" );
2372             ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote summary (including all ["
2373                     + pfams_without_mappings_counter + "] encountered Pfams without GO mappings) to: \""
2374                     + encountered_pfams_summary_file + "\"" );
2375             ForesterUtil.programMessage( surfacing.PRG_NAME, "Sum of Pfams encountered                : "
2376                     + all_pfams_encountered.size() );
2377             ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams without a mapping                 : "
2378                     + pfams_without_mappings_counter + " ["
2379                     + ( ( 100 * pfams_without_mappings_counter ) / all_pfams_encountered.size() ) + "%]" );
2380             ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams without mapping to proc. or func. : "
2381                     + pfams_without_mappings_to_bp_or_mf_counter + " ["
2382                     + ( ( 100 * pfams_without_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" );
2383             ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with a mapping                    : "
2384                     + pfams_with_mappings_counter + " ["
2385                     + ( ( 100 * pfams_with_mappings_counter ) / all_pfams_encountered.size() ) + "%]" );
2386             ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with a mapping to proc. or func.  : "
2387                     + pfams_with_mappings_to_bp_or_mf_counter + " ["
2388                     + ( ( 100 * pfams_with_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" );
2389             ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to biological process: "
2390                     + biological_process_counter + " ["
2391                     + ( ( 100 * biological_process_counter ) / all_pfams_encountered.size() ) + "%]" );
2392             ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to molecular function: "
2393                     + molecular_function_counter + " ["
2394                     + ( ( 100 * molecular_function_counter ) / all_pfams_encountered.size() ) + "%]" );
2395             ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to cellular component: "
2396                     + cellular_component_counter + " ["
2397                     + ( ( 100 * cellular_component_counter ) / all_pfams_encountered.size() ) + "%]" );
2398             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
2399             summary_writer.write( "# Sum of Pfams encountered                : " + all_pfams_encountered.size() );
2400             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
2401             summary_writer.write( "# Pfams without a mapping                 : " + pfams_without_mappings_counter
2402                     + " [" + ( ( 100 * pfams_without_mappings_counter ) / all_pfams_encountered.size() ) + "%]" );
2403             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
2404             summary_writer.write( "# Pfams without mapping to proc. or func. : "
2405                     + pfams_without_mappings_to_bp_or_mf_counter + " ["
2406                     + ( ( 100 * pfams_without_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" );
2407             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
2408             summary_writer.write( "# Pfams with a mapping                    : " + pfams_with_mappings_counter + " ["
2409                     + ( ( 100 * pfams_with_mappings_counter ) / all_pfams_encountered.size() ) + "%]" );
2410             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
2411             summary_writer.write( "# Pfams with a mapping to proc. or func.  : "
2412                     + pfams_with_mappings_to_bp_or_mf_counter + " ["
2413                     + ( ( 100 * pfams_with_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" );
2414             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
2415             summary_writer.write( "# Pfams with mapping to biological process: " + biological_process_counter + " ["
2416                     + ( ( 100 * biological_process_counter ) / all_pfams_encountered.size() ) + "%]" );
2417             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
2418             summary_writer.write( "# Pfams with mapping to molecular function: " + molecular_function_counter + " ["
2419                     + ( ( 100 * molecular_function_counter ) / all_pfams_encountered.size() ) + "%]" );
2420             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
2421             summary_writer.write( "# Pfams with mapping to cellular component: " + cellular_component_counter + " ["
2422                     + ( ( 100 * cellular_component_counter ) / all_pfams_encountered.size() ) + "%]" );
2423             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
2424             summary_writer.close();
2425         }
2426         catch ( final IOException e ) {
2427             ForesterUtil.printWarningMessage( surfacing.PRG_NAME, "Failure to write: " + e );
2428         }
2429     }
2430
2431     public static void writeBinaryDomainCombinationsFileForGraphAnalysis( final String[][] input_file_properties,
2432                                                                           final File output_dir,
2433                                                                           final GenomeWideCombinableDomains gwcd,
2434                                                                           final int i,
2435                                                                           final GenomeWideCombinableDomainsSortOrder dc_sort_order ) {
2436         File dc_outfile_dot = new File( input_file_properties[ i ][ 1 ]
2437                 + surfacing.DOMAIN_COMBINITONS_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS );
2438         if ( output_dir != null ) {
2439             dc_outfile_dot = new File( output_dir + ForesterUtil.FILE_SEPARATOR + dc_outfile_dot );
2440         }
2441         checkForOutputFileWriteability( dc_outfile_dot );
2442         final SortedSet<BinaryDomainCombination> binary_combinations = createSetOfAllBinaryDomainCombinationsPerGenome( gwcd );
2443         try {
2444             final BufferedWriter out_dot = new BufferedWriter( new FileWriter( dc_outfile_dot ) );
2445             for( final BinaryDomainCombination bdc : binary_combinations ) {
2446                 out_dot.write( bdc.toGraphDescribingLanguage( BinaryDomainCombination.OutputFormat.DOT, null, null )
2447                         .toString() );
2448                 out_dot.write( SurfacingConstants.NL );
2449             }
2450             out_dot.close();
2451         }
2452         catch ( final IOException e ) {
2453             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
2454         }
2455         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote binary domain combination for \""
2456                 + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ", "
2457                 + input_file_properties[ i ][ 2 ] + ") to: \"" + dc_outfile_dot + "\"" );
2458     }
2459
2460     public static void writeBinaryStatesMatrixAsListToFile( final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
2461                                                             final CharacterStateMatrix.GainLossStates state,
2462                                                             final String filename,
2463                                                             final String indentifier_characters_separator,
2464                                                             final String character_separator,
2465                                                             final Map<String, String> descriptions ) {
2466         final File outfile = new File( filename );
2467         checkForOutputFileWriteability( outfile );
2468         final SortedSet<String> sorted_ids = new TreeSet<String>();
2469         for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) {
2470             sorted_ids.add( matrix.getIdentifier( i ) );
2471         }
2472         try {
2473             final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) );
2474             for( final String id : sorted_ids ) {
2475                 out.write( indentifier_characters_separator );
2476                 out.write( "#" + id );
2477                 out.write( indentifier_characters_separator );
2478                 for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) {
2479                     // Not nice:
2480                     // using null to indicate either UNCHANGED_PRESENT or GAIN.
2481                     if ( ( matrix.getState( id, c ) == state )
2482                             || ( ( state == null ) && ( ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) || ( matrix
2483                                     .getState( id, c ) == CharacterStateMatrix.GainLossStates.UNCHANGED_PRESENT ) ) ) ) {
2484                         out.write( matrix.getCharacter( c ) );
2485                         if ( ( descriptions != null ) && !descriptions.isEmpty()
2486                                 && descriptions.containsKey( matrix.getCharacter( c ) ) ) {
2487                             out.write( "\t" );
2488                             out.write( descriptions.get( matrix.getCharacter( c ) ) );
2489                         }
2490                         out.write( character_separator );
2491                     }
2492                 }
2493             }
2494             out.flush();
2495             out.close();
2496         }
2497         catch ( final IOException e ) {
2498             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
2499         }
2500         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote characters list: \"" + filename + "\"" );
2501     }
2502
2503     public static void writeBinaryStatesMatrixAsListToFileForBinaryCombinationsForGraphAnalysis( final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
2504                                                                                                  final CharacterStateMatrix.GainLossStates state,
2505                                                                                                  final String filename,
2506                                                                                                  final String indentifier_characters_separator,
2507                                                                                                  final String character_separator,
2508                                                                                                  final BinaryDomainCombination.OutputFormat bc_output_format ) {
2509         final File outfile = new File( filename );
2510         checkForOutputFileWriteability( outfile );
2511         final SortedSet<String> sorted_ids = new TreeSet<String>();
2512         for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) {
2513             sorted_ids.add( matrix.getIdentifier( i ) );
2514         }
2515         try {
2516             final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) );
2517             for( final String id : sorted_ids ) {
2518                 out.write( indentifier_characters_separator );
2519                 out.write( "#" + id );
2520                 out.write( indentifier_characters_separator );
2521                 for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) {
2522                     // Not nice:
2523                     // using null to indicate either UNCHANGED_PRESENT or GAIN.
2524                     if ( ( matrix.getState( id, c ) == state )
2525                             || ( ( state == null ) && ( ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) || ( matrix
2526                                     .getState( id, c ) == CharacterStateMatrix.GainLossStates.UNCHANGED_PRESENT ) ) ) ) {
2527                         BinaryDomainCombination bdc = null;
2528                         try {
2529                             bdc = BasicBinaryDomainCombination.createInstance( matrix.getCharacter( c ) );
2530                         }
2531                         catch ( final Exception e ) {
2532                             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
2533                         }
2534                         out.write( bdc.toGraphDescribingLanguage( bc_output_format, null, null ).toString() );
2535                         out.write( character_separator );
2536                     }
2537                 }
2538             }
2539             out.flush();
2540             out.close();
2541         }
2542         catch ( final IOException e ) {
2543             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
2544         }
2545         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote characters list: \"" + filename + "\"" );
2546     }
2547
2548     public static void writeBinaryStatesMatrixToList( final Map<String, List<GoId>> domain_id_to_go_ids_map,
2549                                                       final Map<GoId, GoTerm> go_id_to_term_map,
2550                                                       final GoNameSpace go_namespace_limit,
2551                                                       final boolean domain_combinations,
2552                                                       final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
2553                                                       final CharacterStateMatrix.GainLossStates state,
2554                                                       final String filename,
2555                                                       final String indentifier_characters_separator,
2556                                                       final String character_separator,
2557                                                       final String title_for_html,
2558                                                       final String prefix_for_html,
2559                                                       final Map<String, Set<String>>[] domain_id_to_secondary_features_maps,
2560                                                       final SortedSet<String> all_pfams_encountered,
2561                                                       final SortedSet<String> pfams_gained_or_lost,
2562                                                       final String suffix_for_per_node_events_file,
2563                                                       final Map<String, Integer> tax_code_to_id_map ) {
2564         if ( ( go_namespace_limit != null ) && ( ( go_id_to_term_map == null ) || ( go_id_to_term_map.size() < 1 ) ) ) {
2565             throw new IllegalArgumentException( "attempt to use GO namespace limit without a GO-id to term map" );
2566         }
2567         else if ( ( ( domain_id_to_go_ids_map == null ) || ( domain_id_to_go_ids_map.size() < 1 ) ) ) {
2568             throw new IllegalArgumentException( "attempt to output detailed HTML without a Pfam to GO map" );
2569         }
2570         else if ( ( ( go_id_to_term_map == null ) || ( go_id_to_term_map.size() < 1 ) ) ) {
2571             throw new IllegalArgumentException( "attempt to output detailed HTML without a GO-id to term map" );
2572         }
2573         final File outfile = new File( filename );
2574         checkForOutputFileWriteability( outfile );
2575         final SortedSet<String> sorted_ids = new TreeSet<String>();
2576         for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) {
2577             sorted_ids.add( matrix.getIdentifier( i ) );
2578         }
2579         try {
2580             final Writer out = new BufferedWriter( new FileWriter( outfile ) );
2581             final File per_node_go_mapped_domain_gain_loss_files_base_dir = createBaseDirForPerNodeDomainFiles( surfacing.BASE_DIRECTORY_PER_NODE_DOMAIN_GAIN_LOSS_FILES,
2582                                                                                                                 domain_combinations,
2583                                                                                                                 state,
2584                                                                                                                 filename );
2585             Writer per_node_go_mapped_domain_gain_loss_outfile_writer = null;
2586             File per_node_go_mapped_domain_gain_loss_outfile = null;
2587             int per_node_counter = 0;
2588             out.write( "<html>" );
2589             out.write( SurfacingConstants.NL );
2590             addHtmlHead( out, title_for_html );
2591             out.write( SurfacingConstants.NL );
2592             out.write( "<body>" );
2593             out.write( SurfacingConstants.NL );
2594             out.write( "<h1>" );
2595             out.write( SurfacingConstants.NL );
2596             out.write( title_for_html );
2597             out.write( SurfacingConstants.NL );
2598             out.write( "</h1>" );
2599             out.write( SurfacingConstants.NL );
2600             out.write( "<table>" );
2601             out.write( SurfacingConstants.NL );
2602             for( final String id : sorted_ids ) {
2603                 final Matcher matcher = PATTERN_SP_STYLE_TAXONOMY.matcher( id );
2604                 if ( matcher.matches() ) {
2605                     continue;
2606                 }
2607                 out.write( "<tr>" );
2608                 out.write( "<td>" );
2609                 out.write( "<a href=\"#" + id + "\">" + id + "</a>" );
2610                 out.write( "</td>" );
2611                 out.write( "</tr>" );
2612                 out.write( SurfacingConstants.NL );
2613             }
2614             out.write( "</table>" );
2615             out.write( SurfacingConstants.NL );
2616             for( final String id : sorted_ids ) {
2617                 final Matcher matcher = PATTERN_SP_STYLE_TAXONOMY.matcher( id );
2618                 if ( matcher.matches() ) {
2619                     continue;
2620                 }
2621                 out.write( SurfacingConstants.NL );
2622                 out.write( "<h2>" );
2623                 out.write( "<a name=\"" + id + "\">" + id + "</a>" );
2624                 writeTaxonomyLinks( out, id, tax_code_to_id_map );
2625                 out.write( "</h2>" );
2626                 out.write( SurfacingConstants.NL );
2627                 out.write( "<table>" );
2628                 out.write( SurfacingConstants.NL );
2629                 out.write( "<tr>" );
2630                 out.write( "<td><b>" );
2631                 out.write( "Pfam domain(s)" );
2632                 out.write( "</b></td><td><b>" );
2633                 out.write( "GO term acc" );
2634                 out.write( "</b></td><td><b>" );
2635                 out.write( "GO term" );
2636                 out.write( "</b></td><td><b>" );
2637                 out.write( "GO namespace" );
2638                 out.write( "</b></td>" );
2639                 out.write( "</tr>" );
2640                 out.write( SurfacingConstants.NL );
2641                 out.write( "</tr>" );
2642                 out.write( SurfacingConstants.NL );
2643                 per_node_counter = 0;
2644                 if ( matrix.getNumberOfCharacters() > 0 ) {
2645                     per_node_go_mapped_domain_gain_loss_outfile = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
2646                             + ForesterUtil.FILE_SEPARATOR + id + suffix_for_per_node_events_file );
2647                     SurfacingUtil.checkForOutputFileWriteability( per_node_go_mapped_domain_gain_loss_outfile );
2648                     per_node_go_mapped_domain_gain_loss_outfile_writer = ForesterUtil
2649                             .createBufferedWriter( per_node_go_mapped_domain_gain_loss_outfile );
2650                 }
2651                 else {
2652                     per_node_go_mapped_domain_gain_loss_outfile = null;
2653                     per_node_go_mapped_domain_gain_loss_outfile_writer = null;
2654                 }
2655                 for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) {
2656                     // Not nice:
2657                     // using null to indicate either UNCHANGED_PRESENT or GAIN.
2658                     if ( ( matrix.getState( id, c ) == state )
2659                             || ( ( state == null ) && ( ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.UNCHANGED_PRESENT ) || ( matrix
2660                                     .getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) ) ) ) {
2661                         final String character = matrix.getCharacter( c );
2662                         String domain_0 = "";
2663                         String domain_1 = "";
2664                         if ( character.indexOf( BinaryDomainCombination.SEPARATOR ) > 0 ) {
2665                             final String[] s = character.split( BinaryDomainCombination.SEPARATOR );
2666                             if ( s.length != 2 ) {
2667                                 throw new AssertionError( "this should not have happened: unexpected format for domain combination: ["
2668                                         + character + "]" );
2669                             }
2670                             domain_0 = s[ 0 ];
2671                             domain_1 = s[ 1 ];
2672                         }
2673                         else {
2674                             domain_0 = character;
2675                         }
2676                         writeDomainData( domain_id_to_go_ids_map,
2677                                          go_id_to_term_map,
2678                                          go_namespace_limit,
2679                                          out,
2680                                          domain_0,
2681                                          domain_1,
2682                                          prefix_for_html,
2683                                          character_separator,
2684                                          domain_id_to_secondary_features_maps,
2685                                          null );
2686                         all_pfams_encountered.add( domain_0 );
2687                         if ( pfams_gained_or_lost != null ) {
2688                             pfams_gained_or_lost.add( domain_0 );
2689                         }
2690                         if ( !ForesterUtil.isEmpty( domain_1 ) ) {
2691                             all_pfams_encountered.add( domain_1 );
2692                             if ( pfams_gained_or_lost != null ) {
2693                                 pfams_gained_or_lost.add( domain_1 );
2694                             }
2695                         }
2696                         if ( per_node_go_mapped_domain_gain_loss_outfile_writer != null ) {
2697                             writeDomainsToIndividualFilePerTreeNode( per_node_go_mapped_domain_gain_loss_outfile_writer,
2698                                                                      domain_0,
2699                                                                      domain_1 );
2700                             per_node_counter++;
2701                         }
2702                     }
2703                 }
2704                 if ( per_node_go_mapped_domain_gain_loss_outfile_writer != null ) {
2705                     per_node_go_mapped_domain_gain_loss_outfile_writer.close();
2706                     if ( per_node_counter < 1 ) {
2707                         per_node_go_mapped_domain_gain_loss_outfile.delete();
2708                     }
2709                     per_node_counter = 0;
2710                 }
2711                 out.write( "</table>" );
2712                 out.write( SurfacingConstants.NL );
2713                 out.write( "<hr>" );
2714                 out.write( SurfacingConstants.NL );
2715             } // for( final String id : sorted_ids ) {  
2716             out.write( "</body>" );
2717             out.write( SurfacingConstants.NL );
2718             out.write( "</html>" );
2719             out.write( SurfacingConstants.NL );
2720             out.flush();
2721             out.close();
2722         }
2723         catch ( final IOException e ) {
2724             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
2725         }
2726         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote characters detailed HTML list: \"" + filename + "\"" );
2727     }
2728
2729     public static void writeDomainCombinationsCountsFile( final String[][] input_file_properties,
2730                                                           final File output_dir,
2731                                                           final Writer per_genome_domain_promiscuity_statistics_writer,
2732                                                           final GenomeWideCombinableDomains gwcd,
2733                                                           final int i,
2734                                                           final GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder dc_sort_order ) {
2735         File dc_outfile = new File( input_file_properties[ i ][ 1 ]
2736                 + surfacing.DOMAIN_COMBINITON_COUNTS_OUTPUTFILE_SUFFIX );
2737         if ( output_dir != null ) {
2738             dc_outfile = new File( output_dir + ForesterUtil.FILE_SEPARATOR + dc_outfile );
2739         }
2740         checkForOutputFileWriteability( dc_outfile );
2741         try {
2742             final BufferedWriter out = new BufferedWriter( new FileWriter( dc_outfile ) );
2743             out.write( gwcd.toStringBuilder( dc_sort_order ).toString() );
2744             out.close();
2745         }
2746         catch ( final IOException e ) {
2747             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
2748         }
2749         final DescriptiveStatistics stats = gwcd.getPerGenomeDomainPromiscuityStatistics();
2750         try {
2751             per_genome_domain_promiscuity_statistics_writer.write( input_file_properties[ i ][ 1 ] + "\t" );
2752             per_genome_domain_promiscuity_statistics_writer.write( FORMATTER_3.format( stats.arithmeticMean() ) + "\t" );
2753             if ( stats.getN() < 2 ) {
2754                 per_genome_domain_promiscuity_statistics_writer.write( "n/a" + "\t" );
2755             }
2756             else {
2757                 per_genome_domain_promiscuity_statistics_writer.write( FORMATTER_3.format( stats
2758                         .sampleStandardDeviation() ) + "\t" );
2759             }
2760             per_genome_domain_promiscuity_statistics_writer.write( FORMATTER_3.format( stats.median() ) + "\t" );
2761             per_genome_domain_promiscuity_statistics_writer.write( ( int ) stats.getMin() + "\t" );
2762             per_genome_domain_promiscuity_statistics_writer.write( ( int ) stats.getMax() + "\t" );
2763             per_genome_domain_promiscuity_statistics_writer.write( stats.getN() + "\t" );
2764             final SortedSet<String> mpds = gwcd.getMostPromiscuosDomain();
2765             for( final String mpd : mpds ) {
2766                 per_genome_domain_promiscuity_statistics_writer.write( mpd + " " );
2767             }
2768             per_genome_domain_promiscuity_statistics_writer.write( ForesterUtil.LINE_SEPARATOR );
2769         }
2770         catch ( final IOException e ) {
2771             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
2772         }
2773         if ( input_file_properties[ i ].length == 3 ) {
2774             ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote domain combination counts for \""
2775                     + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ", "
2776                     + input_file_properties[ i ][ 2 ] + ") to: \"" + dc_outfile + "\"" );
2777         }
2778         else {
2779             ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote domain combination counts for \""
2780                     + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ") to: \""
2781                     + dc_outfile + "\"" );
2782         }
2783     }
2784
2785     private static void writeDomainData( final Map<String, List<GoId>> domain_id_to_go_ids_map,
2786                                          final Map<GoId, GoTerm> go_id_to_term_map,
2787                                          final GoNameSpace go_namespace_limit,
2788                                          final Writer out,
2789                                          final String domain_0,
2790                                          final String domain_1,
2791                                          final String prefix_for_html,
2792                                          final String character_separator_for_non_html_output,
2793                                          final Map<String, Set<String>>[] domain_id_to_secondary_features_maps,
2794                                          final Set<GoId> all_go_ids ) throws IOException {
2795         boolean any_go_annotation_present = false;
2796         boolean first_has_no_go = false;
2797         int domain_count = 2; // To distinguish between domains and binary domain combinations.
2798         if ( ForesterUtil.isEmpty( domain_1 ) ) {
2799             domain_count = 1;
2800         }
2801         // The following has a difficult to understand logic.  
2802         for( int d = 0; d < domain_count; ++d ) {
2803             List<GoId> go_ids = null;
2804             boolean go_annotation_present = false;
2805             if ( d == 0 ) {
2806                 if ( domain_id_to_go_ids_map.containsKey( domain_0 ) ) {
2807                     go_annotation_present = true;
2808                     any_go_annotation_present = true;
2809                     go_ids = domain_id_to_go_ids_map.get( domain_0 );
2810                 }
2811                 else {
2812                     first_has_no_go = true;
2813                 }
2814             }
2815             else {
2816                 if ( domain_id_to_go_ids_map.containsKey( domain_1 ) ) {
2817                     go_annotation_present = true;
2818                     any_go_annotation_present = true;
2819                     go_ids = domain_id_to_go_ids_map.get( domain_1 );
2820                 }
2821             }
2822             if ( go_annotation_present ) {
2823                 boolean first = ( ( d == 0 ) || ( ( d == 1 ) && first_has_no_go ) );
2824                 for( final GoId go_id : go_ids ) {
2825                     out.write( "<tr>" );
2826                     if ( first ) {
2827                         first = false;
2828                         writeDomainIdsToHtml( out,
2829                                               domain_0,
2830                                               domain_1,
2831                                               prefix_for_html,
2832                                               domain_id_to_secondary_features_maps );
2833                     }
2834                     else {
2835                         out.write( "<td></td>" );
2836                     }
2837                     if ( !go_id_to_term_map.containsKey( go_id ) ) {
2838                         throw new IllegalArgumentException( "GO-id [" + go_id + "] not found in GO-id to GO-term map" );
2839                     }
2840                     final GoTerm go_term = go_id_to_term_map.get( go_id );
2841                     if ( ( go_namespace_limit == null ) || go_namespace_limit.equals( go_term.getGoNameSpace() ) ) {
2842                         // final String top = GoUtils.getPenultimateGoTerm( go_term, go_id_to_term_map ).getName();
2843                         final String go_id_str = go_id.getId();
2844                         out.write( "<td>" );
2845                         out.write( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id_str
2846                                 + "\" target=\"amigo_window\">" + go_id_str + "</a>" );
2847                         out.write( "</td><td>" );
2848                         out.write( go_term.getName() );
2849                         if ( domain_count == 2 ) {
2850                             out.write( " (" + d + ")" );
2851                         }
2852                         out.write( "</td><td>" );
2853                         // out.write( top );
2854                         // out.write( "</td><td>" );
2855                         out.write( "[" );
2856                         out.write( go_term.getGoNameSpace().toShortString() );
2857                         out.write( "]" );
2858                         out.write( "</td>" );
2859                         if ( all_go_ids != null ) {
2860                             all_go_ids.add( go_id );
2861                         }
2862                     }
2863                     else {
2864                         out.write( "<td>" );
2865                         out.write( "</td><td>" );
2866                         out.write( "</td><td>" );
2867                         out.write( "</td><td>" );
2868                         out.write( "</td>" );
2869                     }
2870                     out.write( "</tr>" );
2871                     out.write( SurfacingConstants.NL );
2872                 }
2873             }
2874         } //  for( int d = 0; d < domain_count; ++d ) 
2875         if ( !any_go_annotation_present ) {
2876             out.write( "<tr>" );
2877             writeDomainIdsToHtml( out, domain_0, domain_1, prefix_for_html, domain_id_to_secondary_features_maps );
2878             out.write( "<td>" );
2879             out.write( "</td><td>" );
2880             out.write( "</td><td>" );
2881             out.write( "</td><td>" );
2882             out.write( "</td>" );
2883             out.write( "</tr>" );
2884             out.write( SurfacingConstants.NL );
2885         }
2886     }
2887
2888     private static void writeDomainIdsToHtml( final Writer out,
2889                                               final String domain_0,
2890                                               final String domain_1,
2891                                               final String prefix_for_detailed_html,
2892                                               final Map<String, Set<String>>[] domain_id_to_secondary_features_maps )
2893             throws IOException {
2894         out.write( "<td>" );
2895         if ( !ForesterUtil.isEmpty( prefix_for_detailed_html ) ) {
2896             out.write( prefix_for_detailed_html );
2897             out.write( " " );
2898         }
2899         out.write( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_0 + "\">" + domain_0 + "</a>" );
2900         out.write( "</td>" );
2901     }
2902
2903     public static void writeDomainSimilaritiesToFile( final StringBuilder html_desc,
2904                                                       final StringBuilder html_title,
2905                                                       final Writer simple_tab_writer,
2906                                                       final Writer single_writer,
2907                                                       Map<Character, Writer> split_writers,
2908                                                       final SortedSet<DomainSimilarity> similarities,
2909                                                       final boolean treat_as_binary,
2910                                                       final List<Species> species_order,
2911                                                       final PrintableDomainSimilarity.PRINT_OPTION print_option,
2912                                                       final DomainSimilarity.DomainSimilarityScoring scoring,
2913                                                       final boolean verbose,
2914                                                       final Map<String, Integer> tax_code_to_id_map,
2915                                                       final Phylogeny phy ) throws IOException {
2916         if ( ( single_writer != null ) && ( ( split_writers == null ) || split_writers.isEmpty() ) ) {
2917             split_writers = new HashMap<Character, Writer>();
2918             split_writers.put( '_', single_writer );
2919         }
2920         switch ( print_option ) {
2921             case SIMPLE_TAB_DELIMITED:
2922                 break;
2923             case HTML:
2924                 for( final Character key : split_writers.keySet() ) {
2925                     final Writer w = split_writers.get( key );
2926                     w.write( "<html>" );
2927                     w.write( SurfacingConstants.NL );
2928                     if ( key != '_' ) {
2929                         addHtmlHead( w, "DC analysis (" + html_title + ") " + key.toString().toUpperCase() );
2930                     }
2931                     else {
2932                         addHtmlHead( w, "DC analysis (" + html_title + ")" );
2933                     }
2934                     w.write( SurfacingConstants.NL );
2935                     w.write( "<body>" );
2936                     w.write( SurfacingConstants.NL );
2937                     w.write( html_desc.toString() );
2938                     w.write( SurfacingConstants.NL );
2939                     w.write( "<hr>" );
2940                     w.write( SurfacingConstants.NL );
2941                     w.write( "<br>" );
2942                     w.write( SurfacingConstants.NL );
2943                     w.write( "<table>" );
2944                     w.write( SurfacingConstants.NL );
2945                     w.write( "<tr><td><b>Domains:</b></td></tr>" );
2946                     w.write( SurfacingConstants.NL );
2947                 }
2948                 break;
2949         }
2950         //
2951         for( final DomainSimilarity similarity : similarities ) {
2952             if ( ( species_order != null ) && !species_order.isEmpty() ) {
2953                 ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
2954             }
2955             if ( single_writer != null ) {
2956                 single_writer.write( "<tr><td><b><a href=\"#" + similarity.getDomainId() + "\">"
2957                         + similarity.getDomainId() + "</a></b></td></tr>" );
2958                 single_writer.write( SurfacingConstants.NL );
2959             }
2960             else {
2961                 Writer local_writer = split_writers.get( ( similarity.getDomainId().charAt( 0 ) + "" ).toLowerCase()
2962                         .charAt( 0 ) );
2963                 if ( local_writer == null ) {
2964                     local_writer = split_writers.get( '0' );
2965                 }
2966                 local_writer.write( "<tr><td><b><a href=\"#" + similarity.getDomainId() + "\">"
2967                         + similarity.getDomainId() + "</a></b></td></tr>" );
2968                 local_writer.write( SurfacingConstants.NL );
2969             }
2970         }
2971         for( final Writer w : split_writers.values() ) {
2972             w.write( "</table>" );
2973             w.write( SurfacingConstants.NL );
2974             w.write( "<hr>" );
2975             w.write( SurfacingConstants.NL );
2976             w.write( "<table>" );
2977             w.write( SurfacingConstants.NL );
2978         }
2979         //
2980         for( final DomainSimilarity similarity : similarities ) {
2981             if ( ( species_order != null ) && !species_order.isEmpty() ) {
2982                 ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
2983             }
2984             if ( simple_tab_writer != null ) {
2985                 simple_tab_writer.write( similarity.toStringBuffer( PRINT_OPTION.SIMPLE_TAB_DELIMITED,
2986                                                                     tax_code_to_id_map,
2987                                                                     null ).toString() );
2988             }
2989             if ( single_writer != null ) {
2990                 single_writer.write( similarity.toStringBuffer( print_option, tax_code_to_id_map, phy ).toString() );
2991                 single_writer.write( SurfacingConstants.NL );
2992             }
2993             else {
2994                 Writer local_writer = split_writers.get( ( similarity.getDomainId().charAt( 0 ) + "" ).toLowerCase()
2995                         .charAt( 0 ) );
2996                 if ( local_writer == null ) {
2997                     local_writer = split_writers.get( '0' );
2998                 }
2999                 local_writer.write( similarity.toStringBuffer( print_option, tax_code_to_id_map, phy ).toString() );
3000                 local_writer.write( SurfacingConstants.NL );
3001             }
3002         }
3003         switch ( print_option ) {
3004             case HTML:
3005                 for( final Writer w : split_writers.values() ) {
3006                     w.write( SurfacingConstants.NL );
3007                     w.write( "</table>" );
3008                     w.write( SurfacingConstants.NL );
3009                     w.write( "</font>" );
3010                     w.write( SurfacingConstants.NL );
3011                     w.write( "</body>" );
3012                     w.write( SurfacingConstants.NL );
3013                     w.write( "</html>" );
3014                     w.write( SurfacingConstants.NL );
3015                 }
3016                 break;
3017         }
3018         for( final Writer w : split_writers.values() ) {
3019             w.close();
3020         }
3021     }
3022
3023     private static void writeDomainsToIndividualFilePerTreeNode( final Writer individual_files_writer,
3024                                                                  final String domain_0,
3025                                                                  final String domain_1 ) throws IOException {
3026         individual_files_writer.write( domain_0 );
3027         individual_files_writer.write( ForesterUtil.LINE_SEPARATOR );
3028         if ( !ForesterUtil.isEmpty( domain_1 ) ) {
3029             individual_files_writer.write( domain_1 );
3030             individual_files_writer.write( ForesterUtil.LINE_SEPARATOR );
3031         }
3032     }
3033
3034     public static void writeMatrixToFile( final CharacterStateMatrix<?> matrix,
3035                                           final String filename,
3036                                           final Format format ) {
3037         final File outfile = new File( filename );
3038         checkForOutputFileWriteability( outfile );
3039         try {
3040             final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) );
3041             matrix.toWriter( out, format );
3042             out.flush();
3043             out.close();
3044         }
3045         catch ( final IOException e ) {
3046             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
3047         }
3048         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote matrix: \"" + filename + "\"" );
3049     }
3050
3051     public static void writeMatrixToFile( final File matrix_outfile, final List<DistanceMatrix> matrices ) {
3052         checkForOutputFileWriteability( matrix_outfile );
3053         try {
3054             final BufferedWriter out = new BufferedWriter( new FileWriter( matrix_outfile ) );
3055             for( final DistanceMatrix distance_matrix : matrices ) {
3056                 out.write( distance_matrix.toStringBuffer( DistanceMatrix.Format.PHYLIP ).toString() );
3057                 out.write( ForesterUtil.LINE_SEPARATOR );
3058                 out.flush();
3059             }
3060             out.close();
3061         }
3062         catch ( final IOException e ) {
3063             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
3064         }
3065         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote distance matrices to \"" + matrix_outfile + "\"" );
3066     }
3067
3068     private static void writePfamsToFile( final String outfile_name, final SortedSet<String> pfams ) {
3069         try {
3070             final Writer writer = new BufferedWriter( new FileWriter( new File( outfile_name ) ) );
3071             for( final String pfam : pfams ) {
3072                 writer.write( pfam );
3073                 writer.write( ForesterUtil.LINE_SEPARATOR );
3074             }
3075             writer.close();
3076             ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote " + pfams.size() + " pfams to [" + outfile_name
3077                     + "]" );
3078         }
3079         catch ( final IOException e ) {
3080             ForesterUtil.printWarningMessage( surfacing.PRG_NAME, "Failure to write: " + e );
3081         }
3082     }
3083
3084     public static void writePhylogenyToFile( final Phylogeny phylogeny, final String filename ) {
3085         final PhylogenyWriter writer = new PhylogenyWriter();
3086         try {
3087             writer.toPhyloXML( new File( filename ), phylogeny, 1 );
3088         }
3089         catch ( final IOException e ) {
3090             ForesterUtil.printWarningMessage( surfacing.PRG_NAME, "failed to write phylogeny to \"" + filename + "\": "
3091                     + e );
3092         }
3093         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote phylogeny to \"" + filename + "\"" );
3094     }
3095
3096     public static void writePresentToNexus( final File output_file,
3097                                             final File positive_filter_file,
3098                                             final SortedSet<String> filter,
3099                                             final List<GenomeWideCombinableDomains> gwcd_list ) {
3100         try {
3101             writeMatrixToFile( DomainParsimonyCalculator.createMatrixOfDomainPresenceOrAbsence( gwcd_list,
3102                                                                                                 positive_filter_file == null ? null
3103                                                                                                         : filter ),
3104                                output_file + surfacing.DOMAINS_PRESENT_NEXUS,
3105                                Format.NEXUS_BINARY );
3106             writeMatrixToFile( DomainParsimonyCalculator.createMatrixOfBinaryDomainCombinationPresenceOrAbsence( gwcd_list ),
3107                                output_file + surfacing.BDC_PRESENT_NEXUS,
3108                                Format.NEXUS_BINARY );
3109         }
3110         catch ( final Exception e ) {
3111             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
3112         }
3113     }
3114
3115     public static void writeProteinListsForAllSpecies( final File output_dir,
3116                                                        final SortedMap<Species, List<Protein>> protein_lists_per_species,
3117                                                        final List<GenomeWideCombinableDomains> gwcd_list,
3118                                                        final double domain_e_cutoff ) {
3119         final SortedSet<String> all_domains = new TreeSet<String>();
3120         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
3121             all_domains.addAll( gwcd.getAllDomainIds() );
3122         }
3123         for( final String domain : all_domains ) {
3124             final File out = new File( output_dir + ForesterUtil.FILE_SEPARATOR + domain + surfacing.SEQ_EXTRACT_SUFFIX );
3125             checkForOutputFileWriteability( out );
3126             try {
3127                 final Writer proteins_file_writer = new BufferedWriter( new FileWriter( out ) );
3128                 extractProteinNames( protein_lists_per_species,
3129                                      domain,
3130                                      proteins_file_writer,
3131                                      "\t",
3132                                      surfacing.LIMIT_SPEC_FOR_PROT_EX,
3133                                      domain_e_cutoff );
3134                 proteins_file_writer.close();
3135             }
3136             catch ( final IOException e ) {
3137                 ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
3138             }
3139             ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote proteins list to \"" + out + "\"" );
3140         }
3141     }
3142
3143     public static void writeTaxonomyLinks( final Writer writer,
3144                                            final String species,
3145                                            final Map<String, Integer> tax_code_to_id_map ) throws IOException {
3146         if ( ( species.length() > 1 ) && ( species.indexOf( '_' ) < 1 ) ) {
3147             writer.write( " [" );
3148             if ( ( tax_code_to_id_map != null ) && tax_code_to_id_map.containsKey( species ) ) {
3149                 writer.write( "<a href=\"" + SurfacingConstants.UNIPROT_TAXONOMY_ID_LINK
3150                         + tax_code_to_id_map.get( species ) + "\" target=\"taxonomy_window\">uniprot</a>" );
3151             }
3152             else {
3153                 writer.write( "<a href=\"" + SurfacingConstants.EOL_LINK + species
3154                         + "\" target=\"taxonomy_window\">eol</a>" );
3155                 writer.write( "|" );
3156                 writer.write( "<a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_SEARCH + species
3157                         + "\" target=\"taxonomy_window\">scholar</a>" );
3158                 writer.write( "|" );
3159                 writer.write( "<a href=\"" + SurfacingConstants.GOOGLE_WEB_SEARCH_LINK + species
3160                         + "\" target=\"taxonomy_window\">google</a>" );
3161             }
3162             writer.write( "]" );
3163         }
3164     }
3165
3166     private static void writeToNexus( final String outfile_name,
3167                                       final CharacterStateMatrix<BinaryStates> matrix,
3168                                       final Phylogeny phylogeny ) {
3169         if ( !( matrix instanceof BasicCharacterStateMatrix ) ) {
3170             throw new IllegalArgumentException( "can only write matrices of type [" + BasicCharacterStateMatrix.class
3171                     + "] to nexus" );
3172         }
3173         final BasicCharacterStateMatrix<BinaryStates> my_matrix = ( org.forester.evoinference.matrix.character.BasicCharacterStateMatrix<BinaryStates> ) matrix;
3174         final List<Phylogeny> phylogenies = new ArrayList<Phylogeny>( 1 );
3175         phylogenies.add( phylogeny );
3176         try {
3177             final BufferedWriter w = new BufferedWriter( new FileWriter( outfile_name ) );
3178             w.write( NexusConstants.NEXUS );
3179             w.write( ForesterUtil.LINE_SEPARATOR );
3180             my_matrix.writeNexusTaxaBlock( w );
3181             my_matrix.writeNexusBinaryChractersBlock( w );
3182             PhylogenyWriter.writeNexusTreesBlock( w, phylogenies, NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE );
3183             w.flush();
3184             w.close();
3185             ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote Nexus file: \"" + outfile_name + "\"" );
3186         }
3187         catch ( final IOException e ) {
3188             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
3189         }
3190     }
3191
3192     private static void writeToNexus( final String outfile_name,
3193                                       final DomainParsimonyCalculator domain_parsimony,
3194                                       final Phylogeny phylogeny ) {
3195         writeToNexus( outfile_name + surfacing.NEXUS_EXTERNAL_DOMAINS,
3196                       domain_parsimony.createMatrixOfDomainPresenceOrAbsence(),
3197                       phylogeny );
3198         writeToNexus( outfile_name + surfacing.NEXUS_EXTERNAL_DOMAIN_COMBINATIONS,
3199                       domain_parsimony.createMatrixOfBinaryDomainCombinationPresenceOrAbsence(),
3200                       phylogeny );
3201     }
3202
3203     private SurfacingUtil() {
3204         // Hidden constructor.
3205     }
3206 }