avoid multiple same dcs
[jalview.git] / forester / java / src / org / forester / surfacing / SurfacingUtil.java
1 // $Id:
2 //
3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
5 //
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
8 // All rights reserved
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 //
24 // Contact: phylosoft @ gmail . com
25 // WWW: www.phylosoft.org/forester
26
27 package org.forester.surfacing;
28
29 import java.io.BufferedWriter;
30 import java.io.File;
31 import java.io.FileWriter;
32 import java.io.IOException;
33 import java.io.Writer;
34 import java.text.DecimalFormat;
35 import java.text.NumberFormat;
36 import java.util.ArrayList;
37 import java.util.Collections;
38 import java.util.Comparator;
39 import java.util.HashMap;
40 import java.util.HashSet;
41 import java.util.List;
42 import java.util.Map;
43 import java.util.Set;
44 import java.util.SortedMap;
45 import java.util.SortedSet;
46 import java.util.TreeMap;
47 import java.util.TreeSet;
48 import java.util.regex.Matcher;
49 import java.util.regex.Pattern;
50
51 import org.forester.application.surfacing_old;
52 import org.forester.evoinference.distance.NeighborJoining;
53 import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
54 import org.forester.evoinference.matrix.character.CharacterStateMatrix;
55 import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
56 import org.forester.evoinference.matrix.character.CharacterStateMatrix.Format;
57 import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates;
58 import org.forester.evoinference.matrix.distance.DistanceMatrix;
59 import org.forester.go.GoId;
60 import org.forester.go.GoNameSpace;
61 import org.forester.go.GoTerm;
62 import org.forester.go.GoUtils;
63 import org.forester.go.PfamToGoMapping;
64 import org.forester.io.parsers.nexus.NexusConstants;
65 import org.forester.io.writers.PhylogenyWriter;
66 import org.forester.phylogeny.Phylogeny;
67 import org.forester.phylogeny.PhylogenyMethods;
68 import org.forester.phylogeny.PhylogenyNode;
69 import org.forester.phylogeny.data.BinaryCharacters;
70 import org.forester.phylogeny.data.Confidence;
71 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
72 import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
73 import org.forester.surfacing.DomainSimilarityCalculator.GoAnnotationOutput;
74 import org.forester.surfacing.GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder;
75 import org.forester.util.AsciiHistogram;
76 import org.forester.util.BasicDescriptiveStatistics;
77 import org.forester.util.BasicTable;
78 import org.forester.util.BasicTableParser;
79 import org.forester.util.DescriptiveStatistics;
80 import org.forester.util.ForesterUtil;
81
82 public final class SurfacingUtil {
83
84     private final static NumberFormat       FORMATTER                        = new DecimalFormat( "0.0E0" );
85     private final static NumberFormat       FORMATTER_3                      = new DecimalFormat( "0.000" );
86     private static final Comparator<Domain> ASCENDING_CONFIDENCE_VALUE_ORDER = new Comparator<Domain>() {
87
88                                                                                  @Override
89                                                                                  public int compare( final Domain d1,
90                                                                                                      final Domain d2 ) {
91                                                                                      if ( d1.getPerSequenceEvalue() < d2
92                                                                                              .getPerSequenceEvalue() ) {
93                                                                                          return -1;
94                                                                                      }
95                                                                                      else if ( d1
96                                                                                              .getPerSequenceEvalue() > d2
97                                                                                              .getPerSequenceEvalue() ) {
98                                                                                          return 1;
99                                                                                      }
100                                                                                      else {
101                                                                                          return d1.compareTo( d2 );
102                                                                                      }
103                                                                                  }
104                                                                              };
105     public final static Pattern             PATTERN_SP_STYLE_TAXONOMY        = Pattern.compile( "^[A-Z0-9]{3,5}$" );
106
107     private SurfacingUtil() {
108         // Hidden constructor.
109     }
110
111     public static void addAllBinaryDomainCombinationToSet( final GenomeWideCombinableDomains genome,
112                                                            final SortedSet<BinaryDomainCombination> binary_domain_combinations ) {
113         final SortedMap<DomainId, CombinableDomains> all_cd = genome.getAllCombinableDomainsIds();
114         for( final DomainId domain_id : all_cd.keySet() ) {
115             binary_domain_combinations.addAll( all_cd.get( domain_id ).toBinaryDomainCombinations() );
116         }
117     }
118
119     public static void addAllDomainIdsToSet( final GenomeWideCombinableDomains genome,
120                                              final SortedSet<DomainId> domain_ids ) {
121         final SortedSet<DomainId> domains = genome.getAllDomainIds();
122         for( final DomainId domain : domains ) {
123             domain_ids.add( domain );
124         }
125     }
126
127     public static void addHtmlHead( final Writer w, final String title ) throws IOException {
128         w.write( SurfacingConstants.NL );
129         w.write( "<head>" );
130         w.write( "<title>" );
131         w.write( title );
132         w.write( "</title>" );
133         w.write( SurfacingConstants.NL );
134         w.write( "<style>" );
135         w.write( SurfacingConstants.NL );
136         w.write( "a:visited { color : #6633FF; text-decoration : none; }" );
137         w.write( SurfacingConstants.NL );
138         w.write( "a:link { color : #6633FF; text-decoration : none; }" );
139         w.write( SurfacingConstants.NL );
140         w.write( "a:active { color : #99FF00; text-decoration : none; }" );
141         w.write( SurfacingConstants.NL );
142         w.write( "a:hover { color : #FFFFFF; background-color : #99FF00; text-decoration : none; }" );
143         w.write( SurfacingConstants.NL );
144         w.write( "td { text-align: left; vertical-align: top; font-family: Verdana, Arial, Helvetica; font-size: 8pt}" );
145         w.write( SurfacingConstants.NL );
146         w.write( "h1 { color : #0000FF; font-family: Verdana, Arial, Helvetica; font-size: 18pt; font-weight: bold }" );
147         w.write( SurfacingConstants.NL );
148         w.write( "h2 { color : #0000FF; font-family: Verdana, Arial, Helvetica; font-size: 16pt; font-weight: bold }" );
149         w.write( SurfacingConstants.NL );
150         w.write( "</style>" );
151         w.write( SurfacingConstants.NL );
152         w.write( "</head>" );
153         w.write( SurfacingConstants.NL );
154     }
155
156     public static DescriptiveStatistics calculateDescriptiveStatisticsForMeanValues( final Set<DomainSimilarity> similarities ) {
157         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
158         for( final DomainSimilarity similarity : similarities ) {
159             stats.addValue( similarity.getMeanSimilarityScore() );
160         }
161         return stats;
162     }
163
164     public static int calculateOverlap( final Domain domain, final List<Boolean> covered_positions ) {
165         int overlap_count = 0;
166         for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
167             if ( ( i < covered_positions.size() ) && ( covered_positions.get( i ) == true ) ) {
168                 ++overlap_count;
169             }
170         }
171         return overlap_count;
172     }
173
174     public static void checkForOutputFileWriteability( final File outfile ) {
175         final String error = ForesterUtil.isWritableFile( outfile );
176         if ( !ForesterUtil.isEmpty( error ) ) {
177             ForesterUtil.fatalError( surfacing_old.PRG_NAME, error );
178         }
179     }
180
181     private static SortedSet<String> collectAllDomainsChangedOnSubtree( final PhylogenyNode subtree_root,
182                                                                         final boolean get_gains ) {
183         final SortedSet<String> domains = new TreeSet<String>();
184         for( final PhylogenyNode descendant : PhylogenyMethods.getAllDescendants( subtree_root ) ) {
185             final BinaryCharacters chars = descendant.getNodeData().getBinaryCharacters();
186             if ( get_gains ) {
187                 domains.addAll( chars.getGainedCharacters() );
188             }
189             else {
190                 domains.addAll( chars.getLostCharacters() );
191             }
192         }
193         return domains;
194     }
195
196     public static void collectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile( final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
197                                                                                            final BinaryDomainCombination.DomainCombinationType dc_type,
198                                                                                            final List<BinaryDomainCombination> all_binary_domains_combination_gained,
199                                                                                            final boolean get_gains ) {
200         final SortedSet<String> sorted_ids = new TreeSet<String>();
201         for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) {
202             sorted_ids.add( matrix.getIdentifier( i ) );
203         }
204         for( final String id : sorted_ids ) {
205             for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) {
206                 if ( ( get_gains && ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) )
207                         || ( !get_gains && ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.LOSS ) ) ) {
208                     if ( dc_type == BinaryDomainCombination.DomainCombinationType.DIRECTED_ADJACTANT ) {
209                         all_binary_domains_combination_gained.add( AdjactantDirectedBinaryDomainCombination
210                                 .createInstance( matrix.getCharacter( c ) ) );
211                     }
212                     else if ( dc_type == BinaryDomainCombination.DomainCombinationType.DIRECTED ) {
213                         all_binary_domains_combination_gained.add( DirectedBinaryDomainCombination
214                                 .createInstance( matrix.getCharacter( c ) ) );
215                     }
216                     else {
217                         all_binary_domains_combination_gained.add( BasicBinaryDomainCombination.createInstance( matrix
218                                 .getCharacter( c ) ) );
219                     }
220                 }
221             }
222         }
223     }
224
225     private static File createBaseDirForPerNodeDomainFiles( final String base_dir,
226                                                             final boolean domain_combinations,
227                                                             final CharacterStateMatrix.GainLossStates state,
228                                                             final String outfile ) {
229         File per_node_go_mapped_domain_gain_loss_files_base_dir = new File( new File( outfile ).getParent()
230                 + ForesterUtil.FILE_SEPARATOR + base_dir );
231         if ( !per_node_go_mapped_domain_gain_loss_files_base_dir.exists() ) {
232             per_node_go_mapped_domain_gain_loss_files_base_dir.mkdir();
233         }
234         if ( domain_combinations ) {
235             per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
236                     + ForesterUtil.FILE_SEPARATOR + "DC" );
237         }
238         else {
239             per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
240                     + ForesterUtil.FILE_SEPARATOR + "DOMAINS" );
241         }
242         if ( !per_node_go_mapped_domain_gain_loss_files_base_dir.exists() ) {
243             per_node_go_mapped_domain_gain_loss_files_base_dir.mkdir();
244         }
245         if ( state == GainLossStates.GAIN ) {
246             per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
247                     + ForesterUtil.FILE_SEPARATOR + "GAINS" );
248         }
249         else if ( state == GainLossStates.LOSS ) {
250             per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
251                     + ForesterUtil.FILE_SEPARATOR + "LOSSES" );
252         }
253         else {
254             per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
255                     + ForesterUtil.FILE_SEPARATOR + "PRESENT" );
256         }
257         if ( !per_node_go_mapped_domain_gain_loss_files_base_dir.exists() ) {
258             per_node_go_mapped_domain_gain_loss_files_base_dir.mkdir();
259         }
260         return per_node_go_mapped_domain_gain_loss_files_base_dir;
261     }
262
263     public static Map<DomainId, List<GoId>> createDomainIdToGoIdMap( final List<PfamToGoMapping> pfam_to_go_mappings ) {
264         final Map<DomainId, List<GoId>> domain_id_to_go_ids_map = new HashMap<DomainId, List<GoId>>( pfam_to_go_mappings
265                 .size() );
266         for( final PfamToGoMapping pfam_to_go : pfam_to_go_mappings ) {
267             if ( !domain_id_to_go_ids_map.containsKey( pfam_to_go.getKey() ) ) {
268                 domain_id_to_go_ids_map.put( pfam_to_go.getKey(), new ArrayList<GoId>() );
269             }
270             domain_id_to_go_ids_map.get( pfam_to_go.getKey() ).add( pfam_to_go.getValue() );
271         }
272         return domain_id_to_go_ids_map;
273     }
274
275     public static Map<DomainId, Set<String>> createDomainIdToSecondaryFeaturesMap( final File secondary_features_map_file )
276             throws IOException {
277         final BasicTable<String> primary_table = BasicTableParser.parse( secondary_features_map_file, "\t" );
278         final Map<DomainId, Set<String>> map = new TreeMap<DomainId, Set<String>>();
279         for( int r = 0; r < primary_table.getNumberOfRows(); ++r ) {
280             final DomainId domain_id = new DomainId( primary_table.getValue( 0, r ) );
281             if ( !map.containsKey( domain_id ) ) {
282                 map.put( domain_id, new HashSet<String>() );
283             }
284             map.get( domain_id ).add( primary_table.getValue( 1, r ) );
285         }
286         return map;
287     }
288
289     public static Phylogeny createNjTreeBasedOnMatrixToFile( final File nj_tree_outfile, final DistanceMatrix distance ) {
290         checkForOutputFileWriteability( nj_tree_outfile );
291         final NeighborJoining nj = NeighborJoining.createInstance();
292         final Phylogeny phylogeny = nj.execute( distance );
293         phylogeny.setName( nj_tree_outfile.getName() );
294         writePhylogenyToFile( phylogeny, nj_tree_outfile.toString() );
295         return phylogeny;
296     }
297
298     private static SortedSet<BinaryDomainCombination> createSetOfAllBinaryDomainCombinationsPerGenome( final GenomeWideCombinableDomains gwcd ) {
299         final SortedMap<DomainId, CombinableDomains> cds = gwcd.getAllCombinableDomainsIds();
300         final SortedSet<BinaryDomainCombination> binary_combinations = new TreeSet<BinaryDomainCombination>();
301         for( final DomainId domain_id : cds.keySet() ) {
302             final CombinableDomains cd = cds.get( domain_id );
303             binary_combinations.addAll( cd.toBinaryDomainCombinations() );
304         }
305         return binary_combinations;
306     }
307
308     public static void decoratePrintableDomainSimilarities( final SortedSet<DomainSimilarity> domain_similarities,
309                                                             final Detailedness detailedness,
310                                                             final GoAnnotationOutput go_annotation_output,
311                                                             final Map<GoId, GoTerm> go_id_to_term_map,
312                                                             final GoNameSpace go_namespace_limit ) {
313         if ( ( go_namespace_limit != null ) && ( ( go_id_to_term_map == null ) || go_id_to_term_map.isEmpty() ) ) {
314             throw new IllegalArgumentException( "attempt to use a GO namespace limit without a GO id to term map" );
315         }
316         for( final DomainSimilarity domain_similarity : domain_similarities ) {
317             if ( domain_similarity instanceof PrintableDomainSimilarity ) {
318                 final PrintableDomainSimilarity printable_domain_similarity = ( PrintableDomainSimilarity ) domain_similarity;
319                 printable_domain_similarity.setDetailedness( detailedness );
320                 printable_domain_similarity.setGoAnnotationOutput( go_annotation_output );
321                 printable_domain_similarity.setGoIdToTermMap( go_id_to_term_map );
322                 printable_domain_similarity.setGoNamespaceLimit( go_namespace_limit );
323             }
324         }
325     }
326
327     public static void executeDomainLengthAnalysis( final String[][] input_file_properties,
328                                                     final int number_of_genomes,
329                                                     final DomainLengthsTable domain_lengths_table,
330                                                     final File outfile ) throws IOException {
331         final DecimalFormat df = new DecimalFormat( "#.00" );
332         checkForOutputFileWriteability( outfile );
333         final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) );
334         out.write( "MEAN BASED STATISTICS PER SPECIES" );
335         out.write( ForesterUtil.LINE_SEPARATOR );
336         out.write( domain_lengths_table.createMeanBasedStatisticsPerSpeciesTable().toString() );
337         out.write( ForesterUtil.LINE_SEPARATOR );
338         out.write( ForesterUtil.LINE_SEPARATOR );
339         final List<DomainLengths> domain_lengths_list = domain_lengths_table.getDomainLengthsList();
340         out.write( "OUTLIER SPECIES PER DOMAIN (Z>=1.5)" );
341         out.write( ForesterUtil.LINE_SEPARATOR );
342         for( final DomainLengths domain_lengths : domain_lengths_list ) {
343             final List<Species> species_list = domain_lengths.getMeanBasedOutlierSpecies( 1.5 );
344             if ( species_list.size() > 0 ) {
345                 out.write( domain_lengths.getDomainId() + "\t" );
346                 for( final Species species : species_list ) {
347                     out.write( species + "\t" );
348                 }
349                 out.write( ForesterUtil.LINE_SEPARATOR );
350                 // DescriptiveStatistics stats_for_domain = domain_lengths
351                 //         .calculateMeanBasedStatistics();
352                 //AsciiHistogram histo = new AsciiHistogram( stats_for_domain );
353                 //System.out.println( histo.toStringBuffer( 40, '=', 60, 4 ).toString() );
354             }
355         }
356         out.write( ForesterUtil.LINE_SEPARATOR );
357         out.write( ForesterUtil.LINE_SEPARATOR );
358         out.write( "OUTLIER SPECIES (Z 1.0)" );
359         out.write( ForesterUtil.LINE_SEPARATOR );
360         final DescriptiveStatistics stats_for_all_species = domain_lengths_table
361                 .calculateMeanBasedStatisticsForAllSpecies();
362         out.write( stats_for_all_species.asSummary() );
363         out.write( ForesterUtil.LINE_SEPARATOR );
364         final AsciiHistogram histo = new AsciiHistogram( stats_for_all_species );
365         out.write( histo.toStringBuffer( 40, '=', 60, 4 ).toString() );
366         out.write( ForesterUtil.LINE_SEPARATOR );
367         final double population_sd = stats_for_all_species.sampleStandardDeviation();
368         final double population_mean = stats_for_all_species.arithmeticMean();
369         for( final Species species : domain_lengths_table.getSpecies() ) {
370             final double x = domain_lengths_table.calculateMeanBasedStatisticsForSpecies( species ).arithmeticMean();
371             final double z = ( x - population_mean ) / population_sd;
372             out.write( species + "\t" + z );
373             out.write( ForesterUtil.LINE_SEPARATOR );
374         }
375         out.write( ForesterUtil.LINE_SEPARATOR );
376         for( final Species species : domain_lengths_table.getSpecies() ) {
377             final DescriptiveStatistics stats_for_species = domain_lengths_table
378                     .calculateMeanBasedStatisticsForSpecies( species );
379             final double x = stats_for_species.arithmeticMean();
380             final double z = ( x - population_mean ) / population_sd;
381             if ( ( z <= -1.0 ) || ( z >= 1.0 ) ) {
382                 out.write( species + "\t" + df.format( z ) + "\t" + stats_for_species.asSummary() );
383                 out.write( ForesterUtil.LINE_SEPARATOR );
384             }
385         }
386         out.close();
387         //        final List<HistogramData> histogram_datas = new ArrayList<HistogramData>();
388         //        for( int i = 0; i < number_of_genomes; ++i ) {
389         //            final Species species = new BasicSpecies( input_file_properties[ i ][ 0 ] );
390         //            histogram_datas
391         //                    .add( new HistogramData( species.toString(), domain_lengths_table
392         //                            .calculateMeanBasedStatisticsForSpecies( species )
393         //                            .getDataAsDoubleArray(), 5, 600, null, 60 ) );
394         //        }
395         //        final HistogramsFrame hf = new HistogramsFrame( histogram_datas );
396         //        hf.setVisible( true );
397         System.gc();
398     }
399
400     /**
401      * 
402      * @param all_binary_domains_combination_lost_fitch 
403      * @param consider_directedness_and_adjacency_for_bin_combinations 
404      * @param all_binary_domains_combination_gained if null ignored, otherwise this is to list all binary domain combinations
405      * which were gained under unweighted (Fitch) parsimony.
406      */
407     public static void executeParsimonyAnalysis( final long random_number_seed_for_fitch_parsimony,
408                                                  final boolean radomize_fitch_parsimony,
409                                                  final String outfile_name,
410                                                  final DomainParsimonyCalculator domain_parsimony,
411                                                  final Phylogeny phylogeny,
412                                                  final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
413                                                  final Map<GoId, GoTerm> go_id_to_term_map,
414                                                  final GoNameSpace go_namespace_limit,
415                                                  final String parameters_str,
416                                                  final Map<DomainId, Set<String>>[] domain_id_to_secondary_features_maps,
417                                                  final SortedSet<DomainId> positive_filter,
418                                                  final boolean output_binary_domain_combinations_for_graphs,
419                                                  final List<BinaryDomainCombination> all_binary_domains_combination_gained_fitch,
420                                                  final List<BinaryDomainCombination> all_binary_domains_combination_lost_fitch,
421                                                  final BinaryDomainCombination.DomainCombinationType dc_type ) {
422         final String sep = ForesterUtil.LINE_SEPARATOR + "###################" + ForesterUtil.LINE_SEPARATOR;
423         final String date_time = ForesterUtil.getCurrentDateTime();
424         final SortedSet<String> all_pfams_encountered = new TreeSet<String>();
425         final SortedSet<String> all_pfams_gained_as_domains = new TreeSet<String>();
426         final SortedSet<String> all_pfams_lost_as_domains = new TreeSet<String>();
427         final SortedSet<String> all_pfams_gained_as_dom_combinations = new TreeSet<String>();
428         final SortedSet<String> all_pfams_lost_as_dom_combinations = new TreeSet<String>();
429         writeToNexus( outfile_name, domain_parsimony, phylogeny );
430         // DOLLO DOMAINS
431         // -------------
432         Phylogeny local_phylogeny_l = phylogeny.copy();
433         if ( ( positive_filter != null ) && ( positive_filter.size() > 0 ) ) {
434             domain_parsimony.executeDolloParsimonyOnDomainPresence( positive_filter );
435         }
436         else {
437             domain_parsimony.executeDolloParsimonyOnDomainPresence();
438         }
439         SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossMatrix(), outfile_name
440                 + surfacing_old.PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_DOMAINS, Format.FORESTER );
441         SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossCountsMatrix(), outfile_name
442                 + surfacing_old.PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_DOMAINS, Format.FORESTER );
443         SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
444                                                            CharacterStateMatrix.GainLossStates.GAIN,
445                                                            outfile_name + surfacing_old.PARSIMONY_OUTPUT_DOLLO_GAINS_D,
446                                                            sep,
447                                                            ForesterUtil.LINE_SEPARATOR,
448                                                            null );
449         SurfacingUtil
450                 .writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
451                                                       CharacterStateMatrix.GainLossStates.LOSS,
452                                                       outfile_name + surfacing_old.PARSIMONY_OUTPUT_DOLLO_LOSSES_D,
453                                                       sep,
454                                                       ForesterUtil.LINE_SEPARATOR,
455                                                       null );
456         SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(), null, outfile_name
457                 + surfacing_old.PARSIMONY_OUTPUT_DOLLO_PRESENT_D, sep, ForesterUtil.LINE_SEPARATOR, null );
458         //HTML:
459         writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
460                                        go_id_to_term_map,
461                                        go_namespace_limit,
462                                        false,
463                                        domain_parsimony.getGainLossMatrix(),
464                                        CharacterStateMatrix.GainLossStates.GAIN,
465                                        outfile_name + surfacing_old.PARSIMONY_OUTPUT_DOLLO_GAINS_HTML_D,
466                                        sep,
467                                        ForesterUtil.LINE_SEPARATOR,
468                                        "Dollo Parsimony | Gains | Domains",
469                                        "+",
470                                        domain_id_to_secondary_features_maps,
471                                        all_pfams_encountered,
472                                        all_pfams_gained_as_domains,
473                                        "_dollo_gains_d" );
474         writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
475                                        go_id_to_term_map,
476                                        go_namespace_limit,
477                                        false,
478                                        domain_parsimony.getGainLossMatrix(),
479                                        CharacterStateMatrix.GainLossStates.LOSS,
480                                        outfile_name + surfacing_old.PARSIMONY_OUTPUT_DOLLO_LOSSES_HTML_D,
481                                        sep,
482                                        ForesterUtil.LINE_SEPARATOR,
483                                        "Dollo Parsimony | Losses | Domains",
484                                        "-",
485                                        domain_id_to_secondary_features_maps,
486                                        all_pfams_encountered,
487                                        all_pfams_lost_as_domains,
488                                        "_dollo_losses_d" );
489         writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
490                                        go_id_to_term_map,
491                                        go_namespace_limit,
492                                        false,
493                                        domain_parsimony.getGainLossMatrix(),
494                                        null,
495                                        outfile_name + surfacing_old.PARSIMONY_OUTPUT_DOLLO_PRESENT_HTML_D,
496                                        sep,
497                                        ForesterUtil.LINE_SEPARATOR,
498                                        "Dollo Parsimony | Present | Domains",
499                                        "",
500                                        domain_id_to_secondary_features_maps,
501                                        all_pfams_encountered,
502                                        null,
503                                        "_dollo_present_d" );
504         preparePhylogeny( local_phylogeny_l,
505                           domain_parsimony,
506                           date_time,
507                           "Dollo parsimony on domain presence/absence",
508                           "dollo_on_domains_" + outfile_name,
509                           parameters_str );
510         SurfacingUtil.writePhylogenyToFile( local_phylogeny_l, outfile_name
511                 + surfacing_old.DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO );
512         try {
513             writeAllDomainsChangedOnAllSubtrees( local_phylogeny_l, true, outfile_name, "_dollo_all_gains_d" );
514             writeAllDomainsChangedOnAllSubtrees( local_phylogeny_l, false, outfile_name, "_dollo_all_losses_d" );
515         }
516         catch ( final IOException e ) {
517             e.printStackTrace();
518             ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getLocalizedMessage() );
519         }
520         if ( domain_parsimony.calculateNumberOfBinaryDomainCombination() > 0 ) {
521             // FITCH DOMAIN COMBINATIONS
522             // -------------------------
523             local_phylogeny_l = phylogeny.copy();
524             String randomization = "no";
525             if ( radomize_fitch_parsimony ) {
526                 domain_parsimony.executeFitchParsimonyOnBinaryDomainCombintion( random_number_seed_for_fitch_parsimony );
527                 randomization = "yes, seed = " + random_number_seed_for_fitch_parsimony;
528             }
529             else {
530                 domain_parsimony.executeFitchParsimonyOnBinaryDomainCombintion( false );
531             }
532             SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossMatrix(), outfile_name
533                     + surfacing_old.PARSIMONY_OUTPUT_GL_SUFFIX_FITCH_BINARY_COMBINATIONS, Format.FORESTER );
534             SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossCountsMatrix(), outfile_name
535                     + surfacing_old.PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_FITCH_BINARY_COMBINATIONS, Format.FORESTER );
536             SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
537                                                                CharacterStateMatrix.GainLossStates.GAIN,
538                                                                outfile_name
539                                                                        + surfacing_old.PARSIMONY_OUTPUT_FITCH_GAINS_BC,
540                                                                sep,
541                                                                ForesterUtil.LINE_SEPARATOR,
542                                                                null );
543             SurfacingUtil
544                     .writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
545                                                           CharacterStateMatrix.GainLossStates.LOSS,
546                                                           outfile_name + surfacing_old.PARSIMONY_OUTPUT_FITCH_LOSSES_BC,
547                                                           sep,
548                                                           ForesterUtil.LINE_SEPARATOR,
549                                                           null );
550             SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(), null, outfile_name
551                     + surfacing_old.PARSIMONY_OUTPUT_FITCH_PRESENT_BC, sep, ForesterUtil.LINE_SEPARATOR, null );
552             if ( all_binary_domains_combination_gained_fitch != null ) {
553                 collectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
554                                                                                     dc_type,
555                                                                                     all_binary_domains_combination_gained_fitch,
556                                                                                     true );
557             }
558             if ( all_binary_domains_combination_lost_fitch != null ) {
559                 collectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
560                                                                                     dc_type,
561                                                                                     all_binary_domains_combination_lost_fitch,
562                                                                                     false );
563             }
564             if ( output_binary_domain_combinations_for_graphs ) {
565                 SurfacingUtil
566                         .writeBinaryStatesMatrixAsListToFileForBinaryCombinationsForGraphAnalysis( domain_parsimony
567                                                                                                            .getGainLossMatrix(),
568                                                                                                    null,
569                                                                                                    outfile_name
570                                                                                                            + surfacing_old.PARSIMONY_OUTPUT_FITCH_PRESENT_BC_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS,
571                                                                                                    sep,
572                                                                                                    ForesterUtil.LINE_SEPARATOR,
573                                                                                                    BinaryDomainCombination.OutputFormat.DOT );
574             }
575             // HTML:
576             writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
577                                            go_id_to_term_map,
578                                            go_namespace_limit,
579                                            true,
580                                            domain_parsimony.getGainLossMatrix(),
581                                            CharacterStateMatrix.GainLossStates.GAIN,
582                                            outfile_name + surfacing_old.PARSIMONY_OUTPUT_FITCH_GAINS_HTML_BC,
583                                            sep,
584                                            ForesterUtil.LINE_SEPARATOR,
585                                            "Fitch Parsimony | Gains | Domain Combinations",
586                                            "+",
587                                            null,
588                                            all_pfams_encountered,
589                                            all_pfams_gained_as_dom_combinations,
590                                            "_fitch_gains_dc" );
591             writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
592                                            go_id_to_term_map,
593                                            go_namespace_limit,
594                                            true,
595                                            domain_parsimony.getGainLossMatrix(),
596                                            CharacterStateMatrix.GainLossStates.LOSS,
597                                            outfile_name + surfacing_old.PARSIMONY_OUTPUT_FITCH_LOSSES_HTML_BC,
598                                            sep,
599                                            ForesterUtil.LINE_SEPARATOR,
600                                            "Fitch Parsimony | Losses | Domain Combinations",
601                                            "-",
602                                            null,
603                                            all_pfams_encountered,
604                                            all_pfams_lost_as_dom_combinations,
605                                            "_fitch_losses_dc" );
606             writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
607                                            go_id_to_term_map,
608                                            go_namespace_limit,
609                                            true,
610                                            domain_parsimony.getGainLossMatrix(),
611                                            null,
612                                            outfile_name + surfacing_old.PARSIMONY_OUTPUT_FITCH_PRESENT_HTML_BC,
613                                            sep,
614                                            ForesterUtil.LINE_SEPARATOR,
615                                            "Fitch Parsimony | Present | Domain Combinations",
616                                            "",
617                                            null,
618                                            all_pfams_encountered,
619                                            null,
620                                            "_fitch_present_dc" );
621             writeAllEncounteredPfamsToFile( domain_id_to_go_ids_map,
622                                             go_id_to_term_map,
623                                             outfile_name,
624                                             all_pfams_encountered );
625             writePfamsToFile( outfile_name + surfacing_old.ALL_PFAMS_GAINED_AS_DOMAINS_SUFFIX,
626                               all_pfams_gained_as_domains );
627             writePfamsToFile( outfile_name + surfacing_old.ALL_PFAMS_LOST_AS_DOMAINS_SUFFIX, all_pfams_lost_as_domains );
628             writePfamsToFile( outfile_name + surfacing_old.ALL_PFAMS_GAINED_AS_DC_SUFFIX,
629                               all_pfams_gained_as_dom_combinations );
630             writePfamsToFile( outfile_name + surfacing_old.ALL_PFAMS_LOST_AS_DC_SUFFIX,
631                               all_pfams_lost_as_dom_combinations );
632             preparePhylogeny( local_phylogeny_l,
633                               domain_parsimony,
634                               date_time,
635                               "Fitch parsimony on binary domain combination presence/absence randomization: "
636                                       + randomization,
637                               "fitch_on_binary_domain_combinations_" + outfile_name,
638                               parameters_str );
639             SurfacingUtil.writePhylogenyToFile( local_phylogeny_l, outfile_name
640                     + surfacing_old.BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH );
641         }
642     }
643
644     public static void executeParsimonyAnalysisForSecondaryFeatures( final String outfile_name,
645                                                                      final DomainParsimonyCalculator secondary_features_parsimony,
646                                                                      final Phylogeny phylogeny,
647                                                                      final String parameters_str,
648                                                                      final Map<Species, MappingResults> mapping_results_map ) {
649         final String sep = ForesterUtil.LINE_SEPARATOR + "###################" + ForesterUtil.LINE_SEPARATOR;
650         final String date_time = ForesterUtil.getCurrentDateTime();
651         System.out.println();
652         writeToNexus( outfile_name + surfacing_old.NEXUS_SECONDARY_FEATURES,
653                       secondary_features_parsimony.createMatrixOfSecondaryFeaturePresenceOrAbsence( null ),
654                       phylogeny );
655         final Phylogeny local_phylogeny_copy = phylogeny.copy();
656         secondary_features_parsimony.executeDolloParsimonyOnSecondaryFeatures( mapping_results_map );
657         SurfacingUtil.writeMatrixToFile( secondary_features_parsimony.getGainLossMatrix(), outfile_name
658                 + surfacing_old.PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_SECONDARY_FEATURES, Format.FORESTER );
659         SurfacingUtil.writeMatrixToFile( secondary_features_parsimony.getGainLossCountsMatrix(), outfile_name
660                 + surfacing_old.PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_SECONDARY_FEATURES, Format.FORESTER );
661         SurfacingUtil
662                 .writeBinaryStatesMatrixAsListToFile( secondary_features_parsimony.getGainLossMatrix(),
663                                                       CharacterStateMatrix.GainLossStates.GAIN,
664                                                       outfile_name
665                                                               + surfacing_old.PARSIMONY_OUTPUT_DOLLO_GAINS_SECONDARY_FEATURES,
666                                                       sep,
667                                                       ForesterUtil.LINE_SEPARATOR,
668                                                       null );
669         SurfacingUtil
670                 .writeBinaryStatesMatrixAsListToFile( secondary_features_parsimony.getGainLossMatrix(),
671                                                       CharacterStateMatrix.GainLossStates.LOSS,
672                                                       outfile_name
673                                                               + surfacing_old.PARSIMONY_OUTPUT_DOLLO_LOSSES_SECONDARY_FEATURES,
674                                                       sep,
675                                                       ForesterUtil.LINE_SEPARATOR,
676                                                       null );
677         SurfacingUtil
678                 .writeBinaryStatesMatrixAsListToFile( secondary_features_parsimony.getGainLossMatrix(),
679                                                       null,
680                                                       outfile_name
681                                                               + surfacing_old.PARSIMONY_OUTPUT_DOLLO_PRESENT_SECONDARY_FEATURES,
682                                                       sep,
683                                                       ForesterUtil.LINE_SEPARATOR,
684                                                       null );
685         preparePhylogeny( local_phylogeny_copy,
686                           secondary_features_parsimony,
687                           date_time,
688                           "Dollo parsimony on secondary feature presence/absence",
689                           "dollo_on_secondary_features_" + outfile_name,
690                           parameters_str );
691         SurfacingUtil.writePhylogenyToFile( local_phylogeny_copy, outfile_name
692                 + surfacing_old.SECONDARY_FEATURES_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO );
693     }
694
695     public static void extractProteinNames( final List<Protein> proteins,
696                                             final List<DomainId> query_domain_ids_nc_order,
697                                             final Writer out,
698                                             final String separator ) throws IOException {
699         for( final Protein protein : proteins ) {
700             if ( protein.contains( query_domain_ids_nc_order, true ) ) {
701                 out.write( protein.getSpecies().getSpeciesId() );
702                 out.write( separator );
703                 out.write( protein.getProteinId().getId() );
704                 out.write( separator );
705                 out.write( "[" );
706                 final Set<DomainId> visited_domain_ids = new HashSet<DomainId>();
707                 boolean first = true;
708                 for( final Domain domain : protein.getProteinDomains() ) {
709                     if ( !visited_domain_ids.contains( domain.getDomainId() ) ) {
710                         visited_domain_ids.add( domain.getDomainId() );
711                         if ( first ) {
712                             first = false;
713                         }
714                         else {
715                             out.write( " " );
716                         }
717                         out.write( domain.getDomainId().getId() );
718                         out.write( " {" );
719                         out.write( "" + domain.getTotalCount() );
720                         out.write( "}" );
721                     }
722                 }
723                 out.write( "]" );
724                 out.write( separator );
725                 if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription()
726                         .equals( SurfacingConstants.NONE ) ) ) {
727                     out.write( protein.getDescription() );
728                 }
729                 out.write( separator );
730                 if ( !( ForesterUtil.isEmpty( protein.getAccession() ) || protein.getAccession()
731                         .equals( SurfacingConstants.NONE ) ) ) {
732                     out.write( protein.getAccession() );
733                 }
734                 out.write( SurfacingConstants.NL );
735             }
736         }
737         out.flush();
738     }
739
740     public static void extractProteinNames( final SortedMap<Species, List<Protein>> protein_lists_per_species,
741                                             final DomainId domain_id,
742                                             final Writer out,
743                                             final String separator ) throws IOException {
744         for( final Species species : protein_lists_per_species.keySet() ) {
745             for( final Protein protein : protein_lists_per_species.get( species ) ) {
746                 final List<Domain> domains = protein.getProteinDomains( domain_id );
747                 if ( domains.size() > 0 ) {
748                     final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
749                     for( final Domain domain : domains ) {
750                         stats.addValue( domain.getPerSequenceEvalue() );
751                     }
752                     out.write( protein.getSpecies().getSpeciesId() );
753                     out.write( separator );
754                     out.write( protein.getProteinId().getId() );
755                     out.write( separator );
756                     out.write( "[" + FORMATTER.format( stats.median() ) + "]" );
757                     out.write( separator );
758                     if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription()
759                             .equals( SurfacingConstants.NONE ) ) ) {
760                         out.write( protein.getDescription() );
761                     }
762                     out.write( separator );
763                     if ( !( ForesterUtil.isEmpty( protein.getAccession() ) || protein.getAccession()
764                             .equals( SurfacingConstants.NONE ) ) ) {
765                         out.write( protein.getAccession() );
766                     }
767                     out.write( SurfacingConstants.NL );
768                 }
769             }
770         }
771         out.flush();
772     }
773
774     public static SortedSet<DomainId> getAllDomainIds( final List<GenomeWideCombinableDomains> gwcd_list ) {
775         final SortedSet<DomainId> all_domains_ids = new TreeSet<DomainId>();
776         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
777             final Set<DomainId> all_domains = gwcd.getAllDomainIds();
778             //    for( final Domain domain : all_domains ) {
779             all_domains_ids.addAll( all_domains );
780             //    }
781         }
782         return all_domains_ids;
783     }
784
785     public static SortedMap<String, Integer> getDomainCounts( final List<Protein> protein_domain_collections ) {
786         final SortedMap<String, Integer> map = new TreeMap<String, Integer>();
787         for( final Protein protein_domain_collection : protein_domain_collections ) {
788             for( final Object name : protein_domain_collection.getProteinDomains() ) {
789                 final BasicDomain protein_domain = ( BasicDomain ) name;
790                 final String id = protein_domain.getDomainId().getId();
791                 if ( map.containsKey( id ) ) {
792                     map.put( id, map.get( id ) + 1 );
793                 }
794                 else {
795                     map.put( id, 1 );
796                 }
797             }
798         }
799         return map;
800     }
801
802     public static int getNumberOfNodesLackingName( final Phylogeny p, final StringBuilder names ) {
803         final PhylogenyNodeIterator it = p.iteratorPostorder();
804         int c = 0;
805         while ( it.hasNext() ) {
806             final PhylogenyNode n = it.next();
807             if ( ForesterUtil.isEmpty( n.getName() )
808                     && ( !n.getNodeData().isHasTaxonomy() || ForesterUtil.isEmpty( n.getNodeData().getTaxonomy()
809                             .getScientificName() ) ) ) {
810                 if ( n.getParent() != null ) {
811                     names.append( " " );
812                     names.append( n.getParent().getName() );
813                 }
814                 ++c;
815             }
816         }
817         return c;
818     }
819
820     /**
821      * Returns true is Domain domain falls in an uninterrupted stretch of
822      * covered positions.
823      * 
824      * @param domain
825      * @param covered_positions
826      * @return
827      */
828     public static boolean isEngulfed( final Domain domain, final List<Boolean> covered_positions ) {
829         for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
830             if ( ( i >= covered_positions.size() ) || ( covered_positions.get( i ) != true ) ) {
831                 return false;
832             }
833         }
834         return true;
835     }
836
837     public static void preparePhylogeny( final Phylogeny p,
838                                          final DomainParsimonyCalculator domain_parsimony,
839                                          final String date_time,
840                                          final String method,
841                                          final String name,
842                                          final String parameters_str ) {
843         domain_parsimony.decoratePhylogenyWithDomains( p );
844         final StringBuilder desc = new StringBuilder();
845         desc.append( "[Method: " + method + "] [Date: " + date_time + "] " );
846         desc.append( "[Cost: " + domain_parsimony.getCost() + "] " );
847         desc.append( "[Gains: " + domain_parsimony.getTotalGains() + "] " );
848         desc.append( "[Losses: " + domain_parsimony.getTotalLosses() + "] " );
849         desc.append( "[Unchanged: " + domain_parsimony.getTotalUnchanged() + "] " );
850         desc.append( "[Parameters: " + parameters_str + "]" );
851         p.setName( name );
852         p.setDescription( desc.toString() );
853         p.setConfidence( new Confidence( domain_parsimony.getCost(), "parsimony" ) );
854         p.setRerootable( false );
855         p.setRooted( true );
856     }
857
858     /**
859      * 
860      * Example regarding engulfment: ------------0.1 ----------0.2 --0.3 =>
861      * domain with 0.3 is ignored
862      * 
863      * -----------0.1 ----------0.2 --0.3 => domain with 0.3 is ignored
864      * 
865      * 
866      * ------------0.1 ----------0.3 --0.2 => domains with 0.3 and 0.2 are _not_
867      * ignored
868      * 
869      * @param max_allowed_overlap
870      *            maximal allowed overlap (inclusive) to be still considered not
871      *            overlapping (zero or negative value to allow any overlap)
872      * @param remove_engulfed_domains
873      *            to remove domains which are completely engulfed by coverage of
874      *            domains with better support
875      * @param protein
876      * @return
877      */
878     public static Protein removeOverlappingDomains( final int max_allowed_overlap,
879                                                     final boolean remove_engulfed_domains,
880                                                     final Protein protein ) {
881         final Protein pruned_protein = new BasicProtein( protein.getProteinId().getId(), protein.getSpecies()
882                 .getSpeciesId() );
883         final List<Domain> sorted = SurfacingUtil.sortDomainsWithAscendingConfidenceValues( protein );
884         final List<Boolean> covered_positions = new ArrayList<Boolean>();
885         for( final Domain domain : sorted ) {
886             if ( ( ( max_allowed_overlap < 0 ) || ( SurfacingUtil.calculateOverlap( domain, covered_positions ) <= max_allowed_overlap ) )
887                     && ( !remove_engulfed_domains || !isEngulfed( domain, covered_positions ) ) ) {
888                 final int covered_positions_size = covered_positions.size();
889                 for( int i = covered_positions_size; i < domain.getFrom(); ++i ) {
890                     covered_positions.add( false );
891                 }
892                 final int new_covered_positions_size = covered_positions.size();
893                 for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
894                     if ( i < new_covered_positions_size ) {
895                         covered_positions.set( i, true );
896                     }
897                     else {
898                         covered_positions.add( true );
899                     }
900                 }
901                 pruned_protein.addProteinDomain( domain );
902             }
903         }
904         return pruned_protein;
905     }
906
907     static List<Domain> sortDomainsWithAscendingConfidenceValues( final Protein protein ) {
908         final List<Domain> domains = new ArrayList<Domain>();
909         for( final Domain d : protein.getProteinDomains() ) {
910             domains.add( d );
911         }
912         Collections.sort( domains, SurfacingUtil.ASCENDING_CONFIDENCE_VALUE_ORDER );
913         return domains;
914     }
915
916     public static void writeAllDomainsChangedOnAllSubtrees( final Phylogeny p,
917                                                             final boolean get_gains,
918                                                             final String outdir,
919                                                             final String suffix_for_filename ) throws IOException {
920         CharacterStateMatrix.GainLossStates state = CharacterStateMatrix.GainLossStates.GAIN;
921         if ( !get_gains ) {
922             state = CharacterStateMatrix.GainLossStates.LOSS;
923         }
924         final File base_dir = createBaseDirForPerNodeDomainFiles( surfacing_old.BASE_DIRECTORY_PER_SUBTREE_DOMAIN_GAIN_LOSS_FILES,
925                                                                   false,
926                                                                   state,
927                                                                   outdir );
928         for( final PhylogenyNodeIterator it = p.iteratorPostorder(); it.hasNext(); ) {
929             final PhylogenyNode node = it.next();
930             if ( !node.isExternal() ) {
931                 final SortedSet<String> domains = collectAllDomainsChangedOnSubtree( node, get_gains );
932                 if ( domains.size() > 0 ) {
933                     final Writer writer = ForesterUtil.createBufferedWriter( base_dir + ForesterUtil.FILE_SEPARATOR
934                             + node.getName() + suffix_for_filename );
935                     for( final String domain : domains ) {
936                         writer.write( domain );
937                         writer.write( ForesterUtil.LINE_SEPARATOR );
938                     }
939                     writer.close();
940                 }
941             }
942         }
943     }
944
945     private static void writeAllEncounteredPfamsToFile( final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
946                                                         final Map<GoId, GoTerm> go_id_to_term_map,
947                                                         final String outfile_name,
948                                                         final SortedSet<String> all_pfams_encountered ) {
949         final File all_pfams_encountered_file = new File( outfile_name + surfacing_old.ALL_PFAMS_ENCOUNTERED_SUFFIX );
950         final File all_pfams_encountered_with_go_annotation_file = new File( outfile_name
951                 + surfacing_old.ALL_PFAMS_ENCOUNTERED_WITH_GO_ANNOTATION_SUFFIX );
952         final File encountered_pfams_summary_file = new File( outfile_name
953                 + surfacing_old.ENCOUNTERED_PFAMS_SUMMARY_SUFFIX );
954         int biological_process_counter = 0;
955         int cellular_component_counter = 0;
956         int molecular_function_counter = 0;
957         int pfams_with_mappings_counter = 0;
958         int pfams_without_mappings_counter = 0;
959         int pfams_without_mappings_to_bp_or_mf_counter = 0;
960         int pfams_with_mappings_to_bp_or_mf_counter = 0;
961         try {
962             final Writer all_pfams_encountered_writer = new BufferedWriter( new FileWriter( all_pfams_encountered_file ) );
963             final Writer all_pfams_encountered_with_go_annotation_writer = new BufferedWriter( new FileWriter( all_pfams_encountered_with_go_annotation_file ) );
964             final Writer summary_writer = new BufferedWriter( new FileWriter( encountered_pfams_summary_file ) );
965             summary_writer.write( "# Pfam to GO mapping summary" );
966             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
967             summary_writer.write( "# Actual summary is at the end of this file." );
968             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
969             summary_writer.write( "# Encountered Pfams without a GO mapping:" );
970             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
971             for( final String pfam : all_pfams_encountered ) {
972                 all_pfams_encountered_writer.write( pfam );
973                 all_pfams_encountered_writer.write( ForesterUtil.LINE_SEPARATOR );
974                 final DomainId domain_id = new DomainId( pfam );
975                 if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
976                     ++pfams_with_mappings_counter;
977                     all_pfams_encountered_with_go_annotation_writer.write( pfam );
978                     all_pfams_encountered_with_go_annotation_writer.write( ForesterUtil.LINE_SEPARATOR );
979                     final List<GoId> go_ids = domain_id_to_go_ids_map.get( domain_id );
980                     boolean maps_to_bp = false;
981                     boolean maps_to_cc = false;
982                     boolean maps_to_mf = false;
983                     for( final GoId go_id : go_ids ) {
984                         final GoTerm go_term = go_id_to_term_map.get( go_id );
985                         if ( go_term.getGoNameSpace().isBiologicalProcess() ) {
986                             maps_to_bp = true;
987                         }
988                         else if ( go_term.getGoNameSpace().isCellularComponent() ) {
989                             maps_to_cc = true;
990                         }
991                         else if ( go_term.getGoNameSpace().isMolecularFunction() ) {
992                             maps_to_mf = true;
993                         }
994                     }
995                     if ( maps_to_bp ) {
996                         ++biological_process_counter;
997                     }
998                     if ( maps_to_cc ) {
999                         ++cellular_component_counter;
1000                     }
1001                     if ( maps_to_mf ) {
1002                         ++molecular_function_counter;
1003                     }
1004                     if ( maps_to_bp || maps_to_mf ) {
1005                         ++pfams_with_mappings_to_bp_or_mf_counter;
1006                     }
1007                     else {
1008                         ++pfams_without_mappings_to_bp_or_mf_counter;
1009                     }
1010                 }
1011                 else {
1012                     ++pfams_without_mappings_to_bp_or_mf_counter;
1013                     ++pfams_without_mappings_counter;
1014                     summary_writer.write( pfam );
1015                     summary_writer.write( ForesterUtil.LINE_SEPARATOR );
1016                 }
1017             }
1018             all_pfams_encountered_writer.close();
1019             all_pfams_encountered_with_go_annotation_writer.close();
1020             ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote all [" + all_pfams_encountered.size()
1021                     + "] encountered Pfams to: \"" + all_pfams_encountered_file + "\"" );
1022             ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote all [" + pfams_with_mappings_counter
1023                     + "] encountered Pfams with GO mappings to: \"" + all_pfams_encountered_with_go_annotation_file
1024                     + "\"" );
1025             ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote summary (including all ["
1026                     + pfams_without_mappings_counter + "] encountered Pfams without GO mappings) to: \""
1027                     + encountered_pfams_summary_file + "\"" );
1028             ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Sum of Pfams encountered                : "
1029                     + all_pfams_encountered.size() );
1030             ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Pfams without a mapping                 : "
1031                     + pfams_without_mappings_counter + " ["
1032                     + ( 100 * pfams_without_mappings_counter / all_pfams_encountered.size() ) + "%]" );
1033             ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Pfams without mapping to proc. or func. : "
1034                     + pfams_without_mappings_to_bp_or_mf_counter + " ["
1035                     + ( 100 * pfams_without_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" );
1036             ForesterUtil.programMessage( surfacing_old.PRG_NAME,
1037                                          "Pfams with a mapping                    : " + pfams_with_mappings_counter
1038                                                  + " ["
1039                                                  + ( 100 * pfams_with_mappings_counter / all_pfams_encountered.size() )
1040                                                  + "%]" );
1041             ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Pfams with a mapping to proc. or func.  : "
1042                     + pfams_with_mappings_to_bp_or_mf_counter + " ["
1043                     + ( 100 * pfams_with_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" );
1044             ForesterUtil.programMessage( surfacing_old.PRG_NAME,
1045                                          "Pfams with mapping to biological process: " + biological_process_counter
1046                                                  + " ["
1047                                                  + ( 100 * biological_process_counter / all_pfams_encountered.size() )
1048                                                  + "%]" );
1049             ForesterUtil.programMessage( surfacing_old.PRG_NAME,
1050                                          "Pfams with mapping to molecular function: " + molecular_function_counter
1051                                                  + " ["
1052                                                  + ( 100 * molecular_function_counter / all_pfams_encountered.size() )
1053                                                  + "%]" );
1054             ForesterUtil.programMessage( surfacing_old.PRG_NAME,
1055                                          "Pfams with mapping to cellular component: " + cellular_component_counter
1056                                                  + " ["
1057                                                  + ( 100 * cellular_component_counter / all_pfams_encountered.size() )
1058                                                  + "%]" );
1059             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
1060             summary_writer.write( "# Sum of Pfams encountered                : " + all_pfams_encountered.size() );
1061             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
1062             summary_writer.write( "# Pfams without a mapping                 : " + pfams_without_mappings_counter
1063                     + " [" + ( 100 * pfams_without_mappings_counter / all_pfams_encountered.size() ) + "%]" );
1064             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
1065             summary_writer.write( "# Pfams without mapping to proc. or func. : "
1066                     + pfams_without_mappings_to_bp_or_mf_counter + " ["
1067                     + ( 100 * pfams_without_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" );
1068             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
1069             summary_writer.write( "# Pfams with a mapping                    : " + pfams_with_mappings_counter + " ["
1070                     + ( 100 * pfams_with_mappings_counter / all_pfams_encountered.size() ) + "%]" );
1071             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
1072             summary_writer.write( "# Pfams with a mapping to proc. or func.  : "
1073                     + pfams_with_mappings_to_bp_or_mf_counter + " ["
1074                     + ( 100 * pfams_with_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" );
1075             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
1076             summary_writer.write( "# Pfams with mapping to biological process: " + biological_process_counter + " ["
1077                     + ( 100 * biological_process_counter / all_pfams_encountered.size() ) + "%]" );
1078             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
1079             summary_writer.write( "# Pfams with mapping to molecular function: " + molecular_function_counter + " ["
1080                     + ( 100 * molecular_function_counter / all_pfams_encountered.size() ) + "%]" );
1081             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
1082             summary_writer.write( "# Pfams with mapping to cellular component: " + cellular_component_counter + " ["
1083                     + ( 100 * cellular_component_counter / all_pfams_encountered.size() ) + "%]" );
1084             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
1085             summary_writer.close();
1086         }
1087         catch ( final IOException e ) {
1088             ForesterUtil.printWarningMessage( surfacing_old.PRG_NAME, "Failure to write: " + e );
1089         }
1090     }
1091
1092     public static void writeBinaryDomainCombinationsFileForGraphAnalysis( final String[][] input_file_properties,
1093                                                                           final File output_dir,
1094                                                                           final GenomeWideCombinableDomains gwcd,
1095                                                                           final int i,
1096                                                                           final GenomeWideCombinableDomainsSortOrder dc_sort_order ) {
1097         File dc_outfile_dot = new File( input_file_properties[ i ][ 0 ]
1098                 + surfacing_old.DOMAIN_COMBINITONS_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS );
1099         if ( output_dir != null ) {
1100             dc_outfile_dot = new File( output_dir + ForesterUtil.FILE_SEPARATOR + dc_outfile_dot );
1101         }
1102         checkForOutputFileWriteability( dc_outfile_dot );
1103         final SortedSet<BinaryDomainCombination> binary_combinations = createSetOfAllBinaryDomainCombinationsPerGenome( gwcd );
1104         try {
1105             final BufferedWriter out_dot = new BufferedWriter( new FileWriter( dc_outfile_dot ) );
1106             for( final BinaryDomainCombination bdc : binary_combinations ) {
1107                 out_dot.write( bdc.toGraphDescribingLanguage( BinaryDomainCombination.OutputFormat.DOT, null, null )
1108                         .toString() );
1109                 out_dot.write( SurfacingConstants.NL );
1110             }
1111             out_dot.close();
1112         }
1113         catch ( final IOException e ) {
1114             ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getMessage() );
1115         }
1116         ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote binary domain combination for \""
1117                 + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ", "
1118                 + input_file_properties[ i ][ 2 ] + ") to: \"" + dc_outfile_dot + "\"" );
1119     }
1120
1121     /*
1122      * species | protein id | n-terminal domain | c-terminal domain | n-terminal domain per domain E-value | c-terminal domain per domain E-value
1123      * 
1124      * 
1125      */
1126     static public StringBuffer proteinToDomainCombinations( final Protein protein,
1127                                                             final String protein_id,
1128                                                             final String separator ) {
1129         final StringBuffer sb = new StringBuffer();
1130         if ( protein.getSpecies() == null ) {
1131             throw new IllegalArgumentException( "species must not be null" );
1132         }
1133         if ( ForesterUtil.isEmpty( protein.getSpecies().getSpeciesId() ) ) {
1134             throw new IllegalArgumentException( "species id must not be empty" );
1135         }
1136         final List<Domain> domains = protein.getProteinDomains();
1137         if ( domains.size() > 1 ) {
1138             final Map<String, Integer> counts = new HashMap<String, Integer>();
1139             for( final Domain domain : domains ) {
1140                 final String id = domain.getDomainId().getId();
1141                 if ( counts.containsKey( id ) ) {
1142                     counts.put( id, counts.get( id ) + 1 );
1143                 }
1144                 else {
1145                     counts.put( id, 1 );
1146                 }
1147             }
1148             final Set<String> dcs = new HashSet<String>();
1149             for( int i = 1; i < domains.size(); ++i ) {
1150                 for( int j = 0; j < i; ++j ) {
1151                     Domain domain_n = domains.get( i );
1152                     Domain domain_c = domains.get( j );
1153                     if ( domain_n.getFrom() > domain_c.getFrom() ) {
1154                         domain_n = domains.get( j );
1155                         domain_c = domains.get( i );
1156                     }
1157                     final String dc = domain_n.getDomainId().getId() + domain_c.getDomainId().getId();
1158                     if ( !dcs.contains( dc ) ) {
1159                         dcs.add( dc );
1160                         sb.append( protein.getSpecies() );
1161                         sb.append( separator );
1162                         sb.append( protein_id );
1163                         sb.append( separator );
1164                         sb.append( domain_n.getDomainId().getId() );
1165                         sb.append( separator );
1166                         sb.append( domain_c.getDomainId().getId() );
1167                         sb.append( separator );
1168                         sb.append( domain_n.getPerDomainEvalue() );
1169                         sb.append( separator );
1170                         sb.append( domain_c.getPerDomainEvalue() );
1171                         sb.append( separator );
1172                         sb.append( counts.get( domain_n.getDomainId().getId() ) );
1173                         sb.append( separator );
1174                         sb.append( counts.get( domain_c.getDomainId().getId() ) );
1175                         sb.append( ForesterUtil.LINE_SEPARATOR );
1176                     }
1177                 }
1178             }
1179         }
1180         else if ( domains.size() == 1 ) {
1181             sb.append( protein.getSpecies() );
1182             sb.append( separator );
1183             sb.append( protein_id );
1184             sb.append( separator );
1185             sb.append( domains.get( 0 ).getDomainId().getId() );
1186             sb.append( separator );
1187             sb.append( separator );
1188             sb.append( domains.get( 0 ).getPerDomainEvalue() );
1189             sb.append( separator );
1190             sb.append( separator );
1191             sb.append( 1 );
1192             sb.append( separator );
1193             sb.append( ForesterUtil.LINE_SEPARATOR );
1194         }
1195         else {
1196             sb.append( protein.getSpecies() );
1197             sb.append( separator );
1198             sb.append( protein_id );
1199             sb.append( separator );
1200             sb.append( separator );
1201             sb.append( separator );
1202             sb.append( separator );
1203             sb.append( separator );
1204             sb.append( separator );
1205             sb.append( ForesterUtil.LINE_SEPARATOR );
1206         }
1207         return sb;
1208     }
1209
1210     public static void writeBinaryStatesMatrixAsListToFile( final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
1211                                                             final CharacterStateMatrix.GainLossStates state,
1212                                                             final String filename,
1213                                                             final String indentifier_characters_separator,
1214                                                             final String character_separator,
1215                                                             final Map<String, String> descriptions ) {
1216         final File outfile = new File( filename );
1217         checkForOutputFileWriteability( outfile );
1218         final SortedSet<String> sorted_ids = new TreeSet<String>();
1219         for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) {
1220             sorted_ids.add( matrix.getIdentifier( i ) );
1221         }
1222         try {
1223             final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) );
1224             for( final String id : sorted_ids ) {
1225                 out.write( indentifier_characters_separator );
1226                 out.write( "#" + id );
1227                 out.write( indentifier_characters_separator );
1228                 for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) {
1229                     // Not nice:
1230                     // using null to indicate either UNCHANGED_PRESENT or GAIN.
1231                     if ( ( matrix.getState( id, c ) == state )
1232                             || ( ( state == null ) && ( ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) || ( matrix
1233                                     .getState( id, c ) == CharacterStateMatrix.GainLossStates.UNCHANGED_PRESENT ) ) ) ) {
1234                         out.write( matrix.getCharacter( c ) );
1235                         if ( ( descriptions != null ) && !descriptions.isEmpty()
1236                                 && descriptions.containsKey( matrix.getCharacter( c ) ) ) {
1237                             out.write( "\t" );
1238                             out.write( descriptions.get( matrix.getCharacter( c ) ) );
1239                         }
1240                         out.write( character_separator );
1241                     }
1242                 }
1243             }
1244             out.flush();
1245             out.close();
1246         }
1247         catch ( final IOException e ) {
1248             ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getMessage() );
1249         }
1250         ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote characters list: \"" + filename + "\"" );
1251     }
1252
1253     public static void writeBinaryStatesMatrixAsListToFileForBinaryCombinationsForGraphAnalysis( final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
1254                                                                                                  final CharacterStateMatrix.GainLossStates state,
1255                                                                                                  final String filename,
1256                                                                                                  final String indentifier_characters_separator,
1257                                                                                                  final String character_separator,
1258                                                                                                  final BinaryDomainCombination.OutputFormat bc_output_format ) {
1259         final File outfile = new File( filename );
1260         checkForOutputFileWriteability( outfile );
1261         final SortedSet<String> sorted_ids = new TreeSet<String>();
1262         for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) {
1263             sorted_ids.add( matrix.getIdentifier( i ) );
1264         }
1265         try {
1266             final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) );
1267             for( final String id : sorted_ids ) {
1268                 out.write( indentifier_characters_separator );
1269                 out.write( "#" + id );
1270                 out.write( indentifier_characters_separator );
1271                 for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) {
1272                     // Not nice:
1273                     // using null to indicate either UNCHANGED_PRESENT or GAIN.
1274                     if ( ( matrix.getState( id, c ) == state )
1275                             || ( ( state == null ) && ( ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) || ( matrix
1276                                     .getState( id, c ) == CharacterStateMatrix.GainLossStates.UNCHANGED_PRESENT ) ) ) ) {
1277                         BinaryDomainCombination bdc = null;
1278                         try {
1279                             bdc = BasicBinaryDomainCombination.createInstance( matrix.getCharacter( c ) );
1280                         }
1281                         catch ( final Exception e ) {
1282                             ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getLocalizedMessage() );
1283                         }
1284                         out.write( bdc.toGraphDescribingLanguage( bc_output_format, null, null ).toString() );
1285                         out.write( character_separator );
1286                     }
1287                 }
1288             }
1289             out.flush();
1290             out.close();
1291         }
1292         catch ( final IOException e ) {
1293             ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getMessage() );
1294         }
1295         ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote characters list: \"" + filename + "\"" );
1296     }
1297
1298     public static void writeBinaryStatesMatrixToList( final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
1299                                                       final Map<GoId, GoTerm> go_id_to_term_map,
1300                                                       final GoNameSpace go_namespace_limit,
1301                                                       final boolean domain_combinations,
1302                                                       final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
1303                                                       final CharacterStateMatrix.GainLossStates state,
1304                                                       final String filename,
1305                                                       final String indentifier_characters_separator,
1306                                                       final String character_separator,
1307                                                       final String title_for_html,
1308                                                       final String prefix_for_html,
1309                                                       final Map<DomainId, Set<String>>[] domain_id_to_secondary_features_maps,
1310                                                       final SortedSet<String> all_pfams_encountered,
1311                                                       final SortedSet<String> pfams_gained_or_lost,
1312                                                       final String suffix_for_per_node_events_file ) {
1313         if ( ( go_namespace_limit != null ) && ( ( go_id_to_term_map == null ) || ( go_id_to_term_map.size() < 1 ) ) ) {
1314             throw new IllegalArgumentException( "attempt to use GO namespace limit without a GO-id to term map" );
1315         }
1316         else if ( ( ( domain_id_to_go_ids_map == null ) || ( domain_id_to_go_ids_map.size() < 1 ) ) ) {
1317             throw new IllegalArgumentException( "attempt to output detailed HTML without a Pfam to GO map" );
1318         }
1319         else if ( ( ( go_id_to_term_map == null ) || ( go_id_to_term_map.size() < 1 ) ) ) {
1320             throw new IllegalArgumentException( "attempt to output detailed HTML without a GO-id to term map" );
1321         }
1322         final File outfile = new File( filename );
1323         checkForOutputFileWriteability( outfile );
1324         final SortedSet<String> sorted_ids = new TreeSet<String>();
1325         for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) {
1326             sorted_ids.add( matrix.getIdentifier( i ) );
1327         }
1328         try {
1329             final Writer out = new BufferedWriter( new FileWriter( outfile ) );
1330             final File per_node_go_mapped_domain_gain_loss_files_base_dir = createBaseDirForPerNodeDomainFiles( surfacing_old.BASE_DIRECTORY_PER_NODE_DOMAIN_GAIN_LOSS_FILES,
1331                                                                                                                 domain_combinations,
1332                                                                                                                 state,
1333                                                                                                                 filename );
1334             Writer per_node_go_mapped_domain_gain_loss_outfile_writer = null;
1335             File per_node_go_mapped_domain_gain_loss_outfile = null;
1336             int per_node_counter = 0;
1337             out.write( "<html>" );
1338             out.write( SurfacingConstants.NL );
1339             addHtmlHead( out, title_for_html );
1340             out.write( SurfacingConstants.NL );
1341             out.write( "<body>" );
1342             out.write( SurfacingConstants.NL );
1343             out.write( "<h1>" );
1344             out.write( SurfacingConstants.NL );
1345             out.write( title_for_html );
1346             out.write( SurfacingConstants.NL );
1347             out.write( "</h1>" );
1348             out.write( SurfacingConstants.NL );
1349             out.write( "<table>" );
1350             out.write( SurfacingConstants.NL );
1351             for( final String id : sorted_ids ) {
1352                 final Matcher matcher = PATTERN_SP_STYLE_TAXONOMY.matcher( id );
1353                 if ( matcher.matches() ) {
1354                     continue;
1355                 }
1356                 out.write( "<tr>" );
1357                 out.write( "<td>" );
1358                 out.write( "<a href=\"#" + id + "\">" + id + "</a>" );
1359                 out.write( "</td>" );
1360                 out.write( "</tr>" );
1361                 out.write( SurfacingConstants.NL );
1362             }
1363             out.write( "</table>" );
1364             out.write( SurfacingConstants.NL );
1365             for( final String id : sorted_ids ) {
1366                 final Matcher matcher = PATTERN_SP_STYLE_TAXONOMY.matcher( id );
1367                 if ( matcher.matches() ) {
1368                     continue;
1369                 }
1370                 out.write( SurfacingConstants.NL );
1371                 out.write( "<h2>" );
1372                 out.write( "<a name=\"" + id + "\">" + id + "</a>" );
1373                 writeTaxonomyLinks( out, id );
1374                 out.write( "</h2>" );
1375                 out.write( SurfacingConstants.NL );
1376                 out.write( "<table>" );
1377                 out.write( SurfacingConstants.NL );
1378                 out.write( "<tr>" );
1379                 out.write( "<td><b>" );
1380                 out.write( "Pfam domain(s)" );
1381                 out.write( "</b></td><td><b>" );
1382                 out.write( "GO term acc" );
1383                 out.write( "</b></td><td><b>" );
1384                 out.write( "GO term" );
1385                 out.write( "</b></td><td><b>" );
1386                 out.write( "GO namespace" );
1387                 out.write( "</b></td>" );
1388                 out.write( "</tr>" );
1389                 out.write( SurfacingConstants.NL );
1390                 out.write( "</tr>" );
1391                 out.write( SurfacingConstants.NL );
1392                 per_node_counter = 0;
1393                 if ( matrix.getNumberOfCharacters() > 0 ) {
1394                     per_node_go_mapped_domain_gain_loss_outfile = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
1395                             + ForesterUtil.FILE_SEPARATOR + id + suffix_for_per_node_events_file );
1396                     SurfacingUtil.checkForOutputFileWriteability( per_node_go_mapped_domain_gain_loss_outfile );
1397                     per_node_go_mapped_domain_gain_loss_outfile_writer = ForesterUtil
1398                             .createBufferedWriter( per_node_go_mapped_domain_gain_loss_outfile );
1399                 }
1400                 else {
1401                     per_node_go_mapped_domain_gain_loss_outfile = null;
1402                     per_node_go_mapped_domain_gain_loss_outfile_writer = null;
1403                 }
1404                 for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) {
1405                     // Not nice:
1406                     // using null to indicate either UNCHANGED_PRESENT or GAIN.
1407                     if ( ( matrix.getState( id, c ) == state )
1408                             || ( ( state == null ) && ( ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.UNCHANGED_PRESENT ) || ( matrix
1409                                     .getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) ) ) ) {
1410                         final String character = matrix.getCharacter( c );
1411                         String domain_0 = "";
1412                         String domain_1 = "";
1413                         if ( character.indexOf( BinaryDomainCombination.SEPARATOR ) > 0 ) {
1414                             final String[] s = character.split( BinaryDomainCombination.SEPARATOR );
1415                             if ( s.length != 2 ) {
1416                                 throw new AssertionError( "this should not have happened: unexpected format for domain combination: ["
1417                                         + character + "]" );
1418                             }
1419                             domain_0 = s[ 0 ];
1420                             domain_1 = s[ 1 ];
1421                         }
1422                         else {
1423                             domain_0 = character;
1424                         }
1425                         writeDomainData( domain_id_to_go_ids_map,
1426                                          go_id_to_term_map,
1427                                          go_namespace_limit,
1428                                          out,
1429                                          domain_0,
1430                                          domain_1,
1431                                          prefix_for_html,
1432                                          character_separator,
1433                                          domain_id_to_secondary_features_maps,
1434                                          null );
1435                         all_pfams_encountered.add( domain_0 );
1436                         if ( pfams_gained_or_lost != null ) {
1437                             pfams_gained_or_lost.add( domain_0 );
1438                         }
1439                         if ( !ForesterUtil.isEmpty( domain_1 ) ) {
1440                             all_pfams_encountered.add( domain_1 );
1441                             if ( pfams_gained_or_lost != null ) {
1442                                 pfams_gained_or_lost.add( domain_1 );
1443                             }
1444                         }
1445                         if ( per_node_go_mapped_domain_gain_loss_outfile_writer != null ) {
1446                             writeDomainsToIndividualFilePerTreeNode( per_node_go_mapped_domain_gain_loss_outfile_writer,
1447                                                                      domain_0,
1448                                                                      domain_1 );
1449                             per_node_counter++;
1450                         }
1451                     }
1452                 }
1453                 if ( per_node_go_mapped_domain_gain_loss_outfile_writer != null ) {
1454                     per_node_go_mapped_domain_gain_loss_outfile_writer.close();
1455                     if ( per_node_counter < 1 ) {
1456                         per_node_go_mapped_domain_gain_loss_outfile.delete();
1457                     }
1458                     per_node_counter = 0;
1459                 }
1460                 out.write( "</table>" );
1461                 out.write( SurfacingConstants.NL );
1462                 out.write( "<hr>" );
1463                 out.write( SurfacingConstants.NL );
1464             } // for( final String id : sorted_ids ) {  
1465             out.write( "</body>" );
1466             out.write( SurfacingConstants.NL );
1467             out.write( "</html>" );
1468             out.write( SurfacingConstants.NL );
1469             out.flush();
1470             out.close();
1471         }
1472         catch ( final IOException e ) {
1473             ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getMessage() );
1474         }
1475         ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote characters detailed HTML list: \"" + filename
1476                 + "\"" );
1477     }
1478
1479     public static void writeBinaryStatesMatrixToListORIGIG( final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
1480                                                             final Map<GoId, GoTerm> go_id_to_term_map,
1481                                                             final GoNameSpace go_namespace_limit,
1482                                                             final boolean domain_combinations,
1483                                                             final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
1484                                                             final CharacterStateMatrix.GainLossStates state,
1485                                                             final String filename,
1486                                                             final String indentifier_characters_separator,
1487                                                             final String character_separator,
1488                                                             final String title_for_html,
1489                                                             final String prefix_for_html,
1490                                                             final Map<DomainId, Set<String>>[] domain_id_to_secondary_features_maps,
1491                                                             final SortedSet<String> all_pfams_encountered,
1492                                                             final SortedSet<String> pfams_gained_or_lost,
1493                                                             final String suffix_for_per_node_events_file ) {
1494         if ( ( go_namespace_limit != null ) && ( ( go_id_to_term_map == null ) || ( go_id_to_term_map.size() < 1 ) ) ) {
1495             throw new IllegalArgumentException( "attempt to use GO namespace limit without a GO-id to term map" );
1496         }
1497         else if ( ( ( domain_id_to_go_ids_map == null ) || ( domain_id_to_go_ids_map.size() < 1 ) ) ) {
1498             throw new IllegalArgumentException( "attempt to output detailed HTML without a Pfam to GO map" );
1499         }
1500         else if ( ( ( go_id_to_term_map == null ) || ( go_id_to_term_map.size() < 1 ) ) ) {
1501             throw new IllegalArgumentException( "attempt to output detailed HTML without a GO-id to term map" );
1502         }
1503         final File outfile = new File( filename );
1504         checkForOutputFileWriteability( outfile );
1505         final SortedSet<String> sorted_ids = new TreeSet<String>();
1506         for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) {
1507             sorted_ids.add( matrix.getIdentifier( i ) );
1508         }
1509         try {
1510             final Writer out = new BufferedWriter( new FileWriter( outfile ) );
1511             final File per_node_go_mapped_domain_gain_loss_files_base_dir = createBaseDirForPerNodeDomainFiles( surfacing_old.BASE_DIRECTORY_PER_NODE_DOMAIN_GAIN_LOSS_FILES,
1512                                                                                                                 domain_combinations,
1513                                                                                                                 state,
1514                                                                                                                 filename );
1515             Writer per_node_go_mapped_domain_gain_loss_outfile_writer = null;
1516             File per_node_go_mapped_domain_gain_loss_outfile = null;
1517             int per_node_counter = 0;
1518             out.write( "<html>" );
1519             out.write( SurfacingConstants.NL );
1520             addHtmlHead( out, title_for_html );
1521             out.write( SurfacingConstants.NL );
1522             out.write( "<body>" );
1523             out.write( SurfacingConstants.NL );
1524             out.write( "<h1>" );
1525             out.write( SurfacingConstants.NL );
1526             out.write( title_for_html );
1527             out.write( SurfacingConstants.NL );
1528             out.write( "</h1>" );
1529             out.write( SurfacingConstants.NL );
1530             out.write( "<table>" );
1531             out.write( SurfacingConstants.NL );
1532             for( final String id : sorted_ids ) {
1533                 out.write( "<tr>" );
1534                 out.write( "<td>" );
1535                 out.write( "<a href=\"#" + id + "\">" + id + "</a>" );
1536                 writeTaxonomyLinks( out, id );
1537                 out.write( "</td>" );
1538                 out.write( "</tr>" );
1539                 out.write( SurfacingConstants.NL );
1540             }
1541             out.write( "</table>" );
1542             out.write( SurfacingConstants.NL );
1543             for( final String id : sorted_ids ) {
1544                 out.write( SurfacingConstants.NL );
1545                 out.write( "<h2>" );
1546                 out.write( "<a name=\"" + id + "\">" + id + "</a>" );
1547                 writeTaxonomyLinks( out, id );
1548                 out.write( "</h2>" );
1549                 out.write( SurfacingConstants.NL );
1550                 out.write( "<table>" );
1551                 out.write( SurfacingConstants.NL );
1552                 out.write( "<tr>" );
1553                 out.write( "<td><b>" );
1554                 out.write( "Pfam domain(s)" );
1555                 out.write( "</b></td><td><b>" );
1556                 out.write( "GO term acc" );
1557                 out.write( "</b></td><td><b>" );
1558                 out.write( "GO term" );
1559                 out.write( "</b></td><td><b>" );
1560                 out.write( "Penultimate GO term" );
1561                 out.write( "</b></td><td><b>" );
1562                 out.write( "GO namespace" );
1563                 out.write( "</b></td>" );
1564                 out.write( "</tr>" );
1565                 out.write( SurfacingConstants.NL );
1566                 out.write( "</tr>" );
1567                 out.write( SurfacingConstants.NL );
1568                 per_node_counter = 0;
1569                 if ( matrix.getNumberOfCharacters() > 0 ) {
1570                     per_node_go_mapped_domain_gain_loss_outfile = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
1571                             + ForesterUtil.FILE_SEPARATOR + id + suffix_for_per_node_events_file );
1572                     SurfacingUtil.checkForOutputFileWriteability( per_node_go_mapped_domain_gain_loss_outfile );
1573                     per_node_go_mapped_domain_gain_loss_outfile_writer = ForesterUtil
1574                             .createBufferedWriter( per_node_go_mapped_domain_gain_loss_outfile );
1575                 }
1576                 else {
1577                     per_node_go_mapped_domain_gain_loss_outfile = null;
1578                     per_node_go_mapped_domain_gain_loss_outfile_writer = null;
1579                 }
1580                 for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) {
1581                     // Not nice:
1582                     // using null to indicate either UNCHANGED_PRESENT or GAIN.
1583                     if ( ( matrix.getState( id, c ) == state )
1584                             || ( ( state == null ) && ( ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.UNCHANGED_PRESENT ) || ( matrix
1585                                     .getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) ) ) ) {
1586                         final String character = matrix.getCharacter( c );
1587                         String domain_0 = "";
1588                         String domain_1 = "";
1589                         if ( character.indexOf( BinaryDomainCombination.SEPARATOR ) > 0 ) {
1590                             final String[] s = character.split( BinaryDomainCombination.SEPARATOR );
1591                             if ( s.length != 2 ) {
1592                                 throw new AssertionError( "this should not have happened: unexpected format for domain combination: ["
1593                                         + character + "]" );
1594                             }
1595                             domain_0 = s[ 0 ];
1596                             domain_1 = s[ 1 ];
1597                         }
1598                         else {
1599                             domain_0 = character;
1600                         }
1601                         writeDomainData( domain_id_to_go_ids_map,
1602                                          go_id_to_term_map,
1603                                          go_namespace_limit,
1604                                          out,
1605                                          domain_0,
1606                                          domain_1,
1607                                          prefix_for_html,
1608                                          character_separator,
1609                                          domain_id_to_secondary_features_maps,
1610                                          null );
1611                         all_pfams_encountered.add( domain_0 );
1612                         if ( pfams_gained_or_lost != null ) {
1613                             pfams_gained_or_lost.add( domain_0 );
1614                         }
1615                         if ( !ForesterUtil.isEmpty( domain_1 ) ) {
1616                             all_pfams_encountered.add( domain_1 );
1617                             if ( pfams_gained_or_lost != null ) {
1618                                 pfams_gained_or_lost.add( domain_1 );
1619                             }
1620                         }
1621                         if ( per_node_go_mapped_domain_gain_loss_outfile_writer != null ) {
1622                             writeDomainsToIndividualFilePerTreeNode( per_node_go_mapped_domain_gain_loss_outfile_writer,
1623                                                                      domain_0,
1624                                                                      domain_1 );
1625                             per_node_counter++;
1626                         }
1627                     }
1628                 }
1629                 if ( per_node_go_mapped_domain_gain_loss_outfile_writer != null ) {
1630                     per_node_go_mapped_domain_gain_loss_outfile_writer.close();
1631                     if ( per_node_counter < 1 ) {
1632                         per_node_go_mapped_domain_gain_loss_outfile.delete();
1633                     }
1634                     per_node_counter = 0;
1635                 }
1636                 out.write( "</table>" );
1637                 out.write( SurfacingConstants.NL );
1638                 out.write( "<hr>" );
1639                 out.write( SurfacingConstants.NL );
1640             } // for( final String id : sorted_ids ) {  
1641             out.write( "</body>" );
1642             out.write( SurfacingConstants.NL );
1643             out.write( "</html>" );
1644             out.write( SurfacingConstants.NL );
1645             out.flush();
1646             out.close();
1647         }
1648         catch ( final IOException e ) {
1649             ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getMessage() );
1650         }
1651         ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote characters detailed HTML list: \"" + filename
1652                 + "\"" );
1653     }
1654
1655     public static void writeDomainCombinationsCountsFile( final String[][] input_file_properties,
1656                                                           final File output_dir,
1657                                                           final Writer per_genome_domain_promiscuity_statistics_writer,
1658                                                           final GenomeWideCombinableDomains gwcd,
1659                                                           final int i,
1660                                                           final GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder dc_sort_order ) {
1661         File dc_outfile = new File( input_file_properties[ i ][ 0 ]
1662                 + surfacing_old.DOMAIN_COMBINITON_COUNTS_OUTPUTFILE_SUFFIX );
1663         if ( output_dir != null ) {
1664             dc_outfile = new File( output_dir + ForesterUtil.FILE_SEPARATOR + dc_outfile );
1665         }
1666         checkForOutputFileWriteability( dc_outfile );
1667         try {
1668             final BufferedWriter out = new BufferedWriter( new FileWriter( dc_outfile ) );
1669             out.write( gwcd.toStringBuilder( dc_sort_order ).toString() );
1670             out.close();
1671         }
1672         catch ( final IOException e ) {
1673             ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getMessage() );
1674         }
1675         final DescriptiveStatistics stats = gwcd.getPerGenomeDomainPromiscuityStatistics();
1676         try {
1677             per_genome_domain_promiscuity_statistics_writer.write( input_file_properties[ i ][ 0 ] + "\t" );
1678             per_genome_domain_promiscuity_statistics_writer.write( FORMATTER_3.format( stats.arithmeticMean() ) + "\t" );
1679             if ( stats.getN() < 2 ) {
1680                 per_genome_domain_promiscuity_statistics_writer.write( "n/a" + "\t" );
1681             }
1682             else {
1683                 per_genome_domain_promiscuity_statistics_writer.write( FORMATTER_3.format( stats
1684                         .sampleStandardDeviation() ) + "\t" );
1685             }
1686             per_genome_domain_promiscuity_statistics_writer.write( FORMATTER_3.format( stats.median() ) + "\t" );
1687             per_genome_domain_promiscuity_statistics_writer.write( ( int ) stats.getMin() + "\t" );
1688             per_genome_domain_promiscuity_statistics_writer.write( ( int ) stats.getMax() + "\t" );
1689             per_genome_domain_promiscuity_statistics_writer.write( stats.getN() + "\t" );
1690             final SortedSet<DomainId> mpds = gwcd.getMostPromiscuosDomain();
1691             for( final DomainId mpd : mpds ) {
1692                 per_genome_domain_promiscuity_statistics_writer.write( mpd.getId() + " " );
1693             }
1694             per_genome_domain_promiscuity_statistics_writer.write( ForesterUtil.LINE_SEPARATOR );
1695         }
1696         catch ( final IOException e ) {
1697             ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getMessage() );
1698         }
1699         if ( input_file_properties[ i ].length == 3 ) {
1700             ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote domain combination counts for \""
1701                     + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ", "
1702                     + input_file_properties[ i ][ 2 ] + ") to: \"" + dc_outfile + "\"" );
1703         }
1704         else {
1705             ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote domain combination counts for \""
1706                     + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ") to: \""
1707                     + dc_outfile + "\"" );
1708         }
1709     }
1710
1711     private static void writeDomainData( final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
1712                                          final Map<GoId, GoTerm> go_id_to_term_map,
1713                                          final GoNameSpace go_namespace_limit,
1714                                          final Writer out,
1715                                          final String domain_0,
1716                                          final String domain_1,
1717                                          final String prefix_for_html,
1718                                          final String character_separator_for_non_html_output,
1719                                          final Map<DomainId, Set<String>>[] domain_id_to_secondary_features_maps,
1720                                          final Set<GoId> all_go_ids ) throws IOException {
1721         boolean any_go_annotation_present = false;
1722         boolean first_has_no_go = false;
1723         int domain_count = 2; // To distinguish between domains and binary domain combinations.
1724         if ( ForesterUtil.isEmpty( domain_1 ) ) {
1725             domain_count = 1;
1726         }
1727         // The following has a difficult to understand logic.  
1728         for( int d = 0; d < domain_count; ++d ) {
1729             List<GoId> go_ids = null;
1730             boolean go_annotation_present = false;
1731             if ( d == 0 ) {
1732                 final DomainId domain_id = new DomainId( domain_0 );
1733                 if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
1734                     go_annotation_present = true;
1735                     any_go_annotation_present = true;
1736                     go_ids = domain_id_to_go_ids_map.get( domain_id );
1737                 }
1738                 else {
1739                     first_has_no_go = true;
1740                 }
1741             }
1742             else {
1743                 final DomainId domain_id = new DomainId( domain_1 );
1744                 if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
1745                     go_annotation_present = true;
1746                     any_go_annotation_present = true;
1747                     go_ids = domain_id_to_go_ids_map.get( domain_id );
1748                 }
1749             }
1750             if ( go_annotation_present ) {
1751                 boolean first = ( ( d == 0 ) || ( ( d == 1 ) && first_has_no_go ) );
1752                 for( final GoId go_id : go_ids ) {
1753                     out.write( "<tr>" );
1754                     if ( first ) {
1755                         first = false;
1756                         writeDomainIdsToHtml( out,
1757                                               domain_0,
1758                                               domain_1,
1759                                               prefix_for_html,
1760                                               domain_id_to_secondary_features_maps );
1761                     }
1762                     else {
1763                         out.write( "<td></td>" );
1764                     }
1765                     if ( !go_id_to_term_map.containsKey( go_id ) ) {
1766                         throw new IllegalArgumentException( "GO-id [" + go_id + "] not found in GO-id to GO-term map" );
1767                     }
1768                     final GoTerm go_term = go_id_to_term_map.get( go_id );
1769                     if ( ( go_namespace_limit == null ) || go_namespace_limit.equals( go_term.getGoNameSpace() ) ) {
1770                         // final String top = GoUtils.getPenultimateGoTerm( go_term, go_id_to_term_map ).getName();
1771                         final String go_id_str = go_id.getId();
1772                         out.write( "<td>" );
1773                         out.write( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id_str
1774                                 + "\" target=\"amigo_window\">" + go_id_str + "</a>" );
1775                         out.write( "</td><td>" );
1776                         out.write( go_term.getName() );
1777                         if ( domain_count == 2 ) {
1778                             out.write( " (" + d + ")" );
1779                         }
1780                         out.write( "</td><td>" );
1781                         // out.write( top );
1782                         // out.write( "</td><td>" );
1783                         out.write( "[" );
1784                         out.write( go_term.getGoNameSpace().toShortString() );
1785                         out.write( "]" );
1786                         out.write( "</td>" );
1787                         if ( all_go_ids != null ) {
1788                             all_go_ids.add( go_id );
1789                         }
1790                     }
1791                     else {
1792                         out.write( "<td>" );
1793                         out.write( "</td><td>" );
1794                         out.write( "</td><td>" );
1795                         out.write( "</td><td>" );
1796                         out.write( "</td>" );
1797                     }
1798                     out.write( "</tr>" );
1799                     out.write( SurfacingConstants.NL );
1800                 }
1801             }
1802         } //  for( int d = 0; d < domain_count; ++d ) 
1803         if ( !any_go_annotation_present ) {
1804             out.write( "<tr>" );
1805             writeDomainIdsToHtml( out, domain_0, domain_1, prefix_for_html, domain_id_to_secondary_features_maps );
1806             out.write( "<td>" );
1807             out.write( "</td><td>" );
1808             out.write( "</td><td>" );
1809             out.write( "</td><td>" );
1810             out.write( "</td>" );
1811             out.write( "</tr>" );
1812             out.write( SurfacingConstants.NL );
1813         }
1814     }
1815
1816     private static void writeDomainDataORIG( final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
1817                                              final Map<GoId, GoTerm> go_id_to_term_map,
1818                                              final GoNameSpace go_namespace_limit,
1819                                              final Writer out,
1820                                              final String domain_0,
1821                                              final String domain_1,
1822                                              final String prefix_for_html,
1823                                              final String character_separator_for_non_html_output,
1824                                              final Map<DomainId, Set<String>>[] domain_id_to_secondary_features_maps,
1825                                              final Set<GoId> all_go_ids ) throws IOException {
1826         boolean any_go_annotation_present = false;
1827         boolean first_has_no_go = false;
1828         int domain_count = 2; // To distinguish between domains and binary domain combinations.
1829         if ( ForesterUtil.isEmpty( domain_1 ) ) {
1830             domain_count = 1;
1831         }
1832         // The following has a difficult to understand logic.  
1833         for( int d = 0; d < domain_count; ++d ) {
1834             List<GoId> go_ids = null;
1835             boolean go_annotation_present = false;
1836             if ( d == 0 ) {
1837                 final DomainId domain_id = new DomainId( domain_0 );
1838                 if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
1839                     go_annotation_present = true;
1840                     any_go_annotation_present = true;
1841                     go_ids = domain_id_to_go_ids_map.get( domain_id );
1842                 }
1843                 else {
1844                     first_has_no_go = true;
1845                 }
1846             }
1847             else {
1848                 final DomainId domain_id = new DomainId( domain_1 );
1849                 if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
1850                     go_annotation_present = true;
1851                     any_go_annotation_present = true;
1852                     go_ids = domain_id_to_go_ids_map.get( domain_id );
1853                 }
1854             }
1855             if ( go_annotation_present ) {
1856                 boolean first = ( ( d == 0 ) || ( ( d == 1 ) && first_has_no_go ) );
1857                 for( final GoId go_id : go_ids ) {
1858                     out.write( "<tr>" );
1859                     if ( first ) {
1860                         first = false;
1861                         writeDomainIdsToHtml( out,
1862                                               domain_0,
1863                                               domain_1,
1864                                               prefix_for_html,
1865                                               domain_id_to_secondary_features_maps );
1866                     }
1867                     else {
1868                         out.write( "<td></td>" );
1869                     }
1870                     if ( !go_id_to_term_map.containsKey( go_id ) ) {
1871                         throw new IllegalArgumentException( "GO-id [" + go_id + "] not found in GO-id to GO-term map" );
1872                     }
1873                     final GoTerm go_term = go_id_to_term_map.get( go_id );
1874                     if ( ( go_namespace_limit == null ) || go_namespace_limit.equals( go_term.getGoNameSpace() ) ) {
1875                         final String top = GoUtils.getPenultimateGoTerm( go_term, go_id_to_term_map ).getName();
1876                         final String go_id_str = go_id.getId();
1877                         out.write( "<td>" );
1878                         out.write( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id_str
1879                                 + "\" target=\"amigo_window\">" + go_id_str + "</a>" );
1880                         out.write( "</td><td>" );
1881                         out.write( go_term.getName() );
1882                         if ( domain_count == 2 ) {
1883                             out.write( " (" + d + ")" );
1884                         }
1885                         out.write( "</td><td>" );
1886                         out.write( top );
1887                         out.write( "</td><td>" );
1888                         out.write( "[" );
1889                         out.write( go_term.getGoNameSpace().toShortString() );
1890                         out.write( "]" );
1891                         out.write( "</td>" );
1892                         if ( all_go_ids != null ) {
1893                             all_go_ids.add( go_id );
1894                         }
1895                     }
1896                     else {
1897                         out.write( "<td>" );
1898                         out.write( "</td><td>" );
1899                         out.write( "</td><td>" );
1900                         out.write( "</td><td>" );
1901                         out.write( "</td>" );
1902                     }
1903                     out.write( "</tr>" );
1904                     out.write( SurfacingConstants.NL );
1905                 }
1906             }
1907         } //  for( int d = 0; d < domain_count; ++d ) 
1908         if ( !any_go_annotation_present ) {
1909             out.write( "<tr>" );
1910             writeDomainIdsToHtml( out, domain_0, domain_1, prefix_for_html, domain_id_to_secondary_features_maps );
1911             out.write( "<td>" );
1912             out.write( "</td><td>" );
1913             out.write( "</td><td>" );
1914             out.write( "</td><td>" );
1915             out.write( "</td>" );
1916             out.write( "</tr>" );
1917             out.write( SurfacingConstants.NL );
1918         }
1919     }
1920
1921     private static void writeDomainIdsToHtml( final Writer out,
1922                                               final String domain_0,
1923                                               final String domain_1,
1924                                               final String prefix_for_detailed_html,
1925                                               final Map<DomainId, Set<String>>[] domain_id_to_secondary_features_maps )
1926             throws IOException {
1927         out.write( "<td>" );
1928         if ( !ForesterUtil.isEmpty( prefix_for_detailed_html ) ) {
1929             out.write( prefix_for_detailed_html );
1930             out.write( " " );
1931         }
1932         out.write( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_0 + "\">" + domain_0 + "</a>" );
1933         //if ( ForesterUtil.isEmpty( domain_1 ) ) {
1934         //    out.write( " <a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_LINK + domain_0
1935         //            + SurfacingConstants.GOOGLE_SCHOLAR_LIMITS + "\">[gs]</a>" );
1936         //}
1937         // if ( !ForesterUtil.isEmpty( domain_1 ) ) {
1938         //     out.write( "=" );
1939         //    out.write( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_1 + "\">" + domain_1 + "</a>" );
1940         //}
1941         //        else if ( ( domain_id_to_secondary_features_maps != null )
1942         //                && ( domain_id_to_secondary_features_maps.length > 0 ) ) {
1943         //            out.write( " [" );
1944         //            boolean first = true;
1945         //            for( final Map<DomainId, Set<String>> domain_id_to_secondary_features_map : domain_id_to_secondary_features_maps ) {
1946         //                final Set<String> sec_features = domain_id_to_secondary_features_map.get( new DomainId( domain_0 ) );
1947         //                if ( ( sec_features != null ) && ( sec_features.size() > 0 ) ) {
1948         //                    for( final String sec_feature : sec_features ) {
1949         //                        if ( first ) {
1950         //                            first = false;
1951         //                        }
1952         //                        else {
1953         //                            out.write( ", " );
1954         //                        }
1955         //                        if ( SurfacingConstants.SECONDARY_FEATURES_ARE_SCOP
1956         //                                && ( SurfacingConstants.SECONDARY_FEATURES_SCOP_LINK != null ) ) {
1957         //                            out.write( "<a href=\"" + SurfacingConstants.SECONDARY_FEATURES_SCOP_LINK + sec_feature
1958         //                                    + "\" target=\"scop_window\">" + sec_feature + "</a>" );
1959         //                        }
1960         //                        else {
1961         //                            out.write( sec_feature );
1962         //                        }
1963         //                    }
1964         //                }
1965         //            }
1966         //            out.write( "]" );
1967         //        }
1968         out.write( "</td>" );
1969     }
1970
1971     private static void writeDomainIdsToHtmlORIG( final Writer out,
1972                                                   final String domain_0,
1973                                                   final String domain_1,
1974                                                   final String prefix_for_detailed_html,
1975                                                   final Map<DomainId, Set<String>>[] domain_id_to_secondary_features_maps )
1976             throws IOException {
1977         out.write( "<td>" );
1978         if ( !ForesterUtil.isEmpty( prefix_for_detailed_html ) ) {
1979             out.write( prefix_for_detailed_html );
1980             out.write( " " );
1981         }
1982         out.write( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_0 + "\">" + domain_0 + "</a>" );
1983         if ( ForesterUtil.isEmpty( domain_1 ) ) {
1984             out.write( " <a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_LINK + domain_0
1985                     + SurfacingConstants.GOOGLE_SCHOLAR_LIMITS + "\">[gs]</a>" );
1986         }
1987         if ( !ForesterUtil.isEmpty( domain_1 ) ) {
1988             out.write( "=" );
1989             out.write( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_1 + "\">" + domain_1 + "</a>" );
1990         }
1991         else if ( ( domain_id_to_secondary_features_maps != null )
1992                 && ( domain_id_to_secondary_features_maps.length > 0 ) ) {
1993             out.write( " [" );
1994             boolean first = true;
1995             for( final Map<DomainId, Set<String>> domain_id_to_secondary_features_map : domain_id_to_secondary_features_maps ) {
1996                 final Set<String> sec_features = domain_id_to_secondary_features_map.get( new DomainId( domain_0 ) );
1997                 if ( ( sec_features != null ) && ( sec_features.size() > 0 ) ) {
1998                     for( final String sec_feature : sec_features ) {
1999                         if ( first ) {
2000                             first = false;
2001                         }
2002                         else {
2003                             out.write( ", " );
2004                         }
2005                         if ( SurfacingConstants.SECONDARY_FEATURES_ARE_SCOP
2006                                 && ( SurfacingConstants.SECONDARY_FEATURES_SCOP_LINK != null ) ) {
2007                             out.write( "<a href=\"" + SurfacingConstants.SECONDARY_FEATURES_SCOP_LINK + sec_feature
2008                                     + "\" target=\"scop_window\">" + sec_feature + "</a>" );
2009                         }
2010                         else {
2011                             out.write( sec_feature );
2012                         }
2013                     }
2014                 }
2015             }
2016             out.write( "]" );
2017         }
2018         out.write( "</td>" );
2019     }
2020
2021     public static DescriptiveStatistics writeDomainSimilaritiesToFile( final StringBuilder html_desc,
2022                                                                        final StringBuilder html_title,
2023                                                                        final Writer w,
2024                                                                        final SortedSet<DomainSimilarity> similarities,
2025                                                                        final boolean treat_as_binary,
2026                                                                        final List<Species> species_order,
2027                                                                        final PrintableDomainSimilarity.PRINT_OPTION print_option,
2028                                                                        final DomainSimilarity.DomainSimilaritySortField sort_field,
2029                                                                        final DomainSimilarity.DomainSimilarityScoring scoring,
2030                                                                        final boolean verbose ) throws IOException {
2031         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
2032         String histogram_title = null;
2033         switch ( sort_field ) {
2034             case ABS_MAX_COUNTS_DIFFERENCE:
2035                 if ( treat_as_binary ) {
2036                     histogram_title = "absolute counts difference:";
2037                 }
2038                 else {
2039                     histogram_title = "absolute (maximal) counts difference:";
2040                 }
2041                 break;
2042             case MAX_COUNTS_DIFFERENCE:
2043                 if ( treat_as_binary ) {
2044                     histogram_title = "counts difference:";
2045                 }
2046                 else {
2047                     histogram_title = "(maximal) counts difference:";
2048                 }
2049                 break;
2050             case DOMAIN_ID:
2051                 histogram_title = "score mean:";
2052                 break;
2053             case MIN:
2054                 histogram_title = "score minimum:";
2055                 break;
2056             case MAX:
2057                 histogram_title = "score maximum:";
2058                 break;
2059             case MAX_DIFFERENCE:
2060                 if ( treat_as_binary ) {
2061                     histogram_title = "difference:";
2062                 }
2063                 else {
2064                     histogram_title = "(maximal) difference:";
2065                 }
2066                 break;
2067             case MEAN:
2068                 histogram_title = "score mean:";
2069                 break;
2070             case SD:
2071                 histogram_title = "score standard deviation:";
2072                 break;
2073             case SPECIES_COUNT:
2074                 histogram_title = "species number:";
2075                 break;
2076             default:
2077                 throw new AssertionError( "Unknown sort field: " + sort_field );
2078         }
2079         for( final DomainSimilarity similarity : similarities ) {
2080             switch ( sort_field ) {
2081                 case ABS_MAX_COUNTS_DIFFERENCE:
2082                     stats.addValue( Math.abs( similarity.getMaximalDifferenceInCounts() ) );
2083                     break;
2084                 case MAX_COUNTS_DIFFERENCE:
2085                     stats.addValue( similarity.getMaximalDifferenceInCounts() );
2086                     break;
2087                 case DOMAIN_ID:
2088                     stats.addValue( similarity.getMeanSimilarityScore() );
2089                     break;
2090                 case MIN:
2091                     stats.addValue( similarity.getMinimalSimilarityScore() );
2092                     break;
2093                 case MAX:
2094                     stats.addValue( similarity.getMaximalSimilarityScore() );
2095                     break;
2096                 case MAX_DIFFERENCE:
2097                     stats.addValue( similarity.getMaximalDifference() );
2098                     break;
2099                 case MEAN:
2100                     stats.addValue( similarity.getMeanSimilarityScore() );
2101                     break;
2102                 case SD:
2103                     stats.addValue( similarity.getStandardDeviationOfSimilarityScore() );
2104                     break;
2105                 case SPECIES_COUNT:
2106                     stats.addValue( similarity.getSpecies().size() );
2107                     break;
2108                 default:
2109                     throw new AssertionError( "Unknown sort field: " + sort_field );
2110             }
2111         }
2112         //
2113         // final HistogramData[] hists = new HistogramData[ 1 ];
2114         //      
2115         //        
2116         // List<HistogramDataItem> data_items = new
2117         // ArrayList<HistogramDataItem>();
2118         // double[] values = stats.getDataAsDoubleArray();
2119         // for( int i = 0; i < values.length; i++ ) {
2120         // HistogramDataItem data_item = new BasicHistogramDataItem( "", values[
2121         // i ] );
2122         // data_items.add( data_item );
2123         // }
2124         //        
2125         //        
2126         // HistogramData hd0 = new HistogramData( "name",
2127         // data_items,
2128         // null, 20,
2129         // 40 );
2130         //        
2131         //        
2132         //        
2133         //        
2134         // hists[ 0 ] = hd0;
2135         //       
2136         // final HistogramsFrame hf = new HistogramsFrame( hists );
2137         // hf.setVisible( true );
2138         //
2139         AsciiHistogram histo = null;
2140         if ( stats.getMin() < stats.getMin() ) {
2141             histo = new AsciiHistogram( stats, histogram_title );
2142         }
2143         if ( verbose ) {
2144             if ( histo != null ) {
2145                 System.out.println( histo.toStringBuffer( 20, '|', 40, 5 ) );
2146             }
2147             System.out.println();
2148             System.out.println( "N                   : " + stats.getN() );
2149             System.out.println( "Min                 : " + stats.getMin() );
2150             System.out.println( "Max                 : " + stats.getMax() );
2151             System.out.println( "Mean                : " + stats.arithmeticMean() );
2152             if ( stats.getN() > 1 ) {
2153                 System.out.println( "SD                  : " + stats.sampleStandardDeviation() );
2154             }
2155             else {
2156                 System.out.println( "SD                  : n/a" );
2157             }
2158             System.out.println( "Median              : " + stats.median() );
2159             if ( stats.getN() > 1 ) {
2160                 System.out.println( "Pearsonian skewness : " + stats.pearsonianSkewness() );
2161             }
2162             else {
2163                 System.out.println( "Pearsonian skewness : n/a" );
2164             }
2165         }
2166         switch ( print_option ) {
2167             case SIMPLE_TAB_DELIMITED:
2168                 break;
2169             case HTML:
2170                 w.write( "<html>" );
2171                 w.write( SurfacingConstants.NL );
2172                 addHtmlHead( w, "SURFACING :: " + html_title );
2173                 w.write( SurfacingConstants.NL );
2174                 w.write( "<body>" );
2175                 w.write( SurfacingConstants.NL );
2176                 w.write( html_desc.toString() );
2177                 w.write( SurfacingConstants.NL );
2178                 w.write( "<hr>" );
2179                 w.write( "<br>" );
2180                 w.write( SurfacingConstants.NL );
2181                 w.write( "<tt><pre>" );
2182                 w.write( SurfacingConstants.NL );
2183                 if ( histo != null ) {
2184                     w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
2185                     w.write( SurfacingConstants.NL );
2186                 }
2187                 w.write( "</pre></tt>" );
2188                 w.write( SurfacingConstants.NL );
2189                 w.write( "<table>" );
2190                 w.write( SurfacingConstants.NL );
2191                 w.write( "<tr><td>N: </td><td>" + stats.getN() + "</td></tr>" );
2192                 w.write( SurfacingConstants.NL );
2193                 w.write( "<tr><td>Min: </td><td>" + stats.getMin() + "</td></tr>" );
2194                 w.write( SurfacingConstants.NL );
2195                 w.write( "<tr><td>Max: </td><td>" + stats.getMax() + "</td></tr>" );
2196                 w.write( SurfacingConstants.NL );
2197                 w.write( "<tr><td>Mean: </td><td>" + stats.arithmeticMean() + "</td></tr>" );
2198                 w.write( SurfacingConstants.NL );
2199                 if ( stats.getN() > 1 ) {
2200                     w.write( "<tr><td>SD: </td><td>" + stats.sampleStandardDeviation() + "</td></tr>" );
2201                 }
2202                 else {
2203                     w.write( "<tr><td>SD: </td><td>n/a</td></tr>" );
2204                 }
2205                 w.write( SurfacingConstants.NL );
2206                 w.write( "<tr><td>Median: </td><td>" + stats.median() + "</td></tr>" );
2207                 w.write( SurfacingConstants.NL );
2208                 if ( stats.getN() > 1 ) {
2209                     w.write( "<tr><td>Pearsonian skewness: </td><td>" + stats.pearsonianSkewness() + "</td></tr>" );
2210                 }
2211                 else {
2212                     w.write( "<tr><td>Pearsonian skewness: </td><td>n/a</td></tr>" );
2213                 }
2214                 w.write( SurfacingConstants.NL );
2215                 w.write( "</table>" );
2216                 w.write( SurfacingConstants.NL );
2217                 w.write( "<br>" );
2218                 w.write( SurfacingConstants.NL );
2219                 w.write( "<hr>" );
2220                 w.write( SurfacingConstants.NL );
2221                 w.write( "<br>" );
2222                 w.write( SurfacingConstants.NL );
2223                 w.write( "<table>" );
2224                 w.write( SurfacingConstants.NL );
2225                 break;
2226         }
2227         w.write( SurfacingConstants.NL );
2228         for( final DomainSimilarity similarity : similarities ) {
2229             if ( ( species_order != null ) && !species_order.isEmpty() ) {
2230                 ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
2231             }
2232             w.write( similarity.toStringBuffer( print_option ).toString() );
2233             w.write( SurfacingConstants.NL );
2234         }
2235         switch ( print_option ) {
2236             case HTML:
2237                 w.write( SurfacingConstants.NL );
2238                 w.write( "</table>" );
2239                 w.write( SurfacingConstants.NL );
2240                 w.write( "</font>" );
2241                 w.write( SurfacingConstants.NL );
2242                 w.write( "</body>" );
2243                 w.write( SurfacingConstants.NL );
2244                 w.write( "</html>" );
2245                 w.write( SurfacingConstants.NL );
2246                 break;
2247         }
2248         w.flush();
2249         w.close();
2250         return stats;
2251     }
2252
2253     private static void writeDomainsToIndividualFilePerTreeNode( final Writer individual_files_writer,
2254                                                                  final String domain_0,
2255                                                                  final String domain_1 ) throws IOException {
2256         individual_files_writer.write( domain_0 );
2257         individual_files_writer.write( ForesterUtil.LINE_SEPARATOR );
2258         if ( !ForesterUtil.isEmpty( domain_1 ) ) {
2259             individual_files_writer.write( domain_1 );
2260             individual_files_writer.write( ForesterUtil.LINE_SEPARATOR );
2261         }
2262     }
2263
2264     public static void writeMatrixToFile( final CharacterStateMatrix<?> matrix,
2265                                           final String filename,
2266                                           final Format format ) {
2267         final File outfile = new File( filename );
2268         checkForOutputFileWriteability( outfile );
2269         try {
2270             final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) );
2271             matrix.toWriter( out, format );
2272             out.flush();
2273             out.close();
2274         }
2275         catch ( final IOException e ) {
2276             ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getMessage() );
2277         }
2278         ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote matrix: \"" + filename + "\"" );
2279     }
2280
2281     public static void writeMatrixToFile( final File matrix_outfile, final List<DistanceMatrix> matrices ) {
2282         checkForOutputFileWriteability( matrix_outfile );
2283         try {
2284             final BufferedWriter out = new BufferedWriter( new FileWriter( matrix_outfile ) );
2285             for( final DistanceMatrix distance_matrix : matrices ) {
2286                 out.write( distance_matrix.toStringBuffer( DistanceMatrix.Format.PHYLIP ).toString() );
2287                 out.write( ForesterUtil.LINE_SEPARATOR );
2288                 out.flush();
2289             }
2290             out.close();
2291         }
2292         catch ( final IOException e ) {
2293             ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getMessage() );
2294         }
2295         ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote distance matrices to \"" + matrix_outfile + "\"" );
2296     }
2297
2298     private static void writePfamsToFile( final String outfile_name, final SortedSet<String> pfams ) {
2299         try {
2300             final Writer writer = new BufferedWriter( new FileWriter( new File( outfile_name ) ) );
2301             for( final String pfam : pfams ) {
2302                 writer.write( pfam );
2303                 writer.write( ForesterUtil.LINE_SEPARATOR );
2304             }
2305             writer.close();
2306             ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote " + pfams.size() + " pfams to [" + outfile_name
2307                     + "]" );
2308         }
2309         catch ( final IOException e ) {
2310             ForesterUtil.printWarningMessage( surfacing_old.PRG_NAME, "Failure to write: " + e );
2311         }
2312     }
2313
2314     public static void writePhylogenyToFile( final Phylogeny phylogeny, final String filename ) {
2315         final PhylogenyWriter writer = new PhylogenyWriter();
2316         try {
2317             writer.toPhyloXML( new File( filename ), phylogeny, 1 );
2318         }
2319         catch ( final IOException e ) {
2320             ForesterUtil.printWarningMessage( surfacing_old.PRG_NAME, "failed to write phylogeny to \"" + filename
2321                     + "\": " + e );
2322         }
2323         ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote phylogeny to \"" + filename + "\"" );
2324     }
2325
2326     public static void writeTaxonomyLinks( final Writer writer, final String species ) throws IOException {
2327         if ( ( species.length() > 1 ) && ( species.indexOf( '_' ) < 1 ) ) {
2328             final Matcher matcher = PATTERN_SP_STYLE_TAXONOMY.matcher( species );
2329             writer.write( " [" );
2330             if ( matcher.matches() ) {
2331                 writer.write( "<a href=\"" + SurfacingConstants.UNIPROT_LINK + species
2332                         + "\" target=\"taxonomy_window\">uniprot</a>" );
2333             }
2334             else {
2335                 writer.write( "<a href=\"" + SurfacingConstants.EOL_LINK + species
2336                         + "\" target=\"taxonomy_window\">eol</a>" );
2337                 writer.write( "|" );
2338                 writer.write( "<a href=\"" + SurfacingConstants.TOL_LINK + species
2339                         + "\" target=\"taxonomy_window\">tol</a>" );
2340             }
2341             writer.write( "]" );
2342         }
2343     }
2344
2345     public static void writeTaxonomyLinksORIG( final Writer writer, final String species ) throws IOException {
2346         if ( ( species.length() > 1 ) && ( species.indexOf( '_' ) < 1 ) ) {
2347             final Matcher matcher = PATTERN_SP_STYLE_TAXONOMY.matcher( species );
2348             writer.write( " [" );
2349             if ( matcher.matches() ) {
2350                 writer.write( "<a href=\"" + SurfacingConstants.UNIPROT_LINK + species
2351                         + "\" target=\"taxonomy_window\">uniprot</a>" );
2352             }
2353             else {
2354                 writer.write( "<a href=\"" + SurfacingConstants.EOL_LINK + species
2355                         + "\" target=\"taxonomy_window\">eol</a>" );
2356                 writer.write( "|" );
2357                 writer.write( "<a href=\"" + SurfacingConstants.TOL_LINK + species
2358                         + "\" target=\"taxonomy_window\">tol</a>" );
2359                 writer.write( "|" );
2360                 writer.write( "<a href=\"" + SurfacingConstants.WIKIPEDIA_LINK + species
2361                         + "\" target=\"taxonomy_window\">wikipedia</a>" );
2362                 writer.write( "|" );
2363                 writer.write( "<a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_LINK + species
2364                         + "\" target=\"taxonomy_window\">gs</a>" );
2365             }
2366             writer.write( "]" );
2367         }
2368     }
2369
2370     private static void writeToNexus( final String outfile_name, final CharacterStateMatrix<BinaryStates> matrix ) {
2371         if ( !( matrix instanceof BasicCharacterStateMatrix ) ) {
2372             throw new IllegalArgumentException( "can only write matrices of type [" + BasicCharacterStateMatrix.class
2373                     + "] to nexus" );
2374         }
2375         final BasicCharacterStateMatrix<BinaryStates> my_matrix = ( org.forester.evoinference.matrix.character.BasicCharacterStateMatrix<BinaryStates> ) matrix;
2376         try {
2377             final BufferedWriter w = new BufferedWriter( new FileWriter( outfile_name ) );
2378             w.write( NexusConstants.NEXUS );
2379             w.write( ForesterUtil.LINE_SEPARATOR );
2380             my_matrix.writeNexusTaxaBlock( w );
2381             my_matrix.writeNexusBinaryChractersBlock( w );
2382             w.flush();
2383             w.close();
2384             ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote Nexus file: \"" + outfile_name + "\"" );
2385         }
2386         catch ( final IOException e ) {
2387             ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getMessage() );
2388         }
2389     }
2390
2391     private static void writeToNexus( final String outfile_name,
2392                                       final CharacterStateMatrix<BinaryStates> matrix,
2393                                       final Phylogeny phylogeny ) {
2394         if ( !( matrix instanceof BasicCharacterStateMatrix ) ) {
2395             throw new IllegalArgumentException( "can only write matrices of type [" + BasicCharacterStateMatrix.class
2396                     + "] to nexus" );
2397         }
2398         final BasicCharacterStateMatrix<BinaryStates> my_matrix = ( org.forester.evoinference.matrix.character.BasicCharacterStateMatrix<BinaryStates> ) matrix;
2399         final List<Phylogeny> phylogenies = new ArrayList<Phylogeny>( 1 );
2400         phylogenies.add( phylogeny );
2401         try {
2402             final BufferedWriter w = new BufferedWriter( new FileWriter( outfile_name ) );
2403             w.write( NexusConstants.NEXUS );
2404             w.write( ForesterUtil.LINE_SEPARATOR );
2405             my_matrix.writeNexusTaxaBlock( w );
2406             my_matrix.writeNexusBinaryChractersBlock( w );
2407             PhylogenyWriter.writeNexusTreesBlock( w, phylogenies );
2408             w.flush();
2409             w.close();
2410             ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote Nexus file: \"" + outfile_name + "\"" );
2411         }
2412         catch ( final IOException e ) {
2413             ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getMessage() );
2414         }
2415     }
2416
2417     private static void writeToNexus( final String outfile_name, final DomainParsimonyCalculator domain_parsimony ) {
2418         writeToNexus( outfile_name + surfacing_old.NEXUS_EXTERNAL_DOMAINS,
2419                       domain_parsimony.createMatrixOfDomainPresenceOrAbsence() );
2420         writeToNexus( outfile_name + surfacing_old.NEXUS_EXTERNAL_DOMAIN_COMBINATIONS,
2421                       domain_parsimony.createMatrixOfBinaryDomainCombinationPresenceOrAbsence() );
2422     }
2423
2424     private static void writeToNexus( final String outfile_name,
2425                                       final DomainParsimonyCalculator domain_parsimony,
2426                                       final Phylogeny phylogeny ) {
2427         writeToNexus( outfile_name + surfacing_old.NEXUS_EXTERNAL_DOMAINS,
2428                       domain_parsimony.createMatrixOfDomainPresenceOrAbsence(),
2429                       phylogeny );
2430         writeToNexus( outfile_name + surfacing_old.NEXUS_EXTERNAL_DOMAIN_COMBINATIONS,
2431                       domain_parsimony.createMatrixOfBinaryDomainCombinationPresenceOrAbsence(),
2432                       phylogeny );
2433     }
2434 }