cleanup
[jalview.git] / forester / java / src / org / forester / surfacing / SurfacingUtil.java
1 // $Id:
2 //
3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
5 //
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
8 // All rights reserved
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 //
24 // Contact: phylosoft @ gmail . com
25 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26
27 package org.forester.surfacing;
28
29 import java.awt.Color;
30 import java.io.BufferedWriter;
31 import java.io.File;
32 import java.io.FileWriter;
33 import java.io.IOException;
34 import java.io.Writer;
35 import java.text.DecimalFormat;
36 import java.text.NumberFormat;
37 import java.util.ArrayList;
38 import java.util.Arrays;
39 import java.util.Collections;
40 import java.util.Comparator;
41 import java.util.HashMap;
42 import java.util.HashSet;
43 import java.util.Iterator;
44 import java.util.List;
45 import java.util.Map;
46 import java.util.Map.Entry;
47 import java.util.PriorityQueue;
48 import java.util.Set;
49 import java.util.SortedMap;
50 import java.util.SortedSet;
51 import java.util.TreeMap;
52 import java.util.TreeSet;
53 import java.util.regex.Matcher;
54 import java.util.regex.Pattern;
55
56 import org.forester.application.surfacing;
57 import org.forester.evoinference.distance.NeighborJoining;
58 import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
59 import org.forester.evoinference.matrix.character.CharacterStateMatrix;
60 import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
61 import org.forester.evoinference.matrix.character.CharacterStateMatrix.Format;
62 import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates;
63 import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
64 import org.forester.evoinference.matrix.distance.DistanceMatrix;
65 import org.forester.go.GoId;
66 import org.forester.go.GoNameSpace;
67 import org.forester.go.GoTerm;
68 import org.forester.go.PfamToGoMapping;
69 import org.forester.io.parsers.nexus.NexusConstants;
70 import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
71 import org.forester.io.parsers.util.ParserUtils;
72 import org.forester.io.writers.PhylogenyWriter;
73 import org.forester.phylogeny.Phylogeny;
74 import org.forester.phylogeny.PhylogenyMethods;
75 import org.forester.phylogeny.PhylogenyNode;
76 import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE;
77 import org.forester.phylogeny.data.BinaryCharacters;
78 import org.forester.phylogeny.data.Confidence;
79 import org.forester.phylogeny.data.Taxonomy;
80 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
81 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
82 import org.forester.protein.BasicDomain;
83 import org.forester.protein.BasicProtein;
84 import org.forester.protein.BinaryDomainCombination;
85 import org.forester.protein.Domain;
86 import org.forester.protein.Protein;
87 import org.forester.species.Species;
88 import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
89 import org.forester.surfacing.GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder;
90 import org.forester.surfacing.PrintableDomainSimilarity.PRINT_OPTION;
91 import org.forester.util.AsciiHistogram;
92 import org.forester.util.BasicDescriptiveStatistics;
93 import org.forester.util.BasicTable;
94 import org.forester.util.BasicTableParser;
95 import org.forester.util.CommandLineArguments;
96 import org.forester.util.DescriptiveStatistics;
97 import org.forester.util.ForesterUtil;
98 import org.forester.util.TaxonomyColors;
99
100 public final class SurfacingUtil {
101
102     public final static Pattern              PATTERN_SP_STYLE_TAXONOMY        = Pattern.compile( "^[A-Z0-9]{3,5}$" );
103     private final static Map<String, String> _TAXCODE_HEXCOLORSTRING_MAP      = new HashMap<String, String>();
104     private static final Comparator<Domain>  ASCENDING_CONFIDENCE_VALUE_ORDER = new Comparator<Domain>() {
105
106                                                                                   @Override
107                                                                                   public int compare( final Domain d1,
108                                                                                                       final Domain d2 ) {
109                                                                                       if ( d1.getPerSequenceEvalue() < d2
110                                                                                               .getPerSequenceEvalue() ) {
111                                                                                           return -1;
112                                                                                       }
113                                                                                       else if ( d1
114                                                                                               .getPerSequenceEvalue() > d2
115                                                                                               .getPerSequenceEvalue() ) {
116                                                                                           return 1;
117                                                                                       }
118                                                                                       else {
119                                                                                           return d1.compareTo( d2 );
120                                                                                       }
121                                                                                   }
122                                                                               };
123     private final static NumberFormat        FORMATTER_3                      = new DecimalFormat( "0.000" );
124     private SurfacingUtil() {
125         // Hidden constructor.
126     }
127
128     public static void addAllBinaryDomainCombinationToSet( final GenomeWideCombinableDomains genome,
129                                                            final SortedSet<BinaryDomainCombination> binary_domain_combinations ) {
130         final SortedMap<String, CombinableDomains> all_cd = genome.getAllCombinableDomainsIds();
131         for( final String domain_id : all_cd.keySet() ) {
132             binary_domain_combinations.addAll( all_cd.get( domain_id ).toBinaryDomainCombinations() );
133         }
134     }
135
136     public static void addAllDomainIdsToSet( final GenomeWideCombinableDomains genome,
137                                              final SortedSet<String> domain_ids ) {
138         final SortedSet<String> domains = genome.getAllDomainIds();
139         for( final String domain : domains ) {
140             domain_ids.add( domain );
141         }
142     }
143
144     public static DescriptiveStatistics calculateDescriptiveStatisticsForMeanValues( final Set<DomainSimilarity> similarities ) {
145         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
146         for( final DomainSimilarity similarity : similarities ) {
147             stats.addValue( similarity.getMeanSimilarityScore() );
148         }
149         return stats;
150     }
151
152     public static void checkForOutputFileWriteability( final File outfile ) {
153         final String error = ForesterUtil.isWritableFile( outfile );
154         if ( !ForesterUtil.isEmpty( error ) ) {
155             ForesterUtil.fatalError( surfacing.PRG_NAME, error );
156         }
157     }
158
159     public static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
160                                                                 final String[][] input_file_properties,
161                                                                 final String automated_pairwise_comparison_suffix,
162                                                                 final File outdir ) {
163         for( int i = 0; i < input_file_properties.length; ++i ) {
164             for( int j = 0; j < i; ++j ) {
165                 final String species_i = input_file_properties[ i ][ 1 ];
166                 final String species_j = input_file_properties[ j ][ 1 ];
167                 String pairwise_similarities_output_file_str = surfacing.PAIRWISE_DOMAIN_COMPARISONS_PREFIX + species_i
168                         + "_" + species_j + automated_pairwise_comparison_suffix;
169                 switch ( domain_similarity_print_option ) {
170                     case HTML:
171                         if ( !pairwise_similarities_output_file_str.endsWith( ".html" ) ) {
172                             pairwise_similarities_output_file_str += ".html";
173                         }
174                         break;
175                 }
176                 final String error = ForesterUtil
177                         .isWritableFile( new File( outdir == null ? pairwise_similarities_output_file_str : outdir
178                                 + ForesterUtil.FILE_SEPARATOR + pairwise_similarities_output_file_str ) );
179                 if ( !ForesterUtil.isEmpty( error ) ) {
180                     ForesterUtil.fatalError( surfacing.PRG_NAME, error );
181                 }
182             }
183         }
184     }
185
186     public static void collectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile( final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
187                                                                                            final BinaryDomainCombination.DomainCombinationType dc_type,
188                                                                                            final List<BinaryDomainCombination> all_binary_domains_combination_gained,
189                                                                                            final boolean get_gains ) {
190         final SortedSet<String> sorted_ids = new TreeSet<String>();
191         for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) {
192             sorted_ids.add( matrix.getIdentifier( i ) );
193         }
194         for( final String id : sorted_ids ) {
195             for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) {
196                 if ( ( get_gains && ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) )
197                         || ( !get_gains && ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.LOSS ) ) ) {
198                     if ( dc_type == BinaryDomainCombination.DomainCombinationType.DIRECTED_ADJACTANT ) {
199                         all_binary_domains_combination_gained.add( AdjactantDirectedBinaryDomainCombination
200                                 .createInstance( matrix.getCharacter( c ) ) );
201                     }
202                     else if ( dc_type == BinaryDomainCombination.DomainCombinationType.DIRECTED ) {
203                         all_binary_domains_combination_gained.add( DirectedBinaryDomainCombination
204                                 .createInstance( matrix.getCharacter( c ) ) );
205                     }
206                     else {
207                         all_binary_domains_combination_gained.add( BasicBinaryDomainCombination.createInstance( matrix
208                                 .getCharacter( c ) ) );
209                     }
210                 }
211             }
212         }
213     }
214
215     public static Map<String, List<GoId>> createDomainIdToGoIdMap( final List<PfamToGoMapping> pfam_to_go_mappings ) {
216         final Map<String, List<GoId>> domain_id_to_go_ids_map = new HashMap<String, List<GoId>>( pfam_to_go_mappings.size() );
217         for( final PfamToGoMapping pfam_to_go : pfam_to_go_mappings ) {
218             if ( !domain_id_to_go_ids_map.containsKey( pfam_to_go.getKey() ) ) {
219                 domain_id_to_go_ids_map.put( pfam_to_go.getKey(), new ArrayList<GoId>() );
220             }
221             domain_id_to_go_ids_map.get( pfam_to_go.getKey() ).add( pfam_to_go.getValue() );
222         }
223         return domain_id_to_go_ids_map;
224     }
225
226     public static Map<String, Set<String>> createDomainIdToSecondaryFeaturesMap( final File secondary_features_map_file )
227             throws IOException {
228         final BasicTable<String> primary_table = BasicTableParser.parse( secondary_features_map_file, '\t' );
229         final Map<String, Set<String>> map = new TreeMap<String, Set<String>>();
230         for( int r = 0; r < primary_table.getNumberOfRows(); ++r ) {
231             final String domain_id = primary_table.getValue( 0, r );
232             if ( !map.containsKey( domain_id ) ) {
233                 map.put( domain_id, new HashSet<String>() );
234             }
235             map.get( domain_id ).add( primary_table.getValue( 1, r ) );
236         }
237         return map;
238     }
239
240     public static Phylogeny createNjTreeBasedOnMatrixToFile( final File nj_tree_outfile, final DistanceMatrix distance ) {
241         checkForOutputFileWriteability( nj_tree_outfile );
242         final NeighborJoining nj = NeighborJoining.createInstance();
243         final Phylogeny phylogeny = nj.execute( ( BasicSymmetricalDistanceMatrix ) distance );
244         phylogeny.setName( nj_tree_outfile.getName() );
245         writePhylogenyToFile( phylogeny, nj_tree_outfile.toString() );
246         return phylogeny;
247     }
248
249     public static StringBuilder createParametersAsString( final boolean ignore_dufs,
250                                                           final double e_value_max,
251                                                           final int max_allowed_overlap,
252                                                           final boolean no_engulfing_overlaps,
253                                                           final File cutoff_scores_file,
254                                                           final BinaryDomainCombination.DomainCombinationType dc_type ) {
255         final StringBuilder parameters_sb = new StringBuilder();
256         parameters_sb.append( "E-value: " + e_value_max );
257         if ( cutoff_scores_file != null ) {
258             parameters_sb.append( ", Cutoff-scores-file: " + cutoff_scores_file );
259         }
260         else {
261             parameters_sb.append( ", Cutoff-scores-file: not-set" );
262         }
263         if ( max_allowed_overlap != surfacing.MAX_ALLOWED_OVERLAP_DEFAULT ) {
264             parameters_sb.append( ", Max-overlap: " + max_allowed_overlap );
265         }
266         else {
267             parameters_sb.append( ", Max-overlap: not-set" );
268         }
269         if ( no_engulfing_overlaps ) {
270             parameters_sb.append( ", Engulfing-overlaps: not-allowed" );
271         }
272         else {
273             parameters_sb.append( ", Engulfing-overlaps: allowed" );
274         }
275         if ( ignore_dufs ) {
276             parameters_sb.append( ", Ignore-dufs: true" );
277         }
278         else {
279             parameters_sb.append( ", Ignore-dufs: false" );
280         }
281         parameters_sb.append( ", DC type (if applicable): " + dc_type );
282         return parameters_sb;
283     }
284
285     public static void createSplitWriters( final File out_dir,
286                                            final String my_outfile,
287                                            final Map<Character, Writer> split_writers ) throws IOException {
288         split_writers.put( 'a', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
289                 + "_domains_A.html" ) ) );
290         split_writers.put( 'b', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
291                 + "_domains_B.html" ) ) );
292         split_writers.put( 'c', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
293                 + "_domains_C.html" ) ) );
294         split_writers.put( 'd', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
295                 + "_domains_D.html" ) ) );
296         split_writers.put( 'e', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
297                 + "_domains_E.html" ) ) );
298         split_writers.put( 'f', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
299                 + "_domains_F.html" ) ) );
300         split_writers.put( 'g', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
301                 + "_domains_G.html" ) ) );
302         split_writers.put( 'h', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
303                 + "_domains_H.html" ) ) );
304         split_writers.put( 'i', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
305                 + "_domains_I.html" ) ) );
306         split_writers.put( 'j', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
307                 + "_domains_J.html" ) ) );
308         split_writers.put( 'k', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
309                 + "_domains_K.html" ) ) );
310         split_writers.put( 'l', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
311                 + "_domains_L.html" ) ) );
312         split_writers.put( 'm', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
313                 + "_domains_M.html" ) ) );
314         split_writers.put( 'n', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
315                 + "_domains_N.html" ) ) );
316         split_writers.put( 'o', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
317                 + "_domains_O.html" ) ) );
318         split_writers.put( 'p', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
319                 + "_domains_P.html" ) ) );
320         split_writers.put( 'q', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
321                 + "_domains_Q.html" ) ) );
322         split_writers.put( 'r', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
323                 + "_domains_R.html" ) ) );
324         split_writers.put( 's', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
325                 + "_domains_S.html" ) ) );
326         split_writers.put( 't', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
327                 + "_domains_T.html" ) ) );
328         split_writers.put( 'u', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
329                 + "_domains_U.html" ) ) );
330         split_writers.put( 'v', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
331                 + "_domains_V.html" ) ) );
332         split_writers.put( 'w', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
333                 + "_domains_W.html" ) ) );
334         split_writers.put( 'x', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
335                 + "_domains_X.html" ) ) );
336         split_writers.put( 'y', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
337                 + "_domains_Y.html" ) ) );
338         split_writers.put( 'z', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
339                 + "_domains_Z.html" ) ) );
340         split_writers.put( '0', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
341                 + "_domains_0.html" ) ) );
342     }
343
344     public static Map<String, Integer> createTaxCodeToIdMap( final Phylogeny phy ) {
345         final Map<String, Integer> m = new HashMap<String, Integer>();
346         for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
347             final PhylogenyNode n = iter.next();
348             if ( n.getNodeData().isHasTaxonomy() ) {
349                 final Taxonomy t = n.getNodeData().getTaxonomy();
350                 final String c = t.getTaxonomyCode();
351                 if ( !ForesterUtil.isEmpty( c ) ) {
352                     if ( n.getNodeData().getTaxonomy() == null ) {
353                         ForesterUtil.fatalError( surfacing.PRG_NAME, "no taxonomy id for node " + n );
354                     }
355                     final String id = n.getNodeData().getTaxonomy().getIdentifier().getValue();
356                     if ( ForesterUtil.isEmpty( id ) ) {
357                         ForesterUtil.fatalError( surfacing.PRG_NAME, "no taxonomy id for node " + n );
358                     }
359                     if ( m.containsKey( c ) ) {
360                         ForesterUtil.fatalError( surfacing.PRG_NAME, "taxonomy code " + c + " is not unique" );
361                     }
362                     final int iid = Integer.valueOf( id );
363                     if ( m.containsValue( iid ) ) {
364                         ForesterUtil.fatalError( surfacing.PRG_NAME, "taxonomy id " + iid + " is not unique" );
365                     }
366                     m.put( c, iid );
367                 }
368             }
369             else {
370                 ForesterUtil.fatalError( surfacing.PRG_NAME, "no taxonomy for node " + n );
371             }
372         }
373         return m;
374     }
375
376     public static void decoratePrintableDomainSimilarities( final SortedSet<DomainSimilarity> domain_similarities,
377                                                             final Detailedness detailedness ) {
378         for( final DomainSimilarity domain_similarity : domain_similarities ) {
379             if ( domain_similarity instanceof PrintableDomainSimilarity ) {
380                 final PrintableDomainSimilarity printable_domain_similarity = ( PrintableDomainSimilarity ) domain_similarity;
381                 printable_domain_similarity.setDetailedness( detailedness );
382             }
383         }
384     }
385
386     public static void doit( final List<Protein> proteins,
387                              final List<String> query_domain_ids_nc_order,
388                              final Writer out,
389                              final String separator,
390                              final String limit_to_species,
391                              final Map<String, List<Integer>> average_protein_lengths_by_dc ) throws IOException {
392         for( final Protein protein : proteins ) {
393             if ( ForesterUtil.isEmpty( limit_to_species )
394                     || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) {
395                 if ( protein.contains( query_domain_ids_nc_order, true ) ) {
396                     out.write( protein.getSpecies().getSpeciesId() );
397                     out.write( separator );
398                     out.write( protein.getProteinId().getId() );
399                     out.write( separator );
400                     out.write( "[" );
401                     final Set<String> visited_domain_ids = new HashSet<String>();
402                     boolean first = true;
403                     for( final Domain domain : protein.getProteinDomains() ) {
404                         if ( !visited_domain_ids.contains( domain.getDomainId() ) ) {
405                             visited_domain_ids.add( domain.getDomainId() );
406                             if ( first ) {
407                                 first = false;
408                             }
409                             else {
410                                 out.write( " " );
411                             }
412                             out.write( domain.getDomainId() );
413                             out.write( " {" );
414                             out.write( "" + domain.getTotalCount() );
415                             out.write( "}" );
416                         }
417                     }
418                     out.write( "]" );
419                     out.write( separator );
420                     if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription()
421                             .equals( SurfacingConstants.NONE ) ) ) {
422                         out.write( protein.getDescription() );
423                     }
424                     out.write( separator );
425                     if ( !( ForesterUtil.isEmpty( protein.getAccession() ) || protein.getAccession()
426                             .equals( SurfacingConstants.NONE ) ) ) {
427                         out.write( protein.getAccession() );
428                     }
429                     out.write( SurfacingConstants.NL );
430                 }
431             }
432         }
433         out.flush();
434     }
435
436     public static void domainsPerProteinsStatistics( final String genome,
437                                                      final List<Protein> protein_list,
438                                                      final DescriptiveStatistics all_genomes_domains_per_potein_stats,
439                                                      final SortedMap<Integer, Integer> all_genomes_domains_per_potein_histo,
440                                                      final SortedSet<String> domains_which_are_always_single,
441                                                      final SortedSet<String> domains_which_are_sometimes_single_sometimes_not,
442                                                      final SortedSet<String> domains_which_never_single,
443                                                      final Writer writer ) {
444         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
445         for( final Protein protein : protein_list ) {
446             final int domains = protein.getNumberOfProteinDomains();
447             //System.out.println( domains );
448             stats.addValue( domains );
449             all_genomes_domains_per_potein_stats.addValue( domains );
450             if ( !all_genomes_domains_per_potein_histo.containsKey( domains ) ) {
451                 all_genomes_domains_per_potein_histo.put( domains, 1 );
452             }
453             else {
454                 all_genomes_domains_per_potein_histo.put( domains,
455                                                           1 + all_genomes_domains_per_potein_histo.get( domains ) );
456             }
457             if ( domains == 1 ) {
458                 final String domain = protein.getProteinDomain( 0 ).getDomainId();
459                 if ( !domains_which_are_sometimes_single_sometimes_not.contains( domain ) ) {
460                     if ( domains_which_never_single.contains( domain ) ) {
461                         domains_which_never_single.remove( domain );
462                         domains_which_are_sometimes_single_sometimes_not.add( domain );
463                     }
464                     else {
465                         domains_which_are_always_single.add( domain );
466                     }
467                 }
468             }
469             else if ( domains > 1 ) {
470                 for( final Domain d : protein.getProteinDomains() ) {
471                     final String domain = d.getDomainId();
472                     // System.out.println( domain );
473                     if ( !domains_which_are_sometimes_single_sometimes_not.contains( domain ) ) {
474                         if ( domains_which_are_always_single.contains( domain ) ) {
475                             domains_which_are_always_single.remove( domain );
476                             domains_which_are_sometimes_single_sometimes_not.add( domain );
477                         }
478                         else {
479                             domains_which_never_single.add( domain );
480                         }
481                     }
482                 }
483             }
484         }
485         try {
486             writer.write( genome );
487             writer.write( "\t" );
488             if ( stats.getN() >= 1 ) {
489                 writer.write( stats.arithmeticMean() + "" );
490                 writer.write( "\t" );
491                 if ( stats.getN() >= 2 ) {
492                     writer.write( stats.sampleStandardDeviation() + "" );
493                 }
494                 else {
495                     writer.write( "" );
496                 }
497                 writer.write( "\t" );
498                 writer.write( stats.median() + "" );
499                 writer.write( "\t" );
500                 writer.write( stats.getN() + "" );
501                 writer.write( "\t" );
502                 writer.write( stats.getMin() + "" );
503                 writer.write( "\t" );
504                 writer.write( stats.getMax() + "" );
505             }
506             else {
507                 writer.write( "\t" );
508                 writer.write( "\t" );
509                 writer.write( "\t" );
510                 writer.write( "0" );
511                 writer.write( "\t" );
512                 writer.write( "\t" );
513             }
514             writer.write( "\n" );
515         }
516         catch ( final IOException e ) {
517             e.printStackTrace();
518         }
519     }
520
521     public static void executeDomainLengthAnalysis( final String[][] input_file_properties,
522                                                     final int number_of_genomes,
523                                                     final DomainLengthsTable domain_lengths_table,
524                                                     final File outfile ) throws IOException {
525         final DecimalFormat df = new DecimalFormat( "#.00" );
526         checkForOutputFileWriteability( outfile );
527         final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) );
528         out.write( "MEAN BASED STATISTICS PER SPECIES" );
529         out.write( ForesterUtil.LINE_SEPARATOR );
530         out.write( domain_lengths_table.createMeanBasedStatisticsPerSpeciesTable().toString() );
531         out.write( ForesterUtil.LINE_SEPARATOR );
532         out.write( ForesterUtil.LINE_SEPARATOR );
533         final List<DomainLengths> domain_lengths_list = domain_lengths_table.getDomainLengthsList();
534         out.write( "OUTLIER SPECIES PER DOMAIN (Z>=1.5)" );
535         out.write( ForesterUtil.LINE_SEPARATOR );
536         for( final DomainLengths domain_lengths : domain_lengths_list ) {
537             final List<Species> species_list = domain_lengths.getMeanBasedOutlierSpecies( 1.5 );
538             if ( species_list.size() > 0 ) {
539                 out.write( domain_lengths.getDomainId() + "\t" );
540                 for( final Species species : species_list ) {
541                     out.write( species + "\t" );
542                 }
543                 out.write( ForesterUtil.LINE_SEPARATOR );
544             }
545         }
546         out.write( ForesterUtil.LINE_SEPARATOR );
547         out.write( ForesterUtil.LINE_SEPARATOR );
548         out.write( "OUTLIER SPECIES (Z 1.0)" );
549         out.write( ForesterUtil.LINE_SEPARATOR );
550         final DescriptiveStatistics stats_for_all_species = domain_lengths_table
551                 .calculateMeanBasedStatisticsForAllSpecies();
552         out.write( stats_for_all_species.asSummary() );
553         out.write( ForesterUtil.LINE_SEPARATOR );
554         final AsciiHistogram histo = new AsciiHistogram( stats_for_all_species );
555         out.write( histo.toStringBuffer( 40, '=', 60, 4 ).toString() );
556         out.write( ForesterUtil.LINE_SEPARATOR );
557         final double population_sd = stats_for_all_species.sampleStandardDeviation();
558         final double population_mean = stats_for_all_species.arithmeticMean();
559         for( final Species species : domain_lengths_table.getSpecies() ) {
560             final double x = domain_lengths_table.calculateMeanBasedStatisticsForSpecies( species ).arithmeticMean();
561             final double z = ( x - population_mean ) / population_sd;
562             out.write( species + "\t" + z );
563             out.write( ForesterUtil.LINE_SEPARATOR );
564         }
565         out.write( ForesterUtil.LINE_SEPARATOR );
566         for( final Species species : domain_lengths_table.getSpecies() ) {
567             final DescriptiveStatistics stats_for_species = domain_lengths_table
568                     .calculateMeanBasedStatisticsForSpecies( species );
569             final double x = stats_for_species.arithmeticMean();
570             final double z = ( x - population_mean ) / population_sd;
571             if ( ( z <= -1.0 ) || ( z >= 1.0 ) ) {
572                 out.write( species + "\t" + df.format( z ) + "\t" + stats_for_species.asSummary() );
573                 out.write( ForesterUtil.LINE_SEPARATOR );
574             }
575         }
576         out.close();
577         System.gc();
578     }
579
580     /**
581      * Warning: This side-effects 'all_bin_domain_combinations_encountered'!
582      * 
583      * 
584      * @param output_file
585      * @param all_bin_domain_combinations_changed
586      * @param sum_of_all_domains_encountered
587      * @param all_bin_domain_combinations_encountered
588      * @param is_gains_analysis
589      * @param protein_length_stats_by_dc 
590      * @throws IOException
591      */
592     public static void executeFitchGainsAnalysis( final File output_file,
593                                                   final List<BinaryDomainCombination> all_bin_domain_combinations_changed,
594                                                   final int sum_of_all_domains_encountered,
595                                                   final SortedSet<BinaryDomainCombination> all_bin_domain_combinations_encountered,
596                                                   final boolean is_gains_analysis ) throws IOException {
597         checkForOutputFileWriteability( output_file );
598         final Writer out = ForesterUtil.createBufferedWriter( output_file );
599         final SortedMap<Object, Integer> bdc_to_counts = ForesterUtil
600                 .listToSortedCountsMap( all_bin_domain_combinations_changed );
601         final SortedSet<String> all_domains_in_combination_changed_more_than_once = new TreeSet<String>();
602         final SortedSet<String> all_domains_in_combination_changed_only_once = new TreeSet<String>();
603         int above_one = 0;
604         int one = 0;
605         for( final Object bdc_object : bdc_to_counts.keySet() ) {
606             final BinaryDomainCombination bdc = ( BinaryDomainCombination ) bdc_object;
607             final int count = bdc_to_counts.get( bdc_object );
608             if ( count < 1 ) {
609                 ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME, "count < 1 " );
610             }
611             out.write( bdc + "\t" + count + ForesterUtil.LINE_SEPARATOR );
612             if ( count > 1 ) {
613                 all_domains_in_combination_changed_more_than_once.add( bdc.getId0() );
614                 all_domains_in_combination_changed_more_than_once.add( bdc.getId1() );
615                 above_one++;
616             }
617             else if ( count == 1 ) {
618                 all_domains_in_combination_changed_only_once.add( bdc.getId0() );
619                 all_domains_in_combination_changed_only_once.add( bdc.getId1() );
620                 one++;
621             }
622         }
623         final int all = all_bin_domain_combinations_encountered.size();
624         int never_lost = -1;
625         if ( !is_gains_analysis ) {
626             all_bin_domain_combinations_encountered.removeAll( all_bin_domain_combinations_changed );
627             never_lost = all_bin_domain_combinations_encountered.size();
628             for( final BinaryDomainCombination bdc : all_bin_domain_combinations_encountered ) {
629                 out.write( bdc + "\t" + "0" + ForesterUtil.LINE_SEPARATOR );
630             }
631         }
632         if ( is_gains_analysis ) {
633             out.write( "Sum of all distinct domain combinations appearing once               : " + one
634                     + ForesterUtil.LINE_SEPARATOR );
635             out.write( "Sum of all distinct domain combinations appearing more than once     : " + above_one
636                     + ForesterUtil.LINE_SEPARATOR );
637             out.write( "Sum of all distinct domains in combinations apppearing only once     : "
638                     + all_domains_in_combination_changed_only_once.size() + ForesterUtil.LINE_SEPARATOR );
639             out.write( "Sum of all distinct domains in combinations apppearing more than once: "
640                     + all_domains_in_combination_changed_more_than_once.size() + ForesterUtil.LINE_SEPARATOR );
641         }
642         else {
643             out.write( "Sum of all distinct domain combinations never lost                   : " + never_lost
644                     + ForesterUtil.LINE_SEPARATOR );
645             out.write( "Sum of all distinct domain combinations lost once                    : " + one
646                     + ForesterUtil.LINE_SEPARATOR );
647             out.write( "Sum of all distinct domain combinations lost more than once          : " + above_one
648                     + ForesterUtil.LINE_SEPARATOR );
649             out.write( "Sum of all distinct domains in combinations lost only once           : "
650                     + all_domains_in_combination_changed_only_once.size() + ForesterUtil.LINE_SEPARATOR );
651             out.write( "Sum of all distinct domains in combinations lost more than once: "
652                     + all_domains_in_combination_changed_more_than_once.size() + ForesterUtil.LINE_SEPARATOR );
653         }
654         out.write( "All binary combinations                                              : " + all
655                 + ForesterUtil.LINE_SEPARATOR );
656         out.write( "All domains                                                          : "
657                 + sum_of_all_domains_encountered );
658         out.close();
659         ForesterUtil.programMessage( surfacing.PRG_NAME,
660                                      "Wrote fitch domain combination dynamics counts analysis to \"" + output_file
661                                              + "\"" );
662     }
663
664     /**
665      * 
666      * @param all_binary_domains_combination_lost_fitch 
667      * @param use_last_in_fitch_parsimony 
668      * @param consider_directedness_and_adjacency_for_bin_combinations 
669      * @param all_binary_domains_combination_gained if null ignored, otherwise this is to list all binary domain combinations
670      * which were gained under unweighted (Fitch) parsimony.
671      */
672     public static void executeParsimonyAnalysis( final long random_number_seed_for_fitch_parsimony,
673                                                  final boolean radomize_fitch_parsimony,
674                                                  final String outfile_name,
675                                                  final DomainParsimonyCalculator domain_parsimony,
676                                                  final Phylogeny phylogeny,
677                                                  final Map<String, List<GoId>> domain_id_to_go_ids_map,
678                                                  final Map<GoId, GoTerm> go_id_to_term_map,
679                                                  final GoNameSpace go_namespace_limit,
680                                                  final String parameters_str,
681                                                  final Map<String, Set<String>>[] domain_id_to_secondary_features_maps,
682                                                  final SortedSet<String> positive_filter,
683                                                  final boolean output_binary_domain_combinations_for_graphs,
684                                                  final List<BinaryDomainCombination> all_binary_domains_combination_gained_fitch,
685                                                  final List<BinaryDomainCombination> all_binary_domains_combination_lost_fitch,
686                                                  final BinaryDomainCombination.DomainCombinationType dc_type,
687                                                  final Map<String, DescriptiveStatistics> protein_length_stats_by_dc,
688                                                  final Map<String, DescriptiveStatistics> domain_number_stats_by_dc,
689                                                  final Map<String, DescriptiveStatistics> domain_length_stats_by_domain,
690                                                  final Map<String, Integer> tax_code_to_id_map,
691                                                  final boolean write_to_nexus,
692                                                  final boolean use_last_in_fitch_parsimony ) {
693         final String sep = ForesterUtil.LINE_SEPARATOR + "###################" + ForesterUtil.LINE_SEPARATOR;
694         final String date_time = ForesterUtil.getCurrentDateTime();
695         final SortedSet<String> all_pfams_encountered = new TreeSet<String>();
696         final SortedSet<String> all_pfams_gained_as_domains = new TreeSet<String>();
697         final SortedSet<String> all_pfams_lost_as_domains = new TreeSet<String>();
698         final SortedSet<String> all_pfams_gained_as_dom_combinations = new TreeSet<String>();
699         final SortedSet<String> all_pfams_lost_as_dom_combinations = new TreeSet<String>();
700         if ( write_to_nexus ) {
701             writeToNexus( outfile_name, domain_parsimony, phylogeny );
702         }
703         // DOLLO DOMAINS
704         // -------------
705         Phylogeny local_phylogeny_l = phylogeny.copy();
706         if ( ( positive_filter != null ) && ( positive_filter.size() > 0 ) ) {
707             domain_parsimony.executeDolloParsimonyOnDomainPresence( positive_filter );
708         }
709         else {
710             domain_parsimony.executeDolloParsimonyOnDomainPresence();
711         }
712         SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossMatrix(), outfile_name
713                 + surfacing.PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_DOMAINS, Format.FORESTER );
714         SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossCountsMatrix(), outfile_name
715                 + surfacing.PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_DOMAINS, Format.FORESTER );
716         SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
717                                                            CharacterStateMatrix.GainLossStates.GAIN,
718                                                            outfile_name + surfacing.PARSIMONY_OUTPUT_DOLLO_GAINS_D,
719                                                            sep,
720                                                            ForesterUtil.LINE_SEPARATOR,
721                                                            null );
722         SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
723                                                            CharacterStateMatrix.GainLossStates.LOSS,
724                                                            outfile_name + surfacing.PARSIMONY_OUTPUT_DOLLO_LOSSES_D,
725                                                            sep,
726                                                            ForesterUtil.LINE_SEPARATOR,
727                                                            null );
728         SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(), null, outfile_name
729                 + surfacing.PARSIMONY_OUTPUT_DOLLO_PRESENT_D, sep, ForesterUtil.LINE_SEPARATOR, null );
730         //HTML:
731         writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
732                                        go_id_to_term_map,
733                                        go_namespace_limit,
734                                        false,
735                                        domain_parsimony.getGainLossMatrix(),
736                                        CharacterStateMatrix.GainLossStates.GAIN,
737                                        outfile_name + surfacing.PARSIMONY_OUTPUT_DOLLO_GAINS_HTML_D,
738                                        sep,
739                                        ForesterUtil.LINE_SEPARATOR,
740                                        "Dollo Parsimony | Gains | Domains",
741                                        "+",
742                                        domain_id_to_secondary_features_maps,
743                                        all_pfams_encountered,
744                                        all_pfams_gained_as_domains,
745                                        "_dollo_gains_d",
746                                        tax_code_to_id_map );
747         writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
748                                        go_id_to_term_map,
749                                        go_namespace_limit,
750                                        false,
751                                        domain_parsimony.getGainLossMatrix(),
752                                        CharacterStateMatrix.GainLossStates.LOSS,
753                                        outfile_name + surfacing.PARSIMONY_OUTPUT_DOLLO_LOSSES_HTML_D,
754                                        sep,
755                                        ForesterUtil.LINE_SEPARATOR,
756                                        "Dollo Parsimony | Losses | Domains",
757                                        "-",
758                                        domain_id_to_secondary_features_maps,
759                                        all_pfams_encountered,
760                                        all_pfams_lost_as_domains,
761                                        "_dollo_losses_d",
762                                        tax_code_to_id_map );
763         //        writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
764         //                                       go_id_to_term_map,
765         //                                       go_namespace_limit,
766         //                                       false,
767         //                                       domain_parsimony.getGainLossMatrix(),
768         //                                       null,
769         //                                       outfile_name + surfacing.PARSIMONY_OUTPUT_DOLLO_PRESENT_HTML_D,
770         //                                       sep,
771         //                                       ForesterUtil.LINE_SEPARATOR,
772         //                                       "Dollo Parsimony | Present | Domains",
773         //                                       "",
774         //                                       domain_id_to_secondary_features_maps,
775         //                                       all_pfams_encountered,
776         //                                       null,
777         //                                       "_dollo_present_d",
778         //                                       tax_code_to_id_map );
779         preparePhylogeny( local_phylogeny_l,
780                           domain_parsimony,
781                           date_time,
782                           "Dollo parsimony on domain presence/absence",
783                           "dollo_on_domains_" + outfile_name,
784                           parameters_str );
785         SurfacingUtil.writePhylogenyToFile( local_phylogeny_l, outfile_name
786                 + surfacing.DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO );
787         try {
788             writeAllDomainsChangedOnAllSubtrees( local_phylogeny_l, true, outfile_name, "_dollo_all_gains_d" );
789             writeAllDomainsChangedOnAllSubtrees( local_phylogeny_l, false, outfile_name, "_dollo_all_losses_d" );
790         }
791         catch ( final IOException e ) {
792             e.printStackTrace();
793             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
794         }
795         if ( domain_parsimony.calculateNumberOfBinaryDomainCombination() > 0 ) {
796             // FITCH DOMAIN COMBINATIONS
797             // -------------------------
798             local_phylogeny_l = phylogeny.copy();
799             String randomization = "no";
800             if ( radomize_fitch_parsimony ) {
801                 domain_parsimony.executeFitchParsimonyOnBinaryDomainCombintion( random_number_seed_for_fitch_parsimony );
802                 randomization = "yes, seed = " + random_number_seed_for_fitch_parsimony;
803             }
804             else {
805                 domain_parsimony.executeFitchParsimonyOnBinaryDomainCombintion( use_last_in_fitch_parsimony );
806             }
807             SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossMatrix(), outfile_name
808                     + surfacing.PARSIMONY_OUTPUT_GL_SUFFIX_FITCH_BINARY_COMBINATIONS, Format.FORESTER );
809             SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossCountsMatrix(), outfile_name
810                     + surfacing.PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_FITCH_BINARY_COMBINATIONS, Format.FORESTER );
811             SurfacingUtil
812                     .writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
813                                                           CharacterStateMatrix.GainLossStates.GAIN,
814                                                           outfile_name + surfacing.PARSIMONY_OUTPUT_FITCH_GAINS_BC,
815                                                           sep,
816                                                           ForesterUtil.LINE_SEPARATOR,
817                                                           null );
818             SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
819                                                                CharacterStateMatrix.GainLossStates.LOSS,
820                                                                outfile_name
821                                                                        + surfacing.PARSIMONY_OUTPUT_FITCH_LOSSES_BC,
822                                                                sep,
823                                                                ForesterUtil.LINE_SEPARATOR,
824                                                                null );
825             SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(), null, outfile_name
826                     + surfacing.PARSIMONY_OUTPUT_FITCH_PRESENT_BC, sep, ForesterUtil.LINE_SEPARATOR, null );
827             if ( all_binary_domains_combination_gained_fitch != null ) {
828                 collectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
829                                                                                     dc_type,
830                                                                                     all_binary_domains_combination_gained_fitch,
831                                                                                     true );
832             }
833             if ( all_binary_domains_combination_lost_fitch != null ) {
834                 collectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
835                                                                                     dc_type,
836                                                                                     all_binary_domains_combination_lost_fitch,
837                                                                                     false );
838             }
839             if ( output_binary_domain_combinations_for_graphs ) {
840                 SurfacingUtil
841                         .writeBinaryStatesMatrixAsListToFileForBinaryCombinationsForGraphAnalysis( domain_parsimony
842                                                                                                            .getGainLossMatrix(),
843                                                                                                    null,
844                                                                                                    outfile_name
845                                                                                                            + surfacing.PARSIMONY_OUTPUT_FITCH_PRESENT_BC_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS,
846                                                                                                    sep,
847                                                                                                    ForesterUtil.LINE_SEPARATOR,
848                                                                                                    BinaryDomainCombination.OutputFormat.DOT );
849             }
850             // HTML:
851             writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
852                                            go_id_to_term_map,
853                                            go_namespace_limit,
854                                            true,
855                                            domain_parsimony.getGainLossMatrix(),
856                                            CharacterStateMatrix.GainLossStates.GAIN,
857                                            outfile_name + surfacing.PARSIMONY_OUTPUT_FITCH_GAINS_HTML_BC,
858                                            sep,
859                                            ForesterUtil.LINE_SEPARATOR,
860                                            "Fitch Parsimony | Gains | Domain Combinations",
861                                            "+",
862                                            null,
863                                            all_pfams_encountered,
864                                            all_pfams_gained_as_dom_combinations,
865                                            "_fitch_gains_dc",
866                                            tax_code_to_id_map );
867             writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
868                                            go_id_to_term_map,
869                                            go_namespace_limit,
870                                            true,
871                                            domain_parsimony.getGainLossMatrix(),
872                                            CharacterStateMatrix.GainLossStates.LOSS,
873                                            outfile_name + surfacing.PARSIMONY_OUTPUT_FITCH_LOSSES_HTML_BC,
874                                            sep,
875                                            ForesterUtil.LINE_SEPARATOR,
876                                            "Fitch Parsimony | Losses | Domain Combinations",
877                                            "-",
878                                            null,
879                                            all_pfams_encountered,
880                                            all_pfams_lost_as_dom_combinations,
881                                            "_fitch_losses_dc",
882                                            tax_code_to_id_map );
883             //            writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
884             //                                           go_id_to_term_map,
885             //                                           go_namespace_limit,
886             //                                           true,
887             //                                           domain_parsimony.getGainLossMatrix(),
888             //                                           null,
889             //                                           outfile_name + surfacing.PARSIMONY_OUTPUT_FITCH_PRESENT_HTML_BC,
890             //                                           sep,
891             //                                           ForesterUtil.LINE_SEPARATOR,
892             //                                           "Fitch Parsimony | Present | Domain Combinations",
893             //                                           "",
894             //                                           null,
895             //                                           all_pfams_encountered,
896             //                                           null,
897             //                                           "_fitch_present_dc",
898             //                                           tax_code_to_id_map );
899             writeAllEncounteredPfamsToFile( domain_id_to_go_ids_map,
900                                             go_id_to_term_map,
901                                             outfile_name,
902                                             all_pfams_encountered );
903             writePfamsToFile( outfile_name + surfacing.ALL_PFAMS_GAINED_AS_DOMAINS_SUFFIX, all_pfams_gained_as_domains );
904             writePfamsToFile( outfile_name + surfacing.ALL_PFAMS_LOST_AS_DOMAINS_SUFFIX, all_pfams_lost_as_domains );
905             writePfamsToFile( outfile_name + surfacing.ALL_PFAMS_GAINED_AS_DC_SUFFIX,
906                               all_pfams_gained_as_dom_combinations );
907             writePfamsToFile( outfile_name + surfacing.ALL_PFAMS_LOST_AS_DC_SUFFIX, all_pfams_lost_as_dom_combinations );
908             preparePhylogeny( local_phylogeny_l,
909                               domain_parsimony,
910                               date_time,
911                               "Fitch parsimony on binary domain combination presence/absence randomization: "
912                                       + randomization,
913                               "fitch_on_binary_domain_combinations_" + outfile_name,
914                               parameters_str );
915             SurfacingUtil.writePhylogenyToFile( local_phylogeny_l, outfile_name
916                     + surfacing.BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH );
917             calculateIndependentDomainCombinationGains( local_phylogeny_l,
918                                                         outfile_name
919                                                                 + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_OUTPUT_SUFFIX,
920                                                         outfile_name
921                                                                 + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_OUTPUT_SUFFIX,
922                                                         outfile_name
923                                                                 + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_SUFFIX,
924                                                         outfile_name
925                                                                 + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_UNIQUE_SUFFIX,
926                                                         outfile_name + "_indep_dc_gains_fitch_lca_ranks.txt",
927                                                         outfile_name + "_indep_dc_gains_fitch_lca_taxonomies.txt",
928                                                         outfile_name + "_indep_dc_gains_fitch_protein_statistics.txt",
929                                                         protein_length_stats_by_dc,
930                                                         domain_number_stats_by_dc,
931                                                         domain_length_stats_by_domain );
932         }
933     }
934
935     public static void executeParsimonyAnalysisForSecondaryFeatures( final String outfile_name,
936                                                                      final DomainParsimonyCalculator secondary_features_parsimony,
937                                                                      final Phylogeny phylogeny,
938                                                                      final String parameters_str,
939                                                                      final Map<Species, MappingResults> mapping_results_map,
940                                                                      final boolean use_last_in_fitch_parsimony ) {
941         final String sep = ForesterUtil.LINE_SEPARATOR + "###################" + ForesterUtil.LINE_SEPARATOR;
942         final String date_time = ForesterUtil.getCurrentDateTime();
943         System.out.println();
944         writeToNexus( outfile_name + surfacing.NEXUS_SECONDARY_FEATURES,
945                       secondary_features_parsimony.createMatrixOfSecondaryFeaturePresenceOrAbsence( null ),
946                       phylogeny );
947         Phylogeny local_phylogeny_copy = phylogeny.copy();
948         secondary_features_parsimony.executeDolloParsimonyOnSecondaryFeatures( mapping_results_map );
949         SurfacingUtil.writeMatrixToFile( secondary_features_parsimony.getGainLossMatrix(), outfile_name
950                 + surfacing.PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_SECONDARY_FEATURES, Format.FORESTER );
951         SurfacingUtil.writeMatrixToFile( secondary_features_parsimony.getGainLossCountsMatrix(), outfile_name
952                 + surfacing.PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_SECONDARY_FEATURES, Format.FORESTER );
953         SurfacingUtil
954                 .writeBinaryStatesMatrixAsListToFile( secondary_features_parsimony.getGainLossMatrix(),
955                                                       CharacterStateMatrix.GainLossStates.GAIN,
956                                                       outfile_name
957                                                               + surfacing.PARSIMONY_OUTPUT_DOLLO_GAINS_SECONDARY_FEATURES,
958                                                       sep,
959                                                       ForesterUtil.LINE_SEPARATOR,
960                                                       null );
961         SurfacingUtil
962                 .writeBinaryStatesMatrixAsListToFile( secondary_features_parsimony.getGainLossMatrix(),
963                                                       CharacterStateMatrix.GainLossStates.LOSS,
964                                                       outfile_name
965                                                               + surfacing.PARSIMONY_OUTPUT_DOLLO_LOSSES_SECONDARY_FEATURES,
966                                                       sep,
967                                                       ForesterUtil.LINE_SEPARATOR,
968                                                       null );
969         SurfacingUtil
970                 .writeBinaryStatesMatrixAsListToFile( secondary_features_parsimony.getGainLossMatrix(),
971                                                       null,
972                                                       outfile_name
973                                                               + surfacing.PARSIMONY_OUTPUT_DOLLO_PRESENT_SECONDARY_FEATURES,
974                                                       sep,
975                                                       ForesterUtil.LINE_SEPARATOR,
976                                                       null );
977         preparePhylogeny( local_phylogeny_copy,
978                           secondary_features_parsimony,
979                           date_time,
980                           "Dollo parsimony on secondary feature presence/absence",
981                           "dollo_on_secondary_features_" + outfile_name,
982                           parameters_str );
983         SurfacingUtil.writePhylogenyToFile( local_phylogeny_copy, outfile_name
984                 + surfacing.SECONDARY_FEATURES_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO );
985         // FITCH DOMAIN COMBINATIONS
986         // -------------------------
987         local_phylogeny_copy = phylogeny.copy();
988         final String randomization = "no";
989         secondary_features_parsimony
990                 .executeFitchParsimonyOnBinaryDomainCombintionOnSecondaryFeatures( use_last_in_fitch_parsimony );
991         preparePhylogeny( local_phylogeny_copy,
992                           secondary_features_parsimony,
993                           date_time,
994                           "Fitch parsimony on secondary binary domain combination presence/absence randomization: "
995                                   + randomization,
996                           "fitch_on_binary_domain_combinations_" + outfile_name,
997                           parameters_str );
998         SurfacingUtil.writePhylogenyToFile( local_phylogeny_copy, outfile_name
999                 + surfacing.BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH_MAPPED );
1000         calculateIndependentDomainCombinationGains( local_phylogeny_copy, outfile_name
1001                 + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_MAPPED_OUTPUT_SUFFIX, outfile_name
1002                 + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_MAPPED_OUTPUT_SUFFIX, outfile_name
1003                 + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX, outfile_name
1004                 + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX, outfile_name
1005                 + "_MAPPED_indep_dc_gains_fitch_lca_ranks.txt", outfile_name
1006                 + "_MAPPED_indep_dc_gains_fitch_lca_taxonomies.txt", null, null, null, null );
1007     }
1008
1009     public static void executePlusMinusAnalysis( final File output_file,
1010                                                  final List<String> plus_minus_analysis_high_copy_base,
1011                                                  final List<String> plus_minus_analysis_high_copy_target,
1012                                                  final List<String> plus_minus_analysis_low_copy,
1013                                                  final List<GenomeWideCombinableDomains> gwcd_list,
1014                                                  final SortedMap<Species, List<Protein>> protein_lists_per_species,
1015                                                  final Map<String, List<GoId>> domain_id_to_go_ids_map,
1016                                                  final Map<GoId, GoTerm> go_id_to_term_map,
1017                                                  final List<Object> plus_minus_analysis_numbers ) {
1018         final Set<String> all_spec = new HashSet<String>();
1019         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
1020             all_spec.add( gwcd.getSpecies().getSpeciesId() );
1021         }
1022         final File html_out_dom = new File( output_file + surfacing.PLUS_MINUS_DOM_SUFFIX_HTML );
1023         final File plain_out_dom = new File( output_file + surfacing.PLUS_MINUS_DOM_SUFFIX );
1024         final File html_out_dc = new File( output_file + surfacing.PLUS_MINUS_DC_SUFFIX_HTML );
1025         final File all_domains_go_ids_out_dom = new File( output_file + surfacing.PLUS_MINUS_ALL_GO_IDS_DOM_SUFFIX );
1026         final File passing_domains_go_ids_out_dom = new File( output_file
1027                 + surfacing.PLUS_MINUS_PASSING_GO_IDS_DOM_SUFFIX );
1028         final File proteins_file_base = new File( output_file + "" );
1029         final int min_diff = ( ( Integer ) plus_minus_analysis_numbers.get( 0 ) ).intValue();
1030         final double factor = ( ( Double ) plus_minus_analysis_numbers.get( 1 ) ).doubleValue();
1031         try {
1032             DomainCountsDifferenceUtil.calculateCopyNumberDifferences( gwcd_list,
1033                                                                        protein_lists_per_species,
1034                                                                        plus_minus_analysis_high_copy_base,
1035                                                                        plus_minus_analysis_high_copy_target,
1036                                                                        plus_minus_analysis_low_copy,
1037                                                                        min_diff,
1038                                                                        factor,
1039                                                                        plain_out_dom,
1040                                                                        html_out_dom,
1041                                                                        html_out_dc,
1042                                                                        domain_id_to_go_ids_map,
1043                                                                        go_id_to_term_map,
1044                                                                        all_domains_go_ids_out_dom,
1045                                                                        passing_domains_go_ids_out_dom,
1046                                                                        proteins_file_base );
1047         }
1048         catch ( final IOException e ) {
1049             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
1050         }
1051         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote plus minus domain analysis results to \""
1052                 + html_out_dom + "\"" );
1053         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote plus minus domain analysis results to \""
1054                 + plain_out_dom + "\"" );
1055         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote plus minus domain analysis results to \"" + html_out_dc
1056                 + "\"" );
1057         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote plus minus domain analysis based passing GO ids to \""
1058                 + passing_domains_go_ids_out_dom + "\"" );
1059         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote plus minus domain analysis based all GO ids to \""
1060                 + all_domains_go_ids_out_dom + "\"" );
1061     }
1062
1063     public static void extractProteinNames( final List<Protein> proteins,
1064                                             final List<String> query_domain_ids_nc_order,
1065                                             final Writer out,
1066                                             final String separator,
1067                                             final String limit_to_species ) throws IOException {
1068         for( final Protein protein : proteins ) {
1069             if ( ForesterUtil.isEmpty( limit_to_species )
1070                     || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) {
1071                 if ( protein.contains( query_domain_ids_nc_order, true ) ) {
1072                     out.write( protein.getSpecies().getSpeciesId() );
1073                     out.write( separator );
1074                     out.write( protein.getProteinId().getId() );
1075                     out.write( separator );
1076                     out.write( "[" );
1077                     final Set<String> visited_domain_ids = new HashSet<String>();
1078                     boolean first = true;
1079                     for( final Domain domain : protein.getProteinDomains() ) {
1080                         if ( !visited_domain_ids.contains( domain.getDomainId() ) ) {
1081                             visited_domain_ids.add( domain.getDomainId() );
1082                             if ( first ) {
1083                                 first = false;
1084                             }
1085                             else {
1086                                 out.write( " " );
1087                             }
1088                             out.write( domain.getDomainId() );
1089                             out.write( " {" );
1090                             out.write( "" + domain.getTotalCount() );
1091                             out.write( "}" );
1092                         }
1093                     }
1094                     out.write( "]" );
1095                     out.write( separator );
1096                     if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription()
1097                             .equals( SurfacingConstants.NONE ) ) ) {
1098                         out.write( protein.getDescription() );
1099                     }
1100                     out.write( separator );
1101                     if ( !( ForesterUtil.isEmpty( protein.getAccession() ) || protein.getAccession()
1102                             .equals( SurfacingConstants.NONE ) ) ) {
1103                         out.write( protein.getAccession() );
1104                     }
1105                     out.write( SurfacingConstants.NL );
1106                 }
1107             }
1108         }
1109         out.flush();
1110     }
1111
1112     public static void extractProteinNames( final SortedMap<Species, List<Protein>> protein_lists_per_species,
1113                                             final String domain_id,
1114                                             final Writer out,
1115                                             final String separator,
1116                                             final String limit_to_species,
1117                                             final double domain_e_cutoff ) throws IOException {
1118         //System.out.println( "Per domain E-value: " + domain_e_cutoff );
1119         for( final Species species : protein_lists_per_species.keySet() ) {
1120             //System.out.println( species + ":" );
1121             for( final Protein protein : protein_lists_per_species.get( species ) ) {
1122                 if ( ForesterUtil.isEmpty( limit_to_species )
1123                         || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) {
1124                     final List<Domain> domains = protein.getProteinDomains( domain_id );
1125                     if ( domains.size() > 0 ) {
1126                         out.write( protein.getSpecies().getSpeciesId() );
1127                         out.write( separator );
1128                         out.write( protein.getProteinId().getId() );
1129                         out.write( separator );
1130                         out.write( domain_id.toString() );
1131                         out.write( separator );
1132                         int prev_to = -1;
1133                         for( final Domain domain : domains ) {
1134                             if ( ( domain_e_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= domain_e_cutoff ) ) {
1135                                 out.write( "/" );
1136                                 out.write( domain.getFrom() + "-" + domain.getTo() );
1137                                 if ( prev_to >= 0 ) {
1138                                     final int l = domain.getFrom() - prev_to;
1139                                     // System.out.println( l );
1140                                 }
1141                                 prev_to = domain.getTo();
1142                             }
1143                         }
1144                         out.write( "/" );
1145                         out.write( separator );
1146                         final List<Domain> domain_list = new ArrayList<Domain>();
1147                         for( final Domain domain : protein.getProteinDomains() ) {
1148                             if ( ( domain_e_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= domain_e_cutoff ) ) {
1149                                 domain_list.add( domain );
1150                             }
1151                         }
1152                         final Domain domain_ary[] = new Domain[ domain_list.size() ];
1153                         for( int i = 0; i < domain_list.size(); ++i ) {
1154                             domain_ary[ i ] = domain_list.get( i );
1155                         }
1156                         Arrays.sort( domain_ary, new DomainComparator( true ) );
1157                         out.write( "{" );
1158                         boolean first = true;
1159                         for( final Domain domain : domain_ary ) {
1160                             if ( first ) {
1161                                 first = false;
1162                             }
1163                             else {
1164                                 out.write( "," );
1165                             }
1166                             out.write( domain.getDomainId().toString() );
1167                             out.write( ":" + domain.getFrom() + "-" + domain.getTo() );
1168                             out.write( ":" + domain.getPerDomainEvalue() );
1169                         }
1170                         out.write( "}" );
1171                         if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription()
1172                                 .equals( SurfacingConstants.NONE ) ) ) {
1173                             out.write( protein.getDescription() );
1174                         }
1175                         out.write( separator );
1176                         if ( !( ForesterUtil.isEmpty( protein.getAccession() ) || protein.getAccession()
1177                                 .equals( SurfacingConstants.NONE ) ) ) {
1178                             out.write( protein.getAccession() );
1179                         }
1180                         out.write( SurfacingConstants.NL );
1181                     }
1182                 }
1183             }
1184         }
1185         out.flush();
1186     }
1187
1188     public static SortedSet<String> getAllDomainIds( final List<GenomeWideCombinableDomains> gwcd_list ) {
1189         final SortedSet<String> all_domains_ids = new TreeSet<String>();
1190         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
1191             final Set<String> all_domains = gwcd.getAllDomainIds();
1192             //    for( final Domain domain : all_domains ) {
1193             all_domains_ids.addAll( all_domains );
1194             //    }
1195         }
1196         return all_domains_ids;
1197     }
1198
1199     public static SortedMap<String, Integer> getDomainCounts( final List<Protein> protein_domain_collections ) {
1200         final SortedMap<String, Integer> map = new TreeMap<String, Integer>();
1201         for( final Protein protein_domain_collection : protein_domain_collections ) {
1202             for( final Object name : protein_domain_collection.getProteinDomains() ) {
1203                 final BasicDomain protein_domain = ( BasicDomain ) name;
1204                 final String id = protein_domain.getDomainId();
1205                 if ( map.containsKey( id ) ) {
1206                     map.put( id, map.get( id ) + 1 );
1207                 }
1208                 else {
1209                     map.put( id, 1 );
1210                 }
1211             }
1212         }
1213         return map;
1214     }
1215
1216     public static int getNumberOfNodesLackingName( final Phylogeny p, final StringBuilder names ) {
1217         final PhylogenyNodeIterator it = p.iteratorPostorder();
1218         int c = 0;
1219         while ( it.hasNext() ) {
1220             final PhylogenyNode n = it.next();
1221             if ( ForesterUtil.isEmpty( n.getName() )
1222                     && ( !n.getNodeData().isHasTaxonomy() || ForesterUtil.isEmpty( n.getNodeData().getTaxonomy()
1223                             .getScientificName() ) )
1224                     && ( !n.getNodeData().isHasTaxonomy() || ForesterUtil.isEmpty( n.getNodeData().getTaxonomy()
1225                             .getCommonName() ) ) ) {
1226                 if ( n.getParent() != null ) {
1227                     names.append( " " );
1228                     names.append( n.getParent().getName() );
1229                 }
1230                 final List l = n.getAllExternalDescendants();
1231                 for( final Object object : l ) {
1232                     System.out.println( l.toString() );
1233                 }
1234                 ++c;
1235             }
1236         }
1237         return c;
1238     }
1239
1240     public static void log( final String msg, final Writer w ) {
1241         try {
1242             w.write( msg );
1243             w.write( ForesterUtil.LINE_SEPARATOR );
1244         }
1245         catch ( final IOException e ) {
1246             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
1247         }
1248     }
1249
1250     public static Phylogeny[] obtainAndPreProcessIntrees( final File[] intree_files,
1251                                                           final int number_of_genomes,
1252                                                           final String[][] input_file_properties ) {
1253         final Phylogeny[] intrees = new Phylogeny[ intree_files.length ];
1254         int i = 0;
1255         for( final File intree_file : intree_files ) {
1256             Phylogeny intree = null;
1257             final String error = ForesterUtil.isReadableFile( intree_file );
1258             if ( !ForesterUtil.isEmpty( error ) ) {
1259                 ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read input tree file [" + intree_file + "]: "
1260                         + error );
1261             }
1262             try {
1263                 final Phylogeny[] p_array = ParserBasedPhylogenyFactory.getInstance()
1264                         .create( intree_file, ParserUtils.createParserDependingOnFileType( intree_file, true ) );
1265                 if ( p_array.length < 1 ) {
1266                     ForesterUtil.fatalError( surfacing.PRG_NAME, "file [" + intree_file
1267                             + "] does not contain any phylogeny in phyloXML format" );
1268                 }
1269                 else if ( p_array.length > 1 ) {
1270                     ForesterUtil.fatalError( surfacing.PRG_NAME, "file [" + intree_file
1271                             + "] contains more than one phylogeny in phyloXML format" );
1272                 }
1273                 intree = p_array[ 0 ];
1274             }
1275             catch ( final Exception e ) {
1276                 ForesterUtil.fatalError( surfacing.PRG_NAME, "failed to read input tree from file [" + intree_file
1277                         + "]: " + error );
1278             }
1279             if ( ( intree == null ) || intree.isEmpty() ) {
1280                 ForesterUtil.fatalError( surfacing.PRG_NAME, "input tree [" + intree_file + "] is empty" );
1281             }
1282             if ( !intree.isRooted() ) {
1283                 ForesterUtil.fatalError( surfacing.PRG_NAME, "input tree [" + intree_file + "] is not rooted" );
1284             }
1285             if ( intree.getNumberOfExternalNodes() < number_of_genomes ) {
1286                 ForesterUtil.fatalError( surfacing.PRG_NAME,
1287                                          "number of external nodes [" + intree.getNumberOfExternalNodes()
1288                                                  + "] of input tree [" + intree_file
1289                                                  + "] is smaller than the number of genomes the be analyzed ["
1290                                                  + number_of_genomes + "]" );
1291             }
1292             final StringBuilder parent_names = new StringBuilder();
1293             final int nodes_lacking_name = getNumberOfNodesLackingName( intree, parent_names );
1294             if ( nodes_lacking_name > 0 ) {
1295                 ForesterUtil.fatalError( surfacing.PRG_NAME, "input tree [" + intree_file + "] has "
1296                         + nodes_lacking_name + " node(s) lacking a name [parent names:" + parent_names + "]" );
1297             }
1298             preparePhylogenyForParsimonyAnalyses( intree, input_file_properties );
1299             if ( !intree.isCompletelyBinary() ) {
1300                 ForesterUtil.printWarningMessage( surfacing.PRG_NAME, "input tree [" + intree_file
1301                         + "] is not completely binary" );
1302             }
1303             intrees[ i++ ] = intree;
1304         }
1305         return intrees;
1306     }
1307
1308     public static Phylogeny obtainFirstIntree( final File intree_file ) {
1309         Phylogeny intree = null;
1310         final String error = ForesterUtil.isReadableFile( intree_file );
1311         if ( !ForesterUtil.isEmpty( error ) ) {
1312             ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read input tree file [" + intree_file + "]: " + error );
1313         }
1314         try {
1315             final Phylogeny[] phys = ParserBasedPhylogenyFactory.getInstance()
1316                     .create( intree_file, ParserUtils.createParserDependingOnFileType( intree_file, true ) );
1317             if ( phys.length < 1 ) {
1318                 ForesterUtil.fatalError( surfacing.PRG_NAME, "file [" + intree_file
1319                         + "] does not contain any phylogeny in phyloXML format" );
1320             }
1321             else if ( phys.length > 1 ) {
1322                 ForesterUtil.fatalError( surfacing.PRG_NAME, "file [" + intree_file
1323                         + "] contains more than one phylogeny in phyloXML format" );
1324             }
1325             intree = phys[ 0 ];
1326         }
1327         catch ( final Exception e ) {
1328             ForesterUtil.fatalError( surfacing.PRG_NAME, "failed to read input tree from file [" + intree_file + "]: "
1329                     + error );
1330         }
1331         if ( ( intree == null ) || intree.isEmpty() ) {
1332             ForesterUtil.fatalError( surfacing.PRG_NAME, "input tree [" + intree_file + "] is empty" );
1333         }
1334         if ( !intree.isRooted() ) {
1335             ForesterUtil.fatalError( surfacing.PRG_NAME, "input tree [" + intree_file + "] is not rooted" );
1336         }
1337         return intree;
1338     }
1339
1340     public static String obtainHexColorStringDependingOnTaxonomyGroup( final String tax_code, final Phylogeny phy )
1341             throws IllegalArgumentException {
1342         if ( !_TAXCODE_HEXCOLORSTRING_MAP.containsKey( tax_code ) ) {
1343             if ( ( phy != null ) && !phy.isEmpty() ) {
1344                 final List<PhylogenyNode> nodes = phy.getNodesViaTaxonomyCode( tax_code );
1345                 Color c = null;
1346                 if ( ( nodes == null ) || nodes.isEmpty() ) {
1347                     throw new IllegalArgumentException( "code " + tax_code + " is not found" );
1348                 }
1349                 if ( nodes.size() != 1 ) {
1350                     throw new IllegalArgumentException( "code " + tax_code + " is not unique" );
1351                 }
1352                 PhylogenyNode n = nodes.get( 0 );
1353                 while ( n != null ) {
1354                     if ( n.getNodeData().isHasTaxonomy()
1355                             && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
1356                         c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getNodeData().getTaxonomy()
1357                                 .getScientificName(), tax_code );
1358                     }
1359                     if ( ( c == null ) && !ForesterUtil.isEmpty( n.getName() ) ) {
1360                         c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getName(), tax_code );
1361                     }
1362                     if ( c != null ) {
1363                         break;
1364                     }
1365                     n = n.getParent();
1366                 }
1367                 if ( c == null ) {
1368                     throw new IllegalArgumentException( "no color found for taxonomy code \"" + tax_code + "\"" );
1369                 }
1370                 final String hex = String.format( "#%02x%02x%02x", c.getRed(), c.getGreen(), c.getBlue() );
1371                 _TAXCODE_HEXCOLORSTRING_MAP.put( tax_code, hex );
1372             }
1373             else {
1374                 throw new IllegalArgumentException( "unable to obtain color for code " + tax_code
1375                         + " (tree is null or empty and code is not in map)" );
1376             }
1377         }
1378         return _TAXCODE_HEXCOLORSTRING_MAP.get( tax_code );
1379     }
1380
1381     public static void performDomainArchitectureAnalysis( final SortedMap<String, Set<String>> domain_architecutures,
1382                                                           final SortedMap<String, Integer> domain_architecuture_counts,
1383                                                           final int min_count,
1384                                                           final File da_counts_outfile,
1385                                                           final File unique_da_outfile ) {
1386         checkForOutputFileWriteability( da_counts_outfile );
1387         checkForOutputFileWriteability( unique_da_outfile );
1388         try {
1389             final BufferedWriter da_counts_out = new BufferedWriter( new FileWriter( da_counts_outfile ) );
1390             final BufferedWriter unique_da_out = new BufferedWriter( new FileWriter( unique_da_outfile ) );
1391             final Iterator<Entry<String, Integer>> it = domain_architecuture_counts.entrySet().iterator();
1392             while ( it.hasNext() ) {
1393                 final Map.Entry<String, Integer> e = it.next();
1394                 final String da = e.getKey();
1395                 final int count = e.getValue();
1396                 if ( count >= min_count ) {
1397                     da_counts_out.write( da );
1398                     da_counts_out.write( "\t" );
1399                     da_counts_out.write( String.valueOf( count ) );
1400                     da_counts_out.write( ForesterUtil.LINE_SEPARATOR );
1401                 }
1402                 if ( count == 1 ) {
1403                     final Iterator<Entry<String, Set<String>>> it2 = domain_architecutures.entrySet().iterator();
1404                     while ( it2.hasNext() ) {
1405                         final Map.Entry<String, Set<String>> e2 = it2.next();
1406                         final String genome = e2.getKey();
1407                         final Set<String> das = e2.getValue();
1408                         if ( das.contains( da ) ) {
1409                             unique_da_out.write( genome );
1410                             unique_da_out.write( "\t" );
1411                             unique_da_out.write( da );
1412                             unique_da_out.write( ForesterUtil.LINE_SEPARATOR );
1413                         }
1414                     }
1415                 }
1416             }
1417             unique_da_out.close();
1418             da_counts_out.close();
1419         }
1420         catch ( final IOException e ) {
1421             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
1422         }
1423         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote distance matrices to \"" + da_counts_outfile + "\"" );
1424         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote distance matrices to \"" + unique_da_outfile + "\"" );
1425         //
1426     }
1427
1428     public static void preparePhylogeny( final Phylogeny p,
1429                                          final DomainParsimonyCalculator domain_parsimony,
1430                                          final String date_time,
1431                                          final String method,
1432                                          final String name,
1433                                          final String parameters_str ) {
1434         domain_parsimony.decoratePhylogenyWithDomains( p );
1435         final StringBuilder desc = new StringBuilder();
1436         desc.append( "[Method: " + method + "] [Date: " + date_time + "] " );
1437         desc.append( "[Cost: " + domain_parsimony.getCost() + "] " );
1438         desc.append( "[Gains: " + domain_parsimony.getTotalGains() + "] " );
1439         desc.append( "[Losses: " + domain_parsimony.getTotalLosses() + "] " );
1440         desc.append( "[Unchanged: " + domain_parsimony.getTotalUnchanged() + "] " );
1441         desc.append( "[Parameters: " + parameters_str + "]" );
1442         p.setName( name );
1443         p.setDescription( desc.toString() );
1444         p.setConfidence( new Confidence( domain_parsimony.getCost(), "parsimony" ) );
1445         p.setRerootable( false );
1446         p.setRooted( true );
1447     }
1448
1449     public static void preparePhylogenyForParsimonyAnalyses( final Phylogeny intree,
1450                                                              final String[][] input_file_properties ) {
1451         final String[] genomes = new String[ input_file_properties.length ];
1452         for( int i = 0; i < input_file_properties.length; ++i ) {
1453             if ( intree.getNodes( input_file_properties[ i ][ 1 ] ).size() > 1 ) {
1454                 ForesterUtil.fatalError( surfacing.PRG_NAME, "node named [" + input_file_properties[ i ][ 1 ]
1455                         + "] is not unique in input tree " + intree.getName() );
1456             }
1457             genomes[ i ] = input_file_properties[ i ][ 1 ];
1458         }
1459         //
1460         final PhylogenyNodeIterator it = intree.iteratorPostorder();
1461         while ( it.hasNext() ) {
1462             final PhylogenyNode n = it.next();
1463             if ( ForesterUtil.isEmpty( n.getName() ) ) {
1464                 if ( n.getNodeData().isHasTaxonomy()
1465                         && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
1466                     n.setName( n.getNodeData().getTaxonomy().getTaxonomyCode() );
1467                 }
1468                 else if ( n.getNodeData().isHasTaxonomy()
1469                         && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
1470                     n.setName( n.getNodeData().getTaxonomy().getScientificName() );
1471                 }
1472                 else if ( n.getNodeData().isHasTaxonomy()
1473                         && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getCommonName() ) ) {
1474                     n.setName( n.getNodeData().getTaxonomy().getCommonName() );
1475                 }
1476                 else {
1477                     ForesterUtil
1478                             .fatalError( surfacing.PRG_NAME,
1479                                          "node with no name, scientific name, common name, or taxonomy code present" );
1480                 }
1481             }
1482         }
1483         //
1484         final List<String> igns = PhylogenyMethods.deleteExternalNodesPositiveSelection( genomes, intree );
1485         if ( igns.size() > 0 ) {
1486             System.out.println( "Not using the following " + igns.size() + " nodes:" );
1487             for( int i = 0; i < igns.size(); ++i ) {
1488                 System.out.println( " " + i + ": " + igns.get( i ) );
1489             }
1490             System.out.println( "--" );
1491         }
1492         for( final String[] input_file_propertie : input_file_properties ) {
1493             try {
1494                 intree.getNode( input_file_propertie[ 1 ] );
1495             }
1496             catch ( final IllegalArgumentException e ) {
1497                 ForesterUtil.fatalError( surfacing.PRG_NAME, "node named [" + input_file_propertie[ 1 ]
1498                         + "] not present/not unique in input tree" );
1499             }
1500         }
1501     }
1502
1503     public static void printOutPercentageOfMultidomainProteins( final SortedMap<Integer, Integer> all_genomes_domains_per_potein_histo,
1504                                                                 final Writer log_writer ) {
1505         int sum = 0;
1506         for( final Entry<Integer, Integer> entry : all_genomes_domains_per_potein_histo.entrySet() ) {
1507             sum += entry.getValue();
1508         }
1509         final double percentage = ( 100.0 * ( sum - all_genomes_domains_per_potein_histo.get( 1 ) ) ) / sum;
1510         ForesterUtil.programMessage( surfacing.PRG_NAME, "Percentage of multidomain proteins: " + percentage + "%" );
1511         log( "Percentage of multidomain proteins:            : " + percentage + "%", log_writer );
1512     }
1513
1514     public static void processFilter( final File filter_file, final SortedSet<String> filter ) {
1515         SortedSet<String> filter_str = null;
1516         try {
1517             filter_str = ForesterUtil.file2set( filter_file );
1518         }
1519         catch ( final IOException e ) {
1520             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
1521         }
1522         if ( filter_str != null ) {
1523             for( final String string : filter_str ) {
1524                 filter.add( string );
1525             }
1526         }
1527         if ( surfacing.VERBOSE ) {
1528             System.out.println( "Filter:" );
1529             for( final String domainId : filter ) {
1530                 System.out.println( domainId );
1531             }
1532         }
1533     }
1534
1535     public static String[][] processInputGenomesFile( final File input_genomes ) {
1536         String[][] input_file_properties = null;
1537         try {
1538             input_file_properties = ForesterUtil.file22dArray( input_genomes );
1539         }
1540         catch ( final IOException e ) {
1541             ForesterUtil.fatalError( surfacing.PRG_NAME,
1542                                      "genomes files is to be in the following format \"<hmmpfam output file> <species>\": "
1543                                              + e.getLocalizedMessage() );
1544         }
1545         final Set<String> specs = new HashSet<String>();
1546         final Set<String> paths = new HashSet<String>();
1547         for( int i = 0; i < input_file_properties.length; ++i ) {
1548             if ( !PhyloXmlUtil.TAXOMONY_CODE_PATTERN.matcher( input_file_properties[ i ][ 1 ] ).matches() ) {
1549                 ForesterUtil.fatalError( surfacing.PRG_NAME, "illegal format for species code: "
1550                         + input_file_properties[ i ][ 1 ] );
1551             }
1552             if ( specs.contains( input_file_properties[ i ][ 1 ] ) ) {
1553                 ForesterUtil.fatalError( surfacing.PRG_NAME, "species code " + input_file_properties[ i ][ 1 ]
1554                         + " is not unique" );
1555             }
1556             specs.add( input_file_properties[ i ][ 1 ] );
1557             if ( paths.contains( input_file_properties[ i ][ 0 ] ) ) {
1558                 ForesterUtil.fatalError( surfacing.PRG_NAME, "path " + input_file_properties[ i ][ 0 ]
1559                         + " is not unique" );
1560             }
1561             paths.add( input_file_properties[ i ][ 0 ] );
1562             final String error = ForesterUtil.isReadableFile( new File( input_file_properties[ i ][ 0 ] ) );
1563             if ( !ForesterUtil.isEmpty( error ) ) {
1564                 ForesterUtil.fatalError( surfacing.PRG_NAME, error );
1565             }
1566         }
1567         return input_file_properties;
1568     }
1569
1570     public static void processPlusMinusAnalysisOption( final CommandLineArguments cla,
1571                                                        final List<String> high_copy_base,
1572                                                        final List<String> high_copy_target,
1573                                                        final List<String> low_copy,
1574                                                        final List<Object> numbers ) {
1575         if ( cla.isOptionSet( surfacing.PLUS_MINUS_ANALYSIS_OPTION ) ) {
1576             if ( !cla.isOptionValueSet( surfacing.PLUS_MINUS_ANALYSIS_OPTION ) ) {
1577                 ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for 'plus-minus' file: -"
1578                         + surfacing.PLUS_MINUS_ANALYSIS_OPTION + "=<file>" );
1579             }
1580             final File plus_minus_file = new File( cla.getOptionValue( surfacing.PLUS_MINUS_ANALYSIS_OPTION ) );
1581             final String msg = ForesterUtil.isReadableFile( plus_minus_file );
1582             if ( !ForesterUtil.isEmpty( msg ) ) {
1583                 ForesterUtil.fatalError( surfacing.PRG_NAME, "can not read from \"" + plus_minus_file + "\": " + msg );
1584             }
1585             processPlusMinusFile( plus_minus_file, high_copy_base, high_copy_target, low_copy, numbers );
1586         }
1587     }
1588
1589     // First numbers is minimal difference, second is factor.
1590     public static void processPlusMinusFile( final File plus_minus_file,
1591                                              final List<String> high_copy_base,
1592                                              final List<String> high_copy_target,
1593                                              final List<String> low_copy,
1594                                              final List<Object> numbers ) {
1595         Set<String> species_set = null;
1596         int min_diff = surfacing.PLUS_MINUS_ANALYSIS_MIN_DIFF_DEFAULT;
1597         double factor = surfacing.PLUS_MINUS_ANALYSIS_FACTOR_DEFAULT;
1598         try {
1599             species_set = ForesterUtil.file2set( plus_minus_file );
1600         }
1601         catch ( final IOException e ) {
1602             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
1603         }
1604         if ( species_set != null ) {
1605             for( final String species : species_set ) {
1606                 final String species_trimmed = species.substring( 1 );
1607                 if ( species.startsWith( "+" ) ) {
1608                     if ( low_copy.contains( species_trimmed ) ) {
1609                         ForesterUtil.fatalError( surfacing.PRG_NAME,
1610                                                  "species/genome names can not appear with both '+' and '-' suffix, as appears the case for: \""
1611                                                          + species_trimmed + "\"" );
1612                     }
1613                     high_copy_base.add( species_trimmed );
1614                 }
1615                 else if ( species.startsWith( "*" ) ) {
1616                     if ( low_copy.contains( species_trimmed ) ) {
1617                         ForesterUtil.fatalError( surfacing.PRG_NAME,
1618                                                  "species/genome names can not appear with both '*' and '-' suffix, as appears the case for: \""
1619                                                          + species_trimmed + "\"" );
1620                     }
1621                     high_copy_target.add( species_trimmed );
1622                 }
1623                 else if ( species.startsWith( "-" ) ) {
1624                     if ( high_copy_base.contains( species_trimmed ) || high_copy_target.contains( species_trimmed ) ) {
1625                         ForesterUtil.fatalError( surfacing.PRG_NAME,
1626                                                  "species/genome names can not appear with both '+' or '*' and '-' suffix, as appears the case for: \""
1627                                                          + species_trimmed + "\"" );
1628                     }
1629                     low_copy.add( species_trimmed );
1630                 }
1631                 else if ( species.startsWith( "$D" ) ) {
1632                     try {
1633                         min_diff = Integer.parseInt( species.substring( 3 ) );
1634                     }
1635                     catch ( final NumberFormatException e ) {
1636                         ForesterUtil.fatalError( surfacing.PRG_NAME,
1637                                                  "could not parse integer value for minimal difference from: \""
1638                                                          + species.substring( 3 ) + "\"" );
1639                     }
1640                 }
1641                 else if ( species.startsWith( "$F" ) ) {
1642                     try {
1643                         factor = Double.parseDouble( species.substring( 3 ) );
1644                     }
1645                     catch ( final NumberFormatException e ) {
1646                         ForesterUtil.fatalError( surfacing.PRG_NAME, "could not parse double value for factor from: \""
1647                                 + species.substring( 3 ) + "\"" );
1648                     }
1649                 }
1650                 else if ( species.startsWith( "#" ) ) {
1651                     // Comment, ignore.
1652                 }
1653                 else {
1654                     ForesterUtil
1655                             .fatalError( surfacing.PRG_NAME,
1656                                          "species/genome names in 'plus minus' file must begin with '*' (high copy target genome), '+' (high copy base genomes), '-' (low copy genomes), '$D=<integer>' minimal Difference (default is 1), '$F=<double>' factor (default is 1.0), double), or '#' (ignore) suffix, encountered: \""
1657                                                  + species + "\"" );
1658                 }
1659                 numbers.add( new Integer( min_diff + "" ) );
1660                 numbers.add( new Double( factor + "" ) );
1661             }
1662         }
1663         else {
1664             ForesterUtil.fatalError( surfacing.PRG_NAME, "'plus minus' file [" + plus_minus_file + "] appears empty" );
1665         }
1666     }
1667
1668     /*
1669      * species | protein id | n-terminal domain | c-terminal domain | n-terminal domain per domain E-value | c-terminal domain per domain E-value
1670      * 
1671      * 
1672      */
1673     static public StringBuffer proteinToDomainCombinations( final Protein protein,
1674                                                             final String protein_id,
1675                                                             final String separator ) {
1676         final StringBuffer sb = new StringBuffer();
1677         if ( protein.getSpecies() == null ) {
1678             throw new IllegalArgumentException( "species must not be null" );
1679         }
1680         if ( ForesterUtil.isEmpty( protein.getSpecies().getSpeciesId() ) ) {
1681             throw new IllegalArgumentException( "species id must not be empty" );
1682         }
1683         final List<Domain> domains = protein.getProteinDomains();
1684         if ( domains.size() > 1 ) {
1685             final Map<String, Integer> counts = new HashMap<String, Integer>();
1686             for( final Domain domain : domains ) {
1687                 final String id = domain.getDomainId();
1688                 if ( counts.containsKey( id ) ) {
1689                     counts.put( id, counts.get( id ) + 1 );
1690                 }
1691                 else {
1692                     counts.put( id, 1 );
1693                 }
1694             }
1695             final Set<String> dcs = new HashSet<String>();
1696             for( int i = 1; i < domains.size(); ++i ) {
1697                 for( int j = 0; j < i; ++j ) {
1698                     Domain domain_n = domains.get( i );
1699                     Domain domain_c = domains.get( j );
1700                     if ( domain_n.getFrom() > domain_c.getFrom() ) {
1701                         domain_n = domains.get( j );
1702                         domain_c = domains.get( i );
1703                     }
1704                     final String dc = domain_n.getDomainId() + domain_c.getDomainId();
1705                     if ( !dcs.contains( dc ) ) {
1706                         dcs.add( dc );
1707                         sb.append( protein.getSpecies() );
1708                         sb.append( separator );
1709                         sb.append( protein_id );
1710                         sb.append( separator );
1711                         sb.append( domain_n.getDomainId() );
1712                         sb.append( separator );
1713                         sb.append( domain_c.getDomainId() );
1714                         sb.append( separator );
1715                         sb.append( domain_n.getPerDomainEvalue() );
1716                         sb.append( separator );
1717                         sb.append( domain_c.getPerDomainEvalue() );
1718                         sb.append( separator );
1719                         sb.append( counts.get( domain_n.getDomainId() ) );
1720                         sb.append( separator );
1721                         sb.append( counts.get( domain_c.getDomainId() ) );
1722                         sb.append( ForesterUtil.LINE_SEPARATOR );
1723                     }
1724                 }
1725             }
1726         }
1727         else if ( domains.size() == 1 ) {
1728             sb.append( protein.getSpecies() );
1729             sb.append( separator );
1730             sb.append( protein_id );
1731             sb.append( separator );
1732             sb.append( domains.get( 0 ).getDomainId() );
1733             sb.append( separator );
1734             sb.append( separator );
1735             sb.append( domains.get( 0 ).getPerDomainEvalue() );
1736             sb.append( separator );
1737             sb.append( separator );
1738             sb.append( 1 );
1739             sb.append( separator );
1740             sb.append( ForesterUtil.LINE_SEPARATOR );
1741         }
1742         else {
1743             sb.append( protein.getSpecies() );
1744             sb.append( separator );
1745             sb.append( protein_id );
1746             sb.append( separator );
1747             sb.append( separator );
1748             sb.append( separator );
1749             sb.append( separator );
1750             sb.append( separator );
1751             sb.append( separator );
1752             sb.append( ForesterUtil.LINE_SEPARATOR );
1753         }
1754         return sb;
1755     }
1756
1757     public static List<Domain> sortDomainsWithAscendingConfidenceValues( final Protein protein ) {
1758         final List<Domain> domains = new ArrayList<Domain>();
1759         for( final Domain d : protein.getProteinDomains() ) {
1760             domains.add( d );
1761         }
1762         Collections.sort( domains, SurfacingUtil.ASCENDING_CONFIDENCE_VALUE_ORDER );
1763         return domains;
1764     }
1765
1766     public static int storeDomainArchitectures( final String genome,
1767                                                 final SortedMap<String, Set<String>> domain_architecutures,
1768                                                 final List<Protein> protein_list,
1769                                                 final Map<String, Integer> distinct_domain_architecuture_counts ) {
1770         final Set<String> da = new HashSet<String>();
1771         domain_architecutures.put( genome, da );
1772         for( final Protein protein : protein_list ) {
1773             final String da_str = ( ( BasicProtein ) protein ).toDomainArchitectureString( "~", 3, "=" );
1774             if ( !da.contains( da_str ) ) {
1775                 if ( !distinct_domain_architecuture_counts.containsKey( da_str ) ) {
1776                     distinct_domain_architecuture_counts.put( da_str, 1 );
1777                 }
1778                 else {
1779                     distinct_domain_architecuture_counts.put( da_str,
1780                                                               distinct_domain_architecuture_counts.get( da_str ) + 1 );
1781                 }
1782                 da.add( da_str );
1783             }
1784         }
1785         return da.size();
1786     }
1787
1788     public static void writeAllDomainsChangedOnAllSubtrees( final Phylogeny p,
1789                                                             final boolean get_gains,
1790                                                             final String outdir,
1791                                                             final String suffix_for_filename ) throws IOException {
1792         CharacterStateMatrix.GainLossStates state = CharacterStateMatrix.GainLossStates.GAIN;
1793         if ( !get_gains ) {
1794             state = CharacterStateMatrix.GainLossStates.LOSS;
1795         }
1796         final File base_dir = createBaseDirForPerNodeDomainFiles( surfacing.BASE_DIRECTORY_PER_SUBTREE_DOMAIN_GAIN_LOSS_FILES,
1797                                                                   false,
1798                                                                   state,
1799                                                                   outdir );
1800         for( final PhylogenyNodeIterator it = p.iteratorPostorder(); it.hasNext(); ) {
1801             final PhylogenyNode node = it.next();
1802             if ( !node.isExternal() ) {
1803                 final SortedSet<String> domains = collectAllDomainsChangedOnSubtree( node, get_gains );
1804                 if ( domains.size() > 0 ) {
1805                     final Writer writer = ForesterUtil.createBufferedWriter( base_dir + ForesterUtil.FILE_SEPARATOR
1806                             + node.getName() + suffix_for_filename );
1807                     for( final String domain : domains ) {
1808                         writer.write( domain );
1809                         writer.write( ForesterUtil.LINE_SEPARATOR );
1810                     }
1811                     writer.close();
1812                 }
1813             }
1814         }
1815     }
1816
1817     public static void writeBinaryDomainCombinationsFileForGraphAnalysis( final String[][] input_file_properties,
1818                                                                           final File output_dir,
1819                                                                           final GenomeWideCombinableDomains gwcd,
1820                                                                           final int i,
1821                                                                           final GenomeWideCombinableDomainsSortOrder dc_sort_order ) {
1822         File dc_outfile_dot = new File( input_file_properties[ i ][ 1 ]
1823                 + surfacing.DOMAIN_COMBINITONS_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS );
1824         if ( output_dir != null ) {
1825             dc_outfile_dot = new File( output_dir + ForesterUtil.FILE_SEPARATOR + dc_outfile_dot );
1826         }
1827         checkForOutputFileWriteability( dc_outfile_dot );
1828         final SortedSet<BinaryDomainCombination> binary_combinations = createSetOfAllBinaryDomainCombinationsPerGenome( gwcd );
1829         try {
1830             final BufferedWriter out_dot = new BufferedWriter( new FileWriter( dc_outfile_dot ) );
1831             for( final BinaryDomainCombination bdc : binary_combinations ) {
1832                 out_dot.write( bdc.toGraphDescribingLanguage( BinaryDomainCombination.OutputFormat.DOT, null, null )
1833                         .toString() );
1834                 out_dot.write( SurfacingConstants.NL );
1835             }
1836             out_dot.close();
1837         }
1838         catch ( final IOException e ) {
1839             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
1840         }
1841         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote binary domain combination for \""
1842                 + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ", "
1843                 + input_file_properties[ i ][ 2 ] + ") to: \"" + dc_outfile_dot + "\"" );
1844     }
1845
1846     public static void writeBinaryStatesMatrixAsListToFile( final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
1847                                                             final CharacterStateMatrix.GainLossStates state,
1848                                                             final String filename,
1849                                                             final String indentifier_characters_separator,
1850                                                             final String character_separator,
1851                                                             final Map<String, String> descriptions ) {
1852         final File outfile = new File( filename );
1853         checkForOutputFileWriteability( outfile );
1854         final SortedSet<String> sorted_ids = new TreeSet<String>();
1855         for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) {
1856             sorted_ids.add( matrix.getIdentifier( i ) );
1857         }
1858         try {
1859             final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) );
1860             for( final String id : sorted_ids ) {
1861                 out.write( indentifier_characters_separator );
1862                 out.write( "#" + id );
1863                 out.write( indentifier_characters_separator );
1864                 for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) {
1865                     // Not nice:
1866                     // using null to indicate either UNCHANGED_PRESENT or GAIN.
1867                     if ( ( matrix.getState( id, c ) == state )
1868                             || ( ( state == null ) && ( ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) || ( matrix
1869                                     .getState( id, c ) == CharacterStateMatrix.GainLossStates.UNCHANGED_PRESENT ) ) ) ) {
1870                         out.write( matrix.getCharacter( c ) );
1871                         if ( ( descriptions != null ) && !descriptions.isEmpty()
1872                                 && descriptions.containsKey( matrix.getCharacter( c ) ) ) {
1873                             out.write( "\t" );
1874                             out.write( descriptions.get( matrix.getCharacter( c ) ) );
1875                         }
1876                         out.write( character_separator );
1877                     }
1878                 }
1879             }
1880             out.flush();
1881             out.close();
1882         }
1883         catch ( final IOException e ) {
1884             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
1885         }
1886         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote characters list: \"" + filename + "\"" );
1887     }
1888
1889     public static void writeBinaryStatesMatrixAsListToFileForBinaryCombinationsForGraphAnalysis( final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
1890                                                                                                  final CharacterStateMatrix.GainLossStates state,
1891                                                                                                  final String filename,
1892                                                                                                  final String indentifier_characters_separator,
1893                                                                                                  final String character_separator,
1894                                                                                                  final BinaryDomainCombination.OutputFormat bc_output_format ) {
1895         final File outfile = new File( filename );
1896         checkForOutputFileWriteability( outfile );
1897         final SortedSet<String> sorted_ids = new TreeSet<String>();
1898         for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) {
1899             sorted_ids.add( matrix.getIdentifier( i ) );
1900         }
1901         try {
1902             final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) );
1903             for( final String id : sorted_ids ) {
1904                 out.write( indentifier_characters_separator );
1905                 out.write( "#" + id );
1906                 out.write( indentifier_characters_separator );
1907                 for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) {
1908                     // Not nice:
1909                     // using null to indicate either UNCHANGED_PRESENT or GAIN.
1910                     if ( ( matrix.getState( id, c ) == state )
1911                             || ( ( state == null ) && ( ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) || ( matrix
1912                                     .getState( id, c ) == CharacterStateMatrix.GainLossStates.UNCHANGED_PRESENT ) ) ) ) {
1913                         BinaryDomainCombination bdc = null;
1914                         try {
1915                             bdc = BasicBinaryDomainCombination.createInstance( matrix.getCharacter( c ) );
1916                         }
1917                         catch ( final Exception e ) {
1918                             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
1919                         }
1920                         out.write( bdc.toGraphDescribingLanguage( bc_output_format, null, null ).toString() );
1921                         out.write( character_separator );
1922                     }
1923                 }
1924             }
1925             out.flush();
1926             out.close();
1927         }
1928         catch ( final IOException e ) {
1929             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
1930         }
1931         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote characters list: \"" + filename + "\"" );
1932     }
1933
1934     public static void writeBinaryStatesMatrixToList( final Map<String, List<GoId>> domain_id_to_go_ids_map,
1935                                                       final Map<GoId, GoTerm> go_id_to_term_map,
1936                                                       final GoNameSpace go_namespace_limit,
1937                                                       final boolean domain_combinations,
1938                                                       final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
1939                                                       final CharacterStateMatrix.GainLossStates state,
1940                                                       final String filename,
1941                                                       final String indentifier_characters_separator,
1942                                                       final String character_separator,
1943                                                       final String title_for_html,
1944                                                       final String prefix_for_html,
1945                                                       final Map<String, Set<String>>[] domain_id_to_secondary_features_maps,
1946                                                       final SortedSet<String> all_pfams_encountered,
1947                                                       final SortedSet<String> pfams_gained_or_lost,
1948                                                       final String suffix_for_per_node_events_file,
1949                                                       final Map<String, Integer> tax_code_to_id_map ) {
1950         if ( ( go_namespace_limit != null ) && ( ( go_id_to_term_map == null ) || ( go_id_to_term_map.size() < 1 ) ) ) {
1951             throw new IllegalArgumentException( "attempt to use GO namespace limit without a GO-id to term map" );
1952         }
1953         else if ( ( ( domain_id_to_go_ids_map == null ) || ( domain_id_to_go_ids_map.size() < 1 ) ) ) {
1954             throw new IllegalArgumentException( "attempt to output detailed HTML without a Pfam to GO map" );
1955         }
1956         else if ( ( ( go_id_to_term_map == null ) || ( go_id_to_term_map.size() < 1 ) ) ) {
1957             throw new IllegalArgumentException( "attempt to output detailed HTML without a GO-id to term map" );
1958         }
1959         final File outfile = new File( filename );
1960         checkForOutputFileWriteability( outfile );
1961         final SortedSet<String> sorted_ids = new TreeSet<String>();
1962         for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) {
1963             sorted_ids.add( matrix.getIdentifier( i ) );
1964         }
1965         try {
1966             final Writer out = new BufferedWriter( new FileWriter( outfile ) );
1967             final File per_node_go_mapped_domain_gain_loss_files_base_dir = createBaseDirForPerNodeDomainFiles( surfacing.BASE_DIRECTORY_PER_NODE_DOMAIN_GAIN_LOSS_FILES,
1968                                                                                                                 domain_combinations,
1969                                                                                                                 state,
1970                                                                                                                 filename );
1971             Writer per_node_go_mapped_domain_gain_loss_outfile_writer = null;
1972             File per_node_go_mapped_domain_gain_loss_outfile = null;
1973             int per_node_counter = 0;
1974             out.write( "<html>" );
1975             out.write( SurfacingConstants.NL );
1976             writeHtmlHead( out, title_for_html );
1977             out.write( SurfacingConstants.NL );
1978             out.write( "<body>" );
1979             out.write( SurfacingConstants.NL );
1980             out.write( "<h1>" );
1981             out.write( SurfacingConstants.NL );
1982             out.write( title_for_html );
1983             out.write( SurfacingConstants.NL );
1984             out.write( "</h1>" );
1985             out.write( SurfacingConstants.NL );
1986             out.write( "<table>" );
1987             out.write( SurfacingConstants.NL );
1988             for( final String id : sorted_ids ) {
1989                 final Matcher matcher = PATTERN_SP_STYLE_TAXONOMY.matcher( id );
1990                 if ( matcher.matches() ) {
1991                     continue;
1992                 }
1993                 out.write( "<tr>" );
1994                 out.write( "<td>" );
1995                 out.write( "<a href=\"#" + id + "\">" + id + "</a>" );
1996                 out.write( "</td>" );
1997                 out.write( "</tr>" );
1998                 out.write( SurfacingConstants.NL );
1999             }
2000             out.write( "</table>" );
2001             out.write( SurfacingConstants.NL );
2002             for( final String id : sorted_ids ) {
2003                 final Matcher matcher = PATTERN_SP_STYLE_TAXONOMY.matcher( id );
2004                 if ( matcher.matches() ) {
2005                     continue;
2006                 }
2007                 out.write( SurfacingConstants.NL );
2008                 out.write( "<h2>" );
2009                 out.write( "<a name=\"" + id + "\">" + id + "</a>" );
2010                 writeTaxonomyLinks( out, id, tax_code_to_id_map );
2011                 out.write( "</h2>" );
2012                 out.write( SurfacingConstants.NL );
2013                 out.write( "<table>" );
2014                 out.write( SurfacingConstants.NL );
2015                 out.write( "<tr>" );
2016                 out.write( "<td><b>" );
2017                 out.write( "Pfam domain(s)" );
2018                 out.write( "</b></td><td><b>" );
2019                 out.write( "GO term acc" );
2020                 out.write( "</b></td><td><b>" );
2021                 out.write( "GO term" );
2022                 out.write( "</b></td><td><b>" );
2023                 out.write( "GO namespace" );
2024                 out.write( "</b></td>" );
2025                 out.write( "</tr>" );
2026                 out.write( SurfacingConstants.NL );
2027                 out.write( "</tr>" );
2028                 out.write( SurfacingConstants.NL );
2029                 per_node_counter = 0;
2030                 if ( matrix.getNumberOfCharacters() > 0 ) {
2031                     per_node_go_mapped_domain_gain_loss_outfile = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
2032                             + ForesterUtil.FILE_SEPARATOR + id + suffix_for_per_node_events_file );
2033                     SurfacingUtil.checkForOutputFileWriteability( per_node_go_mapped_domain_gain_loss_outfile );
2034                     per_node_go_mapped_domain_gain_loss_outfile_writer = ForesterUtil
2035                             .createBufferedWriter( per_node_go_mapped_domain_gain_loss_outfile );
2036                 }
2037                 else {
2038                     per_node_go_mapped_domain_gain_loss_outfile = null;
2039                     per_node_go_mapped_domain_gain_loss_outfile_writer = null;
2040                 }
2041                 for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) {
2042                     // Not nice:
2043                     // using null to indicate either UNCHANGED_PRESENT or GAIN.
2044                     if ( ( matrix.getState( id, c ) == state )
2045                             || ( ( state == null ) && ( ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.UNCHANGED_PRESENT ) || ( matrix
2046                                     .getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) ) ) ) {
2047                         final String character = matrix.getCharacter( c );
2048                         String domain_0 = "";
2049                         String domain_1 = "";
2050                         if ( character.indexOf( BinaryDomainCombination.SEPARATOR ) > 0 ) {
2051                             final String[] s = character.split( BinaryDomainCombination.SEPARATOR );
2052                             if ( s.length != 2 ) {
2053                                 throw new AssertionError( "this should not have happened: unexpected format for domain combination: ["
2054                                         + character + "]" );
2055                             }
2056                             domain_0 = s[ 0 ];
2057                             domain_1 = s[ 1 ];
2058                         }
2059                         else {
2060                             domain_0 = character;
2061                         }
2062                         writeDomainData( domain_id_to_go_ids_map,
2063                                          go_id_to_term_map,
2064                                          go_namespace_limit,
2065                                          out,
2066                                          domain_0,
2067                                          domain_1,
2068                                          prefix_for_html,
2069                                          character_separator,
2070                                          domain_id_to_secondary_features_maps,
2071                                          null );
2072                         all_pfams_encountered.add( domain_0 );
2073                         if ( pfams_gained_or_lost != null ) {
2074                             pfams_gained_or_lost.add( domain_0 );
2075                         }
2076                         if ( !ForesterUtil.isEmpty( domain_1 ) ) {
2077                             all_pfams_encountered.add( domain_1 );
2078                             if ( pfams_gained_or_lost != null ) {
2079                                 pfams_gained_or_lost.add( domain_1 );
2080                             }
2081                         }
2082                         if ( per_node_go_mapped_domain_gain_loss_outfile_writer != null ) {
2083                             writeDomainsToIndividualFilePerTreeNode( per_node_go_mapped_domain_gain_loss_outfile_writer,
2084                                                                      domain_0,
2085                                                                      domain_1 );
2086                             per_node_counter++;
2087                         }
2088                     }
2089                 }
2090                 if ( per_node_go_mapped_domain_gain_loss_outfile_writer != null ) {
2091                     per_node_go_mapped_domain_gain_loss_outfile_writer.close();
2092                     if ( per_node_counter < 1 ) {
2093                         per_node_go_mapped_domain_gain_loss_outfile.delete();
2094                     }
2095                     per_node_counter = 0;
2096                 }
2097                 out.write( "</table>" );
2098                 out.write( SurfacingConstants.NL );
2099                 out.write( "<hr>" );
2100                 out.write( SurfacingConstants.NL );
2101             } // for( final String id : sorted_ids ) {  
2102             out.write( "</body>" );
2103             out.write( SurfacingConstants.NL );
2104             out.write( "</html>" );
2105             out.write( SurfacingConstants.NL );
2106             out.flush();
2107             out.close();
2108         }
2109         catch ( final IOException e ) {
2110             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
2111         }
2112         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote characters detailed HTML list: \"" + filename + "\"" );
2113     }
2114
2115     public static void writeDomainCombinationsCountsFile( final String[][] input_file_properties,
2116                                                           final File output_dir,
2117                                                           final Writer per_genome_domain_promiscuity_statistics_writer,
2118                                                           final GenomeWideCombinableDomains gwcd,
2119                                                           final int i,
2120                                                           final GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder dc_sort_order ) {
2121         File dc_outfile = new File( input_file_properties[ i ][ 1 ]
2122                 + surfacing.DOMAIN_COMBINITON_COUNTS_OUTPUTFILE_SUFFIX );
2123         if ( output_dir != null ) {
2124             dc_outfile = new File( output_dir + ForesterUtil.FILE_SEPARATOR + dc_outfile );
2125         }
2126         checkForOutputFileWriteability( dc_outfile );
2127         try {
2128             final BufferedWriter out = new BufferedWriter( new FileWriter( dc_outfile ) );
2129             out.write( gwcd.toStringBuilder( dc_sort_order ).toString() );
2130             out.close();
2131         }
2132         catch ( final IOException e ) {
2133             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
2134         }
2135         final DescriptiveStatistics stats = gwcd.getPerGenomeDomainPromiscuityStatistics();
2136         try {
2137             per_genome_domain_promiscuity_statistics_writer.write( input_file_properties[ i ][ 1 ] + "\t" );
2138             per_genome_domain_promiscuity_statistics_writer.write( FORMATTER_3.format( stats.arithmeticMean() ) + "\t" );
2139             if ( stats.getN() < 2 ) {
2140                 per_genome_domain_promiscuity_statistics_writer.write( "n/a" + "\t" );
2141             }
2142             else {
2143                 per_genome_domain_promiscuity_statistics_writer.write( FORMATTER_3.format( stats
2144                         .sampleStandardDeviation() ) + "\t" );
2145             }
2146             per_genome_domain_promiscuity_statistics_writer.write( FORMATTER_3.format( stats.median() ) + "\t" );
2147             per_genome_domain_promiscuity_statistics_writer.write( ( int ) stats.getMin() + "\t" );
2148             per_genome_domain_promiscuity_statistics_writer.write( ( int ) stats.getMax() + "\t" );
2149             per_genome_domain_promiscuity_statistics_writer.write( stats.getN() + "\t" );
2150             final SortedSet<String> mpds = gwcd.getMostPromiscuosDomain();
2151             for( final String mpd : mpds ) {
2152                 per_genome_domain_promiscuity_statistics_writer.write( mpd + " " );
2153             }
2154             per_genome_domain_promiscuity_statistics_writer.write( ForesterUtil.LINE_SEPARATOR );
2155         }
2156         catch ( final IOException e ) {
2157             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
2158         }
2159         if ( input_file_properties[ i ].length == 3 ) {
2160             ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote domain combination counts for \""
2161                     + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ", "
2162                     + input_file_properties[ i ][ 2 ] + ") to: \"" + dc_outfile + "\"" );
2163         }
2164         else {
2165             ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote domain combination counts for \""
2166                     + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ") to: \""
2167                     + dc_outfile + "\"" );
2168         }
2169     }
2170
2171     public static void writeDomainSimilaritiesToFile( final StringBuilder html_desc,
2172                                                       final StringBuilder html_title,
2173                                                       final Writer simple_tab_writer,
2174                                                       final Writer single_writer,
2175                                                       Map<Character, Writer> split_writers,
2176                                                       final SortedSet<DomainSimilarity> similarities,
2177                                                       final boolean treat_as_binary,
2178                                                       final List<Species> species_order,
2179                                                       final PrintableDomainSimilarity.PRINT_OPTION print_option,
2180                                                       final DomainSimilarity.DomainSimilarityScoring scoring,
2181                                                       final boolean verbose,
2182                                                       final Map<String, Integer> tax_code_to_id_map,
2183                                                       final Phylogeny phy ) throws IOException {
2184         if ( ( single_writer != null ) && ( ( split_writers == null ) || split_writers.isEmpty() ) ) {
2185             split_writers = new HashMap<Character, Writer>();
2186             split_writers.put( '_', single_writer );
2187         }
2188         switch ( print_option ) {
2189             case SIMPLE_TAB_DELIMITED:
2190                 break;
2191             case HTML:
2192                 for( final Character key : split_writers.keySet() ) {
2193                     final Writer w = split_writers.get( key );
2194                     w.write( "<html>" );
2195                     w.write( SurfacingConstants.NL );
2196                     if ( key != '_' ) {
2197                         writeHtmlHead( w, "DC analysis (" + html_title + ") " + key.toString().toUpperCase() );
2198                     }
2199                     else {
2200                         writeHtmlHead( w, "DC analysis (" + html_title + ")" );
2201                     }
2202                     w.write( SurfacingConstants.NL );
2203                     w.write( "<body>" );
2204                     w.write( SurfacingConstants.NL );
2205                     w.write( html_desc.toString() );
2206                     w.write( SurfacingConstants.NL );
2207                     w.write( "<hr>" );
2208                     w.write( SurfacingConstants.NL );
2209                     w.write( "<br>" );
2210                     w.write( SurfacingConstants.NL );
2211                     w.write( "<table>" );
2212                     w.write( SurfacingConstants.NL );
2213                     w.write( "<tr><td><b>Domains:</b></td></tr>" );
2214                     w.write( SurfacingConstants.NL );
2215                 }
2216                 break;
2217         }
2218         //
2219         for( final DomainSimilarity similarity : similarities ) {
2220             if ( ( species_order != null ) && !species_order.isEmpty() ) {
2221                 ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
2222             }
2223             if ( single_writer != null ) {
2224                 single_writer.write( "<tr><td><b><a href=\"#" + similarity.getDomainId() + "\">"
2225                         + similarity.getDomainId() + "</a></b></td></tr>" );
2226                 single_writer.write( SurfacingConstants.NL );
2227             }
2228             else {
2229                 Writer local_writer = split_writers.get( ( similarity.getDomainId().charAt( 0 ) + "" ).toLowerCase()
2230                         .charAt( 0 ) );
2231                 if ( local_writer == null ) {
2232                     local_writer = split_writers.get( '0' );
2233                 }
2234                 local_writer.write( "<tr><td><b><a href=\"#" + similarity.getDomainId() + "\">"
2235                         + similarity.getDomainId() + "</a></b></td></tr>" );
2236                 local_writer.write( SurfacingConstants.NL );
2237             }
2238         }
2239         for( final Writer w : split_writers.values() ) {
2240             w.write( "</table>" );
2241             w.write( SurfacingConstants.NL );
2242             w.write( "<hr>" );
2243             w.write( SurfacingConstants.NL );
2244             //
2245             w.write( "<table>" );
2246             w.write( SurfacingConstants.NL );
2247             w.write( "<tr><td><b>" );
2248             w.write( "Species group colors:" );
2249             w.write( "</b></td></tr>" );
2250             w.write( SurfacingConstants.NL );
2251             writeColorLabels( "Deuterostomia", TaxonomyColors.DEUTEROSTOMIA_COLOR, w );
2252             writeColorLabels( "Protostomia", TaxonomyColors.PROTOSTOMIA_COLOR, w );
2253             writeColorLabels( "Cnidaria", TaxonomyColors.CNIDARIA_COLOR, w );
2254             writeColorLabels( "Placozoa", TaxonomyColors.PLACOZOA_COLOR, w );
2255             writeColorLabels( "Ctenophora (comb jellies)", TaxonomyColors.CTENOPHORA_COLOR, w );
2256             writeColorLabels( "Porifera (sponges)", TaxonomyColors.PORIFERA_COLOR, w );
2257             writeColorLabels( "Choanoflagellida", TaxonomyColors.CHOANOFLAGELLIDA, w );
2258             writeColorLabels( "Ichthyosporea & Filasterea", TaxonomyColors.ICHTHYOSPOREA_AND_FILASTEREA, w );
2259             writeColorLabels( "Fungi", TaxonomyColors.FUNGI_COLOR, w );
2260             writeColorLabels( "Nucleariidae and Fonticula group",
2261                               TaxonomyColors.NUCLEARIIDAE_AND_FONTICULA_GROUP_COLOR,
2262                               w );
2263             writeColorLabels( "Amoebozoa", TaxonomyColors.AMOEBOZOA_COLOR, w );
2264             writeColorLabels( "Embryophyta (plants)", TaxonomyColors.EMBRYOPHYTA_COLOR, w );
2265             writeColorLabels( "Chlorophyta (green algae)", TaxonomyColors.CHLOROPHYTA_COLOR, w );
2266             writeColorLabels( "Rhodophyta (red algae)", TaxonomyColors.RHODOPHYTA_COLOR, w );
2267             writeColorLabels( "Glaucocystophyce (Glaucophyta)", TaxonomyColors.GLAUCOPHYTA_COLOR, w );
2268             writeColorLabels( "Hacrobia (Cryptophyta & Haptophyceae & Centroheliozoa)",
2269                               TaxonomyColors.HACROBIA_COLOR,
2270                               w );
2271             writeColorLabels( "Stramenopiles (Chromophyta, heterokonts)", TaxonomyColors.STRAMENOPILES_COLOR, w );
2272             writeColorLabels( "Alveolata", TaxonomyColors.ALVEOLATA_COLOR, w );
2273             writeColorLabels( "Rhizaria", TaxonomyColors.RHIZARIA_COLOR, w );
2274             writeColorLabels( "Excavata", TaxonomyColors.EXCAVATA_COLOR, w );
2275             writeColorLabels( "Apusozoa", TaxonomyColors.APUSOZOA_COLOR, w );
2276             writeColorLabels( "Archaea", TaxonomyColors.ARCHAEA_COLOR, w );
2277             writeColorLabels( "Bacteria", TaxonomyColors.BACTERIA_COLOR, w );
2278             w.write( "</table>" );
2279             w.write( SurfacingConstants.NL );
2280             //
2281             w.write( "<hr>" );
2282             w.write( SurfacingConstants.NL );
2283             w.write( "<table>" );
2284             w.write( SurfacingConstants.NL );
2285         }
2286         //
2287         for( final DomainSimilarity similarity : similarities ) {
2288             if ( ( species_order != null ) && !species_order.isEmpty() ) {
2289                 ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
2290             }
2291             if ( simple_tab_writer != null ) {
2292                 simple_tab_writer.write( similarity.toStringBuffer( PRINT_OPTION.SIMPLE_TAB_DELIMITED,
2293                                                                     tax_code_to_id_map,
2294                                                                     null ).toString() );
2295             }
2296             if ( single_writer != null ) {
2297                 single_writer.write( similarity.toStringBuffer( print_option, tax_code_to_id_map, phy ).toString() );
2298                 single_writer.write( SurfacingConstants.NL );
2299             }
2300             else {
2301                 Writer local_writer = split_writers.get( ( similarity.getDomainId().charAt( 0 ) + "" ).toLowerCase()
2302                         .charAt( 0 ) );
2303                 if ( local_writer == null ) {
2304                     local_writer = split_writers.get( '0' );
2305                 }
2306                 local_writer.write( similarity.toStringBuffer( print_option, tax_code_to_id_map, phy ).toString() );
2307                 local_writer.write( SurfacingConstants.NL );
2308             }
2309         }
2310         switch ( print_option ) {
2311             case HTML:
2312                 for( final Writer w : split_writers.values() ) {
2313                     w.write( SurfacingConstants.NL );
2314                     w.write( "</table>" );
2315                     w.write( SurfacingConstants.NL );
2316                     w.write( "</font>" );
2317                     w.write( SurfacingConstants.NL );
2318                     w.write( "</body>" );
2319                     w.write( SurfacingConstants.NL );
2320                     w.write( "</html>" );
2321                     w.write( SurfacingConstants.NL );
2322                 }
2323                 break;
2324             default:
2325                 break;
2326         }
2327         for( final Writer w : split_writers.values() ) {
2328             w.close();
2329         }
2330     }
2331
2332     public static void writeHtmlHead( final Writer w, final String title ) throws IOException {
2333         w.write( SurfacingConstants.NL );
2334         w.write( "<head>" );
2335         w.write( "<title>" );
2336         w.write( title );
2337         w.write( "</title>" );
2338         w.write( SurfacingConstants.NL );
2339         w.write( "<style>" );
2340         w.write( SurfacingConstants.NL );
2341         w.write( "a:visited { color : #000066; text-decoration : none; }" );
2342         w.write( SurfacingConstants.NL );
2343         w.write( "a:link { color : #000066; text-decoration : none; }" );
2344         w.write( SurfacingConstants.NL );
2345         w.write( "a:active { color : ##000066; text-decoration : none; }" );
2346         w.write( SurfacingConstants.NL );
2347         w.write( "a:hover { color : #FFFFFF; background-color : #000000; text-decoration : none; }" );
2348         w.write( SurfacingConstants.NL );
2349         //
2350         w.write( "a.pl:visited { color : #505050; text-decoration : none; font-size: 7px;}" );
2351         w.write( SurfacingConstants.NL );
2352         w.write( "a.pl:link { color : #505050; text-decoration : none; font-size: 7px;}" );
2353         w.write( SurfacingConstants.NL );
2354         w.write( "a.pl:active { color : #505050; text-decoration : none; font-size: 7px;}" );
2355         w.write( SurfacingConstants.NL );
2356         w.write( "a.pl:hover { color : #FFFFFF; background-color : #000000; text-decoration : none; font-size: 7px;}" );
2357         w.write( SurfacingConstants.NL );
2358         //
2359         w.write( "a.ps:visited { color : #707070; text-decoration : none; font-size: 7px;}" );
2360         w.write( SurfacingConstants.NL );
2361         w.write( "a.ps:link { color : #707070; text-decoration : none; font-size: 7px;}" );
2362         w.write( SurfacingConstants.NL );
2363         w.write( "a.ps:active { color : #707070; text-decoration : none; font-size: 7px;}" );
2364         w.write( SurfacingConstants.NL );
2365         w.write( "a.ps:hover { color : #FFFFFF; background-color : #000000; text-decoration : none; font-size: 7px;}" );
2366         w.write( SurfacingConstants.NL );
2367         //
2368         w.write( "td { text-align: left; vertical-align: top; font-family: Verdana, Arial, Helvetica; font-size: 8pt}" );
2369         w.write( SurfacingConstants.NL );
2370         w.write( "h1 { color : #0000FF; font-family: Verdana, Arial, Helvetica; font-size: 18pt; font-weight: bold }" );
2371         w.write( SurfacingConstants.NL );
2372         w.write( "h2 { color : #0000FF; font-family: Verdana, Arial, Helvetica; font-size: 16pt; font-weight: bold }" );
2373         w.write( SurfacingConstants.NL );
2374         w.write( "</style>" );
2375         w.write( SurfacingConstants.NL );
2376         w.write( "</head>" );
2377         w.write( SurfacingConstants.NL );
2378     }
2379
2380     public static void writeMatrixToFile( final CharacterStateMatrix<?> matrix,
2381                                           final String filename,
2382                                           final Format format ) {
2383         final File outfile = new File( filename );
2384         checkForOutputFileWriteability( outfile );
2385         try {
2386             final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) );
2387             matrix.toWriter( out, format );
2388             out.flush();
2389             out.close();
2390         }
2391         catch ( final IOException e ) {
2392             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
2393         }
2394         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote matrix: \"" + filename + "\"" );
2395     }
2396
2397     public static void writeMatrixToFile( final File matrix_outfile, final List<DistanceMatrix> matrices ) {
2398         checkForOutputFileWriteability( matrix_outfile );
2399         try {
2400             final BufferedWriter out = new BufferedWriter( new FileWriter( matrix_outfile ) );
2401             for( final DistanceMatrix distance_matrix : matrices ) {
2402                 out.write( distance_matrix.toStringBuffer( DistanceMatrix.Format.PHYLIP ).toString() );
2403                 out.write( ForesterUtil.LINE_SEPARATOR );
2404                 out.flush();
2405             }
2406             out.close();
2407         }
2408         catch ( final IOException e ) {
2409             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
2410         }
2411         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote distance matrices to \"" + matrix_outfile + "\"" );
2412     }
2413
2414     public static void writePhylogenyToFile( final Phylogeny phylogeny, final String filename ) {
2415         final PhylogenyWriter writer = new PhylogenyWriter();
2416         try {
2417             writer.toPhyloXML( new File( filename ), phylogeny, 1 );
2418         }
2419         catch ( final IOException e ) {
2420             ForesterUtil.printWarningMessage( surfacing.PRG_NAME, "failed to write phylogeny to \"" + filename + "\": "
2421                     + e );
2422         }
2423         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote phylogeny to \"" + filename + "\"" );
2424     }
2425
2426     public static void writePresentToNexus( final File output_file,
2427                                             final File positive_filter_file,
2428                                             final SortedSet<String> filter,
2429                                             final List<GenomeWideCombinableDomains> gwcd_list ) {
2430         try {
2431             writeMatrixToFile( DomainParsimonyCalculator.createMatrixOfDomainPresenceOrAbsence( gwcd_list,
2432                                                                                                 positive_filter_file == null ? null
2433                                                                                                         : filter ),
2434                                output_file + surfacing.DOMAINS_PRESENT_NEXUS,
2435                                Format.NEXUS_BINARY );
2436             writeMatrixToFile( DomainParsimonyCalculator.createMatrixOfBinaryDomainCombinationPresenceOrAbsence( gwcd_list ),
2437                                output_file + surfacing.BDC_PRESENT_NEXUS,
2438                                Format.NEXUS_BINARY );
2439         }
2440         catch ( final Exception e ) {
2441             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
2442         }
2443     }
2444
2445     public static void writeProteinListsForAllSpecies( final File output_dir,
2446                                                        final SortedMap<Species, List<Protein>> protein_lists_per_species,
2447                                                        final List<GenomeWideCombinableDomains> gwcd_list,
2448                                                        final double domain_e_cutoff ) {
2449         final SortedSet<String> all_domains = new TreeSet<String>();
2450         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
2451             all_domains.addAll( gwcd.getAllDomainIds() );
2452         }
2453         for( final String domain : all_domains ) {
2454             final File out = new File( output_dir + ForesterUtil.FILE_SEPARATOR + domain + surfacing.SEQ_EXTRACT_SUFFIX );
2455             checkForOutputFileWriteability( out );
2456             try {
2457                 final Writer proteins_file_writer = new BufferedWriter( new FileWriter( out ) );
2458                 extractProteinNames( protein_lists_per_species,
2459                                      domain,
2460                                      proteins_file_writer,
2461                                      "\t",
2462                                      surfacing.LIMIT_SPEC_FOR_PROT_EX,
2463                                      domain_e_cutoff );
2464                 proteins_file_writer.close();
2465             }
2466             catch ( final IOException e ) {
2467                 ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
2468             }
2469             ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote proteins list to \"" + out + "\"" );
2470         }
2471     }
2472
2473     public static void writeTaxonomyLinks( final Writer writer,
2474                                            final String species,
2475                                            final Map<String, Integer> tax_code_to_id_map ) throws IOException {
2476         if ( ( species.length() > 1 ) && ( species.indexOf( '_' ) < 1 ) ) {
2477             writer.write( " [" );
2478             if ( ( tax_code_to_id_map != null ) && tax_code_to_id_map.containsKey( species ) ) {
2479                 writer.write( "<a href=\"" + SurfacingConstants.UNIPROT_TAXONOMY_ID_LINK
2480                         + tax_code_to_id_map.get( species ) + "\" target=\"taxonomy_window\">uniprot</a>" );
2481             }
2482             else {
2483                 writer.write( "<a href=\"" + SurfacingConstants.EOL_LINK + species
2484                         + "\" target=\"taxonomy_window\">eol</a>" );
2485                 writer.write( "|" );
2486                 writer.write( "<a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_SEARCH + species
2487                         + "\" target=\"taxonomy_window\">scholar</a>" );
2488                 writer.write( "|" );
2489                 writer.write( "<a href=\"" + SurfacingConstants.GOOGLE_WEB_SEARCH_LINK + species
2490                         + "\" target=\"taxonomy_window\">google</a>" );
2491             }
2492             writer.write( "]" );
2493         }
2494     }
2495
2496     private final static void addToCountMap( final Map<String, Integer> map, final String s ) {
2497         if ( map.containsKey( s ) ) {
2498             map.put( s, map.get( s ) + 1 );
2499         }
2500         else {
2501             map.put( s, 1 );
2502         }
2503     }
2504
2505     private static void calculateIndependentDomainCombinationGains( final Phylogeny local_phylogeny_l,
2506                                                                     final String outfilename_for_counts,
2507                                                                     final String outfilename_for_dc,
2508                                                                     final String outfilename_for_dc_for_go_mapping,
2509                                                                     final String outfilename_for_dc_for_go_mapping_unique,
2510                                                                     final String outfilename_for_rank_counts,
2511                                                                     final String outfilename_for_ancestor_species_counts,
2512                                                                     final String outfilename_for_protein_stats,
2513                                                                     final Map<String, DescriptiveStatistics> protein_length_stats_by_dc,
2514                                                                     final Map<String, DescriptiveStatistics> domain_number_stats_by_dc,
2515                                                                     final Map<String, DescriptiveStatistics> domain_length_stats_by_domain ) {
2516         try {
2517             //
2518             //            if ( protein_length_stats_by_dc != null ) {
2519             //                for( final Entry<?, DescriptiveStatistics> entry : protein_length_stats_by_dc.entrySet() ) {
2520             //                    System.out.print( entry.getKey().toString() );
2521             //                    System.out.print( ": " );
2522             //                    double[] a = entry.getValue().getDataAsDoubleArray();
2523             //                    for( int i = 0; i < a.length; i++ ) {
2524             //                        System.out.print( a[ i ] + " " );
2525             //                    }
2526             //                    System.out.println();
2527             //                }
2528             //            }
2529             //            if ( domain_number_stats_by_dc != null ) {
2530             //                for( final Entry<?, DescriptiveStatistics> entry : domain_number_stats_by_dc.entrySet() ) {
2531             //                    System.out.print( entry.getKey().toString() );
2532             //                    System.out.print( ": " );
2533             //                    double[] a = entry.getValue().getDataAsDoubleArray();
2534             //                    for( int i = 0; i < a.length; i++ ) {
2535             //                        System.out.print( a[ i ] + " " );
2536             //                    }
2537             //                    System.out.println();
2538             //                }
2539             //            }
2540             //
2541             final BufferedWriter out_counts = new BufferedWriter( new FileWriter( outfilename_for_counts ) );
2542             final BufferedWriter out_dc = new BufferedWriter( new FileWriter( outfilename_for_dc ) );
2543             final BufferedWriter out_dc_for_go_mapping = new BufferedWriter( new FileWriter( outfilename_for_dc_for_go_mapping ) );
2544             final BufferedWriter out_dc_for_go_mapping_unique = new BufferedWriter( new FileWriter( outfilename_for_dc_for_go_mapping_unique ) );
2545             final SortedMap<String, Integer> dc_gain_counts = new TreeMap<String, Integer>();
2546             for( final PhylogenyNodeIterator it = local_phylogeny_l.iteratorPostorder(); it.hasNext(); ) {
2547                 final PhylogenyNode n = it.next();
2548                 final Set<String> gained_dc = n.getNodeData().getBinaryCharacters().getGainedCharacters();
2549                 for( final String dc : gained_dc ) {
2550                     if ( dc_gain_counts.containsKey( dc ) ) {
2551                         dc_gain_counts.put( dc, dc_gain_counts.get( dc ) + 1 );
2552                     }
2553                     else {
2554                         dc_gain_counts.put( dc, 1 );
2555                     }
2556                 }
2557             }
2558             final SortedMap<Integer, Integer> histogram = new TreeMap<Integer, Integer>();
2559             final SortedMap<Integer, StringBuilder> domain_lists = new TreeMap<Integer, StringBuilder>();
2560             final SortedMap<Integer, DescriptiveStatistics> dc_reapp_counts_to_protein_length_stats = new TreeMap<Integer, DescriptiveStatistics>();
2561             final SortedMap<Integer, DescriptiveStatistics> dc_reapp_counts_to_domain_number_stats = new TreeMap<Integer, DescriptiveStatistics>();
2562             final SortedMap<Integer, DescriptiveStatistics> dc_reapp_counts_to_domain_lengths_stats = new TreeMap<Integer, DescriptiveStatistics>();
2563             final SortedMap<Integer, PriorityQueue<String>> domain_lists_go = new TreeMap<Integer, PriorityQueue<String>>();
2564             final SortedMap<Integer, SortedSet<String>> domain_lists_go_unique = new TreeMap<Integer, SortedSet<String>>();
2565             final Set<String> dcs = dc_gain_counts.keySet();
2566             final SortedSet<String> more_than_once = new TreeSet<String>();
2567             DescriptiveStatistics gained_once_lengths_stats = new BasicDescriptiveStatistics();
2568             DescriptiveStatistics gained_once_domain_count_stats = new BasicDescriptiveStatistics();
2569             DescriptiveStatistics gained_multiple_times_lengths_stats = new BasicDescriptiveStatistics();
2570             final DescriptiveStatistics gained_multiple_times_domain_count_stats = new BasicDescriptiveStatistics();
2571             long gained_multiple_times_domain_length_sum = 0;
2572             long gained_once_domain_length_sum = 0;
2573             long gained_multiple_times_domain_length_count = 0;
2574             long gained_once_domain_length_count = 0;
2575             for( final String dc : dcs ) {
2576                 final int count = dc_gain_counts.get( dc );
2577                 if ( histogram.containsKey( count ) ) {
2578                     histogram.put( count, histogram.get( count ) + 1 );
2579                     domain_lists.get( count ).append( ", " + dc );
2580                     domain_lists_go.get( count ).addAll( splitDomainCombination( dc ) );
2581                     domain_lists_go_unique.get( count ).addAll( splitDomainCombination( dc ) );
2582                 }
2583                 else {
2584                     histogram.put( count, 1 );
2585                     domain_lists.put( count, new StringBuilder( dc ) );
2586                     final PriorityQueue<String> q = new PriorityQueue<String>();
2587                     q.addAll( splitDomainCombination( dc ) );
2588                     domain_lists_go.put( count, q );
2589                     final SortedSet<String> set = new TreeSet<String>();
2590                     set.addAll( splitDomainCombination( dc ) );
2591                     domain_lists_go_unique.put( count, set );
2592                 }
2593                 if ( protein_length_stats_by_dc != null ) {
2594                     if ( !dc_reapp_counts_to_protein_length_stats.containsKey( count ) ) {
2595                         dc_reapp_counts_to_protein_length_stats.put( count, new BasicDescriptiveStatistics() );
2596                     }
2597                     dc_reapp_counts_to_protein_length_stats.get( count ).addValue( protein_length_stats_by_dc.get( dc )
2598                             .arithmeticMean() );
2599                 }
2600                 if ( domain_number_stats_by_dc != null ) {
2601                     if ( !dc_reapp_counts_to_domain_number_stats.containsKey( count ) ) {
2602                         dc_reapp_counts_to_domain_number_stats.put( count, new BasicDescriptiveStatistics() );
2603                     }
2604                     dc_reapp_counts_to_domain_number_stats.get( count ).addValue( domain_number_stats_by_dc.get( dc )
2605                             .arithmeticMean() );
2606                 }
2607                 if ( domain_length_stats_by_domain != null ) {
2608                     if ( !dc_reapp_counts_to_domain_lengths_stats.containsKey( count ) ) {
2609                         dc_reapp_counts_to_domain_lengths_stats.put( count, new BasicDescriptiveStatistics() );
2610                     }
2611                     final String[] ds = dc.split( "=" );
2612                     dc_reapp_counts_to_domain_lengths_stats.get( count ).addValue( domain_length_stats_by_domain
2613                             .get( ds[ 0 ] ).arithmeticMean() );
2614                     dc_reapp_counts_to_domain_lengths_stats.get( count ).addValue( domain_length_stats_by_domain
2615                             .get( ds[ 1 ] ).arithmeticMean() );
2616                 }
2617                 if ( count > 1 ) {
2618                     more_than_once.add( dc );
2619                     if ( protein_length_stats_by_dc != null ) {
2620                         final DescriptiveStatistics s = protein_length_stats_by_dc.get( dc );
2621                         for( final double element : s.getData() ) {
2622                             gained_multiple_times_lengths_stats.addValue( element );
2623                         }
2624                     }
2625                     if ( domain_number_stats_by_dc != null ) {
2626                         final DescriptiveStatistics s = domain_number_stats_by_dc.get( dc );
2627                         for( final double element : s.getData() ) {
2628                             gained_multiple_times_domain_count_stats.addValue( element );
2629                         }
2630                     }
2631                     if ( domain_length_stats_by_domain != null ) {
2632                         final String[] ds = dc.split( "=" );
2633                         final DescriptiveStatistics s0 = domain_length_stats_by_domain.get( ds[ 0 ] );
2634                         final DescriptiveStatistics s1 = domain_length_stats_by_domain.get( ds[ 1 ] );
2635                         for( final double element : s0.getData() ) {
2636                             gained_multiple_times_domain_length_sum += element;
2637                             ++gained_multiple_times_domain_length_count;
2638                         }
2639                         for( final double element : s1.getData() ) {
2640                             gained_multiple_times_domain_length_sum += element;
2641                             ++gained_multiple_times_domain_length_count;
2642                         }
2643                     }
2644                 }
2645                 else {
2646                     if ( protein_length_stats_by_dc != null ) {
2647                         final DescriptiveStatistics s = protein_length_stats_by_dc.get( dc );
2648                         for( final double element : s.getData() ) {
2649                             gained_once_lengths_stats.addValue( element );
2650                         }
2651                     }
2652                     if ( domain_number_stats_by_dc != null ) {
2653                         final DescriptiveStatistics s = domain_number_stats_by_dc.get( dc );
2654                         for( final double element : s.getData() ) {
2655                             gained_once_domain_count_stats.addValue( element );
2656                         }
2657                     }
2658                     if ( domain_length_stats_by_domain != null ) {
2659                         final String[] ds = dc.split( "=" );
2660                         final DescriptiveStatistics s0 = domain_length_stats_by_domain.get( ds[ 0 ] );
2661                         final DescriptiveStatistics s1 = domain_length_stats_by_domain.get( ds[ 1 ] );
2662                         for( final double element : s0.getData() ) {
2663                             gained_once_domain_length_sum += element;
2664                             ++gained_once_domain_length_count;
2665                         }
2666                         for( final double element : s1.getData() ) {
2667                             gained_once_domain_length_sum += element;
2668                             ++gained_once_domain_length_count;
2669                         }
2670                     }
2671                 }
2672             }
2673             final Set<Integer> histogram_keys = histogram.keySet();
2674             for( final Integer histogram_key : histogram_keys ) {
2675                 final int count = histogram.get( histogram_key );
2676                 final StringBuilder dc = domain_lists.get( histogram_key );
2677                 out_counts.write( histogram_key + "\t" + count + ForesterUtil.LINE_SEPARATOR );
2678                 out_dc.write( histogram_key + "\t" + dc + ForesterUtil.LINE_SEPARATOR );
2679                 out_dc_for_go_mapping.write( "#" + histogram_key + ForesterUtil.LINE_SEPARATOR );
2680                 final Object[] sorted = domain_lists_go.get( histogram_key ).toArray();
2681                 Arrays.sort( sorted );
2682                 for( final Object domain : sorted ) {
2683                     out_dc_for_go_mapping.write( domain + ForesterUtil.LINE_SEPARATOR );
2684                 }
2685                 out_dc_for_go_mapping_unique.write( "#" + histogram_key + ForesterUtil.LINE_SEPARATOR );
2686                 for( final String domain : domain_lists_go_unique.get( histogram_key ) ) {
2687                     out_dc_for_go_mapping_unique.write( domain + ForesterUtil.LINE_SEPARATOR );
2688                 }
2689             }
2690             out_counts.close();
2691             out_dc.close();
2692             out_dc_for_go_mapping.close();
2693             out_dc_for_go_mapping_unique.close();
2694             final SortedMap<String, Integer> lca_rank_counts = new TreeMap<String, Integer>();
2695             final SortedMap<String, Integer> lca_ancestor_species_counts = new TreeMap<String, Integer>();
2696             for( final String dc : more_than_once ) {
2697                 final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
2698                 for( final PhylogenyNodeIterator it = local_phylogeny_l.iteratorExternalForward(); it.hasNext(); ) {
2699                     final PhylogenyNode n = it.next();
2700                     if ( n.getNodeData().getBinaryCharacters().getGainedCharacters().contains( dc ) ) {
2701                         nodes.add( n );
2702                     }
2703                 }
2704                 for( int i = 0; i < ( nodes.size() - 1 ); ++i ) {
2705                     for( int j = i + 1; j < nodes.size(); ++j ) {
2706                         final PhylogenyNode lca = PhylogenyMethods.calculateLCA( nodes.get( i ), nodes.get( j ) );
2707                         String rank = "unknown";
2708                         if ( lca.getNodeData().isHasTaxonomy()
2709                                 && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getRank() ) ) {
2710                             rank = lca.getNodeData().getTaxonomy().getRank();
2711                         }
2712                         addToCountMap( lca_rank_counts, rank );
2713                         String lca_species;
2714                         if ( lca.getNodeData().isHasTaxonomy()
2715                                 && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getScientificName() ) ) {
2716                             lca_species = lca.getNodeData().getTaxonomy().getScientificName();
2717                         }
2718                         else if ( lca.getNodeData().isHasTaxonomy()
2719                                 && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getCommonName() ) ) {
2720                             lca_species = lca.getNodeData().getTaxonomy().getCommonName();
2721                         }
2722                         else {
2723                             lca_species = lca.getName();
2724                         }
2725                         addToCountMap( lca_ancestor_species_counts, lca_species );
2726                     }
2727                 }
2728             }
2729             final BufferedWriter out_for_rank_counts = new BufferedWriter( new FileWriter( outfilename_for_rank_counts ) );
2730             final BufferedWriter out_for_ancestor_species_counts = new BufferedWriter( new FileWriter( outfilename_for_ancestor_species_counts ) );
2731             ForesterUtil.map2writer( out_for_rank_counts, lca_rank_counts, "\t", ForesterUtil.LINE_SEPARATOR );
2732             ForesterUtil.map2writer( out_for_ancestor_species_counts,
2733                                      lca_ancestor_species_counts,
2734                                      "\t",
2735                                      ForesterUtil.LINE_SEPARATOR );
2736             out_for_rank_counts.close();
2737             out_for_ancestor_species_counts.close();
2738             if ( !ForesterUtil.isEmpty( outfilename_for_protein_stats )
2739                     && ( ( domain_length_stats_by_domain != null ) || ( protein_length_stats_by_dc != null ) || ( domain_number_stats_by_dc != null ) ) ) {
2740                 final BufferedWriter w = new BufferedWriter( new FileWriter( outfilename_for_protein_stats ) );
2741                 w.write( "Domain Lengths: " );
2742                 w.write( "\n" );
2743                 if ( domain_length_stats_by_domain != null ) {
2744                     for( final Entry<Integer, DescriptiveStatistics> entry : dc_reapp_counts_to_domain_lengths_stats
2745                             .entrySet() ) {
2746                         w.write( entry.getKey().toString() );
2747                         w.write( "\t" + entry.getValue().arithmeticMean() );
2748                         w.write( "\t" + entry.getValue().median() );
2749                         w.write( "\n" );
2750                     }
2751                 }
2752                 w.flush();
2753                 w.write( "\n" );
2754                 w.write( "\n" );
2755                 w.write( "Protein Lengths: " );
2756                 w.write( "\n" );
2757                 if ( protein_length_stats_by_dc != null ) {
2758                     for( final Entry<Integer, DescriptiveStatistics> entry : dc_reapp_counts_to_protein_length_stats
2759                             .entrySet() ) {
2760                         w.write( entry.getKey().toString() );
2761                         w.write( "\t" + entry.getValue().arithmeticMean() );
2762                         w.write( "\t" + entry.getValue().median() );
2763                         w.write( "\n" );
2764                     }
2765                 }
2766                 w.flush();
2767                 w.write( "\n" );
2768                 w.write( "\n" );
2769                 w.write( "Number of domains: " );
2770                 w.write( "\n" );
2771                 if ( domain_number_stats_by_dc != null ) {
2772                     for( final Entry<Integer, DescriptiveStatistics> entry : dc_reapp_counts_to_domain_number_stats
2773                             .entrySet() ) {
2774                         w.write( entry.getKey().toString() );
2775                         w.write( "\t" + entry.getValue().arithmeticMean() );
2776                         w.write( "\t" + entry.getValue().median() );
2777                         w.write( "\n" );
2778                     }
2779                 }
2780                 w.flush();
2781                 w.write( "\n" );
2782                 w.write( "\n" );
2783                 w.write( "Gained once, domain lengths:" );
2784                 w.write( "\n" );
2785                 w.write( "N: " + gained_once_domain_length_count );
2786                 w.write( "\n" );
2787                 w.write( "Avg: " + ( ( double ) gained_once_domain_length_sum / gained_once_domain_length_count ) );
2788                 w.write( "\n" );
2789                 w.write( "\n" );
2790                 w.write( "Gained multiple times, domain lengths:" );
2791                 w.write( "\n" );
2792                 w.write( "N: " + gained_multiple_times_domain_length_count );
2793                 w.write( "\n" );
2794                 w.write( "Avg: "
2795                         + ( ( double ) gained_multiple_times_domain_length_sum / gained_multiple_times_domain_length_count ) );
2796                 w.write( "\n" );
2797                 w.write( "\n" );
2798                 w.write( "\n" );
2799                 w.write( "\n" );
2800                 w.write( "Gained once, protein lengths:" );
2801                 w.write( "\n" );
2802                 w.write( gained_once_lengths_stats.toString() );
2803                 gained_once_lengths_stats = null;
2804                 w.write( "\n" );
2805                 w.write( "\n" );
2806                 w.write( "Gained once, domain counts:" );
2807                 w.write( "\n" );
2808                 w.write( gained_once_domain_count_stats.toString() );
2809                 gained_once_domain_count_stats = null;
2810                 w.write( "\n" );
2811                 w.write( "\n" );
2812                 w.write( "Gained multiple times, protein lengths:" );
2813                 w.write( "\n" );
2814                 w.write( gained_multiple_times_lengths_stats.toString() );
2815                 gained_multiple_times_lengths_stats = null;
2816                 w.write( "\n" );
2817                 w.write( "\n" );
2818                 w.write( "Gained multiple times, domain counts:" );
2819                 w.write( "\n" );
2820                 w.write( gained_multiple_times_domain_count_stats.toString() );
2821                 w.flush();
2822                 w.close();
2823             }
2824         }
2825         catch ( final IOException e ) {
2826             ForesterUtil.printWarningMessage( surfacing.PRG_NAME, "Failure to write: " + e );
2827         }
2828         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote independent domain combination gains fitch counts to ["
2829                 + outfilename_for_counts + "]" );
2830         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote independent domain combination gains fitch lists to ["
2831                 + outfilename_for_dc + "]" );
2832         ForesterUtil.programMessage( surfacing.PRG_NAME,
2833                                      "Wrote independent domain combination gains fitch lists to (for GO mapping) ["
2834                                              + outfilename_for_dc_for_go_mapping + "]" );
2835         ForesterUtil.programMessage( surfacing.PRG_NAME,
2836                                      "Wrote independent domain combination gains fitch lists to (for GO mapping, unique) ["
2837                                              + outfilename_for_dc_for_go_mapping_unique + "]" );
2838     }
2839
2840     private static SortedSet<String> collectAllDomainsChangedOnSubtree( final PhylogenyNode subtree_root,
2841                                                                         final boolean get_gains ) {
2842         final SortedSet<String> domains = new TreeSet<String>();
2843         for( final PhylogenyNode descendant : PhylogenyMethods.getAllDescendants( subtree_root ) ) {
2844             final BinaryCharacters chars = descendant.getNodeData().getBinaryCharacters();
2845             if ( get_gains ) {
2846                 domains.addAll( chars.getGainedCharacters() );
2847             }
2848             else {
2849                 domains.addAll( chars.getLostCharacters() );
2850             }
2851         }
2852         return domains;
2853     }
2854
2855     private static File createBaseDirForPerNodeDomainFiles( final String base_dir,
2856                                                             final boolean domain_combinations,
2857                                                             final CharacterStateMatrix.GainLossStates state,
2858                                                             final String outfile ) {
2859         File per_node_go_mapped_domain_gain_loss_files_base_dir = new File( new File( outfile ).getParent()
2860                 + ForesterUtil.FILE_SEPARATOR + base_dir );
2861         if ( !per_node_go_mapped_domain_gain_loss_files_base_dir.exists() ) {
2862             per_node_go_mapped_domain_gain_loss_files_base_dir.mkdir();
2863         }
2864         if ( domain_combinations ) {
2865             per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
2866                     + ForesterUtil.FILE_SEPARATOR + "DC" );
2867         }
2868         else {
2869             per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
2870                     + ForesterUtil.FILE_SEPARATOR + "DOMAINS" );
2871         }
2872         if ( !per_node_go_mapped_domain_gain_loss_files_base_dir.exists() ) {
2873             per_node_go_mapped_domain_gain_loss_files_base_dir.mkdir();
2874         }
2875         if ( state == GainLossStates.GAIN ) {
2876             per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
2877                     + ForesterUtil.FILE_SEPARATOR + "GAINS" );
2878         }
2879         else if ( state == GainLossStates.LOSS ) {
2880             per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
2881                     + ForesterUtil.FILE_SEPARATOR + "LOSSES" );
2882         }
2883         else {
2884             per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
2885                     + ForesterUtil.FILE_SEPARATOR + "PRESENT" );
2886         }
2887         if ( !per_node_go_mapped_domain_gain_loss_files_base_dir.exists() ) {
2888             per_node_go_mapped_domain_gain_loss_files_base_dir.mkdir();
2889         }
2890         return per_node_go_mapped_domain_gain_loss_files_base_dir;
2891     }
2892
2893     private static SortedSet<BinaryDomainCombination> createSetOfAllBinaryDomainCombinationsPerGenome( final GenomeWideCombinableDomains gwcd ) {
2894         final SortedMap<String, CombinableDomains> cds = gwcd.getAllCombinableDomainsIds();
2895         final SortedSet<BinaryDomainCombination> binary_combinations = new TreeSet<BinaryDomainCombination>();
2896         for( final String domain_id : cds.keySet() ) {
2897             final CombinableDomains cd = cds.get( domain_id );
2898             binary_combinations.addAll( cd.toBinaryDomainCombinations() );
2899         }
2900         return binary_combinations;
2901     }
2902
2903     private static void printSomeStats( final DescriptiveStatistics stats, final AsciiHistogram histo, final Writer w )
2904             throws IOException {
2905         w.write( "<hr>" );
2906         w.write( "<br>" );
2907         w.write( SurfacingConstants.NL );
2908         w.write( "<tt><pre>" );
2909         w.write( SurfacingConstants.NL );
2910         if ( histo != null ) {
2911             w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
2912             w.write( SurfacingConstants.NL );
2913         }
2914         w.write( "</pre></tt>" );
2915         w.write( SurfacingConstants.NL );
2916         w.write( "<table>" );
2917         w.write( SurfacingConstants.NL );
2918         w.write( "<tr><td>N: </td><td>" + stats.getN() + "</td></tr>" );
2919         w.write( SurfacingConstants.NL );
2920         w.write( "<tr><td>Min: </td><td>" + stats.getMin() + "</td></tr>" );
2921         w.write( SurfacingConstants.NL );
2922         w.write( "<tr><td>Max: </td><td>" + stats.getMax() + "</td></tr>" );
2923         w.write( SurfacingConstants.NL );
2924         w.write( "<tr><td>Mean: </td><td>" + stats.arithmeticMean() + "</td></tr>" );
2925         w.write( SurfacingConstants.NL );
2926         if ( stats.getN() > 1 ) {
2927             w.write( "<tr><td>SD: </td><td>" + stats.sampleStandardDeviation() + "</td></tr>" );
2928         }
2929         else {
2930             w.write( "<tr><td>SD: </td><td>n/a</td></tr>" );
2931         }
2932         w.write( SurfacingConstants.NL );
2933         w.write( "</table>" );
2934         w.write( SurfacingConstants.NL );
2935         w.write( "<br>" );
2936         w.write( SurfacingConstants.NL );
2937     }
2938
2939     private static List<String> splitDomainCombination( final String dc ) {
2940         final String[] s = dc.split( "=" );
2941         if ( s.length != 2 ) {
2942             ForesterUtil.printErrorMessage( surfacing.PRG_NAME, "Stringyfied domain combination has illegal format: "
2943                     + dc );
2944             System.exit( -1 );
2945         }
2946         final List<String> l = new ArrayList<String>( 2 );
2947         l.add( s[ 0 ] );
2948         l.add( s[ 1 ] );
2949         return l;
2950     }
2951
2952     private static void writeAllEncounteredPfamsToFile( final Map<String, List<GoId>> domain_id_to_go_ids_map,
2953                                                         final Map<GoId, GoTerm> go_id_to_term_map,
2954                                                         final String outfile_name,
2955                                                         final SortedSet<String> all_pfams_encountered ) {
2956         final File all_pfams_encountered_file = new File( outfile_name + surfacing.ALL_PFAMS_ENCOUNTERED_SUFFIX );
2957         final File all_pfams_encountered_with_go_annotation_file = new File( outfile_name
2958                 + surfacing.ALL_PFAMS_ENCOUNTERED_WITH_GO_ANNOTATION_SUFFIX );
2959         final File encountered_pfams_summary_file = new File( outfile_name + surfacing.ENCOUNTERED_PFAMS_SUMMARY_SUFFIX );
2960         int biological_process_counter = 0;
2961         int cellular_component_counter = 0;
2962         int molecular_function_counter = 0;
2963         int pfams_with_mappings_counter = 0;
2964         int pfams_without_mappings_counter = 0;
2965         int pfams_without_mappings_to_bp_or_mf_counter = 0;
2966         int pfams_with_mappings_to_bp_or_mf_counter = 0;
2967         try {
2968             final Writer all_pfams_encountered_writer = new BufferedWriter( new FileWriter( all_pfams_encountered_file ) );
2969             final Writer all_pfams_encountered_with_go_annotation_writer = new BufferedWriter( new FileWriter( all_pfams_encountered_with_go_annotation_file ) );
2970             final Writer summary_writer = new BufferedWriter( new FileWriter( encountered_pfams_summary_file ) );
2971             summary_writer.write( "# Pfam to GO mapping summary" );
2972             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
2973             summary_writer.write( "# Actual summary is at the end of this file." );
2974             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
2975             summary_writer.write( "# Encountered Pfams without a GO mapping:" );
2976             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
2977             for( final String pfam : all_pfams_encountered ) {
2978                 all_pfams_encountered_writer.write( pfam );
2979                 all_pfams_encountered_writer.write( ForesterUtil.LINE_SEPARATOR );
2980                 final String domain_id = new String( pfam );
2981                 if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
2982                     ++pfams_with_mappings_counter;
2983                     all_pfams_encountered_with_go_annotation_writer.write( pfam );
2984                     all_pfams_encountered_with_go_annotation_writer.write( ForesterUtil.LINE_SEPARATOR );
2985                     final List<GoId> go_ids = domain_id_to_go_ids_map.get( domain_id );
2986                     boolean maps_to_bp = false;
2987                     boolean maps_to_cc = false;
2988                     boolean maps_to_mf = false;
2989                     for( final GoId go_id : go_ids ) {
2990                         final GoTerm go_term = go_id_to_term_map.get( go_id );
2991                         if ( go_term.getGoNameSpace().isBiologicalProcess() ) {
2992                             maps_to_bp = true;
2993                         }
2994                         else if ( go_term.getGoNameSpace().isCellularComponent() ) {
2995                             maps_to_cc = true;
2996                         }
2997                         else if ( go_term.getGoNameSpace().isMolecularFunction() ) {
2998                             maps_to_mf = true;
2999                         }
3000                     }
3001                     if ( maps_to_bp ) {
3002                         ++biological_process_counter;
3003                     }
3004                     if ( maps_to_cc ) {
3005                         ++cellular_component_counter;
3006                     }
3007                     if ( maps_to_mf ) {
3008                         ++molecular_function_counter;
3009                     }
3010                     if ( maps_to_bp || maps_to_mf ) {
3011                         ++pfams_with_mappings_to_bp_or_mf_counter;
3012                     }
3013                     else {
3014                         ++pfams_without_mappings_to_bp_or_mf_counter;
3015                     }
3016                 }
3017                 else {
3018                     ++pfams_without_mappings_to_bp_or_mf_counter;
3019                     ++pfams_without_mappings_counter;
3020                     summary_writer.write( pfam );
3021                     summary_writer.write( ForesterUtil.LINE_SEPARATOR );
3022                 }
3023             }
3024             all_pfams_encountered_writer.close();
3025             all_pfams_encountered_with_go_annotation_writer.close();
3026             ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote all [" + all_pfams_encountered.size()
3027                     + "] encountered Pfams to: \"" + all_pfams_encountered_file + "\"" );
3028             ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote all [" + pfams_with_mappings_counter
3029                     + "] encountered Pfams with GO mappings to: \"" + all_pfams_encountered_with_go_annotation_file
3030                     + "\"" );
3031             ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote summary (including all ["
3032                     + pfams_without_mappings_counter + "] encountered Pfams without GO mappings) to: \""
3033                     + encountered_pfams_summary_file + "\"" );
3034             ForesterUtil.programMessage( surfacing.PRG_NAME, "Sum of Pfams encountered                : "
3035                     + all_pfams_encountered.size() );
3036             ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams without a mapping                 : "
3037                     + pfams_without_mappings_counter + " ["
3038                     + ( ( 100 * pfams_without_mappings_counter ) / all_pfams_encountered.size() ) + "%]" );
3039             ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams without mapping to proc. or func. : "
3040                     + pfams_without_mappings_to_bp_or_mf_counter + " ["
3041                     + ( ( 100 * pfams_without_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" );
3042             ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with a mapping                    : "
3043                     + pfams_with_mappings_counter + " ["
3044                     + ( ( 100 * pfams_with_mappings_counter ) / all_pfams_encountered.size() ) + "%]" );
3045             ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with a mapping to proc. or func.  : "
3046                     + pfams_with_mappings_to_bp_or_mf_counter + " ["
3047                     + ( ( 100 * pfams_with_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" );
3048             ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to biological process: "
3049                     + biological_process_counter + " ["
3050                     + ( ( 100 * biological_process_counter ) / all_pfams_encountered.size() ) + "%]" );
3051             ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to molecular function: "
3052                     + molecular_function_counter + " ["
3053                     + ( ( 100 * molecular_function_counter ) / all_pfams_encountered.size() ) + "%]" );
3054             ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to cellular component: "
3055                     + cellular_component_counter + " ["
3056                     + ( ( 100 * cellular_component_counter ) / all_pfams_encountered.size() ) + "%]" );
3057             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
3058             summary_writer.write( "# Sum of Pfams encountered                : " + all_pfams_encountered.size() );
3059             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
3060             summary_writer.write( "# Pfams without a mapping                 : " + pfams_without_mappings_counter
3061                     + " [" + ( ( 100 * pfams_without_mappings_counter ) / all_pfams_encountered.size() ) + "%]" );
3062             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
3063             summary_writer.write( "# Pfams without mapping to proc. or func. : "
3064                     + pfams_without_mappings_to_bp_or_mf_counter + " ["
3065                     + ( ( 100 * pfams_without_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" );
3066             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
3067             summary_writer.write( "# Pfams with a mapping                    : " + pfams_with_mappings_counter + " ["
3068                     + ( ( 100 * pfams_with_mappings_counter ) / all_pfams_encountered.size() ) + "%]" );
3069             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
3070             summary_writer.write( "# Pfams with a mapping to proc. or func.  : "
3071                     + pfams_with_mappings_to_bp_or_mf_counter + " ["
3072                     + ( ( 100 * pfams_with_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" );
3073             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
3074             summary_writer.write( "# Pfams with mapping to biological process: " + biological_process_counter + " ["
3075                     + ( ( 100 * biological_process_counter ) / all_pfams_encountered.size() ) + "%]" );
3076             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
3077             summary_writer.write( "# Pfams with mapping to molecular function: " + molecular_function_counter + " ["
3078                     + ( ( 100 * molecular_function_counter ) / all_pfams_encountered.size() ) + "%]" );
3079             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
3080             summary_writer.write( "# Pfams with mapping to cellular component: " + cellular_component_counter + " ["
3081                     + ( ( 100 * cellular_component_counter ) / all_pfams_encountered.size() ) + "%]" );
3082             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
3083             summary_writer.close();
3084         }
3085         catch ( final IOException e ) {
3086             ForesterUtil.printWarningMessage( surfacing.PRG_NAME, "Failure to write: " + e );
3087         }
3088     }
3089
3090     private final static void writeColorLabels( final String l, final Color c, final Writer w ) throws IOException {
3091         w.write( "<tr><td><b><span style=\"color:" );
3092         w.write( String.format( "#%02x%02x%02x", c.getRed(), c.getGreen(), c.getBlue() ) );
3093         w.write( "\">" );
3094         w.write( l );
3095         w.write( "</span></b></td></tr>" );
3096         w.write( SurfacingConstants.NL );
3097     }
3098
3099     private static void writeDomainData( final Map<String, List<GoId>> domain_id_to_go_ids_map,
3100                                          final Map<GoId, GoTerm> go_id_to_term_map,
3101                                          final GoNameSpace go_namespace_limit,
3102                                          final Writer out,
3103                                          final String domain_0,
3104                                          final String domain_1,
3105                                          final String prefix_for_html,
3106                                          final String character_separator_for_non_html_output,
3107                                          final Map<String, Set<String>>[] domain_id_to_secondary_features_maps,
3108                                          final Set<GoId> all_go_ids ) throws IOException {
3109         boolean any_go_annotation_present = false;
3110         boolean first_has_no_go = false;
3111         int domain_count = 2; // To distinguish between domains and binary domain combinations.
3112         if ( ForesterUtil.isEmpty( domain_1 ) ) {
3113             domain_count = 1;
3114         }
3115         // The following has a difficult to understand logic.  
3116         for( int d = 0; d < domain_count; ++d ) {
3117             List<GoId> go_ids = null;
3118             boolean go_annotation_present = false;
3119             if ( d == 0 ) {
3120                 if ( domain_id_to_go_ids_map.containsKey( domain_0 ) ) {
3121                     go_annotation_present = true;
3122                     any_go_annotation_present = true;
3123                     go_ids = domain_id_to_go_ids_map.get( domain_0 );
3124                 }
3125                 else {
3126                     first_has_no_go = true;
3127                 }
3128             }
3129             else {
3130                 if ( domain_id_to_go_ids_map.containsKey( domain_1 ) ) {
3131                     go_annotation_present = true;
3132                     any_go_annotation_present = true;
3133                     go_ids = domain_id_to_go_ids_map.get( domain_1 );
3134                 }
3135             }
3136             if ( go_annotation_present ) {
3137                 boolean first = ( ( d == 0 ) || ( ( d == 1 ) && first_has_no_go ) );
3138                 for( final GoId go_id : go_ids ) {
3139                     out.write( "<tr>" );
3140                     if ( first ) {
3141                         first = false;
3142                         writeDomainIdsToHtml( out,
3143                                               domain_0,
3144                                               domain_1,
3145                                               prefix_for_html,
3146                                               domain_id_to_secondary_features_maps );
3147                     }
3148                     else {
3149                         out.write( "<td></td>" );
3150                     }
3151                     if ( !go_id_to_term_map.containsKey( go_id ) ) {
3152                         throw new IllegalArgumentException( "GO-id [" + go_id + "] not found in GO-id to GO-term map" );
3153                     }
3154                     final GoTerm go_term = go_id_to_term_map.get( go_id );
3155                     if ( ( go_namespace_limit == null ) || go_namespace_limit.equals( go_term.getGoNameSpace() ) ) {
3156                         // final String top = GoUtils.getPenultimateGoTerm( go_term, go_id_to_term_map ).getName();
3157                         final String go_id_str = go_id.getId();
3158                         out.write( "<td>" );
3159                         out.write( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id_str
3160                                 + "\" target=\"amigo_window\">" + go_id_str + "</a>" );
3161                         out.write( "</td><td>" );
3162                         out.write( go_term.getName() );
3163                         if ( domain_count == 2 ) {
3164                             out.write( " (" + d + ")" );
3165                         }
3166                         out.write( "</td><td>" );
3167                         // out.write( top );
3168                         // out.write( "</td><td>" );
3169                         out.write( "[" );
3170                         out.write( go_term.getGoNameSpace().toShortString() );
3171                         out.write( "]" );
3172                         out.write( "</td>" );
3173                         if ( all_go_ids != null ) {
3174                             all_go_ids.add( go_id );
3175                         }
3176                     }
3177                     else {
3178                         out.write( "<td>" );
3179                         out.write( "</td><td>" );
3180                         out.write( "</td><td>" );
3181                         out.write( "</td><td>" );
3182                         out.write( "</td>" );
3183                     }
3184                     out.write( "</tr>" );
3185                     out.write( SurfacingConstants.NL );
3186                 }
3187             }
3188         } //  for( int d = 0; d < domain_count; ++d ) 
3189         if ( !any_go_annotation_present ) {
3190             out.write( "<tr>" );
3191             writeDomainIdsToHtml( out, domain_0, domain_1, prefix_for_html, domain_id_to_secondary_features_maps );
3192             out.write( "<td>" );
3193             out.write( "</td><td>" );
3194             out.write( "</td><td>" );
3195             out.write( "</td><td>" );
3196             out.write( "</td>" );
3197             out.write( "</tr>" );
3198             out.write( SurfacingConstants.NL );
3199         }
3200     }
3201
3202     private static void writeDomainIdsToHtml( final Writer out,
3203                                               final String domain_0,
3204                                               final String domain_1,
3205                                               final String prefix_for_detailed_html,
3206                                               final Map<String, Set<String>>[] domain_id_to_secondary_features_maps )
3207             throws IOException {
3208         out.write( "<td>" );
3209         if ( !ForesterUtil.isEmpty( prefix_for_detailed_html ) ) {
3210             out.write( prefix_for_detailed_html );
3211             out.write( " " );
3212         }
3213         out.write( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_0 + "\">" + domain_0 + "</a>" );
3214         out.write( "</td>" );
3215     }
3216
3217     private static void writeDomainsToIndividualFilePerTreeNode( final Writer individual_files_writer,
3218                                                                  final String domain_0,
3219                                                                  final String domain_1 ) throws IOException {
3220         individual_files_writer.write( domain_0 );
3221         individual_files_writer.write( ForesterUtil.LINE_SEPARATOR );
3222         if ( !ForesterUtil.isEmpty( domain_1 ) ) {
3223             individual_files_writer.write( domain_1 );
3224             individual_files_writer.write( ForesterUtil.LINE_SEPARATOR );
3225         }
3226     }
3227
3228     private static void writePfamsToFile( final String outfile_name, final SortedSet<String> pfams ) {
3229         try {
3230             final Writer writer = new BufferedWriter( new FileWriter( new File( outfile_name ) ) );
3231             for( final String pfam : pfams ) {
3232                 writer.write( pfam );
3233                 writer.write( ForesterUtil.LINE_SEPARATOR );
3234             }
3235             writer.close();
3236             ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote " + pfams.size() + " pfams to [" + outfile_name
3237                     + "]" );
3238         }
3239         catch ( final IOException e ) {
3240             ForesterUtil.printWarningMessage( surfacing.PRG_NAME, "Failure to write: " + e );
3241         }
3242     }
3243
3244     private static void writeToNexus( final String outfile_name,
3245                                       final CharacterStateMatrix<BinaryStates> matrix,
3246                                       final Phylogeny phylogeny ) {
3247         if ( !( matrix instanceof BasicCharacterStateMatrix ) ) {
3248             throw new IllegalArgumentException( "can only write matrices of type [" + BasicCharacterStateMatrix.class
3249                     + "] to nexus" );
3250         }
3251         final BasicCharacterStateMatrix<BinaryStates> my_matrix = ( org.forester.evoinference.matrix.character.BasicCharacterStateMatrix<BinaryStates> ) matrix;
3252         final List<Phylogeny> phylogenies = new ArrayList<Phylogeny>( 1 );
3253         phylogenies.add( phylogeny );
3254         try {
3255             final BufferedWriter w = new BufferedWriter( new FileWriter( outfile_name ) );
3256             w.write( NexusConstants.NEXUS );
3257             w.write( ForesterUtil.LINE_SEPARATOR );
3258             my_matrix.writeNexusTaxaBlock( w );
3259             my_matrix.writeNexusBinaryChractersBlock( w );
3260             PhylogenyWriter.writeNexusTreesBlock( w, phylogenies, NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE );
3261             w.flush();
3262             w.close();
3263             ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote Nexus file: \"" + outfile_name + "\"" );
3264         }
3265         catch ( final IOException e ) {
3266             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
3267         }
3268     }
3269
3270     private static void writeToNexus( final String outfile_name,
3271                                       final DomainParsimonyCalculator domain_parsimony,
3272                                       final Phylogeny phylogeny ) {
3273         writeToNexus( outfile_name + surfacing.NEXUS_EXTERNAL_DOMAINS,
3274                       domain_parsimony.createMatrixOfDomainPresenceOrAbsence(),
3275                       phylogeny );
3276         writeToNexus( outfile_name + surfacing.NEXUS_EXTERNAL_DOMAIN_COMBINATIONS,
3277                       domain_parsimony.createMatrixOfBinaryDomainCombinationPresenceOrAbsence(),
3278                       phylogeny );
3279     }
3280
3281     final static class DomainComparator implements Comparator<Domain> {
3282
3283         final private boolean _ascending;
3284
3285         public DomainComparator( final boolean ascending ) {
3286             _ascending = ascending;
3287         }
3288
3289         @Override
3290         public final int compare( final Domain d0, final Domain d1 ) {
3291             if ( d0.getFrom() < d1.getFrom() ) {
3292                 return _ascending ? -1 : 1;
3293             }
3294             else if ( d0.getFrom() > d1.getFrom() ) {
3295                 return _ascending ? 1 : -1;
3296             }
3297             return 0;
3298         }
3299     }
3300 }