cleanup
[jalview.git] / forester / java / src / org / forester / surfacing / SurfacingUtil.java
index cb9df26..73544b7 100644 (file)
@@ -26,6 +26,7 @@
 
 package org.forester.surfacing;
 
+import java.awt.Color;
 import java.io.BufferedWriter;
 import java.io.File;
 import java.io.FileWriter;
@@ -66,6 +67,8 @@ import org.forester.go.GoNameSpace;
 import org.forester.go.GoTerm;
 import org.forester.go.PfamToGoMapping;
 import org.forester.io.parsers.nexus.NexusConstants;
+import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
+import org.forester.io.parsers.util.ParserUtils;
 import org.forester.io.writers.PhylogenyWriter;
 import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyMethods;
@@ -74,6 +77,7 @@ import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE;
 import org.forester.phylogeny.data.BinaryCharacters;
 import org.forester.phylogeny.data.Confidence;
 import org.forester.phylogeny.data.Taxonomy;
+import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
 import org.forester.protein.BasicDomain;
 import org.forester.protein.BasicProtein;
@@ -82,38 +86,41 @@ import org.forester.protein.Domain;
 import org.forester.protein.Protein;
 import org.forester.species.Species;
 import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
-import org.forester.surfacing.DomainSimilarityCalculator.GoAnnotationOutput;
 import org.forester.surfacing.GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder;
+import org.forester.surfacing.PrintableDomainSimilarity.PRINT_OPTION;
 import org.forester.util.AsciiHistogram;
 import org.forester.util.BasicDescriptiveStatistics;
 import org.forester.util.BasicTable;
 import org.forester.util.BasicTableParser;
+import org.forester.util.CommandLineArguments;
 import org.forester.util.DescriptiveStatistics;
 import org.forester.util.ForesterUtil;
+import org.forester.util.TaxonomyColors;
 
 public final class SurfacingUtil {
 
-    private final static NumberFormat       FORMATTER_3                      = new DecimalFormat( "0.000" );
-    private static final Comparator<Domain> ASCENDING_CONFIDENCE_VALUE_ORDER = new Comparator<Domain>() {
-
-                                                                                 @Override
-                                                                                 public int compare( final Domain d1,
-                                                                                                     final Domain d2 ) {
-                                                                                     if ( d1.getPerSequenceEvalue() < d2
-                                                                                             .getPerSequenceEvalue() ) {
-                                                                                         return -1;
-                                                                                     }
-                                                                                     else if ( d1
-                                                                                             .getPerSequenceEvalue() > d2
-                                                                                             .getPerSequenceEvalue() ) {
-                                                                                         return 1;
-                                                                                     }
-                                                                                     else {
-                                                                                         return d1.compareTo( d2 );
-                                                                                     }
-                                                                                 }
-                                                                             };
-    public final static Pattern             PATTERN_SP_STYLE_TAXONOMY        = Pattern.compile( "^[A-Z0-9]{3,5}$" );
+    public final static Pattern              PATTERN_SP_STYLE_TAXONOMY        = Pattern.compile( "^[A-Z0-9]{3,5}$" );
+    private final static Map<String, String> _TAXCODE_HEXCOLORSTRING_MAP      = new HashMap<String, String>();
+    private static final Comparator<Domain>  ASCENDING_CONFIDENCE_VALUE_ORDER = new Comparator<Domain>() {
+
+                                                                                  @Override
+                                                                                  public int compare( final Domain d1,
+                                                                                                      final Domain d2 ) {
+                                                                                      if ( d1.getPerSequenceEvalue() < d2
+                                                                                              .getPerSequenceEvalue() ) {
+                                                                                          return -1;
+                                                                                      }
+                                                                                      else if ( d1
+                                                                                              .getPerSequenceEvalue() > d2
+                                                                                              .getPerSequenceEvalue() ) {
+                                                                                          return 1;
+                                                                                      }
+                                                                                      else {
+                                                                                          return d1.compareTo( d2 );
+                                                                                      }
+                                                                                  }
+                                                                              };
+    private final static NumberFormat        FORMATTER_3                      = new DecimalFormat( "0.000" );
 
     private SurfacingUtil() {
         // Hidden constructor.
@@ -135,35 +142,6 @@ public final class SurfacingUtil {
         }
     }
 
-    public static void addHtmlHead( final Writer w, final String title ) throws IOException {
-        w.write( SurfacingConstants.NL );
-        w.write( "<head>" );
-        w.write( "<title>" );
-        w.write( title );
-        w.write( "</title>" );
-        w.write( SurfacingConstants.NL );
-        w.write( "<style>" );
-        w.write( SurfacingConstants.NL );
-        w.write( "a:visited { color : #6633FF; text-decoration : none; }" );
-        w.write( SurfacingConstants.NL );
-        w.write( "a:link { color : #6633FF; text-decoration : none; }" );
-        w.write( SurfacingConstants.NL );
-        w.write( "a:active { color : #99FF00; text-decoration : none; }" );
-        w.write( SurfacingConstants.NL );
-        w.write( "a:hover { color : #FFFFFF; background-color : #99FF00; text-decoration : none; }" );
-        w.write( SurfacingConstants.NL );
-        w.write( "td { text-align: left; vertical-align: top; font-family: Verdana, Arial, Helvetica; font-size: 8pt}" );
-        w.write( SurfacingConstants.NL );
-        w.write( "h1 { color : #0000FF; font-family: Verdana, Arial, Helvetica; font-size: 18pt; font-weight: bold }" );
-        w.write( SurfacingConstants.NL );
-        w.write( "h2 { color : #0000FF; font-family: Verdana, Arial, Helvetica; font-size: 16pt; font-weight: bold }" );
-        w.write( SurfacingConstants.NL );
-        w.write( "</style>" );
-        w.write( SurfacingConstants.NL );
-        w.write( "</head>" );
-        w.write( SurfacingConstants.NL );
-    }
-
     public static DescriptiveStatistics calculateDescriptiveStatisticsForMeanValues( final Set<DomainSimilarity> similarities ) {
         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
         for( final DomainSimilarity similarity : similarities ) {
@@ -172,16 +150,6 @@ public final class SurfacingUtil {
         return stats;
     }
 
-    public static int calculateOverlap( final Domain domain, final List<Boolean> covered_positions ) {
-        int overlap_count = 0;
-        for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
-            if ( ( i < covered_positions.size() ) && ( covered_positions.get( i ) == true ) ) {
-                ++overlap_count;
-            }
-        }
-        return overlap_count;
-    }
-
     public static void checkForOutputFileWriteability( final File outfile ) {
         final String error = ForesterUtil.isWritableFile( outfile );
         if ( !ForesterUtil.isEmpty( error ) ) {
@@ -189,6 +157,33 @@ public final class SurfacingUtil {
         }
     }
 
+    public static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
+                                                                final String[][] input_file_properties,
+                                                                final String automated_pairwise_comparison_suffix,
+                                                                final File outdir ) {
+        for( int i = 0; i < input_file_properties.length; ++i ) {
+            for( int j = 0; j < i; ++j ) {
+                final String species_i = input_file_properties[ i ][ 1 ];
+                final String species_j = input_file_properties[ j ][ 1 ];
+                String pairwise_similarities_output_file_str = surfacing.PAIRWISE_DOMAIN_COMPARISONS_PREFIX + species_i
+                        + "_" + species_j + automated_pairwise_comparison_suffix;
+                switch ( domain_similarity_print_option ) {
+                    case HTML:
+                        if ( !pairwise_similarities_output_file_str.endsWith( ".html" ) ) {
+                            pairwise_similarities_output_file_str += ".html";
+                        }
+                        break;
+                }
+                final String error = ForesterUtil
+                        .isWritableFile( new File( outdir == null ? pairwise_similarities_output_file_str : outdir
+                                + ForesterUtil.FILE_SEPARATOR + pairwise_similarities_output_file_str ) );
+                if ( !ForesterUtil.isEmpty( error ) ) {
+                    ForesterUtil.fatalError( surfacing.PRG_NAME, error );
+                }
+            }
+        }
+    }
+
     public static void collectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile( final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
                                                                                            final BinaryDomainCombination.DomainCombinationType dc_type,
                                                                                            final List<BinaryDomainCombination> all_binary_domains_combination_gained,
@@ -252,6 +247,101 @@ public final class SurfacingUtil {
         return phylogeny;
     }
 
+    public static StringBuilder createParametersAsString( final boolean ignore_dufs,
+                                                          final double e_value_max,
+                                                          final int max_allowed_overlap,
+                                                          final boolean no_engulfing_overlaps,
+                                                          final File cutoff_scores_file,
+                                                          final BinaryDomainCombination.DomainCombinationType dc_type ) {
+        final StringBuilder parameters_sb = new StringBuilder();
+        parameters_sb.append( "E-value: " + e_value_max );
+        if ( cutoff_scores_file != null ) {
+            parameters_sb.append( ", Cutoff-scores-file: " + cutoff_scores_file );
+        }
+        else {
+            parameters_sb.append( ", Cutoff-scores-file: not-set" );
+        }
+        if ( max_allowed_overlap != surfacing.MAX_ALLOWED_OVERLAP_DEFAULT ) {
+            parameters_sb.append( ", Max-overlap: " + max_allowed_overlap );
+        }
+        else {
+            parameters_sb.append( ", Max-overlap: not-set" );
+        }
+        if ( no_engulfing_overlaps ) {
+            parameters_sb.append( ", Engulfing-overlaps: not-allowed" );
+        }
+        else {
+            parameters_sb.append( ", Engulfing-overlaps: allowed" );
+        }
+        if ( ignore_dufs ) {
+            parameters_sb.append( ", Ignore-dufs: true" );
+        }
+        else {
+            parameters_sb.append( ", Ignore-dufs: false" );
+        }
+        parameters_sb.append( ", DC type (if applicable): " + dc_type );
+        return parameters_sb;
+    }
+
+    public static void createSplitWriters( final File out_dir,
+                                           final String my_outfile,
+                                           final Map<Character, Writer> split_writers ) throws IOException {
+        split_writers.put( 'a', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_A.html" ) ) );
+        split_writers.put( 'b', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_B.html" ) ) );
+        split_writers.put( 'c', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_C.html" ) ) );
+        split_writers.put( 'd', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_D.html" ) ) );
+        split_writers.put( 'e', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_E.html" ) ) );
+        split_writers.put( 'f', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_F.html" ) ) );
+        split_writers.put( 'g', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_G.html" ) ) );
+        split_writers.put( 'h', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_H.html" ) ) );
+        split_writers.put( 'i', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_I.html" ) ) );
+        split_writers.put( 'j', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_J.html" ) ) );
+        split_writers.put( 'k', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_K.html" ) ) );
+        split_writers.put( 'l', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_L.html" ) ) );
+        split_writers.put( 'm', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_M.html" ) ) );
+        split_writers.put( 'n', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_N.html" ) ) );
+        split_writers.put( 'o', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_O.html" ) ) );
+        split_writers.put( 'p', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_P.html" ) ) );
+        split_writers.put( 'q', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_Q.html" ) ) );
+        split_writers.put( 'r', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_R.html" ) ) );
+        split_writers.put( 's', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_S.html" ) ) );
+        split_writers.put( 't', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_T.html" ) ) );
+        split_writers.put( 'u', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_U.html" ) ) );
+        split_writers.put( 'v', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_V.html" ) ) );
+        split_writers.put( 'w', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_W.html" ) ) );
+        split_writers.put( 'x', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_X.html" ) ) );
+        split_writers.put( 'y', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_Y.html" ) ) );
+        split_writers.put( 'z', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_Z.html" ) ) );
+        split_writers.put( '0', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_0.html" ) ) );
+    }
+
     public static Map<String, Integer> createTaxCodeToIdMap( final Phylogeny phy ) {
         final Map<String, Integer> m = new HashMap<String, Integer>();
         for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
@@ -285,20 +375,11 @@ public final class SurfacingUtil {
     }
 
     public static void decoratePrintableDomainSimilarities( final SortedSet<DomainSimilarity> domain_similarities,
-                                                            final Detailedness detailedness,
-                                                            final GoAnnotationOutput go_annotation_output,
-                                                            final Map<GoId, GoTerm> go_id_to_term_map,
-                                                            final GoNameSpace go_namespace_limit ) {
-        if ( ( go_namespace_limit != null ) && ( ( go_id_to_term_map == null ) || go_id_to_term_map.isEmpty() ) ) {
-            throw new IllegalArgumentException( "attempt to use a GO namespace limit without a GO id to term map" );
-        }
+                                                            final Detailedness detailedness ) {
         for( final DomainSimilarity domain_similarity : domain_similarities ) {
             if ( domain_similarity instanceof PrintableDomainSimilarity ) {
                 final PrintableDomainSimilarity printable_domain_similarity = ( PrintableDomainSimilarity ) domain_similarity;
                 printable_domain_similarity.setDetailedness( detailedness );
-                printable_domain_similarity.setGoAnnotationOutput( go_annotation_output );
-                printable_domain_similarity.setGoIdToTermMap( go_id_to_term_map );
-                printable_domain_similarity.setGoNamespaceLimit( go_namespace_limit );
             }
         }
     }
@@ -461,10 +542,6 @@ public final class SurfacingUtil {
                     out.write( species + "\t" );
                 }
                 out.write( ForesterUtil.LINE_SEPARATOR );
-                // DescriptiveStatistics stats_for_domain = domain_lengths
-                //         .calculateMeanBasedStatistics();
-                //AsciiHistogram histo = new AsciiHistogram( stats_for_domain );
-                //System.out.println( histo.toStringBuffer( 40, '=', 60, 4 ).toString() );
             }
         }
         out.write( ForesterUtil.LINE_SEPARATOR );
@@ -498,20 +575,94 @@ public final class SurfacingUtil {
             }
         }
         out.close();
-        //        final List<HistogramData> histogram_datas = new ArrayList<HistogramData>();
-        //        for( int i = 0; i < number_of_genomes; ++i ) {
-        //            final Species species = new BasicSpecies( input_file_properties[ i ][ 0 ] );
-        //            histogram_datas
-        //                    .add( new HistogramData( species.toString(), domain_lengths_table
-        //                            .calculateMeanBasedStatisticsForSpecies( species )
-        //                            .getDataAsDoubleArray(), 5, 600, null, 60 ) );
-        //        }
-        //        final HistogramsFrame hf = new HistogramsFrame( histogram_datas );
-        //        hf.setVisible( true );
         System.gc();
     }
 
     /**
+     * Warning: This side-effects 'all_bin_domain_combinations_encountered'!
+     * 
+     * 
+     * @param output_file
+     * @param all_bin_domain_combinations_changed
+     * @param sum_of_all_domains_encountered
+     * @param all_bin_domain_combinations_encountered
+     * @param is_gains_analysis
+     * @param protein_length_stats_by_dc 
+     * @throws IOException
+     */
+    public static void executeFitchGainsAnalysis( final File output_file,
+                                                  final List<BinaryDomainCombination> all_bin_domain_combinations_changed,
+                                                  final int sum_of_all_domains_encountered,
+                                                  final SortedSet<BinaryDomainCombination> all_bin_domain_combinations_encountered,
+                                                  final boolean is_gains_analysis ) throws IOException {
+        checkForOutputFileWriteability( output_file );
+        final Writer out = ForesterUtil.createBufferedWriter( output_file );
+        final SortedMap<Object, Integer> bdc_to_counts = ForesterUtil
+                .listToSortedCountsMap( all_bin_domain_combinations_changed );
+        final SortedSet<String> all_domains_in_combination_changed_more_than_once = new TreeSet<String>();
+        final SortedSet<String> all_domains_in_combination_changed_only_once = new TreeSet<String>();
+        int above_one = 0;
+        int one = 0;
+        for( final Object bdc_object : bdc_to_counts.keySet() ) {
+            final BinaryDomainCombination bdc = ( BinaryDomainCombination ) bdc_object;
+            final int count = bdc_to_counts.get( bdc_object );
+            if ( count < 1 ) {
+                ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME, "count < 1 " );
+            }
+            out.write( bdc + "\t" + count + ForesterUtil.LINE_SEPARATOR );
+            if ( count > 1 ) {
+                all_domains_in_combination_changed_more_than_once.add( bdc.getId0() );
+                all_domains_in_combination_changed_more_than_once.add( bdc.getId1() );
+                above_one++;
+            }
+            else if ( count == 1 ) {
+                all_domains_in_combination_changed_only_once.add( bdc.getId0() );
+                all_domains_in_combination_changed_only_once.add( bdc.getId1() );
+                one++;
+            }
+        }
+        final int all = all_bin_domain_combinations_encountered.size();
+        int never_lost = -1;
+        if ( !is_gains_analysis ) {
+            all_bin_domain_combinations_encountered.removeAll( all_bin_domain_combinations_changed );
+            never_lost = all_bin_domain_combinations_encountered.size();
+            for( final BinaryDomainCombination bdc : all_bin_domain_combinations_encountered ) {
+                out.write( bdc + "\t" + "0" + ForesterUtil.LINE_SEPARATOR );
+            }
+        }
+        if ( is_gains_analysis ) {
+            out.write( "Sum of all distinct domain combinations appearing once               : " + one
+                    + ForesterUtil.LINE_SEPARATOR );
+            out.write( "Sum of all distinct domain combinations appearing more than once     : " + above_one
+                    + ForesterUtil.LINE_SEPARATOR );
+            out.write( "Sum of all distinct domains in combinations apppearing only once     : "
+                    + all_domains_in_combination_changed_only_once.size() + ForesterUtil.LINE_SEPARATOR );
+            out.write( "Sum of all distinct domains in combinations apppearing more than once: "
+                    + all_domains_in_combination_changed_more_than_once.size() + ForesterUtil.LINE_SEPARATOR );
+        }
+        else {
+            out.write( "Sum of all distinct domain combinations never lost                   : " + never_lost
+                    + ForesterUtil.LINE_SEPARATOR );
+            out.write( "Sum of all distinct domain combinations lost once                    : " + one
+                    + ForesterUtil.LINE_SEPARATOR );
+            out.write( "Sum of all distinct domain combinations lost more than once          : " + above_one
+                    + ForesterUtil.LINE_SEPARATOR );
+            out.write( "Sum of all distinct domains in combinations lost only once           : "
+                    + all_domains_in_combination_changed_only_once.size() + ForesterUtil.LINE_SEPARATOR );
+            out.write( "Sum of all distinct domains in combinations lost more than once: "
+                    + all_domains_in_combination_changed_more_than_once.size() + ForesterUtil.LINE_SEPARATOR );
+        }
+        out.write( "All binary combinations                                              : " + all
+                + ForesterUtil.LINE_SEPARATOR );
+        out.write( "All domains                                                          : "
+                + sum_of_all_domains_encountered );
+        out.close();
+        ForesterUtil.programMessage( surfacing.PRG_NAME,
+                                     "Wrote fitch domain combination dynamics counts analysis to \"" + output_file
+                                             + "\"" );
+    }
+
+    /**
      * 
      * @param all_binary_domains_combination_lost_fitch 
      * @param use_last_in_fitch_parsimony 
@@ -856,6 +1007,60 @@ public final class SurfacingUtil {
                 + "_MAPPED_indep_dc_gains_fitch_lca_taxonomies.txt", null, null, null, null );
     }
 
+    public static void executePlusMinusAnalysis( final File output_file,
+                                                 final List<String> plus_minus_analysis_high_copy_base,
+                                                 final List<String> plus_minus_analysis_high_copy_target,
+                                                 final List<String> plus_minus_analysis_low_copy,
+                                                 final List<GenomeWideCombinableDomains> gwcd_list,
+                                                 final SortedMap<Species, List<Protein>> protein_lists_per_species,
+                                                 final Map<String, List<GoId>> domain_id_to_go_ids_map,
+                                                 final Map<GoId, GoTerm> go_id_to_term_map,
+                                                 final List<Object> plus_minus_analysis_numbers ) {
+        final Set<String> all_spec = new HashSet<String>();
+        for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
+            all_spec.add( gwcd.getSpecies().getSpeciesId() );
+        }
+        final File html_out_dom = new File( output_file + surfacing.PLUS_MINUS_DOM_SUFFIX_HTML );
+        final File plain_out_dom = new File( output_file + surfacing.PLUS_MINUS_DOM_SUFFIX );
+        final File html_out_dc = new File( output_file + surfacing.PLUS_MINUS_DC_SUFFIX_HTML );
+        final File all_domains_go_ids_out_dom = new File( output_file + surfacing.PLUS_MINUS_ALL_GO_IDS_DOM_SUFFIX );
+        final File passing_domains_go_ids_out_dom = new File( output_file
+                + surfacing.PLUS_MINUS_PASSING_GO_IDS_DOM_SUFFIX );
+        final File proteins_file_base = new File( output_file + "" );
+        final int min_diff = ( ( Integer ) plus_minus_analysis_numbers.get( 0 ) ).intValue();
+        final double factor = ( ( Double ) plus_minus_analysis_numbers.get( 1 ) ).doubleValue();
+        try {
+            DomainCountsDifferenceUtil.calculateCopyNumberDifferences( gwcd_list,
+                                                                       protein_lists_per_species,
+                                                                       plus_minus_analysis_high_copy_base,
+                                                                       plus_minus_analysis_high_copy_target,
+                                                                       plus_minus_analysis_low_copy,
+                                                                       min_diff,
+                                                                       factor,
+                                                                       plain_out_dom,
+                                                                       html_out_dom,
+                                                                       html_out_dc,
+                                                                       domain_id_to_go_ids_map,
+                                                                       go_id_to_term_map,
+                                                                       all_domains_go_ids_out_dom,
+                                                                       passing_domains_go_ids_out_dom,
+                                                                       proteins_file_base );
+        }
+        catch ( final IOException e ) {
+            ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
+        }
+        ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote plus minus domain analysis results to \""
+                + html_out_dom + "\"" );
+        ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote plus minus domain analysis results to \""
+                + plain_out_dom + "\"" );
+        ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote plus minus domain analysis results to \"" + html_out_dc
+                + "\"" );
+        ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote plus minus domain analysis based passing GO ids to \""
+                + passing_domains_go_ids_out_dom + "\"" );
+        ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote plus minus domain analysis based all GO ids to \""
+                + all_domains_go_ids_out_dom + "\"" );
+    }
+
     public static void extractProteinNames( final List<Protein> proteins,
                                             final List<String> query_domain_ids_nc_order,
                                             final Writer out,
@@ -911,9 +1116,9 @@ public final class SurfacingUtil {
                                             final String separator,
                                             final String limit_to_species,
                                             final double domain_e_cutoff ) throws IOException {
-        System.out.println( "Per domain E-value: " + domain_e_cutoff );
+        //System.out.println( "Per domain E-value: " + domain_e_cutoff );
         for( final Species species : protein_lists_per_species.keySet() ) {
-            System.out.println( species + ":" );
+            //System.out.println( species + ":" );
             for( final Protein protein : protein_lists_per_species.get( species ) ) {
                 if ( ForesterUtil.isEmpty( limit_to_species )
                         || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) {
@@ -932,7 +1137,7 @@ public final class SurfacingUtil {
                                 out.write( domain.getFrom() + "-" + domain.getTo() );
                                 if ( prev_to >= 0 ) {
                                     final int l = domain.getFrom() - prev_to;
-                                    System.out.println( l );
+                                    // System.out.println( l );
                                 }
                                 prev_to = domain.getTo();
                             }
@@ -1033,21 +1238,145 @@ public final class SurfacingUtil {
         return c;
     }
 
-    /**
-     * Returns true is Domain domain falls in an uninterrupted stretch of
-     * covered positions.
-     * 
-     * @param domain
-     * @param covered_positions
-     * @return
-     */
-    public static boolean isEngulfed( final Domain domain, final List<Boolean> covered_positions ) {
-        for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
-            if ( ( i >= covered_positions.size() ) || ( covered_positions.get( i ) != true ) ) {
-                return false;
+    public static void log( final String msg, final Writer w ) {
+        try {
+            w.write( msg );
+            w.write( ForesterUtil.LINE_SEPARATOR );
+        }
+        catch ( final IOException e ) {
+            ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
+        }
+    }
+
+    public static Phylogeny[] obtainAndPreProcessIntrees( final File[] intree_files,
+                                                          final int number_of_genomes,
+                                                          final String[][] input_file_properties ) {
+        final Phylogeny[] intrees = new Phylogeny[ intree_files.length ];
+        int i = 0;
+        for( final File intree_file : intree_files ) {
+            Phylogeny intree = null;
+            final String error = ForesterUtil.isReadableFile( intree_file );
+            if ( !ForesterUtil.isEmpty( error ) ) {
+                ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read input tree file [" + intree_file + "]: "
+                        + error );
+            }
+            try {
+                final Phylogeny[] p_array = ParserBasedPhylogenyFactory.getInstance()
+                        .create( intree_file, ParserUtils.createParserDependingOnFileType( intree_file, true ) );
+                if ( p_array.length < 1 ) {
+                    ForesterUtil.fatalError( surfacing.PRG_NAME, "file [" + intree_file
+                            + "] does not contain any phylogeny in phyloXML format" );
+                }
+                else if ( p_array.length > 1 ) {
+                    ForesterUtil.fatalError( surfacing.PRG_NAME, "file [" + intree_file
+                            + "] contains more than one phylogeny in phyloXML format" );
+                }
+                intree = p_array[ 0 ];
+            }
+            catch ( final Exception e ) {
+                ForesterUtil.fatalError( surfacing.PRG_NAME, "failed to read input tree from file [" + intree_file
+                        + "]: " + error );
+            }
+            if ( ( intree == null ) || intree.isEmpty() ) {
+                ForesterUtil.fatalError( surfacing.PRG_NAME, "input tree [" + intree_file + "] is empty" );
+            }
+            if ( !intree.isRooted() ) {
+                ForesterUtil.fatalError( surfacing.PRG_NAME, "input tree [" + intree_file + "] is not rooted" );
+            }
+            if ( intree.getNumberOfExternalNodes() < number_of_genomes ) {
+                ForesterUtil.fatalError( surfacing.PRG_NAME,
+                                         "number of external nodes [" + intree.getNumberOfExternalNodes()
+                                                 + "] of input tree [" + intree_file
+                                                 + "] is smaller than the number of genomes the be analyzed ["
+                                                 + number_of_genomes + "]" );
+            }
+            final StringBuilder parent_names = new StringBuilder();
+            final int nodes_lacking_name = getNumberOfNodesLackingName( intree, parent_names );
+            if ( nodes_lacking_name > 0 ) {
+                ForesterUtil.fatalError( surfacing.PRG_NAME, "input tree [" + intree_file + "] has "
+                        + nodes_lacking_name + " node(s) lacking a name [parent names:" + parent_names + "]" );
+            }
+            preparePhylogenyForParsimonyAnalyses( intree, input_file_properties );
+            if ( !intree.isCompletelyBinary() ) {
+                ForesterUtil.printWarningMessage( surfacing.PRG_NAME, "input tree [" + intree_file
+                        + "] is not completely binary" );
             }
+            intrees[ i++ ] = intree;
         }
-        return true;
+        return intrees;
+    }
+
+    public static Phylogeny obtainFirstIntree( final File intree_file ) {
+        Phylogeny intree = null;
+        final String error = ForesterUtil.isReadableFile( intree_file );
+        if ( !ForesterUtil.isEmpty( error ) ) {
+            ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read input tree file [" + intree_file + "]: " + error );
+        }
+        try {
+            final Phylogeny[] phys = ParserBasedPhylogenyFactory.getInstance()
+                    .create( intree_file, ParserUtils.createParserDependingOnFileType( intree_file, true ) );
+            if ( phys.length < 1 ) {
+                ForesterUtil.fatalError( surfacing.PRG_NAME, "file [" + intree_file
+                        + "] does not contain any phylogeny in phyloXML format" );
+            }
+            else if ( phys.length > 1 ) {
+                ForesterUtil.fatalError( surfacing.PRG_NAME, "file [" + intree_file
+                        + "] contains more than one phylogeny in phyloXML format" );
+            }
+            intree = phys[ 0 ];
+        }
+        catch ( final Exception e ) {
+            ForesterUtil.fatalError( surfacing.PRG_NAME, "failed to read input tree from file [" + intree_file + "]: "
+                    + error );
+        }
+        if ( ( intree == null ) || intree.isEmpty() ) {
+            ForesterUtil.fatalError( surfacing.PRG_NAME, "input tree [" + intree_file + "] is empty" );
+        }
+        if ( !intree.isRooted() ) {
+            ForesterUtil.fatalError( surfacing.PRG_NAME, "input tree [" + intree_file + "] is not rooted" );
+        }
+        return intree;
+    }
+
+    public static String obtainHexColorStringDependingOnTaxonomyGroup( final String tax_code, final Phylogeny phy )
+            throws IllegalArgumentException {
+        if ( !_TAXCODE_HEXCOLORSTRING_MAP.containsKey( tax_code ) ) {
+            if ( ( phy != null ) && !phy.isEmpty() ) {
+                final List<PhylogenyNode> nodes = phy.getNodesViaTaxonomyCode( tax_code );
+                Color c = null;
+                if ( ( nodes == null ) || nodes.isEmpty() ) {
+                    throw new IllegalArgumentException( "code " + tax_code + " is not found" );
+                }
+                if ( nodes.size() != 1 ) {
+                    throw new IllegalArgumentException( "code " + tax_code + " is not unique" );
+                }
+                PhylogenyNode n = nodes.get( 0 );
+                while ( n != null ) {
+                    if ( n.getNodeData().isHasTaxonomy()
+                            && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
+                        c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getNodeData().getTaxonomy()
+                                .getScientificName(), tax_code );
+                    }
+                    if ( ( c == null ) && !ForesterUtil.isEmpty( n.getName() ) ) {
+                        c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getName(), tax_code );
+                    }
+                    if ( c != null ) {
+                        break;
+                    }
+                    n = n.getParent();
+                }
+                if ( c == null ) {
+                    throw new IllegalArgumentException( "no color found for taxonomy code \"" + tax_code + "\"" );
+                }
+                final String hex = String.format( "#%02x%02x%02x", c.getRed(), c.getGreen(), c.getBlue() );
+                _TAXCODE_HEXCOLORSTRING_MAP.put( tax_code, hex );
+            }
+            else {
+                throw new IllegalArgumentException( "unable to obtain color for code " + tax_code
+                        + " (tree is null or empty and code is not in map)" );
+            }
+        }
+        return _TAXCODE_HEXCOLORSTRING_MAP.get( tax_code );
     }
 
     public static void performDomainArchitectureAnalysis( final SortedMap<String, Set<String>> domain_architecutures,
@@ -1118,6 +1447,225 @@ public final class SurfacingUtil {
         p.setRooted( true );
     }
 
+    public static void preparePhylogenyForParsimonyAnalyses( final Phylogeny intree,
+                                                             final String[][] input_file_properties ) {
+        final String[] genomes = new String[ input_file_properties.length ];
+        for( int i = 0; i < input_file_properties.length; ++i ) {
+            if ( intree.getNodes( input_file_properties[ i ][ 1 ] ).size() > 1 ) {
+                ForesterUtil.fatalError( surfacing.PRG_NAME, "node named [" + input_file_properties[ i ][ 1 ]
+                        + "] is not unique in input tree " + intree.getName() );
+            }
+            genomes[ i ] = input_file_properties[ i ][ 1 ];
+        }
+        //
+        final PhylogenyNodeIterator it = intree.iteratorPostorder();
+        while ( it.hasNext() ) {
+            final PhylogenyNode n = it.next();
+            if ( ForesterUtil.isEmpty( n.getName() ) ) {
+                if ( n.getNodeData().isHasTaxonomy()
+                        && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
+                    n.setName( n.getNodeData().getTaxonomy().getTaxonomyCode() );
+                }
+                else if ( n.getNodeData().isHasTaxonomy()
+                        && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
+                    n.setName( n.getNodeData().getTaxonomy().getScientificName() );
+                }
+                else if ( n.getNodeData().isHasTaxonomy()
+                        && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getCommonName() ) ) {
+                    n.setName( n.getNodeData().getTaxonomy().getCommonName() );
+                }
+                else {
+                    ForesterUtil
+                            .fatalError( surfacing.PRG_NAME,
+                                         "node with no name, scientific name, common name, or taxonomy code present" );
+                }
+            }
+        }
+        //
+        final List<String> igns = PhylogenyMethods.deleteExternalNodesPositiveSelection( genomes, intree );
+        if ( igns.size() > 0 ) {
+            System.out.println( "Not using the following " + igns.size() + " nodes:" );
+            for( int i = 0; i < igns.size(); ++i ) {
+                System.out.println( " " + i + ": " + igns.get( i ) );
+            }
+            System.out.println( "--" );
+        }
+        for( final String[] input_file_propertie : input_file_properties ) {
+            try {
+                intree.getNode( input_file_propertie[ 1 ] );
+            }
+            catch ( final IllegalArgumentException e ) {
+                ForesterUtil.fatalError( surfacing.PRG_NAME, "node named [" + input_file_propertie[ 1 ]
+                        + "] not present/not unique in input tree" );
+            }
+        }
+    }
+
+    public static void printOutPercentageOfMultidomainProteins( final SortedMap<Integer, Integer> all_genomes_domains_per_potein_histo,
+                                                                final Writer log_writer ) {
+        int sum = 0;
+        for( final Entry<Integer, Integer> entry : all_genomes_domains_per_potein_histo.entrySet() ) {
+            sum += entry.getValue();
+        }
+        final double percentage = ( 100.0 * ( sum - all_genomes_domains_per_potein_histo.get( 1 ) ) ) / sum;
+        ForesterUtil.programMessage( surfacing.PRG_NAME, "Percentage of multidomain proteins: " + percentage + "%" );
+        log( "Percentage of multidomain proteins:            : " + percentage + "%", log_writer );
+    }
+
+    public static void processFilter( final File filter_file, final SortedSet<String> filter ) {
+        SortedSet<String> filter_str = null;
+        try {
+            filter_str = ForesterUtil.file2set( filter_file );
+        }
+        catch ( final IOException e ) {
+            ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
+        }
+        if ( filter_str != null ) {
+            for( final String string : filter_str ) {
+                filter.add( string );
+            }
+        }
+        if ( surfacing.VERBOSE ) {
+            System.out.println( "Filter:" );
+            for( final String domainId : filter ) {
+                System.out.println( domainId );
+            }
+        }
+    }
+
+    public static String[][] processInputGenomesFile( final File input_genomes ) {
+        String[][] input_file_properties = null;
+        try {
+            input_file_properties = ForesterUtil.file22dArray( input_genomes );
+        }
+        catch ( final IOException e ) {
+            ForesterUtil.fatalError( surfacing.PRG_NAME,
+                                     "genomes files is to be in the following format \"<hmmpfam output file> <species>\": "
+                                             + e.getLocalizedMessage() );
+        }
+        final Set<String> specs = new HashSet<String>();
+        final Set<String> paths = new HashSet<String>();
+        for( int i = 0; i < input_file_properties.length; ++i ) {
+            if ( !PhyloXmlUtil.TAXOMONY_CODE_PATTERN.matcher( input_file_properties[ i ][ 1 ] ).matches() ) {
+                ForesterUtil.fatalError( surfacing.PRG_NAME, "illegal format for species code: "
+                        + input_file_properties[ i ][ 1 ] );
+            }
+            if ( specs.contains( input_file_properties[ i ][ 1 ] ) ) {
+                ForesterUtil.fatalError( surfacing.PRG_NAME, "species code " + input_file_properties[ i ][ 1 ]
+                        + " is not unique" );
+            }
+            specs.add( input_file_properties[ i ][ 1 ] );
+            if ( paths.contains( input_file_properties[ i ][ 0 ] ) ) {
+                ForesterUtil.fatalError( surfacing.PRG_NAME, "path " + input_file_properties[ i ][ 0 ]
+                        + " is not unique" );
+            }
+            paths.add( input_file_properties[ i ][ 0 ] );
+            final String error = ForesterUtil.isReadableFile( new File( input_file_properties[ i ][ 0 ] ) );
+            if ( !ForesterUtil.isEmpty( error ) ) {
+                ForesterUtil.fatalError( surfacing.PRG_NAME, error );
+            }
+        }
+        return input_file_properties;
+    }
+
+    public static void processPlusMinusAnalysisOption( final CommandLineArguments cla,
+                                                       final List<String> high_copy_base,
+                                                       final List<String> high_copy_target,
+                                                       final List<String> low_copy,
+                                                       final List<Object> numbers ) {
+        if ( cla.isOptionSet( surfacing.PLUS_MINUS_ANALYSIS_OPTION ) ) {
+            if ( !cla.isOptionValueSet( surfacing.PLUS_MINUS_ANALYSIS_OPTION ) ) {
+                ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for 'plus-minus' file: -"
+                        + surfacing.PLUS_MINUS_ANALYSIS_OPTION + "=<file>" );
+            }
+            final File plus_minus_file = new File( cla.getOptionValue( surfacing.PLUS_MINUS_ANALYSIS_OPTION ) );
+            final String msg = ForesterUtil.isReadableFile( plus_minus_file );
+            if ( !ForesterUtil.isEmpty( msg ) ) {
+                ForesterUtil.fatalError( surfacing.PRG_NAME, "can not read from \"" + plus_minus_file + "\": " + msg );
+            }
+            processPlusMinusFile( plus_minus_file, high_copy_base, high_copy_target, low_copy, numbers );
+        }
+    }
+
+    // First numbers is minimal difference, second is factor.
+    public static void processPlusMinusFile( final File plus_minus_file,
+                                             final List<String> high_copy_base,
+                                             final List<String> high_copy_target,
+                                             final List<String> low_copy,
+                                             final List<Object> numbers ) {
+        Set<String> species_set = null;
+        int min_diff = surfacing.PLUS_MINUS_ANALYSIS_MIN_DIFF_DEFAULT;
+        double factor = surfacing.PLUS_MINUS_ANALYSIS_FACTOR_DEFAULT;
+        try {
+            species_set = ForesterUtil.file2set( plus_minus_file );
+        }
+        catch ( final IOException e ) {
+            ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
+        }
+        if ( species_set != null ) {
+            for( final String species : species_set ) {
+                final String species_trimmed = species.substring( 1 );
+                if ( species.startsWith( "+" ) ) {
+                    if ( low_copy.contains( species_trimmed ) ) {
+                        ForesterUtil.fatalError( surfacing.PRG_NAME,
+                                                 "species/genome names can not appear with both '+' and '-' suffix, as appears the case for: \""
+                                                         + species_trimmed + "\"" );
+                    }
+                    high_copy_base.add( species_trimmed );
+                }
+                else if ( species.startsWith( "*" ) ) {
+                    if ( low_copy.contains( species_trimmed ) ) {
+                        ForesterUtil.fatalError( surfacing.PRG_NAME,
+                                                 "species/genome names can not appear with both '*' and '-' suffix, as appears the case for: \""
+                                                         + species_trimmed + "\"" );
+                    }
+                    high_copy_target.add( species_trimmed );
+                }
+                else if ( species.startsWith( "-" ) ) {
+                    if ( high_copy_base.contains( species_trimmed ) || high_copy_target.contains( species_trimmed ) ) {
+                        ForesterUtil.fatalError( surfacing.PRG_NAME,
+                                                 "species/genome names can not appear with both '+' or '*' and '-' suffix, as appears the case for: \""
+                                                         + species_trimmed + "\"" );
+                    }
+                    low_copy.add( species_trimmed );
+                }
+                else if ( species.startsWith( "$D" ) ) {
+                    try {
+                        min_diff = Integer.parseInt( species.substring( 3 ) );
+                    }
+                    catch ( final NumberFormatException e ) {
+                        ForesterUtil.fatalError( surfacing.PRG_NAME,
+                                                 "could not parse integer value for minimal difference from: \""
+                                                         + species.substring( 3 ) + "\"" );
+                    }
+                }
+                else if ( species.startsWith( "$F" ) ) {
+                    try {
+                        factor = Double.parseDouble( species.substring( 3 ) );
+                    }
+                    catch ( final NumberFormatException e ) {
+                        ForesterUtil.fatalError( surfacing.PRG_NAME, "could not parse double value for factor from: \""
+                                + species.substring( 3 ) + "\"" );
+                    }
+                }
+                else if ( species.startsWith( "#" ) ) {
+                    // Comment, ignore.
+                }
+                else {
+                    ForesterUtil
+                            .fatalError( surfacing.PRG_NAME,
+                                         "species/genome names in 'plus minus' file must begin with '*' (high copy target genome), '+' (high copy base genomes), '-' (low copy genomes), '$D=<integer>' minimal Difference (default is 1), '$F=<double>' factor (default is 1.0), double), or '#' (ignore) suffix, encountered: \""
+                                                 + species + "\"" );
+                }
+                numbers.add( new Integer( min_diff + "" ) );
+                numbers.add( new Double( factor + "" ) );
+            }
+        }
+        else {
+            ForesterUtil.fatalError( surfacing.PRG_NAME, "'plus minus' file [" + plus_minus_file + "] appears empty" );
+        }
+    }
+
     /*
      * species | protein id | n-terminal domain | c-terminal domain | n-terminal domain per domain E-value | c-terminal domain per domain E-value
      * 
@@ -1207,55 +1755,6 @@ public final class SurfacingUtil {
         return sb;
     }
 
-    /**
-     * 
-     * Example regarding engulfment: ------------0.1 ----------0.2 --0.3 =>
-     * domain with 0.3 is ignored
-     * 
-     * -----------0.1 ----------0.2 --0.3 => domain with 0.3 is ignored
-     * 
-     * 
-     * ------------0.1 ----------0.3 --0.2 => domains with 0.3 and 0.2 are _not_
-     * ignored
-     * 
-     * @param max_allowed_overlap
-     *            maximal allowed overlap (inclusive) to be still considered not
-     *            overlapping (zero or negative value to allow any overlap)
-     * @param remove_engulfed_domains
-     *            to remove domains which are completely engulfed by coverage of
-     *            domains with better support
-     * @param protein
-     * @return
-     */
-    public static Protein removeOverlappingDomains( final int max_allowed_overlap,
-                                                    final boolean remove_engulfed_domains,
-                                                    final Protein protein ) {
-        final Protein pruned_protein = new BasicProtein( protein.getProteinId().getId(), protein.getSpecies()
-                .getSpeciesId(), protein.getLength() );
-        final List<Domain> sorted = SurfacingUtil.sortDomainsWithAscendingConfidenceValues( protein );
-        final List<Boolean> covered_positions = new ArrayList<Boolean>();
-        for( final Domain domain : sorted ) {
-            if ( ( ( max_allowed_overlap < 0 ) || ( SurfacingUtil.calculateOverlap( domain, covered_positions ) <= max_allowed_overlap ) )
-                    && ( !remove_engulfed_domains || !isEngulfed( domain, covered_positions ) ) ) {
-                final int covered_positions_size = covered_positions.size();
-                for( int i = covered_positions_size; i < domain.getFrom(); ++i ) {
-                    covered_positions.add( false );
-                }
-                final int new_covered_positions_size = covered_positions.size();
-                for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
-                    if ( i < new_covered_positions_size ) {
-                        covered_positions.set( i, true );
-                    }
-                    else {
-                        covered_positions.add( true );
-                    }
-                }
-                pruned_protein.addProteinDomain( domain );
-            }
-        }
-        return pruned_protein;
-    }
-
     public static List<Domain> sortDomainsWithAscendingConfidenceValues( final Protein protein ) {
         final List<Domain> domains = new ArrayList<Domain>();
         for( final Domain d : protein.getProteinDomains() ) {
@@ -1475,7 +1974,7 @@ public final class SurfacingUtil {
             int per_node_counter = 0;
             out.write( "<html>" );
             out.write( SurfacingConstants.NL );
-            addHtmlHead( out, title_for_html );
+            writeHtmlHead( out, title_for_html );
             out.write( SurfacingConstants.NL );
             out.write( "<body>" );
             out.write( SurfacingConstants.NL );
@@ -1670,127 +2169,19 @@ public final class SurfacingUtil {
         }
     }
 
-    public static DescriptiveStatistics writeDomainSimilaritiesToFile( final StringBuilder html_desc,
-                                                                       final StringBuilder html_title,
-                                                                       final Writer single_writer,
-                                                                       Map<Character, Writer> split_writers,
-                                                                       final SortedSet<DomainSimilarity> similarities,
-                                                                       final boolean treat_as_binary,
-                                                                       final List<Species> species_order,
-                                                                       final PrintableDomainSimilarity.PRINT_OPTION print_option,
-                                                                       final DomainSimilarity.DomainSimilaritySortField sort_field,
-                                                                       final DomainSimilarity.DomainSimilarityScoring scoring,
-                                                                       final boolean verbose,
-                                                                       final Map<String, Integer> tax_code_to_id_map )
-            throws IOException {
-        final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
-        String histogram_title = null;
-        switch ( sort_field ) {
-            case ABS_MAX_COUNTS_DIFFERENCE:
-                if ( treat_as_binary ) {
-                    histogram_title = "absolute counts difference:";
-                }
-                else {
-                    histogram_title = "absolute (maximal) counts difference:";
-                }
-                break;
-            case MAX_COUNTS_DIFFERENCE:
-                if ( treat_as_binary ) {
-                    histogram_title = "counts difference:";
-                }
-                else {
-                    histogram_title = "(maximal) counts difference:";
-                }
-                break;
-            case DOMAIN_ID:
-                histogram_title = "score mean:";
-                break;
-            case MIN:
-                histogram_title = "score minimum:";
-                break;
-            case MAX:
-                histogram_title = "score maximum:";
-                break;
-            case MAX_DIFFERENCE:
-                if ( treat_as_binary ) {
-                    histogram_title = "difference:";
-                }
-                else {
-                    histogram_title = "(maximal) difference:";
-                }
-                break;
-            case MEAN:
-                histogram_title = "score mean:";
-                break;
-            case SD:
-                histogram_title = "score standard deviation:";
-                break;
-            case SPECIES_COUNT:
-                histogram_title = "species number:";
-                break;
-            default:
-                throw new AssertionError( "Unknown sort field: " + sort_field );
-        }
-        for( final DomainSimilarity similarity : similarities ) {
-            switch ( sort_field ) {
-                case ABS_MAX_COUNTS_DIFFERENCE:
-                    stats.addValue( Math.abs( similarity.getMaximalDifferenceInCounts() ) );
-                    break;
-                case MAX_COUNTS_DIFFERENCE:
-                    stats.addValue( similarity.getMaximalDifferenceInCounts() );
-                    break;
-                case DOMAIN_ID:
-                    stats.addValue( similarity.getMeanSimilarityScore() );
-                    break;
-                case MIN:
-                    stats.addValue( similarity.getMinimalSimilarityScore() );
-                    break;
-                case MAX:
-                    stats.addValue( similarity.getMaximalSimilarityScore() );
-                    break;
-                case MAX_DIFFERENCE:
-                    stats.addValue( similarity.getMaximalDifference() );
-                    break;
-                case MEAN:
-                    stats.addValue( similarity.getMeanSimilarityScore() );
-                    break;
-                case SD:
-                    stats.addValue( similarity.getStandardDeviationOfSimilarityScore() );
-                    break;
-                case SPECIES_COUNT:
-                    stats.addValue( similarity.getSpecies().size() );
-                    break;
-                default:
-                    throw new AssertionError( "Unknown sort field: " + sort_field );
-            }
-        }
-        AsciiHistogram histo = null;
-        if ( stats.getMin() < stats.getMin() ) {
-            histo = new AsciiHistogram( stats, histogram_title );
-        }
-        if ( verbose ) {
-            if ( histo != null ) {
-                System.out.println( histo.toStringBuffer( 20, '|', 40, 5 ) );
-            }
-            System.out.println();
-            System.out.println( "N                   : " + stats.getN() );
-            System.out.println( "Min                 : " + stats.getMin() );
-            System.out.println( "Max                 : " + stats.getMax() );
-            System.out.println( "Mean                : " + stats.arithmeticMean() );
-            if ( stats.getN() > 1 ) {
-                System.out.println( "SD                  : " + stats.sampleStandardDeviation() );
-            }
-            else {
-                System.out.println( "SD                  : n/a" );
-            }
-            System.out.println( "Median              : " + stats.median() );
-            if ( stats.getN() > 1 ) {
-                System.out.println( "Pearsonian skewness : " + stats.pearsonianSkewness() );
-            }
-            else {
-                System.out.println( "Pearsonian skewness : n/a" );
-            }
-        }
+    public static void writeDomainSimilaritiesToFile( final StringBuilder html_desc,
+                                                      final StringBuilder html_title,
+                                                      final Writer simple_tab_writer,
+                                                      final Writer single_writer,
+                                                      Map<Character, Writer> split_writers,
+                                                      final SortedSet<DomainSimilarity> similarities,
+                                                      final boolean treat_as_binary,
+                                                      final List<Species> species_order,
+                                                      final PrintableDomainSimilarity.PRINT_OPTION print_option,
+                                                      final DomainSimilarity.DomainSimilarityScoring scoring,
+                                                      final boolean verbose,
+                                                      final Map<String, Integer> tax_code_to_id_map,
+                                                      final Phylogeny phy ) throws IOException {
         if ( ( single_writer != null ) && ( ( split_writers == null ) || split_writers.isEmpty() ) ) {
             split_writers = new HashMap<Character, Writer>();
             split_writers.put( '_', single_writer );
@@ -1804,10 +2195,10 @@ public final class SurfacingUtil {
                     w.write( "<html>" );
                     w.write( SurfacingConstants.NL );
                     if ( key != '_' ) {
-                        addHtmlHead( w, "DCs (" + html_title + ") " + key.toString().toUpperCase() );
+                        writeHtmlHead( w, "DC analysis (" + html_title + ") " + key.toString().toUpperCase() );
                     }
                     else {
-                        addHtmlHead( w, "DCs (" + html_title + ")" );
+                        writeHtmlHead( w, "DC analysis (" + html_title + ")" );
                     }
                     w.write( SurfacingConstants.NL );
                     w.write( "<body>" );
@@ -1815,64 +2206,96 @@ public final class SurfacingUtil {
                     w.write( html_desc.toString() );
                     w.write( SurfacingConstants.NL );
                     w.write( "<hr>" );
-                    w.write( "<br>" );
-                    w.write( SurfacingConstants.NL );
-                    w.write( "<tt><pre>" );
-                    w.write( SurfacingConstants.NL );
-                    if ( histo != null ) {
-                        w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
-                        w.write( SurfacingConstants.NL );
-                    }
-                    w.write( "</pre></tt>" );
-                    w.write( SurfacingConstants.NL );
-                    w.write( "<table>" );
-                    w.write( SurfacingConstants.NL );
-                    w.write( "<tr><td>N: </td><td>" + stats.getN() + "</td></tr>" );
-                    w.write( SurfacingConstants.NL );
-                    w.write( "<tr><td>Min: </td><td>" + stats.getMin() + "</td></tr>" );
-                    w.write( SurfacingConstants.NL );
-                    w.write( "<tr><td>Max: </td><td>" + stats.getMax() + "</td></tr>" );
-                    w.write( SurfacingConstants.NL );
-                    w.write( "<tr><td>Mean: </td><td>" + stats.arithmeticMean() + "</td></tr>" );
-                    w.write( SurfacingConstants.NL );
-                    if ( stats.getN() > 1 ) {
-                        w.write( "<tr><td>SD: </td><td>" + stats.sampleStandardDeviation() + "</td></tr>" );
-                    }
-                    else {
-                        w.write( "<tr><td>SD: </td><td>n/a</td></tr>" );
-                    }
-                    w.write( SurfacingConstants.NL );
-                    w.write( "<tr><td>Median: </td><td>" + stats.median() + "</td></tr>" );
-                    w.write( SurfacingConstants.NL );
-                    if ( stats.getN() > 1 ) {
-                        w.write( "<tr><td>Pearsonian skewness: </td><td>" + stats.pearsonianSkewness() + "</td></tr>" );
-                    }
-                    else {
-                        w.write( "<tr><td>Pearsonian skewness: </td><td>n/a</td></tr>" );
-                    }
-                    w.write( SurfacingConstants.NL );
-                    w.write( "</table>" );
-                    w.write( SurfacingConstants.NL );
-                    w.write( "<br>" );
-                    w.write( SurfacingConstants.NL );
-                    w.write( "<hr>" );
                     w.write( SurfacingConstants.NL );
                     w.write( "<br>" );
                     w.write( SurfacingConstants.NL );
                     w.write( "<table>" );
                     w.write( SurfacingConstants.NL );
+                    w.write( "<tr><td><b>Domains:</b></td></tr>" );
+                    w.write( SurfacingConstants.NL );
                 }
                 break;
         }
+        //
+        for( final DomainSimilarity similarity : similarities ) {
+            if ( ( species_order != null ) && !species_order.isEmpty() ) {
+                ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
+            }
+            if ( single_writer != null ) {
+                single_writer.write( "<tr><td><b><a href=\"#" + similarity.getDomainId() + "\">"
+                        + similarity.getDomainId() + "</a></b></td></tr>" );
+                single_writer.write( SurfacingConstants.NL );
+            }
+            else {
+                Writer local_writer = split_writers.get( ( similarity.getDomainId().charAt( 0 ) + "" ).toLowerCase()
+                        .charAt( 0 ) );
+                if ( local_writer == null ) {
+                    local_writer = split_writers.get( '0' );
+                }
+                local_writer.write( "<tr><td><b><a href=\"#" + similarity.getDomainId() + "\">"
+                        + similarity.getDomainId() + "</a></b></td></tr>" );
+                local_writer.write( SurfacingConstants.NL );
+            }
+        }
         for( final Writer w : split_writers.values() ) {
+            w.write( "</table>" );
+            w.write( SurfacingConstants.NL );
+            w.write( "<hr>" );
+            w.write( SurfacingConstants.NL );
+            //
+            w.write( "<table>" );
+            w.write( SurfacingConstants.NL );
+            w.write( "<tr><td><b>" );
+            w.write( "Species group colors:" );
+            w.write( "</b></td></tr>" );
+            w.write( SurfacingConstants.NL );
+            writeColorLabels( "Deuterostomia", TaxonomyColors.DEUTEROSTOMIA_COLOR, w );
+            writeColorLabels( "Protostomia", TaxonomyColors.PROTOSTOMIA_COLOR, w );
+            writeColorLabels( "Cnidaria", TaxonomyColors.CNIDARIA_COLOR, w );
+            writeColorLabels( "Placozoa", TaxonomyColors.PLACOZOA_COLOR, w );
+            writeColorLabels( "Ctenophora (comb jellies)", TaxonomyColors.CTENOPHORA_COLOR, w );
+            writeColorLabels( "Porifera (sponges)", TaxonomyColors.PORIFERA_COLOR, w );
+            writeColorLabels( "Choanoflagellida", TaxonomyColors.CHOANOFLAGELLIDA, w );
+            writeColorLabels( "Ichthyosporea & Filasterea", TaxonomyColors.ICHTHYOSPOREA_AND_FILASTEREA, w );
+            writeColorLabels( "Fungi", TaxonomyColors.FUNGI_COLOR, w );
+            writeColorLabels( "Nucleariidae and Fonticula group",
+                              TaxonomyColors.NUCLEARIIDAE_AND_FONTICULA_GROUP_COLOR,
+                              w );
+            writeColorLabels( "Amoebozoa", TaxonomyColors.AMOEBOZOA_COLOR, w );
+            writeColorLabels( "Embryophyta (plants)", TaxonomyColors.EMBRYOPHYTA_COLOR, w );
+            writeColorLabels( "Chlorophyta (green algae)", TaxonomyColors.CHLOROPHYTA_COLOR, w );
+            writeColorLabels( "Rhodophyta (red algae)", TaxonomyColors.RHODOPHYTA_COLOR, w );
+            writeColorLabels( "Glaucocystophyce (Glaucophyta)", TaxonomyColors.GLAUCOPHYTA_COLOR, w );
+            writeColorLabels( "Hacrobia (Cryptophyta & Haptophyceae & Centroheliozoa)",
+                              TaxonomyColors.HACROBIA_COLOR,
+                              w );
+            writeColorLabels( "Stramenopiles (Chromophyta, heterokonts)", TaxonomyColors.STRAMENOPILES_COLOR, w );
+            writeColorLabels( "Alveolata", TaxonomyColors.ALVEOLATA_COLOR, w );
+            writeColorLabels( "Rhizaria", TaxonomyColors.RHIZARIA_COLOR, w );
+            writeColorLabels( "Excavata", TaxonomyColors.EXCAVATA_COLOR, w );
+            writeColorLabels( "Apusozoa", TaxonomyColors.APUSOZOA_COLOR, w );
+            writeColorLabels( "Archaea", TaxonomyColors.ARCHAEA_COLOR, w );
+            writeColorLabels( "Bacteria", TaxonomyColors.BACTERIA_COLOR, w );
+            w.write( "</table>" );
+            w.write( SurfacingConstants.NL );
+            //
+            w.write( "<hr>" );
+            w.write( SurfacingConstants.NL );
+            w.write( "<table>" );
             w.write( SurfacingConstants.NL );
         }
+        //
         for( final DomainSimilarity similarity : similarities ) {
             if ( ( species_order != null ) && !species_order.isEmpty() ) {
                 ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
             }
+            if ( simple_tab_writer != null ) {
+                simple_tab_writer.write( similarity.toStringBuffer( PRINT_OPTION.SIMPLE_TAB_DELIMITED,
+                                                                    tax_code_to_id_map,
+                                                                    null ).toString() );
+            }
             if ( single_writer != null ) {
-                single_writer.write( similarity.toStringBuffer( print_option, tax_code_to_id_map ).toString() );
+                single_writer.write( similarity.toStringBuffer( print_option, tax_code_to_id_map, phy ).toString() );
                 single_writer.write( SurfacingConstants.NL );
             }
             else {
@@ -1881,12 +2304,9 @@ public final class SurfacingUtil {
                 if ( local_writer == null ) {
                     local_writer = split_writers.get( '0' );
                 }
-                local_writer.write( similarity.toStringBuffer( print_option, tax_code_to_id_map ).toString() );
+                local_writer.write( similarity.toStringBuffer( print_option, tax_code_to_id_map, phy ).toString() );
                 local_writer.write( SurfacingConstants.NL );
             }
-            // for( final Writer w : split_writers.values() ) {
-            //w.write( SurfacingConstants.NL );
-            // }
         }
         switch ( print_option ) {
             case HTML:
@@ -1902,11 +2322,60 @@ public final class SurfacingUtil {
                     w.write( SurfacingConstants.NL );
                 }
                 break;
+            default:
+                break;
         }
         for( final Writer w : split_writers.values() ) {
             w.close();
         }
-        return stats;
+    }
+
+    public static void writeHtmlHead( final Writer w, final String title ) throws IOException {
+        w.write( SurfacingConstants.NL );
+        w.write( "<head>" );
+        w.write( "<title>" );
+        w.write( title );
+        w.write( "</title>" );
+        w.write( SurfacingConstants.NL );
+        w.write( "<style>" );
+        w.write( SurfacingConstants.NL );
+        w.write( "a:visited { color : #000066; text-decoration : none; }" );
+        w.write( SurfacingConstants.NL );
+        w.write( "a:link { color : #000066; text-decoration : none; }" );
+        w.write( SurfacingConstants.NL );
+        w.write( "a:active { color : ##000066; text-decoration : none; }" );
+        w.write( SurfacingConstants.NL );
+        w.write( "a:hover { color : #FFFFFF; background-color : #000000; text-decoration : none; }" );
+        w.write( SurfacingConstants.NL );
+        //
+        w.write( "a.pl:visited { color : #505050; text-decoration : none; font-size: 7px;}" );
+        w.write( SurfacingConstants.NL );
+        w.write( "a.pl:link { color : #505050; text-decoration : none; font-size: 7px;}" );
+        w.write( SurfacingConstants.NL );
+        w.write( "a.pl:active { color : #505050; text-decoration : none; font-size: 7px;}" );
+        w.write( SurfacingConstants.NL );
+        w.write( "a.pl:hover { color : #FFFFFF; background-color : #000000; text-decoration : none; font-size: 7px;}" );
+        w.write( SurfacingConstants.NL );
+        //
+        w.write( "a.ps:visited { color : #707070; text-decoration : none; font-size: 7px;}" );
+        w.write( SurfacingConstants.NL );
+        w.write( "a.ps:link { color : #707070; text-decoration : none; font-size: 7px;}" );
+        w.write( SurfacingConstants.NL );
+        w.write( "a.ps:active { color : #707070; text-decoration : none; font-size: 7px;}" );
+        w.write( SurfacingConstants.NL );
+        w.write( "a.ps:hover { color : #FFFFFF; background-color : #000000; text-decoration : none; font-size: 7px;}" );
+        w.write( SurfacingConstants.NL );
+        //
+        w.write( "td { text-align: left; vertical-align: top; font-family: Verdana, Arial, Helvetica; font-size: 8pt}" );
+        w.write( SurfacingConstants.NL );
+        w.write( "h1 { color : #0000FF; font-family: Verdana, Arial, Helvetica; font-size: 18pt; font-weight: bold }" );
+        w.write( SurfacingConstants.NL );
+        w.write( "h2 { color : #0000FF; font-family: Verdana, Arial, Helvetica; font-size: 16pt; font-weight: bold }" );
+        w.write( SurfacingConstants.NL );
+        w.write( "</style>" );
+        w.write( SurfacingConstants.NL );
+        w.write( "</head>" );
+        w.write( SurfacingConstants.NL );
     }
 
     public static void writeMatrixToFile( final CharacterStateMatrix<?> matrix,
@@ -1955,6 +2424,53 @@ public final class SurfacingUtil {
         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote phylogeny to \"" + filename + "\"" );
     }
 
+    public static void writePresentToNexus( final File output_file,
+                                            final File positive_filter_file,
+                                            final SortedSet<String> filter,
+                                            final List<GenomeWideCombinableDomains> gwcd_list ) {
+        try {
+            writeMatrixToFile( DomainParsimonyCalculator.createMatrixOfDomainPresenceOrAbsence( gwcd_list,
+                                                                                                positive_filter_file == null ? null
+                                                                                                        : filter ),
+                               output_file + surfacing.DOMAINS_PRESENT_NEXUS,
+                               Format.NEXUS_BINARY );
+            writeMatrixToFile( DomainParsimonyCalculator.createMatrixOfBinaryDomainCombinationPresenceOrAbsence( gwcd_list ),
+                               output_file + surfacing.BDC_PRESENT_NEXUS,
+                               Format.NEXUS_BINARY );
+        }
+        catch ( final Exception e ) {
+            ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
+        }
+    }
+
+    public static void writeProteinListsForAllSpecies( final File output_dir,
+                                                       final SortedMap<Species, List<Protein>> protein_lists_per_species,
+                                                       final List<GenomeWideCombinableDomains> gwcd_list,
+                                                       final double domain_e_cutoff ) {
+        final SortedSet<String> all_domains = new TreeSet<String>();
+        for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
+            all_domains.addAll( gwcd.getAllDomainIds() );
+        }
+        for( final String domain : all_domains ) {
+            final File out = new File( output_dir + ForesterUtil.FILE_SEPARATOR + domain + surfacing.SEQ_EXTRACT_SUFFIX );
+            checkForOutputFileWriteability( out );
+            try {
+                final Writer proteins_file_writer = new BufferedWriter( new FileWriter( out ) );
+                extractProteinNames( protein_lists_per_species,
+                                     domain,
+                                     proteins_file_writer,
+                                     "\t",
+                                     surfacing.LIMIT_SPEC_FOR_PROT_EX,
+                                     domain_e_cutoff );
+                proteins_file_writer.close();
+            }
+            catch ( final IOException e ) {
+                ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
+            }
+            ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote proteins list to \"" + out + "\"" );
+        }
+    }
+
     public static void writeTaxonomyLinks( final Writer writer,
                                            final String species,
                                            final Map<String, Integer> tax_code_to_id_map ) throws IOException {
@@ -2385,6 +2901,42 @@ public final class SurfacingUtil {
         return binary_combinations;
     }
 
+    private static void printSomeStats( final DescriptiveStatistics stats, final AsciiHistogram histo, final Writer w )
+            throws IOException {
+        w.write( "<hr>" );
+        w.write( "<br>" );
+        w.write( SurfacingConstants.NL );
+        w.write( "<tt><pre>" );
+        w.write( SurfacingConstants.NL );
+        if ( histo != null ) {
+            w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
+            w.write( SurfacingConstants.NL );
+        }
+        w.write( "</pre></tt>" );
+        w.write( SurfacingConstants.NL );
+        w.write( "<table>" );
+        w.write( SurfacingConstants.NL );
+        w.write( "<tr><td>N: </td><td>" + stats.getN() + "</td></tr>" );
+        w.write( SurfacingConstants.NL );
+        w.write( "<tr><td>Min: </td><td>" + stats.getMin() + "</td></tr>" );
+        w.write( SurfacingConstants.NL );
+        w.write( "<tr><td>Max: </td><td>" + stats.getMax() + "</td></tr>" );
+        w.write( SurfacingConstants.NL );
+        w.write( "<tr><td>Mean: </td><td>" + stats.arithmeticMean() + "</td></tr>" );
+        w.write( SurfacingConstants.NL );
+        if ( stats.getN() > 1 ) {
+            w.write( "<tr><td>SD: </td><td>" + stats.sampleStandardDeviation() + "</td></tr>" );
+        }
+        else {
+            w.write( "<tr><td>SD: </td><td>n/a</td></tr>" );
+        }
+        w.write( SurfacingConstants.NL );
+        w.write( "</table>" );
+        w.write( SurfacingConstants.NL );
+        w.write( "<br>" );
+        w.write( SurfacingConstants.NL );
+    }
+
     private static List<String> splitDomainCombination( final String dc ) {
         final String[] s = dc.split( "=" );
         if ( s.length != 2 ) {
@@ -2536,6 +3088,15 @@ public final class SurfacingUtil {
         }
     }
 
+    private final static void writeColorLabels( final String l, final Color c, final Writer w ) throws IOException {
+        w.write( "<tr><td><b><span style=\"color:" );
+        w.write( String.format( "#%02x%02x%02x", c.getRed(), c.getGreen(), c.getBlue() ) );
+        w.write( "\">" );
+        w.write( l );
+        w.write( "</span></b></td></tr>" );
+        w.write( SurfacingConstants.NL );
+    }
+
     private static void writeDomainData( final Map<String, List<GoId>> domain_id_to_go_ids_map,
                                          final Map<GoId, GoTerm> go_id_to_term_map,
                                          final GoNameSpace go_namespace_limit,