in progress

[jalview.git] / forester / java / src / org / forester / surfacing / SurfacingUtil.java
diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java

index 0e200dc..f979897 100644 (file)
--- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java
+++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java
@@ -34,6 +34,7 @@ import java.io.Writer;
  import java.text.DecimalFormat;
  import java.text.NumberFormat;
  import java.util.ArrayList;
+import java.util.Arrays;
  import java.util.Collections;
  import java.util.Comparator;
  import java.util.HashMap;
@@ -66,6 +67,7 @@ import org.forester.io.writers.PhylogenyWriter;
  import org.forester.phylogeny.Phylogeny;
  import org.forester.phylogeny.PhylogenyMethods;
  import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.PhylogenyNodeI.NH_CONVERSION_SUPPORT_VALUE_STYLE;
  import org.forester.phylogeny.data.BinaryCharacters;
  import org.forester.phylogeny.data.Confidence;
  import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
@@ -164,11 +166,15 @@ public final class SurfacingUtil {
      private static void calculateIndependentDomainCombinationGains( final Phylogeny local_phylogeny_l,
                                                                      final String outfilename_for_counts,
                                                                      final String outfilename_for_dc,
-                                                                    final String outfilename_for_dc_for_go_mapping ) {
+                                                                    final String outfilename_for_dc_for_go_mapping,
+                                                                    final String outfilename_for_dc_for_go_mapping_unique,
+                                                                    final String outfilename_for_rank_counts,
+                                                                    final String outfilename_for_ancestor_species_counts ) {
          try {
              final BufferedWriter out_counts = new BufferedWriter( new FileWriter( outfilename_for_counts ) );
              final BufferedWriter out_dc = new BufferedWriter( new FileWriter( outfilename_for_dc ) );
              final BufferedWriter out_dc_for_go_mapping = new BufferedWriter( new FileWriter( outfilename_for_dc_for_go_mapping ) );
+            final BufferedWriter out_dc_for_go_mapping_unique = new BufferedWriter( new FileWriter( outfilename_for_dc_for_go_mapping_unique ) );
              final SortedMap<String, Integer> dc_gain_counts = new TreeMap<String, Integer>();
              for( final PhylogenyNodeIterator it = local_phylogeny_l.iteratorPostorder(); it.hasNext(); ) {
                  final PhylogenyNode n = it.next();
@@ -185,20 +191,29 @@ public final class SurfacingUtil {
              final SortedMap<Integer, Integer> histogram = new TreeMap<Integer, Integer>();
              final SortedMap<Integer, StringBuilder> domain_lists = new TreeMap<Integer, StringBuilder>();
              final SortedMap<Integer, PriorityQueue<String>> domain_lists_go = new TreeMap<Integer, PriorityQueue<String>>();
+            final SortedMap<Integer, SortedSet<String>> domain_lists_go_unique = new TreeMap<Integer, SortedSet<String>>();
              final Set<String> dcs = dc_gain_counts.keySet();
+            final SortedSet<String> more_than_once = new TreeSet<String>();
              for( final String dc : dcs ) {
                  final int count = dc_gain_counts.get( dc );
                  if ( histogram.containsKey( count ) ) {
                      histogram.put( count, histogram.get( count ) + 1 );
-                    domain_lists.put( count, domain_lists.get( count ).append( ", " + dc ) );
-                    domain_lists_go.get( count ).add( dc );
+                    domain_lists.get( count ).append( ", " + dc );
+                    domain_lists_go.get( count ).addAll( splitDomainCombination( dc ) );
+                    domain_lists_go_unique.get( count ).addAll( splitDomainCombination( dc ) );
                  }
                  else {
                      histogram.put( count, 1 );
                      domain_lists.put( count, new StringBuilder( dc ) );
                      final PriorityQueue<String> q = new PriorityQueue<String>();
-                    q.add( dc );
+                    q.addAll( splitDomainCombination( dc ) );
                      domain_lists_go.put( count, q );
+                    final SortedSet<String> set = new TreeSet<String>();
+                    set.addAll( splitDomainCombination( dc ) );
+                    domain_lists_go_unique.put( count, set );
+                }
+                if ( count > 1 ) {
+                    more_than_once.add( dc );
                  }
              }
              final Set<Integer> histogram_keys = histogram.keySet();
@@ -207,10 +222,67 @@ public final class SurfacingUtil {
                  final StringBuilder dc = domain_lists.get( histogram_key );
                  out_counts.write( histogram_key + "\t" + count + ForesterUtil.LINE_SEPARATOR );
                  out_dc.write( histogram_key + "\t" + dc + ForesterUtil.LINE_SEPARATOR );
+                out_dc_for_go_mapping.write( "#" + histogram_key + ForesterUtil.LINE_SEPARATOR );
+                final Object[] sorted = domain_lists_go.get( histogram_key ).toArray();
+                Arrays.sort( sorted );
+                for( final Object domain : sorted ) {
+                    out_dc_for_go_mapping.write( domain + ForesterUtil.LINE_SEPARATOR );
+                }
+                out_dc_for_go_mapping_unique.write( "#" + histogram_key + ForesterUtil.LINE_SEPARATOR );
+                for( final String domain : domain_lists_go_unique.get( histogram_key ) ) {
+                    out_dc_for_go_mapping_unique.write( domain + ForesterUtil.LINE_SEPARATOR );
+                }
              }
              out_counts.close();
              out_dc.close();
              out_dc_for_go_mapping.close();
+            out_dc_for_go_mapping_unique.close();
+            //
+            final SortedMap<String, Integer> lca_rank_counts = new TreeMap<String, Integer>();
+            final SortedMap<String, Integer> lca_ancestor_species_counts = new TreeMap<String, Integer>();
+            for( final String dc : more_than_once ) {
+                final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
+                for( final PhylogenyNodeIterator it = local_phylogeny_l.iteratorExternalForward(); it.hasNext(); ) {
+                    final PhylogenyNode n = it.next();
+                    if ( n.getNodeData().getBinaryCharacters().getGainedCharacters().contains( dc ) ) {
+                        nodes.add( n );
+                    }
+                }
+                for( int i = 0; i < nodes.size() - 1; ++i ) {
+                    for( int j = i + 1; j < nodes.size(); ++j ) {
+                        final PhylogenyNode lca = PhylogenyMethods.getInstance().obtainLCA( nodes.get( i ),
+                                                                                            nodes.get( j ) );
+                        String rank = "unknown";
+                        if ( lca.getNodeData().isHasTaxonomy()
+                                && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getRank() ) ) {
+                            rank = lca.getNodeData().getTaxonomy().getRank();
+                        }
+                        addToCountMap( lca_rank_counts, rank );
+                        String lca_species;
+                        if ( lca.getNodeData().isHasTaxonomy()
+                                && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getScientificName() ) ) {
+                            lca_species = lca.getNodeData().getTaxonomy().getScientificName();
+                        }
+                        else if ( lca.getNodeData().isHasTaxonomy()
+                                && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getCommonName() ) ) {
+                            lca_species = lca.getNodeData().getTaxonomy().getCommonName();
+                        }
+                        else {
+                            lca_species = lca.getName();
+                        }
+                        addToCountMap( lca_ancestor_species_counts, lca_species );
+                    }
+                }
+            }
+            final BufferedWriter out_for_rank_counts = new BufferedWriter( new FileWriter( outfilename_for_rank_counts ) );
+            final BufferedWriter out_for_ancestor_species_counts = new BufferedWriter( new FileWriter( outfilename_for_ancestor_species_counts ) );
+            ForesterUtil.map2writer( out_for_rank_counts, lca_rank_counts, "\t", ForesterUtil.LINE_SEPARATOR );
+            ForesterUtil.map2writer( out_for_ancestor_species_counts,
+                                     lca_ancestor_species_counts,
+                                     "\t",
+                                     ForesterUtil.LINE_SEPARATOR );
+            out_for_rank_counts.close();
+            out_for_ancestor_species_counts.close();
          }
          catch ( final IOException e ) {
              ForesterUtil.printWarningMessage( surfacing.PRG_NAME, "Failure to write: " + e );
@@ -222,6 +294,18 @@ public final class SurfacingUtil {
          ForesterUtil.programMessage( surfacing.PRG_NAME,
                                       "Wrote independent domain combination gains fitch lists to (for GO mapping) ["
                                               + outfilename_for_dc_for_go_mapping + "]" );
+        ForesterUtil.programMessage( surfacing.PRG_NAME,
+                                     "Wrote independent domain combination gains fitch lists to (for GO mapping, unique) ["
+                                             + outfilename_for_dc_for_go_mapping_unique + "]" );
+    }
+
+    private final static void addToCountMap( final Map<String, Integer> map, final String s ) {
+        if ( map.containsKey( s ) ) {
+            map.put( s, map.get( s ) + 1 );
+        }
+        else {
+            map.put( s, 1 );
+        }
      }
  
      public static int calculateOverlap( final Domain domain, final List<Boolean> covered_positions ) {
@@ -589,7 +673,7 @@ public final class SurfacingUtil {
                  randomization = "yes, seed = " + random_number_seed_for_fitch_parsimony;
              }
              else {
-                domain_parsimony.executeFitchParsimonyOnBinaryDomainCombintion( false );
+                domain_parsimony.executeFitchParsimonyOnBinaryDomainCombintion( true );
              }
              SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossMatrix(), outfile_name
                      + surfacing.PARSIMONY_OUTPUT_GL_SUFFIX_FITCH_BINARY_COMBINATIONS, Format.FORESTER );
@@ -701,7 +785,9 @@ public final class SurfacingUtil {
              calculateIndependentDomainCombinationGains( local_phylogeny_l, outfile_name
                      + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_OUTPUT_SUFFIX, outfile_name
                      + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_OUTPUT_SUFFIX, outfile_name
-                    + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_SUFFIX );
+                    + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_SUFFIX, outfile_name
+                    + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_UNIQUE_SUFFIX, outfile_name
+                    + "_indep_dc_gains_fitch_lca_ranks.txt", outfile_name + "_indep_dc_gains_fitch_lca_taxonomies.txt" );
          }
      }
  
@@ -759,65 +845,35 @@ public final class SurfacingUtil {
      public static void extractProteinNames( final List<Protein> proteins,
                                              final List<DomainId> query_domain_ids_nc_order,
                                              final Writer out,
-                                            final String separator ) throws IOException {
+                                            final String separator,
+                                            final String limit_to_species ) throws IOException {
          for( final Protein protein : proteins ) {
-            if ( protein.contains( query_domain_ids_nc_order, true ) ) {
-                out.write( protein.getSpecies().getSpeciesId() );
-                out.write( separator );
-                out.write( protein.getProteinId().getId() );
-                out.write( separator );
-                out.write( "[" );
-                final Set<DomainId> visited_domain_ids = new HashSet<DomainId>();
-                boolean first = true;
-                for( final Domain domain : protein.getProteinDomains() ) {
-                    if ( !visited_domain_ids.contains( domain.getDomainId() ) ) {
-                        visited_domain_ids.add( domain.getDomainId() );
-                        if ( first ) {
-                            first = false;
-                        }
-                        else {
-                            out.write( " " );
-                        }
-                        out.write( domain.getDomainId().getId() );
-                        out.write( " {" );
-                        out.write( "" + domain.getTotalCount() );
-                        out.write( "}" );
-                    }
-                }
-                out.write( "]" );
-                out.write( separator );
-                if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription()
-                        .equals( SurfacingConstants.NONE ) ) ) {
-                    out.write( protein.getDescription() );
-                }
-                out.write( separator );
-                if ( !( ForesterUtil.isEmpty( protein.getAccession() ) || protein.getAccession()
-                        .equals( SurfacingConstants.NONE ) ) ) {
-                    out.write( protein.getAccession() );
-                }
-                out.write( SurfacingConstants.NL );
-            }
-        }
-        out.flush();
-    }
-
-    public static void extractProteinNames( final SortedMap<Species, List<Protein>> protein_lists_per_species,
-                                            final DomainId domain_id,
-                                            final Writer out,
-                                            final String separator ) throws IOException {
-        for( final Species species : protein_lists_per_species.keySet() ) {
-            for( final Protein protein : protein_lists_per_species.get( species ) ) {
-                final List<Domain> domains = protein.getProteinDomains( domain_id );
-                if ( domains.size() > 0 ) {
-                    final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
-                    for( final Domain domain : domains ) {
-                        stats.addValue( domain.getPerSequenceEvalue() );
-                    }
+            if ( ForesterUtil.isEmpty( limit_to_species )
+                    || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) {
+                if ( protein.contains( query_domain_ids_nc_order, true ) ) {
                      out.write( protein.getSpecies().getSpeciesId() );
                      out.write( separator );
                      out.write( protein.getProteinId().getId() );
                      out.write( separator );
-                    out.write( "[" + FORMATTER.format( stats.median() ) + "]" );
+                    out.write( "[" );
+                    final Set<DomainId> visited_domain_ids = new HashSet<DomainId>();
+                    boolean first = true;
+                    for( final Domain domain : protein.getProteinDomains() ) {
+                        if ( !visited_domain_ids.contains( domain.getDomainId() ) ) {
+                            visited_domain_ids.add( domain.getDomainId() );
+                            if ( first ) {
+                                first = false;
+                            }
+                            else {
+                                out.write( " " );
+                            }
+                            out.write( domain.getDomainId().getId() );
+                            out.write( " {" );
+                            out.write( "" + domain.getTotalCount() );
+                            out.write( "}" );
+                        }
+                    }
+                    out.write( "]" );
                      out.write( separator );
                      if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription()
                              .equals( SurfacingConstants.NONE ) ) ) {
@@ -835,6 +891,44 @@ public final class SurfacingUtil {
          out.flush();
      }
  
+    public static void extractProteinNames( final SortedMap<Species, List<Protein>> protein_lists_per_species,
+                                            final DomainId domain_id,
+                                            final Writer out,
+                                            final String separator,
+                                            final String limit_to_species ) throws IOException {
+        for( final Species species : protein_lists_per_species.keySet() ) {
+            for( final Protein protein : protein_lists_per_species.get( species ) ) {
+                if ( ForesterUtil.isEmpty( limit_to_species )
+                        || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) {
+                    final List<Domain> domains = protein.getProteinDomains( domain_id );
+                    if ( domains.size() > 0 ) {
+                        final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
+                        for( final Domain domain : domains ) {
+                            stats.addValue( domain.getPerSequenceEvalue() );
+                        }
+                        out.write( protein.getSpecies().getSpeciesId() );
+                        out.write( separator );
+                        out.write( protein.getProteinId().getId() );
+                        out.write( separator );
+                        out.write( "[" + FORMATTER.format( stats.median() ) + "]" );
+                        out.write( separator );
+                        if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription()
+                                .equals( SurfacingConstants.NONE ) ) ) {
+                            out.write( protein.getDescription() );
+                        }
+                        out.write( separator );
+                        if ( !( ForesterUtil.isEmpty( protein.getAccession() ) || protein.getAccession()
+                                .equals( SurfacingConstants.NONE ) ) ) {
+                            out.write( protein.getAccession() );
+                        }
+                        out.write( SurfacingConstants.NL );
+                    }
+                }
+            }
+        }
+        out.flush();
+    }
+
      public static SortedSet<DomainId> getAllDomainIds( final List<GenomeWideCombinableDomains> gwcd_list ) {
          final SortedSet<DomainId> all_domains_ids = new TreeSet<DomainId>();
          for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
@@ -870,11 +964,17 @@ public final class SurfacingUtil {
              final PhylogenyNode n = it.next();
              if ( ForesterUtil.isEmpty( n.getName() )
                      && ( !n.getNodeData().isHasTaxonomy() || ForesterUtil.isEmpty( n.getNodeData().getTaxonomy()
-                            .getScientificName() ) ) ) {
+                            .getScientificName() ) )
+                    && ( !n.getNodeData().isHasTaxonomy() || ForesterUtil.isEmpty( n.getNodeData().getTaxonomy()
+                            .getCommonName() ) ) ) {
                  if ( n.getParent() != null ) {
                      names.append( " " );
                      names.append( n.getParent().getName() );
                  }
+                final List l = n.getAllExternalDescendants();
+                for( final Object object : l ) {
+                    System.out.println( l.toString() );
+                }
                  ++c;
              }
          }
@@ -1066,6 +1166,19 @@ public final class SurfacingUtil {
          return domains;
      }
  
+    private static List<String> splitDomainCombination( final String dc ) {
+        final String[] s = dc.split( "=" );
+        if ( s.length != 2 ) {
+            ForesterUtil.printErrorMessage( surfacing.PRG_NAME, "Stringyfied domain combination has illegal format: "
+                    + dc );
+            System.exit( -1 );
+        }
+        final List<String> l = new ArrayList<String>( 2 );
+        l.add( s[ 0 ] );
+        l.add( s[ 1 ] );
+        return l;
+    }
+
      public static void writeAllDomainsChangedOnAllSubtrees( final Phylogeny p,
                                                              final boolean get_gains,
                                                              final String outdir,
@@ -1891,7 +2004,8 @@ public final class SurfacingUtil {
  
      public static DescriptiveStatistics writeDomainSimilaritiesToFile( final StringBuilder html_desc,
                                                                         final StringBuilder html_title,
-                                                                       final Writer w,
+                                                                       final Writer single_writer,
+                                                                       Map<Character, Writer> split_writers,
                                                                         final SortedSet<DomainSimilarity> similarities,
                                                                         final boolean treat_as_binary,
                                                                         final List<Species> species_order,
@@ -2034,90 +2148,119 @@ public final class SurfacingUtil {
                  System.out.println( "Pearsonian skewness : n/a" );
              }
          }
+        if ( ( single_writer != null ) && ( ( split_writers == null ) || split_writers.isEmpty() ) ) {
+            split_writers = new HashMap<Character, Writer>();
+            split_writers.put( '_', single_writer );
+        }
          switch ( print_option ) {
              case SIMPLE_TAB_DELIMITED:
                  break;
              case HTML:
-                w.write( "<html>" );
-                w.write( SurfacingConstants.NL );
-                addHtmlHead( w, "SURFACING :: " + html_title );
-                w.write( SurfacingConstants.NL );
-                w.write( "<body>" );
-                w.write( SurfacingConstants.NL );
-                w.write( html_desc.toString() );
-                w.write( SurfacingConstants.NL );
-                w.write( "<hr>" );
-                w.write( "<br>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "<tt><pre>" );
-                w.write( SurfacingConstants.NL );
-                if ( histo != null ) {
-                    w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
+                for( final Character key : split_writers.keySet() ) {
+                    final Writer w = split_writers.get( key );
+                    w.write( "<html>" );
+                    w.write( SurfacingConstants.NL );
+                    if ( key != '_' ) {
+                        addHtmlHead( w, "DCs (" + html_title + ") " + key.toString().toUpperCase() );
+                    }
+                    else {
+                        addHtmlHead( w, "DCs (" + html_title + ")" );
+                    }
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<body>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( html_desc.toString() );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<hr>" );
+                    w.write( "<br>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<tt><pre>" );
+                    w.write( SurfacingConstants.NL );
+                    if ( histo != null ) {
+                        w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
+                        w.write( SurfacingConstants.NL );
+                    }
+                    w.write( "</pre></tt>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<table>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<tr><td>N: </td><td>" + stats.getN() + "</td></tr>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<tr><td>Min: </td><td>" + stats.getMin() + "</td></tr>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<tr><td>Max: </td><td>" + stats.getMax() + "</td></tr>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<tr><td>Mean: </td><td>" + stats.arithmeticMean() + "</td></tr>" );
+                    w.write( SurfacingConstants.NL );
+                    if ( stats.getN() > 1 ) {
+                        w.write( "<tr><td>SD: </td><td>" + stats.sampleStandardDeviation() + "</td></tr>" );
+                    }
+                    else {
+                        w.write( "<tr><td>SD: </td><td>n/a</td></tr>" );
+                    }
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<tr><td>Median: </td><td>" + stats.median() + "</td></tr>" );
+                    w.write( SurfacingConstants.NL );
+                    if ( stats.getN() > 1 ) {
+                        w.write( "<tr><td>Pearsonian skewness: </td><td>" + stats.pearsonianSkewness() + "</td></tr>" );
+                    }
+                    else {
+                        w.write( "<tr><td>Pearsonian skewness: </td><td>n/a</td></tr>" );
+                    }
+                    w.write( SurfacingConstants.NL );
+                    w.write( "</table>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<br>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<hr>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<br>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<table>" );
                      w.write( SurfacingConstants.NL );
                  }
-                w.write( "</pre></tt>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "<table>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "<tr><td>N: </td><td>" + stats.getN() + "</td></tr>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "<tr><td>Min: </td><td>" + stats.getMin() + "</td></tr>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "<tr><td>Max: </td><td>" + stats.getMax() + "</td></tr>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "<tr><td>Mean: </td><td>" + stats.arithmeticMean() + "</td></tr>" );
-                w.write( SurfacingConstants.NL );
-                if ( stats.getN() > 1 ) {
-                    w.write( "<tr><td>SD: </td><td>" + stats.sampleStandardDeviation() + "</td></tr>" );
-                }
-                else {
-                    w.write( "<tr><td>SD: </td><td>n/a</td></tr>" );
-                }
-                w.write( SurfacingConstants.NL );
-                w.write( "<tr><td>Median: </td><td>" + stats.median() + "</td></tr>" );
-                w.write( SurfacingConstants.NL );
-                if ( stats.getN() > 1 ) {
-                    w.write( "<tr><td>Pearsonian skewness: </td><td>" + stats.pearsonianSkewness() + "</td></tr>" );
-                }
-                else {
-                    w.write( "<tr><td>Pearsonian skewness: </td><td>n/a</td></tr>" );
-                }
-                w.write( SurfacingConstants.NL );
-                w.write( "</table>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "<br>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "<hr>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "<br>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "<table>" );
-                w.write( SurfacingConstants.NL );
                  break;
          }
-        w.write( SurfacingConstants.NL );
+        for( final Writer w : split_writers.values() ) {
+            w.write( SurfacingConstants.NL );
+        }
          for( final DomainSimilarity similarity : similarities ) {
              if ( ( species_order != null ) && !species_order.isEmpty() ) {
                  ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
              }
-            w.write( similarity.toStringBuffer( print_option ).toString() );
-            w.write( SurfacingConstants.NL );
+            if ( single_writer != null ) {
+                single_writer.write( similarity.toStringBuffer( print_option ).toString() );
+            }
+            else {
+                Writer local_writer = split_writers.get( ( similarity.getDomainId().getId().charAt( 0 ) + "" )
+                        .toLowerCase().charAt( 0 ) );
+                if ( local_writer == null ) {
+                    local_writer = split_writers.get( '0' );
+                }
+                local_writer.write( similarity.toStringBuffer( print_option ).toString() );
+            }
+            for( final Writer w : split_writers.values() ) {
+                w.write( SurfacingConstants.NL );
+            }
          }
          switch ( print_option ) {
              case HTML:
-                w.write( SurfacingConstants.NL );
-                w.write( "</table>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "</font>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "</body>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "</html>" );
-                w.write( SurfacingConstants.NL );
+                for( final Writer w : split_writers.values() ) {
+                    w.write( SurfacingConstants.NL );
+                    w.write( "</table>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "</font>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "</body>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "</html>" );
+                    w.write( SurfacingConstants.NL );
+                }
                  break;
          }
-        w.flush();
-        w.close();
+        for( final Writer w : split_writers.values() ) {
+            w.close();
+        }
          return stats;
      }
  
@@ -2229,7 +2372,7 @@ public final class SurfacingUtil {
              w.write( ForesterUtil.LINE_SEPARATOR );
              my_matrix.writeNexusTaxaBlock( w );
              my_matrix.writeNexusBinaryChractersBlock( w );
-            PhylogenyWriter.writeNexusTreesBlock( w, phylogenies );
+            PhylogenyWriter.writeNexusTreesBlock( w, phylogenies, NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE );
              w.flush();
              w.close();
              ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote Nexus file: \"" + outfile_name + "\"" );
@@ -2249,4 +2392,89 @@ public final class SurfacingUtil {
                        domain_parsimony.createMatrixOfBinaryDomainCombinationPresenceOrAbsence(),
                        phylogeny );
      }
+
+    public static void domainsPerProteinsStatistics( final String genome,
+                                                     final List<Protein> protein_list,
+                                                     final DescriptiveStatistics all_genomes_domains_per_potein_stats,
+                                                     final SortedMap<Integer, Integer> all_genomes_domains_per_potein_histo,
+                                                     final SortedSet<String> domains_which_are_always_single,
+                                                     final SortedSet<String> domains_which_are_sometimes_single_sometimes_not,
+                                                     final SortedSet<String> domains_which_never_single,
+                                                     final Writer writer ) {
+        final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
+        for( final Protein protein : protein_list ) {
+            final int domains = protein.getNumberOfProteinDomains();
+            //System.out.println( domains );
+            stats.addValue( domains );
+            all_genomes_domains_per_potein_stats.addValue( domains );
+            if ( !all_genomes_domains_per_potein_histo.containsKey( domains ) ) {
+                all_genomes_domains_per_potein_histo.put( domains, 1 );
+            }
+            else {
+                all_genomes_domains_per_potein_histo.put( domains,
+                                                          1 + all_genomes_domains_per_potein_histo.get( domains ) );
+            }
+            if ( domains == 1 ) {
+                final String domain = protein.getProteinDomain( 0 ).getDomainId().getId();
+                if ( !domains_which_are_sometimes_single_sometimes_not.contains( domain ) ) {
+                    if ( domains_which_never_single.contains( domain ) ) {
+                        domains_which_never_single.remove( domain );
+                        domains_which_are_sometimes_single_sometimes_not.add( domain );
+                    }
+                    else {
+                        domains_which_are_always_single.add( domain );
+                    }
+                }
+            }
+            else if ( domains > 1 ) {
+                for( final Domain d : protein.getProteinDomains() ) {
+                    final String domain = d.getDomainId().getId();
+                    // System.out.println( domain );
+                    if ( !domains_which_are_sometimes_single_sometimes_not.contains( domain ) ) {
+                        if ( domains_which_are_always_single.contains( domain ) ) {
+                            domains_which_are_always_single.remove( domain );
+                            domains_which_are_sometimes_single_sometimes_not.add( domain );
+                        }
+                        else {
+                            domains_which_never_single.add( domain );
+                        }
+                    }
+                }
+            }
+        }
+        try {
+            writer.write( genome );
+            writer.write( "\t" );
+            if ( stats.getN() >= 1 ) {
+                writer.write( stats.arithmeticMean() + "" );
+                writer.write( "\t" );
+                if ( stats.getN() >= 2 ) {
+                    writer.write( stats.sampleStandardDeviation() + "" );
+                }
+                else {
+                    writer.write( "" );
+                }
+                writer.write( "\t" );
+                writer.write( stats.median() + "" );
+                writer.write( "\t" );
+                writer.write( stats.getN() + "" );
+                writer.write( "\t" );
+                writer.write( stats.getMin() + "" );
+                writer.write( "\t" );
+                writer.write( stats.getMax() + "" );
+            }
+            else {
+                writer.write( "\t" );
+                writer.write( "\t" );
+                writer.write( "\t" );
+                writer.write( "0" );
+                writer.write( "\t" );
+                writer.write( "\t" );
+            }
+            writer.write( "\n" );
+        }
+        catch ( final IOException e ) {
+            e.printStackTrace();
+        }
+    }
  }