in progress

[jalview.git] / forester / java / src / org / forester / application / surfacing.java
diff --git a/forester/java/src/org/forester/application/surfacing.java b/forester/java/src/org/forester/application/surfacing.java

index 4f1073c..62c6981 100644 (file)
--- a/forester/java/src/org/forester/application/surfacing.java
+++ b/forester/java/src/org/forester/application/surfacing.java
@@ -6,7 +6,7 @@
  // Copyright (C) 2008-2009 Christian M. Zmasek
  // Copyright (C) 2008-2009 Burnham Institute for Medical Research
  // All rights reserved
-// 
+//
  // This library is free software; you can redistribute it and/or
  // modify it under the terms of the GNU Lesser General Public
  // License as published by the Free Software Foundation; either
@@ -16,7 +16,7 @@
  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  // Lesser General Public License for more details.
-// 
+//
  // You should have received a copy of the GNU Lesser General Public
  // License along with this library; if not, write to the Free Software
  // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
@@ -33,9 +33,11 @@ import java.io.IOException;
  import java.io.Writer;
  import java.util.ArrayList;
  import java.util.Date;
+import java.util.HashMap;
  import java.util.HashSet;
  import java.util.List;
  import java.util.Map;
+import java.util.Map.Entry;
  import java.util.Set;
  import java.util.SortedMap;
  import java.util.SortedSet;
@@ -54,6 +56,7 @@ import org.forester.go.PfamToGoMapping;
  import org.forester.go.PfamToGoParser;
  import org.forester.io.parsers.HmmscanPerDomainTableParser;
  import org.forester.io.parsers.HmmscanPerDomainTableParser.INDIVIDUAL_SCORE_CUTOFF;
+import org.forester.io.parsers.util.ParserUtils;
  import org.forester.io.writers.PhylogenyWriter;
  import org.forester.phylogeny.Phylogeny;
  import org.forester.phylogeny.PhylogenyMethods;
@@ -71,21 +74,22 @@ import org.forester.surfacing.DomainId;
  import org.forester.surfacing.DomainLengthsTable;
  import org.forester.surfacing.DomainParsimonyCalculator;
  import org.forester.surfacing.DomainSimilarity;
+import org.forester.surfacing.DomainSimilarity.DomainSimilarityScoring;
+import org.forester.surfacing.DomainSimilarity.DomainSimilaritySortField;
  import org.forester.surfacing.DomainSimilarityCalculator;
+import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
  import org.forester.surfacing.GenomeWideCombinableDomains;
+import org.forester.surfacing.GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder;
  import org.forester.surfacing.MappingResults;
  import org.forester.surfacing.PairwiseDomainSimilarityCalculator;
  import org.forester.surfacing.PairwiseGenomeComparator;
  import org.forester.surfacing.PrintableDomainSimilarity;
+import org.forester.surfacing.PrintableDomainSimilarity.PRINT_OPTION;
  import org.forester.surfacing.Protein;
  import org.forester.surfacing.ProteinCountsBasedPairwiseDomainSimilarityCalculator;
  import org.forester.surfacing.Species;
  import org.forester.surfacing.SurfacingUtil;
-import org.forester.surfacing.DomainSimilarity.DomainSimilarityScoring;
-import org.forester.surfacing.DomainSimilarity.DomainSimilaritySortField;
-import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
-import org.forester.surfacing.GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder;
-import org.forester.surfacing.PrintableDomainSimilarity.PRINT_OPTION;
+import org.forester.util.BasicDescriptiveStatistics;
  import org.forester.util.BasicTable;
  import org.forester.util.BasicTableParser;
  import org.forester.util.CommandLineArguments;
@@ -95,6 +99,7 @@ import org.forester.util.ForesterUtil;
  
  public class surfacing {
  
+    private static final int                                  MINIMAL_NUMBER_OF_SIMILARITIES_FOR_SPLITTING                           = 1000;
      public final static String                                DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS                    = "graph_analysis_out";
      public final static String                                DOMAIN_COMBINITONS_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS                = "_dc.dot";
      public final static String                                PARSIMONY_OUTPUT_FITCH_PRESENT_BC_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS = "_fitch_present_dc.dot";
@@ -110,20 +115,18 @@ public class surfacing {
      public final static String                                PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_FITCH_DOMAINS                        = "_fitch_glc_d";
      public final static String                                PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_FITCH_BINARY_COMBINATIONS            = "_fitch_glc_dc";
      // tables:
-    //  public final static String                                PARSIMONY_OUTPUT_FITCH_GAINS_BC                                        = "_fitch_gains_dc";
-    //  public final static String                                PARSIMONY_OUTPUT_FITCH_GAINS_HTML_BC                                   = "_fitch_gains_dc.html";
-    //  public final static String                                PARSIMONY_OUTPUT_FITCH_LOSSES_BC                                       = "_fitch_losses_dc";
-    // public final static String                                PARSIMONY_OUTPUT_FITCH_LOSSES_HTML_BC                                  = "_fitch_losses_dc.html";
-    // public final static String                                PARSIMONY_OUTPUT_FITCH_PRESENT_BC                                      = "_fitch_present_dc";
-    // public final static String                                PARSIMONY_OUTPUT_FITCH_PRESENT_HTML_BC                                 = "_fitch_present_dc.html";
-    // public final static String                                PARSIMONY_OUTPUT_DOLLO_GAINS_D                                         = "_dollo_gains_d";
-    // public final static String                                PARSIMONY_OUTPUT_DOLLO_GAINS_GOID_D                                    = "_dollo_gains_goid_d";
-    //  public final static String                                PARSIMONY_OUTPUT_DOLLO_GAINS_HTML_D                                    = "_dollo_gains_d.html";
-    // public final static String                                PARSIMONY_OUTPUT_DOLLO_LOSSES_D                                        = "_dollo_losses_d";
-    //public final static String                                PARSIMONY_OUTPUT_DOLLO_LOSSES_HTML_D                                   = "_dollo_losses_d.html";
-    // public final static String                                PARSIMONY_OUTPUT_DOLLO_PRESENT_D                                       = "_dollo_present_d";
-    public final static String                                PARSIMONY_OUTPUT_DOLLO_PRESENT_GOID_D                                  = "_dollo_present_goid_d";
-    //public final static String                                PARSIMONY_OUTPUT_DOLLO_PRESENT_HTML_D                                  = "_dollo_present_d.html";
+    public final static String                                PARSIMONY_OUTPUT_FITCH_GAINS_BC                                        = "_fitch_gains_dc";
+    public final static String                                PARSIMONY_OUTPUT_FITCH_GAINS_HTML_BC                                   = "_fitch_gains_dc.html";
+    public final static String                                PARSIMONY_OUTPUT_FITCH_LOSSES_BC                                       = "_fitch_losses_dc";
+    public final static String                                PARSIMONY_OUTPUT_FITCH_LOSSES_HTML_BC                                  = "_fitch_losses_dc.html";
+    public final static String                                PARSIMONY_OUTPUT_FITCH_PRESENT_BC                                      = "_fitch_present_dc";
+    public final static String                                PARSIMONY_OUTPUT_FITCH_PRESENT_HTML_BC                                 = "_fitch_present_dc.html";
+    public final static String                                PARSIMONY_OUTPUT_DOLLO_GAINS_D                                         = "_dollo_gains_d";
+    public final static String                                PARSIMONY_OUTPUT_DOLLO_GAINS_HTML_D                                    = "_dollo_gains_d.html";
+    public final static String                                PARSIMONY_OUTPUT_DOLLO_LOSSES_D                                        = "_dollo_losses_d";
+    public final static String                                PARSIMONY_OUTPUT_DOLLO_LOSSES_HTML_D                                   = "_dollo_losses_d.html";
+    public final static String                                PARSIMONY_OUTPUT_DOLLO_PRESENT_D                                       = "_dollo_present_d";
+    public final static String                                PARSIMONY_OUTPUT_DOLLO_PRESENT_HTML_D                                  = "_dollo_present_d.html";
      public final static String                                DOMAINS_PRESENT_NEXUS                                                  = "_dom.nex";
      public final static String                                BDC_PRESENT_NEXUS                                                      = "_dc.nex";
      // ---
@@ -146,13 +149,7 @@ public class surfacing {
      public static final String                                PARSIMONY_OUTPUT_DOLLO_PRESENT_SECONDARY_FEATURES                      = "_dollo_present_secondary_features";
      public static final String                                SECONDARY_FEATURES_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO                  = "_secondary_features_dollo"
                                                                                                                                               + ForesterConstants.PHYLO_XML_SUFFIX;
-    public static final String                                PARSIMONY_OUTPUT_DOLLO_ALL_GOID_D_BIOLOGICAL_PROCESS                   = "_dollo_biol_proc_goid_d";
-    public static final String                                PARSIMONY_OUTPUT_DOLLO_ALL_GOID_D_CELLULAR_COMPONENT                   = "_dollo_cell_comp_goid_d";
-    public static final String                                PARSIMONY_OUTPUT_DOLLO_ALL_GOID_D_MOLECULAR_FUNCTION                   = "_dollo_mol_funct_goid_d";
      public static final String                                PARSIMONY_OUTPUT_DOLLO_ALL_GOID_D_ALL_NAMESPACES                       = "_dollo_goid_d";
-    public static final String                                PARSIMONY_OUTPUT_FITCH_ALL_GOID_BC_BIOLOGICAL_PROCESS                  = "_fitch_biol_proc_goid_dc";
-    public static final String                                PARSIMONY_OUTPUT_FITCH_ALL_GOID_BC_CELLULAR_COMPONENT                  = "_fitch_cell_comp_goid_dc";
-    public static final String                                PARSIMONY_OUTPUT_FITCH_ALL_GOID_BC_MOLECULAR_FUNCTION                  = "_fitch_mol_funct_goid_dc";
      public static final String                                PARSIMONY_OUTPUT_FITCH_ALL_GOID_BC_ALL_NAMESPACES                      = "_fitch_goid_dc";
      final static private String                               HELP_OPTION_1                                                          = "help";
      final static private String                               HELP_OPTION_2                                                          = "h";
@@ -221,8 +218,6 @@ public class surfacing {
                                                                                                                                               + ForesterConstants.PHYLO_XML_SUFFIX;
      final static private String                               NJ_TREE_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX           = "_bin_combinations_NJ"
                                                                                                                                               + ForesterConstants.PHYLO_XML_SUFFIX;
-    final static private String                               DISPLAY_M_HISTOGRAMS_OPTION                                            = "mhisto";
-    //  final static private boolean DISPLAY_M_HISTOGRAMS_OPTION_DEFAULT                                    = false;
      final static private String                               JACKNIFE_OPTION                                                        = "jack";
      final static private String                               JACKNIFE_RANDOM_SEED_OPTION                                            = "seed";
      final static private String                               JACKNIFE_RATIO_OPTION                                                  = "jack_ratio";
@@ -239,16 +234,14 @@ public class surfacing {
      final static private String                               INPUT_SPECIES_TREE_OPTION                                              = "species_tree";
      final static private String                               SEQ_EXTRACT_OPTION                                                     = "prot_extract";
      final static private char                                 SEPARATOR_FOR_INPUT_VALUES                                             = '#';
-    final static private String                               PRG_VERSION                                                            = "2.003";
-    final static private String                               PRG_DATE                                                               = "2010.12.03";
+    final static private String                               PRG_VERSION                                                            = "2.210";
+    final static private String                               PRG_DATE                                                               = "2012.02.21";
      final static private String                               E_MAIL                                                                 = "czmasek@burnham.org";
      final static private String                               WWW                                                                    = "www.phylosoft.org/forester/applications/surfacing";
      final static private boolean                              IGNORE_DUFS_DEFAULT                                                    = true;
      final static private boolean                              IGNORE_COMBINATION_WITH_SAME_DEFAULLT                                  = false;
      final static private double                               MAX_E_VALUE_DEFAULT                                                    = -1;
      final static private int                                  MAX_ALLOWED_OVERLAP_DEFAULT                                            = -1;
-    final static private String                               DEFAULT_SEARCH_PARAMETER                                               = "ls";
-    final private static boolean                              VERBOSE_DEFAULT                                                        = true;
      private static final String                               RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION                                 = "random_seed";
      private static final String                               CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS                               = "consider_bdc_direction";
      private static final String                               CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS_AND_ADJACENCY                 = "consider_bdc_adj";
@@ -281,12 +274,18 @@ public class surfacing {
      private static final String                               DATA_FILE_SUFFIX                                                       = "_domain_combination_data.txt";
      private static final String                               DATA_FILE_DESC                                                         = "#SPECIES\tPRTEIN_ID\tN_TERM_DOMAIN\tC_TERM_DOMAIN\tN_TERM_DOMAIN_PER_DOMAIN_E_VALUE\tC_TERM_DOMAIN_PER_DOMAIN_E_VALUE\tN_TERM_DOMAIN_COUNTS_PER_PROTEIN\tC_TERM_DOMAIN_COUNTS_PER_PROTEIN";
      private static final INDIVIDUAL_SCORE_CUTOFF              INDIVIDUAL_SCORE_CUTOFF_DEFAULT                                        = INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE;
+    public static final String                                INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_OUTPUT_SUFFIX                   = "_indep_dc_gains_fitch_counts.txt";
+    public static final String                                INDEPENDENT_DC_GAINS_FITCH_PARS_DC_OUTPUT_SUFFIX                       = "_indep_dc_gains_fitch_lists.txt";
+    public static final String                                INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_SUFFIX        = "_indep_dc_gains_fitch_lists_for_go_mapping.txt";
+    public static final String                                INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique.txt";
+    public static final String                                LIMIT_SPEC_FOR_PROT_EX                                                 = null;                                                                                                                                                                                       // e.g. "HUMAN"; set to null for not using this feature (default).
+    public static final String BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH_MAPPED  = "_dc_MAPPED_secondary_features_fitch"
+        + ForesterConstants.PHYLO_XML_SUFFIX;
+    public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_MAPPED_OUTPUT_SUFFIX =  "_indep_dc_gains_fitch_counts_MAPPED.txt";
+    public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_MAPPED_OUTPUT_SUFFIX =  "_indep_dc_gains_fitch_lists_MAPPED.txt";
+    public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt";
+    public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt";
  
-    // final String error = ForesterUtil.isReadableFile( new File(
-    // input_file_properties[ i ][ 0 ] ) );
-    // if ( !ForesterUtil.isEmpty( error ) ) {
-    // ForesterUtil.fatalError( surfacing.PRG_NAME, error );
-    // }
      private static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
                                                                   final String[][] input_file_properties,
                                                                   final String automated_pairwise_comparison_suffix,
@@ -351,7 +350,7 @@ public class surfacing {
      }
  
      /**
-     * Warning: This sideeffects 'all_bin_domain_combinations_encountered'!
+     * Warning: This side-effects 'all_bin_domain_combinations_encountered'!
       * 
       * 
       * @param output_file
@@ -500,7 +499,7 @@ public class surfacing {
              }
              try {
                  final Phylogeny[] p_array = ParserBasedPhylogenyFactory.getInstance()
-                        .create( intree_file, ForesterUtil.createParserDependingOnFileType( intree_file, true ) );
+                        .create( intree_file, ParserUtils.createParserDependingOnFileType( intree_file, true ) );
                  if ( p_array.length < 1 ) {
                      ForesterUtil.fatalError( surfacing.PRG_NAME, "file [" + intree_file
                              + "] does not contain any phylogeny in phyloXML format" );
@@ -522,9 +521,11 @@ public class surfacing {
                  ForesterUtil.fatalError( surfacing.PRG_NAME, "input tree [" + intree_file + "] is not rooted" );
              }
              if ( intree.getNumberOfExternalNodes() < number_of_genomes ) {
-                ForesterUtil.fatalError( surfacing.PRG_NAME, "number of external nodes ["
-                        + intree.getNumberOfExternalNodes() + "] of input tree [" + intree_file
-                        + "] is smaller than the number of genomes the be analyzed [" + number_of_genomes + "]" );
+                ForesterUtil.fatalError( surfacing.PRG_NAME,
+                                         "number of external nodes [" + intree.getNumberOfExternalNodes()
+                                                 + "] of input tree [" + intree_file
+                                                 + "] is smaller than the number of genomes the be analyzed ["
+                                                 + number_of_genomes + "]" );
              }
              final StringBuilder parent_names = new StringBuilder();
              final int nodes_lacking_name = SurfacingUtil.getNumberOfNodesLackingName( intree, parent_names );
@@ -617,7 +618,6 @@ public class surfacing {
          allowed_options.add( surfacing.GO_NAMESPACE_LIMIT_OPTION );
          allowed_options.add( surfacing.PAIRWISE_DOMAIN_COMPARISONS_OPTION );
          allowed_options.add( surfacing.IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_OPTION );
-        allowed_options.add( surfacing.DISPLAY_M_HISTOGRAMS_OPTION );
          allowed_options.add( surfacing.CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS );
          allowed_options.add( JACKNIFE_OPTION );
          allowed_options.add( JACKNIFE_RANDOM_SEED_OPTION );
@@ -1304,8 +1304,8 @@ public class surfacing {
                              .createDomainIdToSecondaryFeaturesMap( secondary_features_map_files[ i ] );
                  }
                  catch ( final IOException e ) {
-                    ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read secondary features map file: "
-                            + e.getMessage() );
+                    ForesterUtil.fatalError( surfacing.PRG_NAME,
+                                             "cannot read secondary features map file: " + e.getMessage() );
                  }
                  catch ( final Exception e ) {
                      ForesterUtil.fatalError( surfacing.PRG_NAME, "problem with contents of features map file ["
@@ -1332,10 +1332,6 @@ public class surfacing {
                                       "no (acceptable) go id to term mapping file provided ('GO OBO file') (-"
                                               + surfacing.GO_OBO_FILE_USE_OPTION + "=<file>)" );
          }
-        boolean display_histograms = false;
-        if ( cla.isOptionSet( surfacing.DISPLAY_M_HISTOGRAMS_OPTION ) ) {
-            display_histograms = true;
-        }
          System.out.println( "Output directory            : " + out_dir );
          if ( input_file_names_from_file != null ) {
              System.out.println( "Input files names from      : " + input_files_file + " ["
@@ -1652,7 +1648,7 @@ public class surfacing {
              }
          } // if ( perform_pwc ) {
          System.out.println();
-        html_desc.append( "<tr><td>Command line:</td><td>" + cla.getCommandLineArgsAsString() + "</td></tr>" + nl );
+        html_desc.append( "<tr><td>Command line:</td><td>\n" + cla.getCommandLineArgsAsString() + "\n</td></tr>" + nl );
          System.out.println( "Command line                : " + cla.getCommandLineArgsAsString() );
          BufferedWriter[] query_domains_writer_ary = null;
          List<DomainId>[] query_domain_ids_array = null;
@@ -1735,6 +1731,35 @@ public class surfacing {
          catch ( final IOException e2 ) {
              ForesterUtil.fatalError( surfacing.PRG_NAME, e2.getMessage() );
          }
+        final DescriptiveStatistics protein_coverage_stats = new BasicDescriptiveStatistics();
+        final DescriptiveStatistics all_genomes_domains_per_potein_stats = new BasicDescriptiveStatistics();
+        final SortedMap<Integer, Integer> all_genomes_domains_per_potein_histo = new TreeMap<Integer, Integer>();
+        final SortedSet<String> domains_which_are_always_single = new TreeSet<String>();
+        final SortedSet<String> domains_which_are_sometimes_single_sometimes_not = new TreeSet<String>();
+        final SortedSet<String> domains_which_never_single = new TreeSet<String>();
+        BufferedWriter domains_per_potein_stats_writer = null;
+        try {
+            domains_per_potein_stats_writer = new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR
+                    + output_file + "__domains_per_potein_stats.txt" ) );
+            domains_per_potein_stats_writer.write( "Genome" );
+            domains_per_potein_stats_writer.write( "\t" );
+            domains_per_potein_stats_writer.write( "Mean" );
+            domains_per_potein_stats_writer.write( "\t" );
+            domains_per_potein_stats_writer.write( "SD" );
+            domains_per_potein_stats_writer.write( "\t" );
+            domains_per_potein_stats_writer.write( "Median" );
+            domains_per_potein_stats_writer.write( "\t" );
+            domains_per_potein_stats_writer.write( "N" );
+            domains_per_potein_stats_writer.write( "\t" );
+            domains_per_potein_stats_writer.write( "Min" );
+            domains_per_potein_stats_writer.write( "\t" );
+            domains_per_potein_stats_writer.write( "Max" );
+            domains_per_potein_stats_writer.write( "\n" );
+        }
+        catch ( final IOException e3 ) {
+            e3.printStackTrace();
+        }
+        // Main loop:
          for( int i = 0; i < number_of_genomes; ++i ) {
              System.out.println();
              System.out.println( ( i + 1 ) + "/" + number_of_genomes );
@@ -1762,12 +1787,14 @@ public class surfacing {
                                                            input_file_properties[ i ][ 1 ],
                                                            filter,
                                                            filter_type,
-                                                          ind_score_cutoff );
+                                                          ind_score_cutoff,
+                                                          true );
              }
              else {
                  parser = new HmmscanPerDomainTableParser( new File( input_file_properties[ i ][ 0 ] ),
                                                            input_file_properties[ i ][ 1 ],
-                                                          ind_score_cutoff );
+                                                          ind_score_cutoff,
+                                                          true );
              }
              if ( e_value_max >= 0.0 ) {
                  parser.setEValueMaximum( e_value_max );
@@ -1778,8 +1805,7 @@ public class surfacing {
              if ( max_allowed_overlap != surfacing.MAX_ALLOWED_OVERLAP_DEFAULT ) {
                  parser.setMaxAllowedOverlap( max_allowed_overlap );
              }
-            parser
-                    .setReturnType( HmmscanPerDomainTableParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN );
+            parser.setReturnType( HmmscanPerDomainTableParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN );
              if ( individual_score_cutoffs != null ) {
                  parser.setIndividualScoreCutoffs( individual_score_cutoffs );
              }
@@ -1799,10 +1825,16 @@ public class surfacing {
                  System.out.println( "Domains ignored due to virus like id: " );
                  ForesterUtil.printCountingMap( parser.getDomainsIgnoredDueToVirusLikeIdCountsMap() );
              }
+            final double coverage = ( double ) protein_list.size() / parser.getProteinsEncountered();
+            protein_coverage_stats.addValue( coverage );
              System.out.println( "Number of proteins encountered                 : " + parser.getProteinsEncountered() );
              log( "Number of proteins encountered                 : " + parser.getProteinsEncountered(), log_writer );
              System.out.println( "Number of proteins stored                      : " + protein_list.size() );
              log( "Number of proteins stored                      : " + protein_list.size(), log_writer );
+            System.out.println( "Coverage                                       : "
+                    + ForesterUtil.roundToInt( 100.0 * coverage ) + "%" );
+            log( "Coverage                                       : " + ForesterUtil.roundToInt( 100.0 * coverage )
+                    + "%", log_writer );
              System.out.println( "Domains encountered                            : " + parser.getDomainsEncountered() );
              log( "Domains encountered                            : " + parser.getDomainsEncountered(), log_writer );
              System.out.println( "Domains stored                                 : " + parser.getDomainsStored() );
@@ -1813,7 +1845,8 @@ public class surfacing {
              System.out.println( "Domains ignored due to individual score cutoffs: "
                      + parser.getDomainsIgnoredDueToIndividualScoreCutoff() );
              log( "Domains ignored due to individual score cutoffs: "
-                    + parser.getDomainsIgnoredDueToIndividualScoreCutoff(), log_writer );
+                         + parser.getDomainsIgnoredDueToIndividualScoreCutoff(),
+                 log_writer );
              System.out.println( "Domains ignored due to E-value                 : "
                      + parser.getDomainsIgnoredDueToEval() );
              log( "Domains ignored due to E-value                 : " + parser.getDomainsIgnoredDueToEval(), log_writer );
@@ -1829,7 +1862,8 @@ public class surfacing {
              System.out.println( "Domains ignored due negative domain filter     : "
                      + parser.getDomainsIgnoredDueToNegativeDomainFilter() );
              log( "Domains ignored due negative domain filter     : "
-                    + parser.getDomainsIgnoredDueToNegativeDomainFilter(), log_writer );
+                         + parser.getDomainsIgnoredDueToNegativeDomainFilter(),
+                 log_writer );
              System.out.println( "Domains ignored due to overlap                 : "
                      + parser.getDomainsIgnoredDueToOverlap() );
              log( "Domains ignored due to overlap                 : " + parser.getDomainsIgnoredDueToOverlap(),
@@ -1880,6 +1914,14 @@ public class surfacing {
              catch ( final IOException e ) {
                  ForesterUtil.fatalError( surfacing.PRG_NAME, e.toString() );
              }
+            SurfacingUtil.domainsPerProteinsStatistics( input_file_properties[ i ][ 1 ],
+                                                        protein_list,
+                                                        all_genomes_domains_per_potein_stats,
+                                                        all_genomes_domains_per_potein_histo,
+                                                        domains_which_are_always_single,
+                                                        domains_which_are_sometimes_single_sometimes_not,
+                                                        domains_which_never_single,
+                                                        domains_per_potein_stats_writer );
              gwcd_list.add( BasicGenomeWideCombinableDomains
                      .createInstance( protein_list,
                                       ignore_combination_with_same,
@@ -1911,7 +1953,8 @@ public class surfacing {
                          SurfacingUtil.extractProteinNames( protein_list,
                                                             query_domain_ids_array[ j ],
                                                             query_domains_writer_ary[ j ],
-                                                           "\t" );
+                                                           "\t",
+                                                           LIMIT_SPEC_FOR_PROT_EX );
                          query_domains_writer_ary[ j ].flush();
                      }
                      catch ( final IOException e ) {
@@ -1930,19 +1973,48 @@ public class surfacing {
              }
              System.gc();
          } // for( int i = 0; i < number_of_genomes; ++i ) {
+        ForesterUtil.programMessage( PRG_NAME, "Wrote domain promiscuities to: "
+                + per_genome_domain_promiscuity_statistics_file );
+        //
          try {
-            per_genome_domain_promiscuity_statistics_writer.flush();
-            per_genome_domain_promiscuity_statistics_writer.close();
-            dc_data_writer.flush();
-            dc_data_writer.close();
-            log_writer.flush();
-            log_writer.close();
+            domains_per_potein_stats_writer.write( "ALL" );
+            domains_per_potein_stats_writer.write( "\t" );
+            domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.arithmeticMean() + "" );
+            domains_per_potein_stats_writer.write( "\t" );
+            domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.sampleStandardDeviation() + "" );
+            domains_per_potein_stats_writer.write( "\t" );
+            domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.median() + "" );
+            domains_per_potein_stats_writer.write( "\t" );
+            domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.getN() + "" );
+            domains_per_potein_stats_writer.write( "\t" );
+            domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.getMin() + "" );
+            domains_per_potein_stats_writer.write( "\t" );
+            domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.getMax() + "" );
+            domains_per_potein_stats_writer.write( "\n" );
+            domains_per_potein_stats_writer.close();
+            printOutPercentageOfMultidomainProteins( all_genomes_domains_per_potein_histo, log_writer );
+            ForesterUtil.map2file( new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file
+                    + "__all_genomes_domains_per_potein_histo.txt" ), all_genomes_domains_per_potein_histo, "\t", "\n" );
+            ForesterUtil.collection2file( new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file
+                    + "__domains_always_single_.txt" ), domains_which_are_always_single, "\n" );
+            ForesterUtil.collection2file( new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file
+                    + "__domains_single_or_combined.txt" ), domains_which_are_sometimes_single_sometimes_not, "\n" );
+            ForesterUtil.collection2file( new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file
+                    + "__domains_always_combined.txt" ), domains_which_never_single, "\n" );
+            ForesterUtil.programMessage( PRG_NAME,
+                                         "Average of proteins with a least one domain assigned: "
+                                                 + ( 100 * protein_coverage_stats.arithmeticMean() ) + "% (+/-"
+                                                 + ( 100 * protein_coverage_stats.sampleStandardDeviation() ) + "%)" );
+            ForesterUtil.programMessage( PRG_NAME, "Range of proteins with a least one domain assigned: " + 100
+                    * protein_coverage_stats.getMin() + "%-" + 100 * protein_coverage_stats.getMax() + "%" );
+            log( "Average of prot with a least one dom assigned  : " + ( 100 * protein_coverage_stats.arithmeticMean() )
+                    + "% (+/-" + ( 100 * protein_coverage_stats.sampleStandardDeviation() ) + "%)", log_writer );
+            log( "Range of prot with a least one dom assigned    : " + 100 * protein_coverage_stats.getMin() + "%-"
+                    + 100 * protein_coverage_stats.getMax() + "%", log_writer );
          }
          catch ( final IOException e2 ) {
              ForesterUtil.fatalError( surfacing.PRG_NAME, e2.getLocalizedMessage() );
          }
-        ForesterUtil.programMessage( PRG_NAME, "Wrote domain promiscuities to: "
-                + per_genome_domain_promiscuity_statistics_file );
          if ( query_domains_writer_ary != null ) {
              for( int j = 0; j < query_domain_ids_array.length; j++ ) {
                  try {
@@ -1953,6 +2025,14 @@ public class surfacing {
                  }
              }
          }
+        try {
+            per_genome_domain_promiscuity_statistics_writer.close();
+            dc_data_writer.close();
+            log_writer.close();
+        }
+        catch ( final IOException e2 ) {
+            ForesterUtil.fatalError( surfacing.PRG_NAME, e2.getLocalizedMessage() );
+        }
          if ( PERFORM_DOMAIN_LENGTH_ANALYSIS ) {
              try {
                  SurfacingUtil.executeDomainLengthAnalysis( input_file_properties,
@@ -2003,11 +2083,19 @@ public class surfacing {
          DescriptiveStatistics pw_stats = null;
          try {
              String my_outfile = output_file.toString();
-            if ( !my_outfile.endsWith( ".html" ) ) {
+            Map<Character, Writer> split_writers = null;
+            Writer writer = null;
+            if ( similarities.size() > MINIMAL_NUMBER_OF_SIMILARITIES_FOR_SPLITTING ) {
+                if ( my_outfile.endsWith( ".html" ) ) {
+                    my_outfile = my_outfile.substring( 0, my_outfile.length() - 5 );
+                }
+                split_writers = new HashMap<Character, Writer>();
+                createSplitWriters( out_dir, my_outfile, split_writers );
+            }
+            else if ( !my_outfile.endsWith( ".html" ) ) {
                  my_outfile += ".html";
+                writer = new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile ) );
              }
-            final Writer writer = new BufferedWriter( new FileWriter( out_dir == null ? my_outfile : out_dir
-                    + ForesterUtil.FILE_SEPARATOR + my_outfile ) );
              List<Species> species_order = null;
              if ( species_matrix ) {
                  species_order = new ArrayList<Species>();
@@ -2027,6 +2115,7 @@ public class surfacing {
                      .writeDomainSimilaritiesToFile( html_desc,
                                                      new StringBuilder( number_of_genomes + " genomes" ),
                                                      writer,
+                                                    split_writers,
                                                      similarities,
                                                      number_of_genomes == 2,
                                                      species_order,
@@ -2069,7 +2158,6 @@ public class surfacing {
                                               true,
                                               surfacing.PAIRWISE_DOMAIN_COMPARISONS_PREFIX,
                                               surfacing.PRG_NAME,
-                                             display_histograms,
                                               out_dir,
                                               write_pwc_files );
              String matrix_output_file = new String( output_file.toString() );
@@ -2082,12 +2170,13 @@ public class surfacing {
              }
              SurfacingUtil.writeMatrixToFile( new File( matrix_output_file
                      + surfacing.MATRIX_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX ), pwgc.getDomainDistanceScoresMeans() );
+            SurfacingUtil
+                    .writeMatrixToFile( new File( matrix_output_file
+                                                + surfacing.MATRIX_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX ),
+                                        pwgc.getSharedBinaryCombinationsBasedDistances() );
              SurfacingUtil.writeMatrixToFile( new File( matrix_output_file
-                    + surfacing.MATRIX_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX ), pwgc
-                    .getSharedBinaryCombinationsBasedDistances() );
-            SurfacingUtil.writeMatrixToFile( new File( matrix_output_file
-                    + surfacing.MATRIX_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX ), pwgc
-                    .getSharedDomainsBasedDistances() );
+                                                     + surfacing.MATRIX_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX ),
+                                             pwgc.getSharedDomainsBasedDistances() );
              final Phylogeny nj_gd = SurfacingUtil.createNjTreeBasedOnMatrixToFile( new File( matrix_output_file
                      + surfacing.NJ_TREE_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX ), pwgc.getDomainDistanceScoresMeans()
                      .get( 0 ) );
@@ -2101,16 +2190,6 @@ public class surfacing {
              inferred_trees.add( nj_gd );
              inferred_trees.add( nj_bc );
              inferred_trees.add( nj_d );
-            // final List<HistogramData> histogram_datas = pwgc.getHistogramDatas();
-            //            if ( infer_species_trees ) {
-            //                inferred_trees = new ArrayList<Phylogeny>();
-            //                final List<Phylogeny> inferred_trees_bc =  inferSpeciesTrees( new File( output_file + INFERRED_SBC_BASED_NJ_SPECIES_TREE_SUFFIX ), pwgc
-            //                        .getSharedBinaryCombinationsBasedDistances() );
-            //                final List<Phylogeny> inferred_trees_d =  inferSpeciesTrees( new File( output_file + INFERRED_SD_BASED_NJ_SPECIES_TREE_SUFFIX ), pwgc
-            //                        .getSharedDomainsBasedDistances() );
-            //                inferred_trees.addAll( inferred_trees_bc );
-            //                inferred_trees.addAll( inferred_trees_d );
-            //            }
              if ( jacknifed_distances ) {
                  pwgc.performPairwiseComparisonsJacknifed( species,
                                                            number_of_genomes,
@@ -2119,14 +2198,19 @@ public class surfacing {
                                                            jacknife_resamplings,
                                                            jacknife_ratio,
                                                            random_seed );
-                SurfacingUtil.writeMatrixToFile( new File( matrix_output_file + "_"
-                        + ForesterUtil.round( jacknife_ratio, 2 ) + "_" + jacknife_resamplings
-                        + surfacing.MATRIX_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX ), pwgc
-                        .getSharedBinaryCombinationsBasedDistances() );
-                SurfacingUtil.writeMatrixToFile( new File( matrix_output_file + "_"
-                        + ForesterUtil.round( jacknife_ratio, 2 ) + "_" + jacknife_resamplings
-                        + surfacing.MATRIX_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX ), pwgc
-                        .getSharedDomainsBasedDistances() );
+                SurfacingUtil
+                        .writeMatrixToFile( new File( matrix_output_file
+                                                    + "_"
+                                                    + ForesterUtil.round( jacknife_ratio, 2 )
+                                                    + "_"
+                                                    + jacknife_resamplings
+                                                    + surfacing.MATRIX_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX ),
+                                            pwgc.getSharedBinaryCombinationsBasedDistances() );
+                SurfacingUtil
+                        .writeMatrixToFile( new File( matrix_output_file + "_" + ForesterUtil.round( jacknife_ratio, 2 )
+                                                    + "_" + jacknife_resamplings
+                                                    + surfacing.MATRIX_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX ),
+                                            pwgc.getSharedDomainsBasedDistances() );
                  //                if ( infer_species_trees ) {
                  //                    inferSpeciesTrees( new File( output_file + "_" + jacknife_resamplings
                  //                            + INFERRED_SBC_BASED_NJ_SPECIES_TREE_SUFFIX ), pwgc
@@ -2135,17 +2219,6 @@ public class surfacing {
                  //                            + INFERRED_SD_BASED_NJ_SPECIES_TREE_SUFFIX ), pwgc.getSharedDomainsBasedDistances() );
                  //                }
              }
-            if ( display_histograms ) {
-                //                final List<HistogramData> histogram_datas_all = new ArrayList<HistogramData>();
-                //                histogram_datas_all.add( new HistogramData( "all",
-                //                                                            values_for_all_scores_histogram,
-                //                                                            null,
-                //                                                            20 ) );
-                //                final HistogramsFrame hf_all = new HistogramsFrame( histogram_datas_all );
-                //                final HistogramsFrame hf = new HistogramsFrame( histogram_datas );
-                //                hf_all.setVisible( true );
-                //                hf.setVisible( true );
-            }
          } // if ( ( output_file != null ) && ( number_of_genomes > 2 ) && !isEmpty( automated_pairwise_comparison_suffix ) )
          if ( ( out_dir != null ) && ( !perform_pwc ) ) {
              output_file = new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file );
@@ -2309,6 +2382,76 @@ public class surfacing {
          System.out.println();
      }
  
+    private static void createSplitWriters( final File out_dir,
+                                            final String my_outfile,
+                                            final Map<Character, Writer> split_writers ) throws IOException {
+        split_writers.put( 'a', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_A.html" ) ) );
+        split_writers.put( 'b', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_B.html" ) ) );
+        split_writers.put( 'c', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_C.html" ) ) );
+        split_writers.put( 'd', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_D.html" ) ) );
+        split_writers.put( 'e', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_E.html" ) ) );
+        split_writers.put( 'f', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_F.html" ) ) );
+        split_writers.put( 'g', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_G.html" ) ) );
+        split_writers.put( 'h', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_H.html" ) ) );
+        split_writers.put( 'i', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_I.html" ) ) );
+        split_writers.put( 'j', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_J.html" ) ) );
+        split_writers.put( 'k', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_K.html" ) ) );
+        split_writers.put( 'l', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_L.html" ) ) );
+        split_writers.put( 'm', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_M.html" ) ) );
+        split_writers.put( 'n', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_N.html" ) ) );
+        split_writers.put( 'o', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_O.html" ) ) );
+        split_writers.put( 'p', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_P.html" ) ) );
+        split_writers.put( 'q', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_Q.html" ) ) );
+        split_writers.put( 'r', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_R.html" ) ) );
+        split_writers.put( 's', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_S.html" ) ) );
+        split_writers.put( 't', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_T.html" ) ) );
+        split_writers.put( 'u', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_U.html" ) ) );
+        split_writers.put( 'v', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_V.html" ) ) );
+        split_writers.put( 'w', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_W.html" ) ) );
+        split_writers.put( 'x', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_X.html" ) ) );
+        split_writers.put( 'y', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_Y.html" ) ) );
+        split_writers.put( 'z', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_Z.html" ) ) );
+        split_writers.put( '0', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
+                + "_domains_0.html" ) ) );
+    }
+
+    private static void printOutPercentageOfMultidomainProteins( final SortedMap<Integer, Integer> all_genomes_domains_per_potein_histo,
+                                                                 final Writer log_writer ) {
+        int sum = 0;
+        for( final Entry<Integer, Integer> entry : all_genomes_domains_per_potein_histo.entrySet() ) {
+            sum += entry.getValue();
+        }
+        final double percentage = 100.0 * ( sum - all_genomes_domains_per_potein_histo.get( 1 ) ) / sum;
+        ForesterUtil.programMessage( PRG_NAME, "Percentage of multidomain proteins: " + percentage + "%" );
+        log( "Percentage of multidomain proteins:            : " + percentage + "%", log_writer );
+    }
+
      private static void preparePhylogenyForParsimonyAnalyses( final Phylogeny intree,
                                                                final String[][] input_file_properties ) {
          final String[] genomes = new String[ input_file_properties.length ];
@@ -2325,12 +2468,21 @@ public class surfacing {
              final PhylogenyNode n = it.next();
              if ( ForesterUtil.isEmpty( n.getName() ) ) {
                  if ( n.getNodeData().isHasTaxonomy()
+                        && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
+                    n.setName( n.getNodeData().getTaxonomy().getTaxonomyCode() );
+                }
+                else if ( n.getNodeData().isHasTaxonomy()
                          && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
                      n.setName( n.getNodeData().getTaxonomy().getScientificName() );
                  }
+                else if ( n.getNodeData().isHasTaxonomy()
+                        && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getCommonName() ) ) {
+                    n.setName( n.getNodeData().getTaxonomy().getCommonName() );
+                }
                  else {
-                    ForesterUtil.fatalError( surfacing.PRG_NAME,
-                                             "node without both name and scientific taxonomy name found" );
+                    ForesterUtil
+                            .fatalError( surfacing.PRG_NAME,
+                                         "node with no name, scientific name, common name, or taxonomy code present" );
                  }
              }
          }
@@ -2422,8 +2574,6 @@ public class surfacing {
          System.out.println( surfacing.INPUT_SPECIES_TREE_OPTION
                  + ": species tree, to perform (Dollo, Fitch) parismony analyses" );
          System.out
-                .println( surfacing.DISPLAY_M_HISTOGRAMS_OPTION + ": to display multiple histograms (using fluorite)" );
-        System.out
                  .println( JACKNIFE_OPTION
                          + ": perform jacknife resampling for domain and binary domain combination based distance matrices [default resamplings: "
                          + JACKNIFE_NUMBER_OF_RESAMPLINGS_DEFAULT + "]" );
@@ -2463,7 +2613,7 @@ public class surfacing {
          System.out.println();
          System.out.println();
          System.out.println( "Example: java -Xms128m -Xmx512m -cp path/to/forester.jar"
-                + "org.forester.application.surfacing -detail=punctilious -o=TEST.html -pwc=TEST"
+                + " org.forester.application.surfacing -detail=punctilious -o=TEST.html -pwc=TEST"
                  + " -cos=Pfam_ls_22_TC2 -p2g=pfam2go -obo=gene_ontology_edit.obo "
                  + "-dc_sort=dom -ignore_with_self -no_singles -e=0.001 -mo=1 -no_eo "
                  + "-ds_output=detailed_html -scoring=domains -sort=alpha -" + JACKNIFE_OPTION
@@ -2646,7 +2796,11 @@ public class surfacing {
              SurfacingUtil.checkForOutputFileWriteability( out );
              try {
                  final Writer proteins_file_writer = new BufferedWriter( new FileWriter( out ) );
-                SurfacingUtil.extractProteinNames( protein_lists_per_species, domain, proteins_file_writer, "\t" );
+                SurfacingUtil.extractProteinNames( protein_lists_per_species,
+                                                   domain,
+                                                   proteins_file_writer,
+                                                   "\t",
+                                                   LIMIT_SPEC_FOR_PROT_EX );
                  proteins_file_writer.close();
              }
              catch ( final IOException e ) {