in progress

[jalview.git] / forester / java / src / org / forester / application / surfacing.java
diff --git a/forester/java/src/org/forester/application/surfacing.java b/forester/java/src/org/forester/application/surfacing.java

index 394c4f6..eb994cb 100644 (file)
--- a/forester/java/src/org/forester/application/surfacing.java
+++ b/forester/java/src/org/forester/application/surfacing.java
@@ -119,13 +119,13 @@ public class surfacing {
      // ---
      public final static String                                      PRG_NAME                                                                      = "surfacing";
      public static final String                                      DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO                                    = "_d_dollo"
-                                                                                                                                                          + ForesterConstants.PHYLO_XML_SUFFIX;
+            + ForesterConstants.PHYLO_XML_SUFFIX;
      public static final String                                      DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH                                    = "_d_fitch"
-                                                                                                                                                          + ForesterConstants.PHYLO_XML_SUFFIX;
+            + ForesterConstants.PHYLO_XML_SUFFIX;
      public static final String                                      BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO                 = "_dc_dollo"
-                                                                                                                                                          + ForesterConstants.PHYLO_XML_SUFFIX;
+            + ForesterConstants.PHYLO_XML_SUFFIX;
      public static final String                                      BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH                 = "_dc_fitch"
-                                                                                                                                                          + ForesterConstants.PHYLO_XML_SUFFIX;
+            + ForesterConstants.PHYLO_XML_SUFFIX;
      public static final String                                      NEXUS_EXTERNAL_DOMAINS                                                        = "_dom.nex";
      public static final String                                      NEXUS_EXTERNAL_DOMAIN_COMBINATIONS                                            = "_dc.nex";
      public static final String                                      NEXUS_SECONDARY_FEATURES                                                      = "_secondary_features.nex";
@@ -135,7 +135,7 @@ public class surfacing {
      public static final String                                      PARSIMONY_OUTPUT_DOLLO_LOSSES_SECONDARY_FEATURES                              = "_dollo_losses_secondary_features";
      public static final String                                      PARSIMONY_OUTPUT_DOLLO_PRESENT_SECONDARY_FEATURES                             = "_dollo_present_secondary_features";
      public static final String                                      SECONDARY_FEATURES_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO                         = "_secondary_features_dollo"
-                                                                                                                                                          + ForesterConstants.PHYLO_XML_SUFFIX;
+            + ForesterConstants.PHYLO_XML_SUFFIX;
      public static final String                                      PARSIMONY_OUTPUT_DOLLO_ALL_GOID_D_ALL_NAMESPACES                              = "_dollo_goid_d";
      public static final String                                      PARSIMONY_OUTPUT_FITCH_ALL_GOID_BC_ALL_NAMESPACES                             = "_fitch_goid_dc";
      final static private String                                     HELP_OPTION_1                                                                 = "help";
@@ -204,19 +204,19 @@ public class surfacing {
      final static private String                                     MATRIX_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX                            = "_domains.pwd";
      final static private String                                     MATRIX_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX                   = "_bin_combinations.pwd";
      final static private String                                     NJ_TREE_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX                               = "_mean_score_NJ"
-                                                                                                                                                          + ForesterConstants.PHYLO_XML_SUFFIX;
+            + ForesterConstants.PHYLO_XML_SUFFIX;
      final static private String                                     NJ_TREE_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX                           = "_domains_NJ"
-                                                                                                                                                          + ForesterConstants.PHYLO_XML_SUFFIX;
+            + ForesterConstants.PHYLO_XML_SUFFIX;
      final static private String                                     NJ_TREE_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX                  = "_bin_combinations_NJ"
-                                                                                                                                                          + ForesterConstants.PHYLO_XML_SUFFIX;
+            + ForesterConstants.PHYLO_XML_SUFFIX;
      final static private String                                     FILTER_POSITIVE_OPTION                                                        = "pos_filter";
      final static private String                                     FILTER_NEGATIVE_OPTION                                                        = "neg_filter";
      final static private String                                     FILTER_NEGATIVE_DOMAINS_OPTION                                                = "neg_dom_filter";
      final static private String                                     INPUT_GENOMES_FILE_OPTION                                                     = "genomes";
      final static private String                                     INPUT_SPECIES_TREE_OPTION                                                     = "species_tree";
      final static private String                                     SEQ_EXTRACT_OPTION                                                            = "prot_extract";
-    final static private String                                     PRG_VERSION                                                                   = "2.403";
-    final static private String                                     PRG_DATE                                                                      = "131127";
+    final static private String                                     PRG_VERSION                                                                   = "2.404";
+    final static private String                                     PRG_DATE                                                                      = "140709";
      final static private String                                     E_MAIL                                                                        = "czmasek@burnham.org";
      final static private String                                     WWW                                                                           = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing";
      final static private boolean                                    IGNORE_DUFS_DEFAULT                                                           = true;
@@ -264,13 +264,13 @@ public class surfacing {
      public static final String                                      INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_UNIQUE_SUFFIX        = "_indep_dc_gains_fitch_lists_for_go_mapping_unique.txt";
      public static final String                                      LIMIT_SPEC_FOR_PROT_EX                                                        = null;                                                                                                                                                                                       // e.g. "HUMAN"; set to null for not using this feature (default).
      public static final String                                      BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH_MAPPED          = "_dc_MAPPED_secondary_features_fitch"
-                                                                                                                                                          + ForesterConstants.PHYLO_XML_SUFFIX;
+            + ForesterConstants.PHYLO_XML_SUFFIX;
      public static final String                                      INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_MAPPED_OUTPUT_SUFFIX                   = "_indep_dc_gains_fitch_counts_MAPPED.txt";
      public static final String                                      INDEPENDENT_DC_GAINS_FITCH_PARS_DC_MAPPED_OUTPUT_SUFFIX                       = "_indep_dc_gains_fitch_lists_MAPPED.txt";
      public static final String                                      INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX        = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt";
      public static final String                                      INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt";
      private static final boolean                                    CALC_SIMILARITY_SCORES                                                        = false;
-
+    @SuppressWarnings("unchecked")
      public static void main( final String args[] ) {
          final long start_time = new Date().getTime();
          // final StringBuffer log = new StringBuffer();
@@ -379,9 +379,9 @@ public class surfacing {
          if ( cla.isOptionSet( DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS ) ) {
              output_binary_domain_combinationsfor_graph_analysis = true;
          }
-        final boolean output_binary_domain_combinationsfor_counts = false;
+        boolean output_binary_domain_combinationsfor_counts = false;
          if ( cla.isOptionSet( DOMAIN_COMBINITONS_COUNTS_OUTPUT_OPTION ) ) {
-            output_binary_domain_combinationsfor_graph_analysis = true;
+            output_binary_domain_combinationsfor_counts = true;
          }
          if ( cla.isOptionSet( surfacing.MAX_FS_E_VALUE_OPTION ) ) {
              try {
@@ -505,8 +505,8 @@ public class surfacing {
                  && ( cla.isOptionSet( surfacing.FILTER_NEGATIVE_OPTION ) || cla
                          .isOptionSet( surfacing.FILTER_POSITIVE_OPTION ) ) ) {
              ForesterUtil
-                    .fatalError( surfacing.PRG_NAME,
-                                 "attempt to use both negative or positive protein filter together wirh a negative domains filter" );
+            .fatalError( surfacing.PRG_NAME,
+                         "attempt to use both negative or positive protein filter together wirh a negative domains filter" );
          }
          if ( cla.isOptionSet( surfacing.FILTER_NEGATIVE_OPTION ) ) {
              if ( !cla.isOptionValueSet( surfacing.FILTER_NEGATIVE_OPTION ) ) {
@@ -541,7 +541,7 @@ public class surfacing {
              final String msg = ForesterUtil.isReadableFile( negative_domains_filter_file );
              if ( !ForesterUtil.isEmpty( msg ) ) {
                  ForesterUtil.fatalError( surfacing.PRG_NAME, "can not read from \"" + negative_domains_filter_file
-                        + "\": " + msg );
+                                         + "\": " + msg );
              }
          }
          final List<String> plus_minus_analysis_high_copy_base_species = new ArrayList<String>();
@@ -563,7 +563,7 @@ public class surfacing {
              final String msg = ForesterUtil.isReadableFile( input_genomes_file );
              if ( !ForesterUtil.isEmpty( msg ) ) {
                  ForesterUtil
-                        .fatalError( surfacing.PRG_NAME, "can not read from \"" + input_genomes_file + "\": " + msg );
+                .fatalError( surfacing.PRG_NAME, "can not read from \"" + input_genomes_file + "\": " + msg );
              }
          }
          else {
@@ -592,9 +592,9 @@ public class surfacing {
              }
              else {
                  ForesterUtil.fatalError( surfacing.PRG_NAME, "unknown value \"" + scoring_str
-                        + "\" for scoring method for domain combinations similarity calculation: \"-"
-                        + surfacing.SCORING_OPTION + "=<" + surfacing.SCORING_DOMAIN_COUNT_BASED + "|"
-                        + surfacing.SCORING_PROTEIN_COUNT_BASED + "|" + surfacing.SCORING_COMBINATION_BASED + ">\"" );
+                                         + "\" for scoring method for domain combinations similarity calculation: \"-"
+                                         + surfacing.SCORING_OPTION + "=<" + surfacing.SCORING_DOMAIN_COUNT_BASED + "|"
+                                         + surfacing.SCORING_PROTEIN_COUNT_BASED + "|" + surfacing.SCORING_COMBINATION_BASED + ">\"" );
              }
          }
          boolean sort_by_species_count_first = false;
@@ -660,10 +660,10 @@ public class surfacing {
          if ( cla.isOptionSet( surfacing.SEQ_EXTRACT_OPTION ) ) {
              if ( !cla.isOptionValueSet( surfacing.SEQ_EXTRACT_OPTION ) ) {
                  ForesterUtil
-                        .fatalError( surfacing.PRG_NAME,
-                                     "no domain ids given for sequences with given domains to be extracted : -"
-                                             + surfacing.SEQ_EXTRACT_OPTION
-                                             + "=<ordered domain sequences, domain ids separated by '~', sequences separated by '#'>" );
+                .fatalError( surfacing.PRG_NAME,
+                             "no domain ids given for sequences with given domains to be extracted : -"
+                                     + surfacing.SEQ_EXTRACT_OPTION
+                                     + "=<ordered domain sequences, domain ids separated by '~', sequences separated by '#'>" );
              }
              query_domain_ids = cla.getOptionValue( surfacing.SEQ_EXTRACT_OPTION );
          }
@@ -719,14 +719,14 @@ public class surfacing {
              }
              else {
                  ForesterUtil.fatalError( surfacing.PRG_NAME, "unknown value \"" + sort_str
-                        + "\" for domain combinations similarities sorting: \"-"
-                        + surfacing.DOMAIN_SIMILARITY_SORT_OPTION + "=<" + surfacing.DOMAIN_SIMILARITY_SORT_ALPHA + "|"
-                        + surfacing.DOMAIN_SIMILARITY_SORT_MAX + "|" + surfacing.DOMAIN_SIMILARITY_SORT_MIN + "|"
-                        + surfacing.DOMAIN_SIMILARITY_SORT_MEAN + "|" + surfacing.DOMAIN_SIMILARITY_SORT_DIFF + "|"
-                        + surfacing.DOMAIN_SIMILARITY_SORT_ABS_COUNTS_DIFF + "|"
-                        + surfacing.DOMAIN_SIMILARITY_SORT_COUNTS_DIFF + "|" + "|"
-                        + surfacing.DOMAIN_SIMILARITY_SORT_SPECIES_COUNT + "|" + surfacing.DOMAIN_SIMILARITY_SORT_SD
-                        + ">\"" );
+                                         + "\" for domain combinations similarities sorting: \"-"
+                                         + surfacing.DOMAIN_SIMILARITY_SORT_OPTION + "=<" + surfacing.DOMAIN_SIMILARITY_SORT_ALPHA + "|"
+                                         + surfacing.DOMAIN_SIMILARITY_SORT_MAX + "|" + surfacing.DOMAIN_SIMILARITY_SORT_MIN + "|"
+                                         + surfacing.DOMAIN_SIMILARITY_SORT_MEAN + "|" + surfacing.DOMAIN_SIMILARITY_SORT_DIFF + "|"
+                                         + surfacing.DOMAIN_SIMILARITY_SORT_ABS_COUNTS_DIFF + "|"
+                                         + surfacing.DOMAIN_SIMILARITY_SORT_COUNTS_DIFF + "|" + "|"
+                                         + surfacing.DOMAIN_SIMILARITY_SORT_SPECIES_COUNT + "|" + surfacing.DOMAIN_SIMILARITY_SORT_SD
+                                         + ">\"" );
              }
          }
          DomainSimilarity.PRINT_OPTION domain_similarity_print_option = DOMAIN_SIMILARITY_PRINT_OPTION_DEFAULT;
@@ -778,10 +778,10 @@ public class surfacing {
              }
              else {
                  ForesterUtil.fatalError( surfacing.PRG_NAME, "unknown value \"" + sort
-                        + "\" for sorting of domain counts: \"-" + surfacing.DOMAIN_COUNT_SORT_OPTION + "=<"
-                        + surfacing.DOMAIN_COUNT_SORT_ALPHA + "|" + surfacing.DOMAIN_COUNT_SORT_KEY_DOMAIN_COUNT + "|"
-                        + surfacing.DOMAIN_COUNT_SORT_KEY_DOMAIN_PROTEINS_COUNT + "|"
-                        + surfacing.DOMAIN_COUNT_SORT_COMBINATIONS_COUNT + ">\"" );
+                                         + "\" for sorting of domain counts: \"-" + surfacing.DOMAIN_COUNT_SORT_OPTION + "=<"
+                                         + surfacing.DOMAIN_COUNT_SORT_ALPHA + "|" + surfacing.DOMAIN_COUNT_SORT_KEY_DOMAIN_COUNT + "|"
+                                         + surfacing.DOMAIN_COUNT_SORT_KEY_DOMAIN_PROTEINS_COUNT + "|"
+                                         + surfacing.DOMAIN_COUNT_SORT_COMBINATIONS_COUNT + ">\"" );
              }
          }
          final String[][] input_file_properties = SurfacingUtil.processInputGenomesFile( input_genomes_file );
@@ -806,62 +806,56 @@ public class surfacing {
              }
              SurfacingUtil.checkForOutputFileWriteability( dcc_outfile );
          }
-        File pfam_to_go_file = null;
-        Map<String, List<GoId>> domain_id_to_go_ids_map = null;
-        int domain_id_to_go_ids_count = 0;
+        File pfam_to_go_file = new File( "pfam2go.txt" );
          if ( cla.isOptionSet( surfacing.PFAM_TO_GO_FILE_USE_OPTION ) ) {
              if ( !cla.isOptionValueSet( surfacing.PFAM_TO_GO_FILE_USE_OPTION ) ) {
                  ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for Pfam to GO mapping file: -"
                          + surfacing.PFAM_TO_GO_FILE_USE_OPTION + "=<file>" );
              }
              pfam_to_go_file = new File( cla.getOptionValue( surfacing.PFAM_TO_GO_FILE_USE_OPTION ) );
-            final String error = ForesterUtil.isReadableFile( pfam_to_go_file );
-            if ( !ForesterUtil.isEmpty( error ) ) {
-                ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read Pfam to GO mapping file: " + error );
-            }
-            try {
-                final PfamToGoParser parser = new PfamToGoParser( pfam_to_go_file );
-                final List<PfamToGoMapping> pfam_to_go_mappings = parser.parse();
-                domain_id_to_go_ids_map = SurfacingUtil.createDomainIdToGoIdMap( pfam_to_go_mappings );
-                if ( parser.getMappingCount() < domain_id_to_go_ids_map.size() ) {
-                    ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME,
-                                                       "parser.getMappingCount() < domain_id_to_go_ids_map.size()" );
-                }
-                domain_id_to_go_ids_count = parser.getMappingCount();
-            }
-            catch ( final IOException e ) {
-                ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read from Pfam to GO mapping file: " + e );
+        }
+        final String error1 = ForesterUtil.isReadableFile( pfam_to_go_file );
+        if ( !ForesterUtil.isEmpty( error1 ) ) {
+            ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read Pfam to GO mapping file: " + error1 );
+        }
+        Map<String, List<GoId>> domain_id_to_go_ids_map = null;
+        int domain_id_to_go_ids_count = 0;
+        try {
+            final PfamToGoParser parser = new PfamToGoParser( pfam_to_go_file );
+            final List<PfamToGoMapping> pfam_to_go_mappings = parser.parse();
+            domain_id_to_go_ids_map = SurfacingUtil.createDomainIdToGoIdMap( pfam_to_go_mappings );
+            if ( parser.getMappingCount() < domain_id_to_go_ids_map.size() ) {
+                ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME,
+                        "parser.getMappingCount() < domain_id_to_go_ids_map.size()" );
              }
+            domain_id_to_go_ids_count = parser.getMappingCount();
          }
-        File go_obo_file = null;
-        List<GoTerm> go_terms = null;
+        catch ( final IOException e ) {
+            ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read from Pfam to GO mapping file: " + e );
+        }
+        File go_obo_file = new File( "go.obo" );
          if ( cla.isOptionSet( surfacing.GO_OBO_FILE_USE_OPTION ) ) {
              if ( !cla.isOptionValueSet( surfacing.GO_OBO_FILE_USE_OPTION ) ) {
                  ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for GO OBO file: -"
                          + surfacing.GO_OBO_FILE_USE_OPTION + "=<file>" );
              }
-            if ( ( domain_id_to_go_ids_map == null ) || ( domain_id_to_go_ids_map.size() < 1 ) ) {
-                ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot use GO OBO file (-"
-                        + surfacing.GO_OBO_FILE_USE_OPTION + "=<file>) without Pfam to GO mapping file ("
-                        + surfacing.PFAM_TO_GO_FILE_USE_OPTION + "=<file>)" );
-            }
              go_obo_file = new File( cla.getOptionValue( surfacing.GO_OBO_FILE_USE_OPTION ) );
-            final String error = ForesterUtil.isReadableFile( go_obo_file );
-            if ( !ForesterUtil.isEmpty( error ) ) {
-                ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read GO OBO file: " + error );
-            }
-            try {
-                final OBOparser parser = new OBOparser( go_obo_file, OBOparser.ReturnType.BASIC_GO_TERM );
-                go_terms = parser.parse();
-                if ( parser.getGoTermCount() != go_terms.size() ) {
-                    ForesterUtil
-                            .unexpectedFatalError( surfacing.PRG_NAME, "parser.getGoTermCount() != go_terms.size()" );
-                }
-            }
-            catch ( final IOException e ) {
-                ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read from GO OBO file: " + e );
+        }
+        final String error2 = ForesterUtil.isReadableFile( go_obo_file );
+        if ( !ForesterUtil.isEmpty( error2 ) ) {
+            ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read GO OBO file: " + error2 );
+        }
+        List<GoTerm> go_terms = null;
+        try {
+            final OBOparser parser = new OBOparser( go_obo_file, OBOparser.ReturnType.BASIC_GO_TERM );
+            go_terms = parser.parse();
+            if ( parser.getGoTermCount() != go_terms.size() ) {
+                ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME, "parser.getGoTermCount() != go_terms.size()" );
              }
          }
+        catch ( final IOException e ) {
+            ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read from GO OBO file: " + e );
+        }
          Map<GoId, GoTerm> go_id_to_term_map = null;
          if ( ( ( domain_id_to_go_ids_map != null ) && ( domain_id_to_go_ids_map.size() > 0 ) )
                  && ( ( go_terms != null ) && ( go_terms.size() > 0 ) ) ) {
@@ -895,10 +889,10 @@ public class surfacing {
              }
              else {
                  ForesterUtil.fatalError( surfacing.PRG_NAME, "unknown value \"" + go_namespace_limit_str
-                        + "\" for GO namespace limit: \"-" + surfacing.GO_NAMESPACE_LIMIT_OPTION + "=<"
-                        + surfacing.GO_NAMESPACE_LIMIT_OPTION_MOLECULAR_FUNCTION + "|"
-                        + surfacing.GO_NAMESPACE_LIMIT_OPTION_BIOLOGICAL_PROCESS + "|"
-                        + surfacing.GO_NAMESPACE_LIMIT_OPTION_CELLULAR_COMPONENT + ">\"" );
+                                         + "\" for GO namespace limit: \"-" + surfacing.GO_NAMESPACE_LIMIT_OPTION + "=<"
+                                         + surfacing.GO_NAMESPACE_LIMIT_OPTION_MOLECULAR_FUNCTION + "|"
+                                         + surfacing.GO_NAMESPACE_LIMIT_OPTION_BIOLOGICAL_PROCESS + "|"
+                                         + surfacing.GO_NAMESPACE_LIMIT_OPTION_CELLULAR_COMPONENT + ">\"" );
              }
          }
          if ( ( domain_similarity_sort_field == DomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE )
@@ -967,7 +961,7 @@ public class surfacing {
          Map<String, Set<String>>[] domain_id_to_secondary_features_maps = null;
          File[] secondary_features_map_files = null;
          final File domain_lengths_analysis_outfile = new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file
-                + DOMAIN_LENGTHS_ANALYSIS_SUFFIX );
+                                                               + DOMAIN_LENGTHS_ANALYSIS_SUFFIX );
          if ( domain_length_analysis ) {
              SurfacingUtil.checkForOutputFileWriteability( domain_lengths_analysis_outfile );
          }
@@ -1026,21 +1020,21 @@ public class surfacing {
          if ( positive_filter_file != null ) {
              final int filter_size = filter.size();
              System.out.println( "Positive protein filter     : " + positive_filter_file + " [" + filter_size
-                    + " domain ids]" );
+                                + " domain ids]" );
              html_desc.append( "<tr><td>Positive protein filter:</td><td>" + positive_filter_file + " [" + filter_size
-                    + " domain ids]</td></tr>" + nl );
+                              + " domain ids]</td></tr>" + nl );
          }
          if ( negative_filter_file != null ) {
              final int filter_size = filter.size();
              System.out.println( "Negative protein filter     : " + negative_filter_file + " [" + filter_size
-                    + " domain ids]" );
+                                + " domain ids]" );
              html_desc.append( "<tr><td>Negative protein filter:</td><td>" + negative_filter_file + " [" + filter_size
-                    + " domain ids]</td></tr>" + nl );
+                              + " domain ids]</td></tr>" + nl );
          }
          if ( negative_domains_filter_file != null ) {
              final int filter_size = filter.size();
              System.out.println( "Negative domain filter      : " + negative_domains_filter_file + " [" + filter_size
-                    + " domain ids]" );
+                                + " domain ids]" );
              html_desc.append( "<tr><td>Negative domain filter:</td><td>" + negative_domains_filter_file + " ["
                      + filter_size + " domain ids]</td></tr>" + nl );
          }
@@ -1059,7 +1053,7 @@ public class surfacing {
              }
              System.out.println( "Plus-minus analysis         : " + plus1 + "&& " + plus0 + "&& " + minus );
              html_desc.append( "<tr><td>Plus-minus analysis:</td><td>" + plus1 + "&& " + plus0 + "&& " + minus
-                    + "</td></tr>" + nl );
+                              + "</td></tr>" + nl );
          }
          if ( cutoff_scores_file != null ) {
              System.out.println( "Cutoff scores file          : " + cutoff_scores_file );
@@ -1092,16 +1086,16 @@ public class surfacing {
          if ( no_engulfing_overlaps ) {
              System.out.println( "Ignore engulfed domains     : " + no_engulfing_overlaps );
              html_desc.append( "<tr><td>Ignore (lower confidence) engulfed domains:</td><td>" + no_engulfing_overlaps
-                    + "</td></tr>" + nl );
+                              + "</td></tr>" + nl );
          }
          System.out.println( "Ignore singlet domains      : " + ignore_domains_without_combs_in_all_spec );
          html_desc
-                .append( "<tr><td>Ignore singlet domains for domain combination similarity analyses (not for parsimony analyses):</td><td>"
-                        + ignore_domains_without_combs_in_all_spec + "</td></tr>" + nl );
+        .append( "<tr><td>Ignore singlet domains for domain combination similarity analyses (not for parsimony analyses):</td><td>"
+                + ignore_domains_without_combs_in_all_spec + "</td></tr>" + nl );
          System.out.println( "Ignore species specific doms: " + ignore_species_specific_domains );
          html_desc
-                .append( "<tr><td>Ignore species specific domains for domain combination similarity analyses (not for parsimony analyses):</td><td>"
-                        + ignore_species_specific_domains + "</td></tr>" + nl );
+        .append( "<tr><td>Ignore species specific domains for domain combination similarity analyses (not for parsimony analyses):</td><td>"
+                + ignore_species_specific_domains + "</td></tr>" + nl );
          System.out.println( "Ignore combination with self: " + ignore_combination_with_same );
          html_desc.append( "<tr><td>Ignore combination with self for domain combination similarity analyses:</td><td>"
                  + ignore_combination_with_same + "</td></tr>" + nl );
@@ -1121,14 +1115,14 @@ public class surfacing {
          if ( perform_dc_fich ) {
              System.out.println( "Use last in Fitch parsimony : " + use_last_in_fitch_parsimony );
              html_desc.append( "<tr><td>Use last in Fitch parsimony:</td><td>" + use_last_in_fitch_parsimony
-                    + "</td></tr>" + nl );
+                              + "</td></tr>" + nl );
          }
          System.out.println( "Write to Nexus files        : " + write_to_nexus );
          html_desc.append( "<tr><td>Write to Nexus files:</td><td>" + write_to_nexus + "</td></tr>" + nl );
          if ( perform_dc_fich ) {
              System.out.println( "DC regain prot stats        : " + perform_dc_regain_proteins_stats );
              html_desc.append( "<tr><td>DC regain prot stats:</td><td>" + perform_dc_regain_proteins_stats
-                    + "</td></tr>" + nl );
+                              + "</td></tr>" + nl );
          }
          System.out.println( "DA analysis                 : " + da_analysis );
          html_desc.append( "<tr><td>DA analysis :</td><td>" + da_analysis + "</td></tr>" + nl );
@@ -1156,7 +1150,7 @@ public class surfacing {
          }
          if ( domain_id_to_go_ids_map != null ) {
              System.out.println( "Pfam to GO mappings from    : " + pfam_to_go_file + " [" + domain_id_to_go_ids_count
-                    + " mappings]" );
+                                + " mappings]" );
              html_desc.append( "<tr><td>Pfam to GO mappings from:</td><td>" + pfam_to_go_file + " ["
                      + domain_id_to_go_ids_count + " mappings]" + "</td></tr>" + nl );
          }
@@ -1172,7 +1166,7 @@ public class surfacing {
          if ( perform_pwc ) {
              System.out.println( "Suffix for PWC files        : " + automated_pairwise_comparison_suffix );
              html_desc.append( "<tr><td>Suffix for PWC files</td><td>" + automated_pairwise_comparison_suffix
-                    + "</td></tr>" + nl );
+                              + "</td></tr>" + nl );
          }
          if ( out_dir != null ) {
              System.out.println( "Output directory            : " + out_dir );
@@ -1309,14 +1303,14 @@ public class surfacing {
                      break;
                  default:
                      ForesterUtil
-                            .unexpectedFatalError( surfacing.PRG_NAME, "unknown value for sorting for similarities" );
+                    .unexpectedFatalError( surfacing.PRG_NAME, "unknown value for sorting for similarities" );
              }
              System.out.println();
              html_desc.append( "</td></tr>" + nl );
              if ( ( intrees != null ) && ( intrees.length > 0 ) ) {
                  for( final File intree_file : intree_files ) {
                      html_desc.append( "<tr><td>Intree for gain/loss parsimony analysis:</td><td>" + intree_file
-                            + "</td></tr>" + nl );
+                                      + "</td></tr>" + nl );
                      System.out.println( "  Intree for gain/loss pars.: " + intree_file );
                  }
              }
@@ -1349,7 +1343,7 @@ public class surfacing {
          } // if ( perform_pwc ) {
          System.out.println();
          html_desc.append( "<tr><td>Command line:</td><td>" + nl + nl + cla.getCommandLineArgsAsString() + nl + nl
-                + "</td></tr>" + nl );
+                          + "</td></tr>" + nl );
          System.out.println( "Command line                : " + cla.getCommandLineArgsAsString() );
          BufferedWriter[] query_domains_writer_ary = null;
          List<String>[] query_domain_ids_array = null;
@@ -1397,7 +1391,7 @@ public class surfacing {
              all_bin_domain_combinations_lost_fitch = new ArrayList<BinaryDomainCombination>();
          }
          final File per_genome_domain_promiscuity_statistics_file = new File( out_dir + ForesterUtil.FILE_SEPARATOR
-                + output_file + D_PROMISCUITY_FILE_SUFFIX );
+                                                                             + output_file + D_PROMISCUITY_FILE_SUFFIX );
          BufferedWriter per_genome_domain_promiscuity_statistics_writer = null;
          try {
              per_genome_domain_promiscuity_statistics_writer = new BufferedWriter( new FileWriter( per_genome_domain_promiscuity_statistics_file ) );
@@ -1440,7 +1434,7 @@ public class surfacing {
          BufferedWriter domains_per_potein_stats_writer = null;
          try {
              domains_per_potein_stats_writer = new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR
-                    + output_file + "_domains_per_potein_stats.txt" ) );
+                                                                                  + output_file + "_domains_per_potein_stats.txt" ) );
              domains_per_potein_stats_writer.write( "Genome" );
              domains_per_potein_stats_writer.write( "\t" );
              domains_per_potein_stats_writer.write( "Mean" );
@@ -1561,8 +1555,8 @@ public class surfacing {
              System.out.println( "Coverage                                       : "
                      + ForesterUtil.roundToInt( 100.0 * coverage ) + "%" );
              SurfacingUtil.log( "Coverage                                       : "
-                                       + ForesterUtil.roundToInt( 100.0 * coverage ) + "%",
-                               log_writer );
+                    + ForesterUtil.roundToInt( 100.0 * coverage ) + "%",
+                    log_writer );
              System.out.println( "Domains encountered                            : " + parser.getDomainsEncountered() );
              SurfacingUtil.log( "Domains encountered                            : " + parser.getDomainsEncountered(),
                                 log_writer );
@@ -1576,53 +1570,53 @@ public class surfacing {
              System.out.println( "Domains ignored due to individual score cutoffs: "
                      + parser.getDomainsIgnoredDueToIndividualScoreCutoff() );
              SurfacingUtil.log( "Domains ignored due to individual score cutoffs: "
-                                       + parser.getDomainsIgnoredDueToIndividualScoreCutoff(),
-                               log_writer );
+                    + parser.getDomainsIgnoredDueToIndividualScoreCutoff(),
+                    log_writer );
              System.out.println( "Domains ignored due to FS E-value              : "
                      + parser.getDomainsIgnoredDueToFsEval() );
              SurfacingUtil.log( "Domains ignored due to FS E-value              : "
-                                       + parser.getDomainsIgnoredDueToFsEval(),
-                               log_writer );
+                    + parser.getDomainsIgnoredDueToFsEval(),
+                    log_writer );
              System.out.println( "Domains ignored due to iE-value                : "
                      + parser.getDomainsIgnoredDueToIEval() );
              SurfacingUtil.log( "Domains ignored due to iE-value                : "
-                                       + parser.getDomainsIgnoredDueToIEval(),
-                               log_writer );
+                    + parser.getDomainsIgnoredDueToIEval(),
+                    log_writer );
              System.out.println( "Domains ignored due to DUF designation         : "
                      + parser.getDomainsIgnoredDueToDuf() );
              SurfacingUtil
-                    .log( "Domains ignored due to DUF designation         : " + parser.getDomainsIgnoredDueToDuf(),
-                          log_writer );
+            .log( "Domains ignored due to DUF designation         : " + parser.getDomainsIgnoredDueToDuf(),
+                  log_writer );
              if ( ignore_virus_like_ids ) {
                  System.out.println( "Domains ignored due virus like ids             : "
                          + parser.getDomainsIgnoredDueToVirusLikeIds() );
                  SurfacingUtil.log( "Domains ignored due virus like ids             : "
-                                           + parser.getDomainsIgnoredDueToVirusLikeIds(),
-                                   log_writer );
+                        + parser.getDomainsIgnoredDueToVirusLikeIds(),
+                        log_writer );
              }
              System.out.println( "Domains ignored due negative domain filter     : "
                      + parser.getDomainsIgnoredDueToNegativeDomainFilter() );
              SurfacingUtil.log( "Domains ignored due negative domain filter     : "
-                                       + parser.getDomainsIgnoredDueToNegativeDomainFilter(),
-                               log_writer );
+                    + parser.getDomainsIgnoredDueToNegativeDomainFilter(),
+                    log_writer );
              System.out.println( "Domains ignored due to overlap                 : "
                      + parser.getDomainsIgnoredDueToOverlap() );
              SurfacingUtil.log( "Domains ignored due to overlap                 : "
-                                       + parser.getDomainsIgnoredDueToOverlap(),
-                               log_writer );
+                    + parser.getDomainsIgnoredDueToOverlap(),
+                    log_writer );
              if ( negative_filter_file != null ) {
                  System.out.println( "Proteins ignored due to negative filter        : "
                          + parser.getProteinsIgnoredDueToFilter() );
                  SurfacingUtil.log( "Proteins ignored due to negative filter        : "
-                                           + parser.getProteinsIgnoredDueToFilter(),
-                                   log_writer );
+                        + parser.getProteinsIgnoredDueToFilter(),
+                        log_writer );
              }
              if ( positive_filter_file != null ) {
                  System.out.println( "Proteins ignored due to positive filter        : "
                          + parser.getProteinsIgnoredDueToFilter() );
                  SurfacingUtil.log( "Proteins ignored due to positive filter        : "
-                                           + parser.getProteinsIgnoredDueToFilter(),
-                                   log_writer );
+                        + parser.getProteinsIgnoredDueToFilter(),
+                        log_writer );
              }
              if ( da_analysis ) {
                  System.out.println( "Distinct domain architectures stored           : " + distinct_das );
@@ -1634,7 +1628,7 @@ public class surfacing {
                  int count = 0;
                  for( final Protein protein : protein_list ) {
                      dc_data_writer.write( SurfacingUtil.proteinToDomainCombinations( protein, count + "", "\t" )
-                            .toString() );
+                                          .toString() );
                      ++count;
                      for( final Domain d : protein.getProteinDomains() ) {
                          final String d_str = d.getDomainId().toString();
@@ -1661,22 +1655,22 @@ public class surfacing {
              }
              if ( !da_analysis ) {
                  gwcd_list.add( BasicGenomeWideCombinableDomains
-                        .createInstance( protein_list,
-                                         ignore_combination_with_same,
-                                         new BasicSpecies( input_file_properties[ i ][ 1 ] ),
-                                         domain_id_to_go_ids_map,
-                                         dc_type,
-                                         protein_length_stats_by_dc,
-                                         domain_number_stats_by_dc ) );
+                               .createInstance( protein_list,
+                                                ignore_combination_with_same,
+                                                new BasicSpecies( input_file_properties[ i ][ 1 ] ),
+                                                domain_id_to_go_ids_map,
+                                                dc_type,
+                                                protein_length_stats_by_dc,
+                                                domain_number_stats_by_dc ) );
                  if ( gwcd_list.get( i ).getSize() > 0 ) {
                      if ( output_binary_domain_combinationsfor_counts ) {
                          SurfacingUtil
-                                .writeDomainCombinationsCountsFile( input_file_properties,
-                                                                    out_dir,
-                                                                    per_genome_domain_promiscuity_statistics_writer,
-                                                                    gwcd_list.get( i ),
-                                                                    i,
-                                                                    dc_sort_order );
+                        .writeDomainCombinationsCountsFile( input_file_properties,
+                                                            out_dir,
+                                                            per_genome_domain_promiscuity_statistics_writer,
+                                                            gwcd_list.get( i ),
+                                                            i,
+                                                            dc_sort_order );
                      }
                      if ( output_binary_domain_combinationsfor_graph_analysis ) {
                          SurfacingUtil.writeBinaryDomainCombinationsFileForGraphAnalysis( input_file_properties,
@@ -1724,9 +1718,9 @@ public class surfacing {
                                                               distinct_domain_architecuture_counts,
                                                               10,
                                                               new File( out_dir.toString() + "/" + output_file
-                                                                     + "_DA_counts.txt" ),
-                                                             new File( out_dir.toString() + "/" + output_file
-                                                                     + "_unique_DAs.txt" ) );
+                                                                       + "_DA_counts.txt" ),
+                                                                       new File( out_dir.toString() + "/" + output_file
+                                                                                 + "_unique_DAs.txt" ) );
              distinct_domain_architecutures_per_genome.clear();
              distinct_domain_architecuture_counts.clear();
              System.gc();
@@ -1752,13 +1746,13 @@ public class surfacing {
              all_genomes_domains_per_potein_stats = null;
              SurfacingUtil.printOutPercentageOfMultidomainProteins( all_genomes_domains_per_potein_histo, log_writer );
              ForesterUtil.map2file( new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file
-                    + "_all_genomes_domains_per_potein_histo.txt" ), all_genomes_domains_per_potein_histo, "\t", "\n" );
+                                             + "_all_genomes_domains_per_potein_histo.txt" ), all_genomes_domains_per_potein_histo, "\t", "\n" );
              ForesterUtil.collection2file( new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file
-                    + "_domains_always_single_.txt" ), domains_which_are_always_single, "\n" );
+                                                    + "_domains_always_single_.txt" ), domains_which_are_always_single, "\n" );
              ForesterUtil.collection2file( new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file
-                    + "_domains_single_or_combined.txt" ), domains_which_are_sometimes_single_sometimes_not, "\n" );
+                                                    + "_domains_single_or_combined.txt" ), domains_which_are_sometimes_single_sometimes_not, "\n" );
              ForesterUtil.collection2file( new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file
-                    + "_domains_always_combined.txt" ), domains_which_never_single, "\n" );
+                                                    + "_domains_always_combined.txt" ), domains_which_never_single, "\n" );
              ForesterUtil.programMessage( PRG_NAME,
                                           "Average of proteins with a least one domain assigned: "
                                                   + ( 100 * protein_coverage_stats.arithmeticMean() ) + "% (+/-"
@@ -1767,13 +1761,13 @@ public class surfacing {
                      + ( 100 * protein_coverage_stats.getMin() ) + "%-" + ( 100 * protein_coverage_stats.getMax() )
                      + "%" );
              SurfacingUtil.log( "Average of prot with a least one dom assigned  : "
-                                       + ( 100 * protein_coverage_stats.arithmeticMean() ) + "% (+/-"
-                                       + ( 100 * protein_coverage_stats.sampleStandardDeviation() ) + "%)",
-                               log_writer );
+                    + ( 100 * protein_coverage_stats.arithmeticMean() ) + "% (+/-"
+                    + ( 100 * protein_coverage_stats.sampleStandardDeviation() ) + "%)",
+                    log_writer );
              SurfacingUtil.log( "Range of prot with a least one dom assigned    : "
-                                       + ( 100 * protein_coverage_stats.getMin() ) + "%-"
-                                       + ( 100 * protein_coverage_stats.getMax() ) + "%",
-                               log_writer );
+                    + ( 100 * protein_coverage_stats.getMin() ) + "%-"
+                    + ( 100 * protein_coverage_stats.getMax() ) + "%",
+                    log_writer );
              protein_coverage_stats = null;
          }
          catch ( final IOException e2 ) {
@@ -1817,7 +1811,8 @@ public class surfacing {
          final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( domain_similarity_sort_field,
                                                                                       sort_by_species_count_first,
                                                                                       number_of_genomes == 2,
-                                                                                     CALC_SIMILARITY_SCORES );
+                                                                                     CALC_SIMILARITY_SCORES,
+                                                                                     true );
          switch ( scoring ) {
              case COMBINATIONS:
                  pw_calc = new CombinationsBasedPairwiseDomainSimilarityCalculator();
@@ -1867,13 +1862,13 @@ public class surfacing {
              html_desc.append( "<tr><td>Sum of all distinct binary combinations:</td><td>"
                      + all_bin_domain_combinations_encountered.size() + "</td></tr>" + nl );
              html_desc.append( "<tr><td>Sum of all distinct domains:</td><td>" + all_domains_encountered.size()
-                    + "</td></tr>" + nl );
+                              + "</td></tr>" + nl );
              html_desc.append( "<tr><td>Analysis date/time:</td><td>"
                      + new java.text.SimpleDateFormat( "yyyy.MM.dd HH:mm:ss" ).format( new java.util.Date() )
                      + "</td></tr>" + nl );
              html_desc.append( "</table>" + nl );
              final Writer simple_tab_writer = new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR
-                    + my_outfile + ".tsv" ) );
+                                                                                 + my_outfile + ".tsv" ) );
              SurfacingUtil.writeDomainSimilaritiesToFile( html_desc,
                                                           new StringBuilder( number_of_genomes + " genomes" ),
                                                           simple_tab_writer,
@@ -1937,23 +1932,23 @@ public class surfacing {
                  output_file = new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file );
              }
              SurfacingUtil.writeMatrixToFile( new File( matrix_output_file
-                    + surfacing.MATRIX_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX ), pwgc.getDomainDistanceScoresMeans() );
+                                                       + surfacing.MATRIX_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX ), pwgc.getDomainDistanceScoresMeans() );
              SurfacingUtil
-                    .writeMatrixToFile( new File( matrix_output_file
-                                                + surfacing.MATRIX_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX ),
-                                        pwgc.getSharedBinaryCombinationsBasedDistances() );
+            .writeMatrixToFile( new File( matrix_output_file
+                                          + surfacing.MATRIX_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX ),
+                                          pwgc.getSharedBinaryCombinationsBasedDistances() );
              SurfacingUtil.writeMatrixToFile( new File( matrix_output_file
-                                                     + surfacing.MATRIX_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX ),
-                                             pwgc.getSharedDomainsBasedDistances() );
+                                                       + surfacing.MATRIX_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX ),
+                                                       pwgc.getSharedDomainsBasedDistances() );
              final Phylogeny nj_gd = SurfacingUtil.createNjTreeBasedOnMatrixToFile( new File( matrix_output_file
-                    + surfacing.NJ_TREE_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX ), pwgc.getDomainDistanceScoresMeans()
-                    .get( 0 ) );
+                                                                                             + surfacing.NJ_TREE_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX ), pwgc.getDomainDistanceScoresMeans()
+                                                                                             .get( 0 ) );
              final Phylogeny nj_bc = SurfacingUtil.createNjTreeBasedOnMatrixToFile( new File( matrix_output_file
-                    + surfacing.NJ_TREE_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX ), pwgc
-                    .getSharedBinaryCombinationsBasedDistances().get( 0 ) );
+                                                                                             + surfacing.NJ_TREE_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX ), pwgc
+                                                                                             .getSharedBinaryCombinationsBasedDistances().get( 0 ) );
              final Phylogeny nj_d = SurfacingUtil.createNjTreeBasedOnMatrixToFile( new File( matrix_output_file
-                    + surfacing.NJ_TREE_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX ), pwgc
-                    .getSharedDomainsBasedDistances().get( 0 ) );
+                                                                                            + surfacing.NJ_TREE_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX ), pwgc
+                                                                                            .getSharedDomainsBasedDistances().get( 0 ) );
              inferred_trees = new ArrayList<Phylogeny>();
              inferred_trees.add( nj_gd );
              inferred_trees.add( nj_bc );
@@ -1994,18 +1989,18 @@ public class surfacing {
                                                          parameters_sb.toString(),
                                                          domain_id_to_secondary_features_maps,
                                                          positive_filter_file == null ? null : filter,
-                                                        output_binary_domain_combinationsfor_graph_analysis,
-                                                        all_bin_domain_combinations_gained_fitch,
-                                                        all_bin_domain_combinations_lost_fitch,
-                                                        dc_type,
-                                                        protein_length_stats_by_dc,
-                                                        domain_number_stats_by_dc,
-                                                        domain_length_stats_by_domain,
-                                                        tax_code_to_id_map,
-                                                        write_to_nexus,
-                                                        use_last_in_fitch_parsimony,
-                                                        perform_dc_fich );
-                // Listing of all domain combinations gained is only done if only one input tree is used. 
+                                                                output_binary_domain_combinationsfor_graph_analysis,
+                                                                all_bin_domain_combinations_gained_fitch,
+                                                                all_bin_domain_combinations_lost_fitch,
+                                                                dc_type,
+                                                                protein_length_stats_by_dc,
+                                                                domain_number_stats_by_dc,
+                                                                domain_length_stats_by_domain,
+                                                                tax_code_to_id_map,
+                                                                write_to_nexus,
+                                                                use_last_in_fitch_parsimony,
+                                                                perform_dc_fich );
+                // Listing of all domain combinations gained is only done if only one input tree is used.
                  if ( ( domain_id_to_secondary_features_maps != null )
                          && ( domain_id_to_secondary_features_maps.length > 0 ) ) {
                      int j = 0;
@@ -2014,14 +2009,14 @@ public class surfacing {
                          final DomainParsimonyCalculator secondary_features_parsimony = DomainParsimonyCalculator
                                  .createInstance( intree, gwcd_list, domain_id_to_secondary_features_map );
                          SurfacingUtil
-                                .executeParsimonyAnalysisForSecondaryFeatures( outfile_name
-                                                                                       + "_"
-                                                                                       + secondary_features_map_files[ j++ ],
-                                                                               secondary_features_parsimony,
-                                                                               intree,
-                                                                               parameters_sb.toString(),
-                                                                               mapping_results_map,
-                                                                               use_last_in_fitch_parsimony );
+                        .executeParsimonyAnalysisForSecondaryFeatures( outfile_name
+                                                                       + "_"
+                                                                       + secondary_features_map_files[ j++ ],
+                                                                       secondary_features_parsimony,
+                                                                       intree,
+                                                                       parameters_sb.toString(),
+                                                                       mapping_results_map,
+                                                                       use_last_in_fitch_parsimony );
                          if ( i == 0 ) {
                              System.out.println();
                              System.out.println( "Mapping to secondary features:" );
@@ -2061,18 +2056,19 @@ public class surfacing {
              SurfacingUtil.writeProteinListsForAllSpecies( out_dir,
                                                            protein_lists_per_species,
                                                            gwcd_list,
-                                                          output_list_of_all_proteins_per_domain_e_value_max );
+                                                          output_list_of_all_proteins_per_domain_e_value_max,
+                                                          positive_filter_file != null ? filter : null );
          }
          gwcd_list = null;
          if ( all_bin_domain_combinations_gained_fitch != null ) {
              try {
                  SurfacingUtil
-                        .executeFitchGainsAnalysis( new File( output_file
-                                                            + surfacing.OUTPUT_DOMAIN_COMBINATIONS_GAINED_MORE_THAN_ONCE_ANALYSIS_SUFFIX ),
-                                                    all_bin_domain_combinations_gained_fitch,
-                                                    all_domains_encountered.size(),
-                                                    all_bin_domain_combinations_encountered,
-                                                    true );
+                .executeFitchGainsAnalysis( new File( output_file
+                                                      + surfacing.OUTPUT_DOMAIN_COMBINATIONS_GAINED_MORE_THAN_ONCE_ANALYSIS_SUFFIX ),
+                                                      all_bin_domain_combinations_gained_fitch,
+                                                      all_domains_encountered.size(),
+                                                      all_bin_domain_combinations_encountered,
+                                                      true );
              }
              catch ( final IOException e ) {
                  ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
@@ -2081,12 +2077,12 @@ public class surfacing {
          if ( all_bin_domain_combinations_lost_fitch != null ) {
              try {
                  SurfacingUtil
-                        .executeFitchGainsAnalysis( new File( output_file
-                                                            + surfacing.OUTPUT_DOMAIN_COMBINATIONS_LOST_MORE_THAN_ONCE_ANALYSIS_SUFFIX ),
-                                                    all_bin_domain_combinations_lost_fitch,
-                                                    all_domains_encountered.size(),
-                                                    all_bin_domain_combinations_encountered,
-                                                    false );
+                .executeFitchGainsAnalysis( new File( output_file
+                                                      + surfacing.OUTPUT_DOMAIN_COMBINATIONS_LOST_MORE_THAN_ONCE_ANALYSIS_SUFFIX ),
+                                                      all_bin_domain_combinations_lost_fitch,
+                                                      all_domains_encountered.size(),
+                                                      all_bin_domain_combinations_encountered,
+                                                      false );
              }
              catch ( final IOException e ) {
                  ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
@@ -2096,7 +2092,7 @@ public class surfacing {
          final long free_memory = rt.freeMemory() / 1000000;
          final long total_memory = rt.totalMemory() / 1000000;
          ForesterUtil.programMessage( PRG_NAME, "Time for analysis : " + ( new Date().getTime() - analysis_start_time )
-                + "ms" );
+                                     + "ms" );
          ForesterUtil.programMessage( PRG_NAME, "Total running time: " + ( new Date().getTime() - start_time ) + "ms " );
          ForesterUtil.programMessage( PRG_NAME, "Free memory       : " + free_memory + "MB, total memory: "
                  + total_memory + "MB" );
@@ -2111,28 +2107,28 @@ public class surfacing {
          System.out.println( "Usage:" );
          System.out.println();
          System.out.println( "% java -Xms256m -Xmx512m -cp forester.jar org.forester.applications." + surfacing.PRG_NAME
-                + " [options] <phylogen(y|ies) infile> [external node name 1] [name 2] ... [name n]" );
+                            + " [options] <phylogen(y|ies) infile> [external node name 1] [name 2] ... [name n]" );
          System.out.println();
          System.out.println( " Note: This software might need a significant amount of memory (heap space);" );
          System.out
-                .println( "       hence use \"-Xms128m -Xmx512m\" (or more) to prevent a \"java.lang.OutOfMemoryError\"." );
+        .println( "       hence use \"-Xms128m -Xmx512m\" (or more) to prevent a \"java.lang.OutOfMemoryError\"." );
          System.out.println();
          System.out.println( " Options: " );
          System.out.println( surfacing.DETAILEDNESS_OPTION + ": level of detail for similarities output file (default:"
                  + DETAILEDNESS_DEFAULT + ")" );
          System.out.println( surfacing.IGNORE_COMBINATION_WITH_SAME_OPTION
-                + ": to ignore combinations with self (default: not to ignore)" );
+                            + ": to ignore combinations with self (default: not to ignore)" );
          System.out
-                .println( surfacing.IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_OPTION
-                        + ": to ignore domains without combinations in any species (for similarity calc purposes, not for parsimony analyses) (default: not to ignore)" );
+        .println( surfacing.IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_OPTION
+                  + ": to ignore domains without combinations in any species (for similarity calc purposes, not for parsimony analyses) (default: not to ignore)" );
          System.out
-                .println( surfacing.IGNORE_DOMAINS_SPECIFIC_TO_ONE_SPECIES_OPTION
-                        + ": to ignore domains specific to one species (for similarity calc purposes, not for parsimony analyses) (default: not to ignore)" );
+        .println( surfacing.IGNORE_DOMAINS_SPECIFIC_TO_ONE_SPECIES_OPTION
+                  + ": to ignore domains specific to one species (for similarity calc purposes, not for parsimony analyses) (default: not to ignore)" );
          System.out.println( surfacing.NOT_IGNORE_DUFS_OPTION
-                + ": to _not_ ignore DUFs (domains with unknown function) (default: ignore DUFs)" );
+                            + ": to _not_ ignore DUFs (domains with unknown function) (default: ignore DUFs)" );
          System.out
-                .println( surfacing.IGNORE_VIRAL_IDS
-                        + ": to ignore domains with ids containing 'vir', 'retro', 'transpos', 'phage', or starting with 'rv' or 'gag_'" );
+        .println( surfacing.IGNORE_VIRAL_IDS
+                  + ": to ignore domains with ids containing 'vir', 'retro', 'transpos', 'phage', or starting with 'rv' or 'gag_'" );
          System.out.println( surfacing.DOMAIN_SIMILARITY_SORT_OPTION + ": sorting for similarities (default: "
                  + DOMAIN_SORT_FILD_DEFAULT + ")" );
          System.out.println( surfacing.OUTPUT_FILE_OPTION + ": name for (main) output file (mandatory)" );
@@ -2148,45 +2144,45 @@ public class surfacing {
                  + DOMAIN_SIMILARITY_PRINT_OPTION_DEFAULT + ")" );
          System.out.println( surfacing.CUTOFF_SCORE_FILE_OPTION + ": cutoff score file" );
          System.out.println( surfacing.DOMAIN_SIMILARITY_SORT_BY_SPECIES_COUNT_FIRST_OPTION
-                + ": sort by species count first" );
+                            + ": sort by species count first" );
          System.out.println( surfacing.OUTPUT_DIR_OPTION + ": output directory" );
          System.out.println( surfacing.PFAM_TO_GO_FILE_USE_OPTION + ": Pfam to GO mapping file" );
          System.out.println( surfacing.GO_OBO_FILE_USE_OPTION + ": GO terms file (OBO format)" );
          System.out.println( surfacing.GO_NAMESPACE_LIMIT_OPTION + ": limit GO term to one GO namespace" );
          System.out.println( surfacing.PAIRWISE_DOMAIN_COMPARISONS_OPTION
-                + "[=<suffix for pairwise comparison output files>]: to perform pairwise comparison based analyses" );
+                            + "[=<suffix for pairwise comparison output files>]: to perform pairwise comparison based analyses" );
          System.out.println( surfacing.INPUT_SPECIES_TREE_OPTION
-                + ": species tree, to perform (Dollo, Fitch) parismony analyses" );
+                            + ": species tree, to perform (Dollo, Fitch) parismony analyses" );
          System.out
-                .println( surfacing.INPUT_SPECIES_TREE_OPTION
-                        + "=<treefiles in phyloXML format, separated by #>: to infer domain/binary domain combination gains/losses on given species trees" );
+        .println( surfacing.INPUT_SPECIES_TREE_OPTION
+                  + "=<treefiles in phyloXML format, separated by #>: to infer domain/binary domain combination gains/losses on given species trees" );
          System.out.println( surfacing.FILTER_POSITIVE_OPTION
-                + "=<file>: to filter out proteins not containing at least one domain listed in <file>" );
+                            + "=<file>: to filter out proteins not containing at least one domain listed in <file>" );
          System.out.println( surfacing.FILTER_NEGATIVE_OPTION
-                + "=<file>: to filter out proteins containing at least one domain listed in <file>" );
+                            + "=<file>: to filter out proteins containing at least one domain listed in <file>" );
          System.out.println( surfacing.FILTER_NEGATIVE_DOMAINS_OPTION
-                + "=<file>: to filter out (ignore) domains listed in <file>" );
+                            + "=<file>: to filter out (ignore) domains listed in <file>" );
          System.out.println( surfacing.INPUT_GENOMES_FILE_OPTION + "=<file>: to read input files from <file>" );
          System.out
-                .println( surfacing.RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION
-                        + "=<seed>: seed for random number generator for Fitch Parsimony analysis (type: long, default: no randomization - given a choice, prefer absence" );
+        .println( surfacing.RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION
+                  + "=<seed>: seed for random number generator for Fitch Parsimony analysis (type: long, default: no randomization - given a choice, prefer absence" );
          System.out.println( surfacing.CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS
-                + ": to consider directedness in binary combinations: e.g. A-B != B-A" );
+                            + ": to consider directedness in binary combinations: e.g. A-B != B-A" );
          System.out.println( surfacing.CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS_AND_ADJACENCY
-                + ": to consider directedness and adjacency in binary combinations" );
+                            + ": to consider directedness and adjacency in binary combinations" );
          System.out
-                .println( surfacing.SEQ_EXTRACT_OPTION
-                        + "=<domain ids (Pfam names)>: to extract sequence names of sequences containing matching domains and/or domain-sequences (order N to C) (domain separator: '~', domain sequences speparator: '#', e.g. 'NACHT#BIR~CARD')" );
+        .println( surfacing.SEQ_EXTRACT_OPTION
+                  + "=<domain ids (Pfam names)>: to extract sequence names of sequences containing matching domains and/or domain-sequences (order N to C) (domain separator: '~', domain sequences speparator: '#', e.g. 'NACHT#BIR~CARD')" );
          System.out.println( surfacing.SECONDARY_FEATURES_PARSIMONY_MAP_FILE
-                + "=<file>: to perfom parsimony analysis on secondary features" );
+                            + "=<file>: to perfom parsimony analysis on secondary features" );
          System.out.println( surfacing.PLUS_MINUS_ANALYSIS_OPTION + "=<file>: to presence/absence genome analysis" );
          System.out.println( surfacing.DOMAIN_COMBINITONS_COUNTS_OUTPUT_OPTION
-                + ": to output binary domain counts (as individual files)" );
+                            + ": to output binary domain counts (as individual files)" );
          System.out.println( surfacing.DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS
-                + ": to output binary domain combinations for (downstream) graph analysis" );
+                            + ": to output binary domain combinations for (downstream) graph analysis" );
          System.out.println( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS + ": to output all proteins per domain" );
          System.out.println( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION
-                + ": e value max per domain for output of all proteins per domain" );
+                            + ": e value max per domain for output of all proteins per domain" );
          System.out.println( surfacing.USE_LAST_IN_FITCH_OPTION + ": to use last in Fitch parsimony" );
          System.out.println( surfacing.WRITE_TO_NEXUS_OPTION + ": to output in Nexus format" );
          System.out.println( PERFORM_DC_FITCH + ": to perform DC Fitch parsimony" );
@@ -2194,16 +2190,12 @@ public class surfacing {
          System.out.println( DA_ANALYSIS_OPTION + ": to perform DA analysis" );
          System.out.println( PERFORM_DOMAIN_LENGTH_ANALYSIS_OPTION + ": to perform domain length analysis" );
          System.out.println();
-        System.out.println( "Example 1: java -Xms128m -Xmx512m -cp path/to/forester.jar"
-                + " org.forester.application.surfacing p2g=pfam2go_2012_02_07.txt -dufs -cos=Pfam_260_NC1"
-                + " -no_eo -mo=0 -genomes=eukaryotes.txt -out_dir=out -o=o "
-                + " -species_tree=tol.xml -obo=gene_ontology_2012_02_07.obo -pos_filter=f.txt -all_prot" );
          System.out.println();
-        System.out.println( "Example 2: java -Xms128m -Xmx512m -cp path/to/forester.jar"
-                + " org.forester.application.surfacing -detail=punctilious -o=TEST.html -pwc=TEST"
-                + " -cos=Pfam_ls_22_TC2 -p2g=pfam2go -obo=gene_ontology_edit.obo "
-                + "-dc_sort=dom -ignore_with_self -no_singles -ie=0.001 -mo=1 -no_eo -genomes=eukaryotes.txt "
-                + "-ds_output=detailed_html -scoring=domains -sort=alpha " );
+        System.out
+        .println( "Example 1: surfacing -p2g=pfam2go.txt -obo=go.obo -species_tree=tol_156.xml -no_eo -ie=0.01 -dufs -genomes=genomes_all.txt -pos_filter=tf_1.txt -out_dir=_tf1 -o=tf1" );
+        System.out.println();
+        System.out
+        .println( "Example 2: surfacing -p2g=pfam2go.txt -obo=go.obo -species_tree=tol_156.xml -last -ignore_viral_ids -no_eo -ie=0.1 -dufs -genomes=genomes_all.txt -pos_filter=tf_1.txt -all_prot -all_prot_e=0.1 -out_dir=_tf1_e01_ape01 -o=tf1_e01_ape01" );
          System.out.println();
      }
  }