inprogress
[jalview.git] / forester / java / src / org / forester / application / surfacing.java
index 162ef57..35346b1 100644 (file)
@@ -87,6 +87,7 @@ public class surfacing {
 
     private static final int                                        MINIMAL_NUMBER_OF_SIMILARITIES_FOR_SPLITTING                                  = 1000;
     public final static String                                      DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS                           = "graph_analysis_out";
+    public final static String                                      DOMAIN_COMBINITONS_COUNTS_OUTPUT_OPTION                                       = "dcc";
     public final static String                                      DOMAIN_COMBINITONS_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS                       = "_dc.dot";
     public final static String                                      PARSIMONY_OUTPUT_FITCH_PRESENT_BC_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS        = "_fitch_present_dc.dot";
     public final static String                                      DOMAIN_COMBINITON_COUNTS_OUTPUTFILE_SUFFIX                                    = ".dcc";
@@ -171,12 +172,13 @@ public class surfacing {
     final static private String                                     DOMAIN_COUNT_SORT_COMBINATIONS_COUNT                                          = "comb";
     final static private String                                     CUTOFF_SCORE_FILE_OPTION                                                      = "cos";
     final static private String                                     NOT_IGNORE_DUFS_OPTION                                                        = "dufs";
-    final static private String                                     MAX_E_VALUE_OPTION                                                            = "e";
+    final static private String                                     MAX_FS_E_VALUE_OPTION                                                         = "fs_e";
+    final static private String                                     MAX_I_E_VALUE_OPTION                                                          = "ie";
     final static private String                                     MAX_ALLOWED_OVERLAP_OPTION                                                    = "mo";
     final static private String                                     NO_ENGULFING_OVERLAP_OPTION                                                   = "no_eo";
     final static private String                                     IGNORE_COMBINATION_WITH_SAME_OPTION                                           = "ignore_self_comb";
     final static private String                                     PERFORM_DC_REGAIN_PROTEINS_STATS_OPTION                                       = "dc_regain_stats";
-    final static private String                                     DA_ANALYSIS_OPTION                                                            = "DA_analyis";
+    final static private String                                     DA_ANALYSIS_OPTION                                                            = "da_analyis";
     final static private String                                     USE_LAST_IN_FITCH_OPTION                                                      = "last";
     public final static String                                      PAIRWISE_DOMAIN_COMPARISONS_PREFIX                                            = "pwc_";
     final static private String                                     PAIRWISE_DOMAIN_COMPARISONS_OPTION                                            = "pwc";
@@ -213,8 +215,8 @@ public class surfacing {
     final static private String                                     INPUT_GENOMES_FILE_OPTION                                                     = "genomes";
     final static private String                                     INPUT_SPECIES_TREE_OPTION                                                     = "species_tree";
     final static private String                                     SEQ_EXTRACT_OPTION                                                            = "prot_extract";
-    final static private String                                     PRG_VERSION                                                                   = "2.401";
-    final static private String                                     PRG_DATE                                                                      = "131125";
+    final static private String                                     PRG_VERSION                                                                   = "2.403";
+    final static private String                                     PRG_DATE                                                                      = "131127";
     final static private String                                     E_MAIL                                                                        = "czmasek@burnham.org";
     final static private String                                     WWW                                                                           = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing";
     final static private boolean                                    IGNORE_DUFS_DEFAULT                                                           = true;
@@ -239,7 +241,7 @@ public class surfacing {
     private static final String                                     OUTPUT_DOMAIN_COMBINATIONS_GAINED_MORE_THAN_ONCE_ANALYSIS_SUFFIX              = "_fitch_dc_gains_counts";
     private static final String                                     OUTPUT_DOMAIN_COMBINATIONS_LOST_MORE_THAN_ONCE_ANALYSIS_SUFFIX                = "_fitch_dc_losses_counts";
     private static final String                                     DOMAIN_LENGTHS_ANALYSIS_SUFFIX                                                = "_domain_lengths_analysis";
-    private static final boolean                                    PERFORM_DOMAIN_LENGTH_ANALYSIS                                                = true;
+    private static final String                                     PERFORM_DOMAIN_LENGTH_ANALYSIS_OPTION                                         = "dla";
     public static final String                                      ALL_PFAMS_ENCOUNTERED_SUFFIX                                                  = "_all_encountered_pfams";
     public static final String                                      ALL_PFAMS_ENCOUNTERED_WITH_GO_ANNOTATION_SUFFIX                               = "_all_encountered_pfams_with_go_annotation";
     public static final String                                      ENCOUNTERED_PFAMS_SUMMARY_SUFFIX                                              = "_encountered_pfams_summary";
@@ -254,6 +256,7 @@ public class surfacing {
     private static final String                                     DATA_FILE_SUFFIX                                                              = "_domain_combination_data.txt";
     private static final String                                     DATA_FILE_DESC                                                                = "#SPECIES\tPRTEIN_ID\tN_TERM_DOMAIN\tC_TERM_DOMAIN\tN_TERM_DOMAIN_PER_DOMAIN_E_VALUE\tC_TERM_DOMAIN_PER_DOMAIN_E_VALUE\tN_TERM_DOMAIN_COUNTS_PER_PROTEIN\tC_TERM_DOMAIN_COUNTS_PER_PROTEIN";
     private static final String                                     WRITE_TO_NEXUS_OPTION                                                         = "nexus";
+    private static final String                                     PERFORM_DC_FITCH                                                              = "dc_pars";
     private static final INDIVIDUAL_SCORE_CUTOFF                    INDIVIDUAL_SCORE_CUTOFF_DEFAULT                                               = INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE;                                                                                                                                                      //TODO look at me! change?
     public static final String                                      INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_OUTPUT_SUFFIX                          = "_indep_dc_gains_fitch_counts.txt";
     public static final String                                      INDEPENDENT_DC_GAINS_FITCH_PARS_DC_OUTPUT_SUFFIX                              = "_indep_dc_gains_fitch_lists.txt";
@@ -301,7 +304,8 @@ public class surfacing {
         }
         final List<String> allowed_options = new ArrayList<String>();
         allowed_options.add( surfacing.NOT_IGNORE_DUFS_OPTION );
-        allowed_options.add( surfacing.MAX_E_VALUE_OPTION );
+        allowed_options.add( surfacing.MAX_FS_E_VALUE_OPTION );
+        allowed_options.add( surfacing.MAX_I_E_VALUE_OPTION );
         allowed_options.add( surfacing.DETAILEDNESS_OPTION );
         allowed_options.add( surfacing.OUTPUT_FILE_OPTION );
         allowed_options.add( surfacing.DOMAIN_SIMILARITY_SORT_OPTION );
@@ -333,15 +337,19 @@ public class surfacing {
         allowed_options.add( SECONDARY_FEATURES_PARSIMONY_MAP_FILE );
         allowed_options.add( PLUS_MINUS_ANALYSIS_OPTION );
         allowed_options.add( DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS );
+        allowed_options.add( DOMAIN_COMBINITONS_COUNTS_OUTPUT_OPTION );
         allowed_options.add( OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS );
         allowed_options.add( CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS_AND_ADJACENCY );
         allowed_options.add( WRITE_TO_NEXUS_OPTION );
         allowed_options.add( PERFORM_DC_REGAIN_PROTEINS_STATS_OPTION );
         allowed_options.add( DA_ANALYSIS_OPTION );
         allowed_options.add( USE_LAST_IN_FITCH_OPTION );
+        allowed_options.add( PERFORM_DC_FITCH );
+        allowed_options.add( PERFORM_DOMAIN_LENGTH_ANALYSIS_OPTION );
         boolean ignore_dufs = surfacing.IGNORE_DUFS_DEFAULT;
         boolean ignore_combination_with_same = surfacing.IGNORE_COMBINATION_WITH_SAME_DEFAULLT;
-        double e_value_max = surfacing.MAX_E_VALUE_DEFAULT;
+        double fs_e_value_max = surfacing.MAX_E_VALUE_DEFAULT;
+        double ie_value_max = surfacing.MAX_E_VALUE_DEFAULT;
         int max_allowed_overlap = surfacing.MAX_ALLOWED_OVERLAP_DEFAULT;
         final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
         if ( dissallowed_options.length() > 0 ) {
@@ -355,6 +363,10 @@ public class surfacing {
         if ( cla.isOptionSet( WRITE_TO_NEXUS_OPTION ) ) {
             write_to_nexus = true;
         }
+        boolean perform_dc_fich = false;
+        if ( cla.isOptionSet( PERFORM_DC_FITCH ) ) {
+            perform_dc_fich = true;
+        }
         boolean perform_dc_regain_proteins_stats = false;
         if ( cla.isOptionSet( PERFORM_DC_REGAIN_PROTEINS_STATS_OPTION ) ) {
             perform_dc_regain_proteins_stats = true;
@@ -367,9 +379,21 @@ public class surfacing {
         if ( cla.isOptionSet( DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS ) ) {
             output_binary_domain_combinationsfor_graph_analysis = true;
         }
-        if ( cla.isOptionSet( surfacing.MAX_E_VALUE_OPTION ) ) {
+        boolean output_binary_domain_combinationsfor_counts = false;
+        if ( cla.isOptionSet( DOMAIN_COMBINITONS_COUNTS_OUTPUT_OPTION ) ) {
+            output_binary_domain_combinationsfor_counts = true;
+        }
+        if ( cla.isOptionSet( surfacing.MAX_FS_E_VALUE_OPTION ) ) {
+            try {
+                fs_e_value_max = cla.getOptionValueAsDouble( surfacing.MAX_FS_E_VALUE_OPTION );
+            }
+            catch ( final Exception e ) {
+                ForesterUtil.fatalError( surfacing.PRG_NAME, "no acceptable value for E-value maximum" );
+            }
+        }
+        if ( cla.isOptionSet( surfacing.MAX_I_E_VALUE_OPTION ) ) {
             try {
-                e_value_max = cla.getOptionValueAsDouble( surfacing.MAX_E_VALUE_OPTION );
+                ie_value_max = cla.getOptionValueAsDouble( surfacing.MAX_I_E_VALUE_OPTION );
             }
             catch ( final Exception e ) {
                 ForesterUtil.fatalError( surfacing.PRG_NAME, "no acceptable value for E-value maximum" );
@@ -397,6 +421,10 @@ public class surfacing {
         if ( cla.isOptionSet( surfacing.IGNORE_COMBINATION_WITH_SAME_OPTION ) ) {
             ignore_combination_with_same = true;
         }
+        boolean domain_length_analysis = false;
+        if ( cla.isOptionSet( surfacing.PERFORM_DOMAIN_LENGTH_ANALYSIS_OPTION ) ) {
+            domain_length_analysis = true;
+        }
         boolean ignore_domains_without_combs_in_all_spec = IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_DEFAULT;
         if ( cla.isOptionSet( surfacing.IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_OPTION ) ) {
             ignore_domains_without_combs_in_all_spec = true;
@@ -940,7 +968,7 @@ public class surfacing {
         File[] secondary_features_map_files = null;
         final File domain_lengths_analysis_outfile = new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file
                 + DOMAIN_LENGTHS_ANALYSIS_SUFFIX );
-        if ( PERFORM_DOMAIN_LENGTH_ANALYSIS ) {
+        if ( domain_length_analysis ) {
             SurfacingUtil.checkForOutputFileWriteability( domain_lengths_analysis_outfile );
         }
         if ( cla.isOptionSet( surfacing.SECONDARY_FEATURES_PARSIMONY_MAP_FILE ) ) {
@@ -1037,9 +1065,13 @@ public class surfacing {
             System.out.println( "Cutoff scores file          : " + cutoff_scores_file );
             html_desc.append( "<tr><td>Cutoff scores file:</td><td>" + cutoff_scores_file + "</td></tr>" + nl );
         }
-        if ( e_value_max >= 0.0 ) {
-            System.out.println( "E-value maximum (inclusive) : " + e_value_max );
-            html_desc.append( "<tr><td>E-value maximum (inclusive):</td><td>" + e_value_max + "</td></tr>" + nl );
+        if ( ie_value_max >= 0.0 ) {
+            System.out.println( "iE-value maximum (incl)     : " + ie_value_max );
+            html_desc.append( "<tr><td>iE-value maximum (inclusive):</td><td>" + ie_value_max + "</td></tr>" + nl );
+        }
+        if ( fs_e_value_max >= 0.0 ) {
+            System.out.println( "FS E-value maximum (incl)   : " + fs_e_value_max );
+            html_desc.append( "<tr><td>FS E-value maximum (inclusive):</td><td>" + fs_e_value_max + "</td></tr>" + nl );
         }
         if ( output_protein_lists_for_all_domains ) {
             System.out.println( "Domain E-value max          : " + output_list_of_all_proteins_per_domain_e_value_max );
@@ -1084,14 +1116,20 @@ public class surfacing {
                     + ( dc_type == BinaryDomainCombination.DomainCombinationType.DIRECTED_ADJACTANT ) + "</td></tr>"
                     + nl );
         }
-        System.out.println( "Use last in Fitch parimony  : " + use_last_in_fitch_parsimony );
-        html_desc.append( "<tr><td>Use last in Fitch parimon:</td><td>" + use_last_in_fitch_parsimony + "</td></tr>"
-                + nl );
+        System.out.println( "Fitch parsimony of DCs      : " + perform_dc_fich );
+        html_desc.append( "<tr><td>Fitch parsimony of DCs:</td><td>" + perform_dc_fich + "</td></tr>" + nl );
+        if ( perform_dc_fich ) {
+            System.out.println( "Use last in Fitch parsimony : " + use_last_in_fitch_parsimony );
+            html_desc.append( "<tr><td>Use last in Fitch parsimony:</td><td>" + use_last_in_fitch_parsimony
+                    + "</td></tr>" + nl );
+        }
         System.out.println( "Write to Nexus files        : " + write_to_nexus );
         html_desc.append( "<tr><td>Write to Nexus files:</td><td>" + write_to_nexus + "</td></tr>" + nl );
-        System.out.println( "DC regain prot stats        : " + perform_dc_regain_proteins_stats );
-        html_desc.append( "<tr><td>DC regain prot stats:</td><td>" + perform_dc_regain_proteins_stats + "</td></tr>"
-                + nl );
+        if ( perform_dc_fich ) {
+            System.out.println( "DC regain prot stats        : " + perform_dc_regain_proteins_stats );
+            html_desc.append( "<tr><td>DC regain prot stats:</td><td>" + perform_dc_regain_proteins_stats
+                    + "</td></tr>" + nl );
+        }
         System.out.println( "DA analysis                 : " + da_analysis );
         html_desc.append( "<tr><td>DA analysis :</td><td>" + da_analysis + "</td></tr>" + nl );
         System.out.print( "Domain counts sort order    : " );
@@ -1310,7 +1348,8 @@ public class surfacing {
             }
         } // if ( perform_pwc ) {
         System.out.println();
-        html_desc.append( "<tr><td>Command line:</td><td>\n" + cla.getCommandLineArgsAsString() + "\n</td></tr>" + nl );
+        html_desc.append( "<tr><td>Command line:</td><td>" + nl + nl + cla.getCommandLineArgsAsString() + nl + nl
+                + "</td></tr>" + nl );
         System.out.println( "Command line                : " + cla.getCommandLineArgsAsString() );
         BufferedWriter[] query_domains_writer_ary = null;
         List<String>[] query_domain_ids_array = null;
@@ -1357,7 +1396,6 @@ public class surfacing {
             all_bin_domain_combinations_gained_fitch = new ArrayList<BinaryDomainCombination>();
             all_bin_domain_combinations_lost_fitch = new ArrayList<BinaryDomainCombination>();
         }
-        DomainLengthsTable domain_lengths_table = new DomainLengthsTable();
         final File per_genome_domain_promiscuity_statistics_file = new File( out_dir + ForesterUtil.FILE_SEPARATOR
                 + output_file + D_PROMISCUITY_FILE_SUFFIX );
         BufferedWriter per_genome_domain_promiscuity_statistics_writer = null;
@@ -1393,8 +1431,8 @@ public class surfacing {
         catch ( final IOException e2 ) {
             ForesterUtil.fatalError( surfacing.PRG_NAME, e2.getMessage() );
         }
-        final DescriptiveStatistics protein_coverage_stats = new BasicDescriptiveStatistics();
-        final DescriptiveStatistics all_genomes_domains_per_potein_stats = new BasicDescriptiveStatistics();
+        DescriptiveStatistics protein_coverage_stats = new BasicDescriptiveStatistics();
+        DescriptiveStatistics all_genomes_domains_per_potein_stats = new BasicDescriptiveStatistics();
         final SortedMap<Integer, Integer> all_genomes_domains_per_potein_histo = new TreeMap<Integer, Integer>();
         final SortedSet<String> domains_which_are_always_single = new TreeSet<String>();
         final SortedSet<String> domains_which_are_sometimes_single_sometimes_not = new TreeSet<String>();
@@ -1428,6 +1466,10 @@ public class surfacing {
             protein_length_stats_by_dc = new HashMap<String, DescriptiveStatistics>();
             domain_number_stats_by_dc = new HashMap<String, DescriptiveStatistics>();
         }
+        DomainLengthsTable domain_lengths_table = null;
+        if ( domain_length_analysis ) {
+            domain_lengths_table = new DomainLengthsTable();
+        }
         // Main loop:
         final SortedMap<String, Set<String>> distinct_domain_architecutures_per_genome = new TreeMap<String, Set<String>>();
         final SortedMap<String, Integer> distinct_domain_architecuture_counts = new TreeMap<String, Integer>();
@@ -1469,8 +1511,11 @@ public class surfacing {
                                                           ind_score_cutoff,
                                                           true );
             }
-            if ( e_value_max >= 0.0 ) {
-                parser.setEValueMaximum( e_value_max );
+            if ( fs_e_value_max >= 0.0 ) {
+                parser.setFsEValueMaximum( fs_e_value_max );
+            }
+            if ( ie_value_max >= 0.0 ) {
+                parser.setIEValueMaximum( ie_value_max );
             }
             parser.setIgnoreDufs( ignore_dufs );
             parser.setIgnoreVirusLikeIds( ignore_virus_like_ids );
@@ -1533,10 +1578,15 @@ public class surfacing {
             SurfacingUtil.log( "Domains ignored due to individual score cutoffs: "
                                        + parser.getDomainsIgnoredDueToIndividualScoreCutoff(),
                                log_writer );
-            System.out.println( "Domains ignored due to E-value                 : "
-                    + parser.getDomainsIgnoredDueToEval() );
-            SurfacingUtil.log( "Domains ignored due to E-value                 : "
-                                       + parser.getDomainsIgnoredDueToEval(),
+            System.out.println( "Domains ignored due to FS E-value              : "
+                    + parser.getDomainsIgnoredDueToFsEval() );
+            SurfacingUtil.log( "Domains ignored due to FS E-value              : "
+                                       + parser.getDomainsIgnoredDueToFsEval(),
+                               log_writer );
+            System.out.println( "Domains ignored due to iE-value                : "
+                    + parser.getDomainsIgnoredDueToIEval() );
+            SurfacingUtil.log( "Domains ignored due to iE-value                : "
+                                       + parser.getDomainsIgnoredDueToIEval(),
                                log_writer );
             System.out.println( "Domains ignored due to DUF designation         : "
                     + parser.getDomainsIgnoredDueToDuf() );
@@ -1606,7 +1656,9 @@ public class surfacing {
                                                         domains_which_are_sometimes_single_sometimes_not,
                                                         domains_which_never_single,
                                                         domains_per_potein_stats_writer );
-            domain_lengths_table.addLengths( protein_list );
+            if ( domain_length_analysis ) {
+                domain_lengths_table.addLengths( protein_list );
+            }
             if ( !da_analysis ) {
                 gwcd_list.add( BasicGenomeWideCombinableDomains
                         .createInstance( protein_list,
@@ -1617,12 +1669,15 @@ public class surfacing {
                                          protein_length_stats_by_dc,
                                          domain_number_stats_by_dc ) );
                 if ( gwcd_list.get( i ).getSize() > 0 ) {
-                    SurfacingUtil.writeDomainCombinationsCountsFile( input_file_properties,
-                                                                     out_dir,
-                                                                     per_genome_domain_promiscuity_statistics_writer,
-                                                                     gwcd_list.get( i ),
-                                                                     i,
-                                                                     dc_sort_order );
+                    if ( output_binary_domain_combinationsfor_counts ) {
+                        SurfacingUtil
+                                .writeDomainCombinationsCountsFile( input_file_properties,
+                                                                    out_dir,
+                                                                    per_genome_domain_promiscuity_statistics_writer,
+                                                                    gwcd_list.get( i ),
+                                                                    i,
+                                                                    dc_sort_order );
+                    }
                     if ( output_binary_domain_combinationsfor_graph_analysis ) {
                         SurfacingUtil.writeBinaryDomainCombinationsFileForGraphAnalysis( input_file_properties,
                                                                                          out_dir,
@@ -1683,8 +1738,10 @@ public class surfacing {
             domains_per_potein_stats_writer.write( "\t" );
             domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.sampleStandardDeviation() + "" );
             domains_per_potein_stats_writer.write( "\t" );
-            domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.median() + "" );
-            domains_per_potein_stats_writer.write( "\t" );
+            if ( all_genomes_domains_per_potein_stats.getN() <= 300 ) {
+                domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.median() + "" );
+                domains_per_potein_stats_writer.write( "\t" );
+            }
             domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.getN() + "" );
             domains_per_potein_stats_writer.write( "\t" );
             domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.getMin() + "" );
@@ -1692,6 +1749,7 @@ public class surfacing {
             domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.getMax() + "" );
             domains_per_potein_stats_writer.write( "\n" );
             domains_per_potein_stats_writer.close();
+            all_genomes_domains_per_potein_stats = null;
             SurfacingUtil.printOutPercentageOfMultidomainProteins( all_genomes_domains_per_potein_histo, log_writer );
             ForesterUtil.map2file( new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file
                     + "_all_genomes_domains_per_potein_histo.txt" ), all_genomes_domains_per_potein_histo, "\t", "\n" );
@@ -1716,6 +1774,7 @@ public class surfacing {
                                        + ( 100 * protein_coverage_stats.getMin() ) + "%-"
                                        + ( 100 * protein_coverage_stats.getMax() ) + "%",
                                log_writer );
+            protein_coverage_stats = null;
         }
         catch ( final IOException e2 ) {
             ForesterUtil.fatalError( surfacing.PRG_NAME, e2.getLocalizedMessage() );
@@ -1738,7 +1797,7 @@ public class surfacing {
         catch ( final IOException e2 ) {
             ForesterUtil.fatalError( surfacing.PRG_NAME, e2.getLocalizedMessage() );
         }
-        if ( PERFORM_DOMAIN_LENGTH_ANALYSIS ) {
+        if ( domain_length_analysis ) {
             try {
                 SurfacingUtil.executeDomainLengthAnalysis( input_file_properties,
                                                            number_of_genomes,
@@ -1758,7 +1817,8 @@ public class surfacing {
         final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( domain_similarity_sort_field,
                                                                                      sort_by_species_count_first,
                                                                                      number_of_genomes == 2,
-                                                                                     CALC_SIMILARITY_SCORES );
+                                                                                     CALC_SIMILARITY_SCORES,
+                                                                                     true );
         switch ( scoring ) {
             case COMBINATIONS:
                 pw_calc = new CombinationsBasedPairwiseDomainSimilarityCalculator();
@@ -1908,7 +1968,8 @@ public class surfacing {
         }
         if ( ( ( intrees != null ) && ( intrees.length > 0 ) ) && ( number_of_genomes > 2 ) ) {
             final StringBuilder parameters_sb = SurfacingUtil.createParametersAsString( ignore_dufs,
-                                                                                        e_value_max,
+                                                                                        ie_value_max,
+                                                                                        fs_e_value_max,
                                                                                         max_allowed_overlap,
                                                                                         no_engulfing_overlaps,
                                                                                         cutoff_scores_file,
@@ -1943,7 +2004,8 @@ public class surfacing {
                                                         domain_length_stats_by_domain,
                                                         tax_code_to_id_map,
                                                         write_to_nexus,
-                                                        use_last_in_fitch_parsimony );
+                                                        use_last_in_fitch_parsimony,
+                                                        perform_dc_fich );
                 // Listing of all domain combinations gained is only done if only one input tree is used. 
                 if ( ( domain_id_to_secondary_features_maps != null )
                         && ( domain_id_to_secondary_features_maps.length > 0 ) ) {
@@ -2000,7 +2062,8 @@ public class surfacing {
             SurfacingUtil.writeProteinListsForAllSpecies( out_dir,
                                                           protein_lists_per_species,
                                                           gwcd_list,
-                                                          output_list_of_all_proteins_per_domain_e_value_max );
+                                                          output_list_of_all_proteins_per_domain_e_value_max,
+                                                          positive_filter_file != null ? filter : null );
         }
         gwcd_list = null;
         if ( all_bin_domain_combinations_gained_fitch != null ) {
@@ -2075,7 +2138,8 @@ public class surfacing {
         System.out.println( surfacing.DOMAIN_SIMILARITY_SORT_OPTION + ": sorting for similarities (default: "
                 + DOMAIN_SORT_FILD_DEFAULT + ")" );
         System.out.println( surfacing.OUTPUT_FILE_OPTION + ": name for (main) output file (mandatory)" );
-        System.out.println( surfacing.MAX_E_VALUE_OPTION + ": max (inclusive) E-value" );
+        System.out.println( surfacing.MAX_I_E_VALUE_OPTION + ": max (inclusive) iE-value" );
+        System.out.println( surfacing.MAX_FS_E_VALUE_OPTION + ": max (inclusive) FS E-value" );
         System.out.println( surfacing.MAX_ALLOWED_OVERLAP_OPTION + ": maximal allowed domain overlap" );
         System.out.println( surfacing.NO_ENGULFING_OVERLAP_OPTION + ": to ignore engulfed lower confidence domains" );
         System.out.println( surfacing.SPECIES_MATRIX_OPTION + ": species matrix" );
@@ -2118,6 +2182,8 @@ public class surfacing {
         System.out.println( surfacing.SECONDARY_FEATURES_PARSIMONY_MAP_FILE
                 + "=<file>: to perfom parsimony analysis on secondary features" );
         System.out.println( surfacing.PLUS_MINUS_ANALYSIS_OPTION + "=<file>: to presence/absence genome analysis" );
+        System.out.println( surfacing.DOMAIN_COMBINITONS_COUNTS_OUTPUT_OPTION
+                + ": to output binary domain counts (as individual files)" );
         System.out.println( surfacing.DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS
                 + ": to output binary domain combinations for (downstream) graph analysis" );
         System.out.println( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS + ": to output all proteins per domain" );
@@ -2125,19 +2191,22 @@ public class surfacing {
                 + ": e value max per domain for output of all proteins per domain" );
         System.out.println( surfacing.USE_LAST_IN_FITCH_OPTION + ": to use last in Fitch parsimony" );
         System.out.println( surfacing.WRITE_TO_NEXUS_OPTION + ": to output in Nexus format" );
+        System.out.println( PERFORM_DC_FITCH + ": to perform DC Fitch parsimony" );
         System.out.println( PERFORM_DC_REGAIN_PROTEINS_STATS_OPTION + ": to perform DC regain protein statistics" );
-        System.out.println( DA_ANALYSIS_OPTION + ": to do DA analysis" );
+        System.out.println( DA_ANALYSIS_OPTION + ": to perform DA analysis" );
+        System.out.println( PERFORM_DOMAIN_LENGTH_ANALYSIS_OPTION + ": to perform domain length analysis" );
         System.out.println();
-        System.out.println( "Example 1: java -Xms128m -Xmx512m -cp path/to/forester.jar"
-                + " org.forester.application.surfacing p2g=pfam2go_2012_02_07.txt -dufs -cos=Pfam_260_NC1"
+        System.out.println( "Example 1: surfacing -p2g=pfam2go_2012_02_07.txt -dufs -cos=Pfam_260_NC1"
                 + " -no_eo -mo=0 -genomes=eukaryotes.txt -out_dir=out -o=o "
                 + " -species_tree=tol.xml -obo=gene_ontology_2012_02_07.obo -pos_filter=f.txt -all_prot" );
         System.out.println();
-        System.out.println( "Example 2: java -Xms128m -Xmx512m -cp path/to/forester.jar"
-                + " org.forester.application.surfacing -detail=punctilious -o=TEST.html -pwc=TEST"
+        System.out.println( "Example 2: surfacing -detail=punctilious -o=TEST.html -pwc=TEST"
                 + " -cos=Pfam_ls_22_TC2 -p2g=pfam2go -obo=gene_ontology_edit.obo "
-                + "-dc_sort=dom -ignore_with_self -no_singles -e=0.001 -mo=1 -no_eo -genomes=eukaryotes.txt "
+                + "-dc_sort=dom -ignore_with_self -no_singles -ie=0.001 -mo=1 -no_eo -genomes=eukaryotes.txt "
                 + "-ds_output=detailed_html -scoring=domains -sort=alpha " );
         System.out.println();
+        System.out
+                .println( "Example 3: surfacing -p2g=pfam2go_130621.txt -obo=gene_onotology_130621.obo -species_tree=tol_155.xml -last -detail=punctilious -ignore_viral_ids -no_eo -ie=1 -dufs -genomes=genomes_all.txt -pos_filter=tf_1.txt -all_prot -all_prot_e=1 -out_dir=_tf1_e1_ape1 -o=tf1_e1_ape1" );
+        System.out.println();
     }
 }