inprogress
[jalview.git] / forester / java / src / org / forester / application / surfacing.java
index 295844b..095f730 100644 (file)
@@ -225,10 +225,10 @@ public class surfacing {
     final static private String                               INPUT_GENOMES_FILE_OPTION                                                     = "genomes";
     final static private String                               INPUT_SPECIES_TREE_OPTION                                                     = "species_tree";
     final static private String                               SEQ_EXTRACT_OPTION                                                            = "prot_extract";
-    final static private String                               PRG_VERSION                                                                   = "2.280";
-    final static private String                               PRG_DATE                                                                      = "130701";
+    final static private String                               PRG_VERSION                                                                   = "2.302";
+    final static private String                               PRG_DATE                                                                      = "130715";
     final static private String                               E_MAIL                                                                        = "czmasek@burnham.org";
-    final static private String                               WWW                                                                           = "www.phylosoft.org/forester/applications/surfacing";
+    final static private String                               WWW                                                                           = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing";
     final static private boolean                              IGNORE_DUFS_DEFAULT                                                           = true;
     final static private boolean                              IGNORE_COMBINATION_WITH_SAME_DEFAULLT                                         = false;
     final static private double                               MAX_E_VALUE_DEFAULT                                                           = -1;
@@ -278,6 +278,7 @@ public class surfacing {
     public static final String                                INDEPENDENT_DC_GAINS_FITCH_PARS_DC_MAPPED_OUTPUT_SUFFIX                       = "_indep_dc_gains_fitch_lists_MAPPED.txt";
     public static final String                                INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX        = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt";
     public static final String                                INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt";
+    private static final boolean                              CALC_SIMILARITY_SCORES                                                        = false;
 
     private static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
                                                                  final String[][] input_file_properties,
@@ -600,9 +601,6 @@ public class surfacing {
         allowed_options.add( surfacing.PAIRWISE_DOMAIN_COMPARISONS_OPTION );
         allowed_options.add( surfacing.IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_OPTION );
         allowed_options.add( surfacing.CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS );
-        //allowed_options.add( JACKNIFE_OPTION );
-        // allowed_options.add( JACKNIFE_RANDOM_SEED_OPTION );
-        // allowed_options.add( JACKNIFE_RATIO_OPTION );
         allowed_options.add( INPUT_SPECIES_TREE_OPTION );
         allowed_options.add( FILTER_POSITIVE_OPTION );
         allowed_options.add( FILTER_NEGATIVE_OPTION );
@@ -1634,7 +1632,7 @@ public class surfacing {
             all_bin_domain_combinations_gained_fitch = new ArrayList<BinaryDomainCombination>();
             all_bin_domain_combinations_lost_fitch = new ArrayList<BinaryDomainCombination>();
         }
-        final DomainLengthsTable domain_lengths_table = new DomainLengthsTable();
+        DomainLengthsTable domain_lengths_table = new DomainLengthsTable();
         final File per_genome_domain_promiscuity_statistics_file = new File( out_dir + ForesterUtil.FILE_SEPARATOR
                 + output_file + D_PROMISCUITY_FILE_SUFFIX );
         BufferedWriter per_genome_domain_promiscuity_statistics_writer = null;
@@ -1844,23 +1842,6 @@ public class surfacing {
             }
             System.out.println( "Time for processing                            : " + parser.getTime() + "ms" );
             log( "", log_writer );
-            html_desc.append( "<tr><td>" + input_file_properties[ i ][ 0 ] + " [species: "
-                    + input_file_properties[ i ][ 1 ] + "]" + ":</td><td>domains analyzed: "
-                    + parser.getDomainsStored() + "; domains ignored: [ind score cutoffs: "
-                    + parser.getDomainsIgnoredDueToIndividualScoreCutoff() + "] [E-value cutoff: "
-                    + parser.getDomainsIgnoredDueToEval() + "] [DUF: " + parser.getDomainsIgnoredDueToDuf()
-                    + "] [virus like ids: " + parser.getDomainsIgnoredDueToVirusLikeIds()
-                    + "] [negative domain filter: " + parser.getDomainsIgnoredDueToNegativeDomainFilter()
-                    + "] [overlap: " + parser.getDomainsIgnoredDueToOverlap() + "]" );
-            if ( negative_filter_file != null ) {
-                html_desc.append( "; proteins ignored due to negative filter: "
-                        + parser.getProteinsIgnoredDueToFilter() );
-            }
-            if ( positive_filter_file != null ) {
-                html_desc.append( "; proteins ignored due to positive filter: "
-                        + parser.getProteinsIgnoredDueToFilter() );
-            }
-            html_desc.append( "</td></tr>" + nl );
             try {
                 int count = 0;
                 for( final Protein protein : protein_list ) {
@@ -2029,12 +2010,13 @@ public class surfacing {
             ForesterUtil.programMessage( PRG_NAME, "Wrote domain length data to: " + domain_lengths_analysis_outfile );
             System.out.println();
         }
+        domain_lengths_table = null;
         final long analysis_start_time = new Date().getTime();
         PairwiseDomainSimilarityCalculator pw_calc = null;
-        // double[] values_for_all_scores_histogram = null;
         final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( domain_similarity_sort_field,
                                                                                      sort_by_species_count_first,
-                                                                                     number_of_genomes == 2 );
+                                                                                     number_of_genomes == 2,
+                                                                                     CALC_SIMILARITY_SCORES );
         switch ( scoring ) {
             case COMBINATIONS:
                 pw_calc = new CombinationsBasedPairwiseDomainSimilarityCalculator();
@@ -2057,11 +2039,7 @@ public class surfacing {
                                         gwcd_list,
                                         ignore_domains_without_combs_in_all_spec,
                                         ignore_species_specific_domains );
-        SurfacingUtil.decoratePrintableDomainSimilarities( similarities,
-                                                           detailedness,
-                                                           go_annotation_output,
-                                                           go_id_to_term_map,
-                                                           go_namespace_limit );
+        SurfacingUtil.decoratePrintableDomainSimilarities( similarities, detailedness );
         final Map<String, Integer> tax_code_to_id_map = SurfacingUtil.createTaxCodeToIdMap( intrees[ 0 ] );
         try {
             String my_outfile = output_file.toString();
@@ -2093,19 +2071,21 @@ public class surfacing {
                     + new java.text.SimpleDateFormat( "yyyy.MM.dd HH:mm:ss" ).format( new java.util.Date() )
                     + "</td></tr>" + nl );
             html_desc.append( "</table>" + nl );
-            final DescriptiveStatistics pw_stats = SurfacingUtil
-                    .writeDomainSimilaritiesToFile( html_desc,
-                                                    new StringBuilder( number_of_genomes + " genomes" ),
-                                                    writer,
-                                                    split_writers,
-                                                    similarities,
-                                                    number_of_genomes == 2,
-                                                    species_order,
-                                                    domain_similarity_print_option,
-                                                    domain_similarity_sort_field,
-                                                    scoring,
-                                                    true,
-                                                    tax_code_to_id_map );
+            final Writer simple_tab_writer = new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR
+                    + my_outfile + ".tsv" ) );
+            SurfacingUtil.writeDomainSimilaritiesToFile( html_desc,
+                                                         new StringBuilder( number_of_genomes + " genomes" ),
+                                                         simple_tab_writer,
+                                                         writer,
+                                                         split_writers,
+                                                         similarities,
+                                                         number_of_genomes == 2,
+                                                         species_order,
+                                                         domain_similarity_print_option,
+                                                         scoring,
+                                                         true,
+                                                         tax_code_to_id_map );
+            simple_tab_writer.close();
             ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote main output (includes domain similarities) to: \""
                     + ( out_dir == null ? my_outfile : out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile ) + "\"" );
         }
@@ -2142,7 +2122,8 @@ public class surfacing {
                                              surfacing.PRG_NAME,
                                              out_dir,
                                              write_pwc_files,
-                                             tax_code_to_id_map );
+                                             tax_code_to_id_map,
+                                             CALC_SIMILARITY_SCORES );
             String matrix_output_file = new String( output_file.toString() );
             if ( matrix_output_file.indexOf( '.' ) > 1 ) {
                 matrix_output_file = matrix_output_file.substring( 0, matrix_output_file.indexOf( '.' ) );
@@ -2522,7 +2503,7 @@ public class surfacing {
         System.out.println( surfacing.USE_LAST_IN_FITCH_OPTION + ": to use last in Fitch parsimony" );
         System.out.println( surfacing.WRITE_TO_NEXUS_OPTION + ": to output in Nexus format" );
         System.out.println( PERFORM_DC_REGAIN_PROTEINS_STATS_OPTION + ": to perform DC regain protein statistics" );
-        System.out.println( DA_ANALYSIS_OPTION + ": to DA analysis" );
+        System.out.println( DA_ANALYSIS_OPTION + ": to do DA analysis" );
         System.out.println();
         System.out.println( "Example 1: java -Xms128m -Xmx512m -cp path/to/forester.jar"
                 + " org.forester.application.surfacing p2g=pfam2go_2012_02_07.txt -dufs -cos=Pfam_260_NC1"