final static private String INPUT_GENOMES_FILE_OPTION = "genomes";
final static private String INPUT_SPECIES_TREE_OPTION = "species_tree";
final static private String SEQ_EXTRACT_OPTION = "prot_extract";
- final static private String PRG_VERSION = "2.280";
- final static private String PRG_DATE = "130701";
+ final static private String PRG_VERSION = "2.302";
+ final static private String PRG_DATE = "130715";
final static private String E_MAIL = "czmasek@burnham.org";
- final static private String WWW = "www.phylosoft.org/forester/applications/surfacing";
+ final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing";
final static private boolean IGNORE_DUFS_DEFAULT = true;
final static private boolean IGNORE_COMBINATION_WITH_SAME_DEFAULLT = false;
final static private double MAX_E_VALUE_DEFAULT = -1;
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_MAPPED.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt";
+ private static final boolean CALC_SIMILARITY_SCORES = false;
private static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
final String[][] input_file_properties,
allowed_options.add( surfacing.PAIRWISE_DOMAIN_COMPARISONS_OPTION );
allowed_options.add( surfacing.IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_OPTION );
allowed_options.add( surfacing.CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS );
- //allowed_options.add( JACKNIFE_OPTION );
- // allowed_options.add( JACKNIFE_RANDOM_SEED_OPTION );
- // allowed_options.add( JACKNIFE_RATIO_OPTION );
allowed_options.add( INPUT_SPECIES_TREE_OPTION );
allowed_options.add( FILTER_POSITIVE_OPTION );
allowed_options.add( FILTER_NEGATIVE_OPTION );
if ( need_protein_lists_per_species ) {
protein_lists_per_species = new TreeMap<Species, List<Protein>>();
}
- final List<GenomeWideCombinableDomains> gwcd_list = new ArrayList<GenomeWideCombinableDomains>( number_of_genomes );
+ List<GenomeWideCombinableDomains> gwcd_list = new ArrayList<GenomeWideCombinableDomains>( number_of_genomes );
final SortedSet<String> all_domains_encountered = new TreeSet<String>();
final SortedSet<BinaryDomainCombination> all_bin_domain_combinations_encountered = new TreeSet<BinaryDomainCombination>();
List<BinaryDomainCombination> all_bin_domain_combinations_gained_fitch = null;
all_bin_domain_combinations_gained_fitch = new ArrayList<BinaryDomainCombination>();
all_bin_domain_combinations_lost_fitch = new ArrayList<BinaryDomainCombination>();
}
- final DomainLengthsTable domain_lengths_table = new DomainLengthsTable();
+ DomainLengthsTable domain_lengths_table = new DomainLengthsTable();
final File per_genome_domain_promiscuity_statistics_file = new File( out_dir + ForesterUtil.FILE_SEPARATOR
+ output_file + D_PROMISCUITY_FILE_SUFFIX );
BufferedWriter per_genome_domain_promiscuity_statistics_writer = null;
}
System.out.println( "Time for processing : " + parser.getTime() + "ms" );
log( "", log_writer );
- html_desc.append( "<tr><td>" + input_file_properties[ i ][ 0 ] + " [species: "
- + input_file_properties[ i ][ 1 ] + "]" + ":</td><td>domains analyzed: "
- + parser.getDomainsStored() + "; domains ignored: [ind score cutoffs: "
- + parser.getDomainsIgnoredDueToIndividualScoreCutoff() + "] [E-value cutoff: "
- + parser.getDomainsIgnoredDueToEval() + "] [DUF: " + parser.getDomainsIgnoredDueToDuf()
- + "] [virus like ids: " + parser.getDomainsIgnoredDueToVirusLikeIds()
- + "] [negative domain filter: " + parser.getDomainsIgnoredDueToNegativeDomainFilter()
- + "] [overlap: " + parser.getDomainsIgnoredDueToOverlap() + "]" );
- if ( negative_filter_file != null ) {
- html_desc.append( "; proteins ignored due to negative filter: "
- + parser.getProteinsIgnoredDueToFilter() );
- }
- if ( positive_filter_file != null ) {
- html_desc.append( "; proteins ignored due to positive filter: "
- + parser.getProteinsIgnoredDueToFilter() );
- }
- html_desc.append( "</td></tr>" + nl );
try {
int count = 0;
for( final Protein protein : protein_list ) {
ForesterUtil.programMessage( PRG_NAME, "Wrote domain length data to: " + domain_lengths_analysis_outfile );
System.out.println();
}
+ domain_lengths_table = null;
final long analysis_start_time = new Date().getTime();
PairwiseDomainSimilarityCalculator pw_calc = null;
- // double[] values_for_all_scores_histogram = null;
final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( domain_similarity_sort_field,
sort_by_species_count_first,
- number_of_genomes == 2 );
+ number_of_genomes == 2,
+ CALC_SIMILARITY_SCORES );
switch ( scoring ) {
case COMBINATIONS:
pw_calc = new CombinationsBasedPairwiseDomainSimilarityCalculator();
gwcd_list,
ignore_domains_without_combs_in_all_spec,
ignore_species_specific_domains );
- SurfacingUtil.decoratePrintableDomainSimilarities( similarities,
- detailedness,
- go_annotation_output,
- go_id_to_term_map,
- go_namespace_limit );
+ SurfacingUtil.decoratePrintableDomainSimilarities( similarities, detailedness );
final Map<String, Integer> tax_code_to_id_map = SurfacingUtil.createTaxCodeToIdMap( intrees[ 0 ] );
try {
String my_outfile = output_file.toString();
+ new java.text.SimpleDateFormat( "yyyy.MM.dd HH:mm:ss" ).format( new java.util.Date() )
+ "</td></tr>" + nl );
html_desc.append( "</table>" + nl );
- final DescriptiveStatistics pw_stats = SurfacingUtil
- .writeDomainSimilaritiesToFile( html_desc,
- new StringBuilder( number_of_genomes + " genomes" ),
- writer,
- split_writers,
- similarities,
- number_of_genomes == 2,
- species_order,
- domain_similarity_print_option,
- domain_similarity_sort_field,
- scoring,
- true,
- tax_code_to_id_map );
+ final Writer simple_tab_writer = new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR
+ + my_outfile + ".tsv" ) );
+ SurfacingUtil.writeDomainSimilaritiesToFile( html_desc,
+ new StringBuilder( number_of_genomes + " genomes" ),
+ simple_tab_writer,
+ writer,
+ split_writers,
+ similarities,
+ number_of_genomes == 2,
+ species_order,
+ domain_similarity_print_option,
+ scoring,
+ true,
+ tax_code_to_id_map );
+ simple_tab_writer.close();
ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote main output (includes domain similarities) to: \""
+ ( out_dir == null ? my_outfile : out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile ) + "\"" );
}
surfacing.PRG_NAME,
out_dir,
write_pwc_files,
- tax_code_to_id_map );
+ tax_code_to_id_map,
+ CALC_SIMILARITY_SCORES );
String matrix_output_file = new String( output_file.toString() );
if ( matrix_output_file.indexOf( '.' ) > 1 ) {
matrix_output_file = matrix_output_file.substring( 0, matrix_output_file.indexOf( '.' ) );
gwcd_list,
output_list_of_all_proteins_per_domain_e_value_max );
}
+ gwcd_list = null;
if ( all_bin_domain_combinations_gained_fitch != null ) {
try {
executeFitchGainsAnalysis( new File( output_file
System.out.println( surfacing.USE_LAST_IN_FITCH_OPTION + ": to use last in Fitch parsimony" );
System.out.println( surfacing.WRITE_TO_NEXUS_OPTION + ": to output in Nexus format" );
System.out.println( PERFORM_DC_REGAIN_PROTEINS_STATS_OPTION + ": to perform DC regain protein statistics" );
- System.out.println( DA_ANALYSIS_OPTION + ": to DA analysis" );
+ System.out.println( DA_ANALYSIS_OPTION + ": to do DA analysis" );
System.out.println();
System.out.println( "Example 1: java -Xms128m -Xmx512m -cp path/to/forester.jar"
+ " org.forester.application.surfacing p2g=pfam2go_2012_02_07.txt -dufs -cos=Pfam_260_NC1"