final static private String PERFORM_DC_REGAIN_PROTEINS_STATS_OPTION = "dc_regain_stats";
final static private String DA_ANALYSIS_OPTION = "DA_analyis";
final static private String USE_LAST_IN_FITCH_OPTION = "last";
- public final static String PAIRWISE_DOMAIN_COMPARISONS_PREFIX = "pwc_";
+ public final static String PAIRWISE_DOMAIN_COMPARISONS_PREFIX = "pwc_";
final static private String PAIRWISE_DOMAIN_COMPARISONS_OPTION = "pwc";
final static private String OUTPUT_FILE_OPTION = "o";
final static private String PFAM_TO_GO_FILE_USE_OPTION = "p2g";
final static private boolean IGNORE_DUFS_DEFAULT = true;
final static private boolean IGNORE_COMBINATION_WITH_SAME_DEFAULLT = false;
final static private double MAX_E_VALUE_DEFAULT = -1;
- public final static int MAX_ALLOWED_OVERLAP_DEFAULT = -1;
+ public final static int MAX_ALLOWED_OVERLAP_DEFAULT = -1;
private static final String RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION = "random_seed";
private static final String CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS = "consider_bdc_direction";
private static final String CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS_AND_ADJACENCY = "consider_bdc_adj";
- public static final String SEQ_EXTRACT_SUFFIX = ".prot";
- public static final String PLUS_MINUS_ANALYSIS_OPTION = "plus_minus";
- public static final String PLUS_MINUS_DOM_SUFFIX = "_plus_minus_dom.txt";
- public static final String PLUS_MINUS_DOM_SUFFIX_HTML = "_plus_minus_dom.html";
- public static final String PLUS_MINUS_DC_SUFFIX_HTML = "_plus_minus_dc.html";
- public static final int PLUS_MINUS_ANALYSIS_MIN_DIFF_DEFAULT = 0;
- public static final double PLUS_MINUS_ANALYSIS_FACTOR_DEFAULT = 1.0;
- public static final String PLUS_MINUS_ALL_GO_IDS_DOM_SUFFIX = "_plus_minus_go_ids_all.txt";
- public static final String PLUS_MINUS_PASSING_GO_IDS_DOM_SUFFIX = "_plus_minus_go_ids_passing.txt";
+ public static final String SEQ_EXTRACT_SUFFIX = ".prot";
+ public static final String PLUS_MINUS_ANALYSIS_OPTION = "plus_minus";
+ public static final String PLUS_MINUS_DOM_SUFFIX = "_plus_minus_dom.txt";
+ public static final String PLUS_MINUS_DOM_SUFFIX_HTML = "_plus_minus_dom.html";
+ public static final String PLUS_MINUS_DC_SUFFIX_HTML = "_plus_minus_dc.html";
+ public static final int PLUS_MINUS_ANALYSIS_MIN_DIFF_DEFAULT = 0;
+ public static final double PLUS_MINUS_ANALYSIS_FACTOR_DEFAULT = 1.0;
+ public static final String PLUS_MINUS_ALL_GO_IDS_DOM_SUFFIX = "_plus_minus_go_ids_all.txt";
+ public static final String PLUS_MINUS_PASSING_GO_IDS_DOM_SUFFIX = "_plus_minus_go_ids_passing.txt";
private static final String OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS = "all_prot";
final static private String OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION = "all_prot_e";
- public static final boolean VERBOSE = false;
+ public static final boolean VERBOSE = false;
private static final String OUTPUT_DOMAIN_COMBINATIONS_GAINED_MORE_THAN_ONCE_ANALYSIS_SUFFIX = "_fitch_dc_gains_counts";
private static final String OUTPUT_DOMAIN_COMBINATIONS_LOST_MORE_THAN_ONCE_ANALYSIS_SUFFIX = "_fitch_dc_losses_counts";
private static final String DOMAIN_LENGTHS_ANALYSIS_SUFFIX = "_domain_lengths_analysis";
if ( cla.isOptionSet( surfacing.IGNORE_DOMAINS_SPECIFIC_TO_ONE_SPECIES_OPTION ) ) {
ignore_species_specific_domains = true;
}
-
-
-
if ( !cla.isOptionValueSet( surfacing.INPUT_SPECIES_TREE_OPTION ) ) {
ForesterUtil.fatalError( surfacing.PRG_NAME, "no input species tree file given: "
+ surfacing.INPUT_SPECIES_TREE_OPTION + "=<file>" );
}
-
-
-
-
File output_file = null;
if ( cla.isOptionSet( surfacing.OUTPUT_FILE_OPTION ) ) {
if ( !cla.isOptionValueSet( surfacing.OUTPUT_FILE_OPTION ) ) {
final List<String> plus_minus_analysis_high_low_copy_species = new ArrayList<String>();
final List<Object> plus_minus_analysis_numbers = new ArrayList<Object>();
SurfacingUtil.processPlusMinusAnalysisOption( cla,
- plus_minus_analysis_high_copy_base_species,
- plus_minus_analysis_high_copy_target_species,
- plus_minus_analysis_high_low_copy_species,
- plus_minus_analysis_numbers );
+ plus_minus_analysis_high_copy_base_species,
+ plus_minus_analysis_high_copy_target_species,
+ plus_minus_analysis_high_low_copy_species,
+ plus_minus_analysis_numbers );
File input_genomes_file = null;
if ( cla.isOptionSet( surfacing.INPUT_GENOMES_FILE_OPTION ) ) {
if ( !cla.isOptionValueSet( surfacing.INPUT_GENOMES_FILE_OPTION ) ) {
ForesterUtil.fatalError( surfacing.PRG_NAME, "no input genomes file given: "
+ surfacing.INPUT_GENOMES_FILE_OPTION + "=<file>" );
}
-
-
-
DomainSimilarity.DomainSimilarityScoring scoring = SCORING_DEFAULT;
if ( cla.isOptionSet( surfacing.SCORING_OPTION ) ) {
if ( !cla.isOptionValueSet( surfacing.SCORING_OPTION ) ) {
+ "=<suffix> to turn on pairwise analyses with less than three input files" );
}
SurfacingUtil.checkWriteabilityForPairwiseComparisons( domain_similarity_print_option,
- input_file_properties,
- automated_pairwise_comparison_suffix,
- out_dir );
+ input_file_properties,
+ automated_pairwise_comparison_suffix,
+ out_dir );
for( int i = 0; i < number_of_genomes; i++ ) {
File dcc_outfile = new File( input_file_properties[ i ][ 1 ]
+ surfacing.DOMAIN_COMBINITON_COUNTS_OUTPUTFILE_SUFFIX );
intree_files[ 0 ] = new File( intrees_str );
}
intrees = SurfacingUtil.obtainAndPreProcessIntrees( intree_files, number_of_genomes, input_file_properties );
-
}
- final Phylogeny intree_0_orig = SurfacingUtil.obtainFirstIntree( intree_files[ 0 ]);
+ final Phylogeny intree_0_orig = SurfacingUtil.obtainFirstIntree( intree_files[ 0 ] );
long random_number_seed_for_fitch_parsimony = 0l;
boolean radomize_fitch_parsimony = false;
if ( cla.isOptionSet( surfacing.RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION ) ) {
SurfacingUtil.log( ( i + 1 ) + "/" + number_of_genomes, log_writer );
System.out.println( "Processing : " + input_file_properties[ i ][ 1 ]
+ " [" + input_file_properties[ i ][ 0 ] + "]" );
- SurfacingUtil.log( "Genome : " + input_file_properties[ i ][ 1 ] + " ["
- + input_file_properties[ i ][ 0 ] + "]", log_writer );
+ SurfacingUtil.log( "Genome : " + input_file_properties[ i ][ 1 ]
+ + " [" + input_file_properties[ i ][ 0 ] + "]", log_writer );
HmmscanPerDomainTableParser parser = null;
INDIVIDUAL_SCORE_CUTOFF ind_score_cutoff = INDIVIDUAL_SCORE_CUTOFF.NONE;
if ( individual_score_cutoffs != null ) {
distinct_domain_architecuture_counts );
}
System.out.println( "Number of proteins encountered : " + parser.getProteinsEncountered() );
- SurfacingUtil.log( "Number of proteins encountered : " + parser.getProteinsEncountered(), log_writer );
+ SurfacingUtil.log( "Number of proteins encountered : " + parser.getProteinsEncountered(),
+ log_writer );
System.out.println( "Number of proteins stored : " + protein_list.size() );
SurfacingUtil.log( "Number of proteins stored : " + protein_list.size(), log_writer );
System.out.println( "Coverage : "
+ ForesterUtil.roundToInt( 100.0 * coverage ) + "%" );
- SurfacingUtil.log( "Coverage : " + ForesterUtil.roundToInt( 100.0 * coverage )
- + "%", log_writer );
+ SurfacingUtil.log( "Coverage : "
+ + ForesterUtil.roundToInt( 100.0 * coverage ) + "%",
+ log_writer );
System.out.println( "Domains encountered : " + parser.getDomainsEncountered() );
- SurfacingUtil.log( "Domains encountered : " + parser.getDomainsEncountered(), log_writer );
+ SurfacingUtil.log( "Domains encountered : " + parser.getDomainsEncountered(),
+ log_writer );
System.out.println( "Domains stored : " + parser.getDomainsStored() );
- SurfacingUtil.log( "Domains stored : " + parser.getDomainsStored(), log_writer );
+ SurfacingUtil.log( "Domains stored : " + parser.getDomainsStored(),
+ log_writer );
System.out.println( "Distinct domains stored : "
+ parser.getDomainsStoredSet().size() );
- SurfacingUtil.log( "Distinct domains stored : " + parser.getDomainsStoredSet().size(), log_writer );
+ SurfacingUtil.log( "Distinct domains stored : "
+ + parser.getDomainsStoredSet().size(), log_writer );
System.out.println( "Domains ignored due to individual score cutoffs: "
+ parser.getDomainsIgnoredDueToIndividualScoreCutoff() );
SurfacingUtil.log( "Domains ignored due to individual score cutoffs: "
- + parser.getDomainsIgnoredDueToIndividualScoreCutoff(),
- log_writer );
+ + parser.getDomainsIgnoredDueToIndividualScoreCutoff(),
+ log_writer );
System.out.println( "Domains ignored due to E-value : "
+ parser.getDomainsIgnoredDueToEval() );
- SurfacingUtil.log( "Domains ignored due to E-value : " + parser.getDomainsIgnoredDueToEval(), log_writer );
+ SurfacingUtil.log( "Domains ignored due to E-value : "
+ + parser.getDomainsIgnoredDueToEval(),
+ log_writer );
System.out.println( "Domains ignored due to DUF designation : "
+ parser.getDomainsIgnoredDueToDuf() );
- SurfacingUtil.log( "Domains ignored due to DUF designation : " + parser.getDomainsIgnoredDueToDuf(), log_writer );
+ SurfacingUtil
+ .log( "Domains ignored due to DUF designation : " + parser.getDomainsIgnoredDueToDuf(),
+ log_writer );
if ( ignore_virus_like_ids ) {
System.out.println( "Domains ignored due virus like ids : "
+ parser.getDomainsIgnoredDueToVirusLikeIds() );
- SurfacingUtil.log( "Domains ignored due virus like ids : " + parser.getDomainsIgnoredDueToVirusLikeIds(),
- log_writer );
+ SurfacingUtil.log( "Domains ignored due virus like ids : "
+ + parser.getDomainsIgnoredDueToVirusLikeIds(),
+ log_writer );
}
System.out.println( "Domains ignored due negative domain filter : "
+ parser.getDomainsIgnoredDueToNegativeDomainFilter() );
SurfacingUtil.log( "Domains ignored due negative domain filter : "
- + parser.getDomainsIgnoredDueToNegativeDomainFilter(),
- log_writer );
+ + parser.getDomainsIgnoredDueToNegativeDomainFilter(),
+ log_writer );
System.out.println( "Domains ignored due to overlap : "
+ parser.getDomainsIgnoredDueToOverlap() );
- SurfacingUtil.log( "Domains ignored due to overlap : " + parser.getDomainsIgnoredDueToOverlap(),
- log_writer );
+ SurfacingUtil.log( "Domains ignored due to overlap : "
+ + parser.getDomainsIgnoredDueToOverlap(),
+ log_writer );
if ( negative_filter_file != null ) {
System.out.println( "Proteins ignored due to negative filter : "
+ parser.getProteinsIgnoredDueToFilter() );
- SurfacingUtil.log( "Proteins ignored due to negative filter : " + parser.getProteinsIgnoredDueToFilter(),
- log_writer );
+ SurfacingUtil.log( "Proteins ignored due to negative filter : "
+ + parser.getProteinsIgnoredDueToFilter(),
+ log_writer );
}
if ( positive_filter_file != null ) {
System.out.println( "Proteins ignored due to positive filter : "
+ parser.getProteinsIgnoredDueToFilter() );
- SurfacingUtil.log( "Proteins ignored due to positive filter : " + parser.getProteinsIgnoredDueToFilter(),
- log_writer );
+ SurfacingUtil.log( "Proteins ignored due to positive filter : "
+ + parser.getProteinsIgnoredDueToFilter(),
+ log_writer );
}
if ( da_analysis ) {
System.out.println( "Distinct domain architectures stored : " + distinct_das );
ForesterUtil.programMessage( PRG_NAME, "Range of proteins with a least one domain assigned: "
+ ( 100 * protein_coverage_stats.getMin() ) + "%-" + ( 100 * protein_coverage_stats.getMax() )
+ "%" );
- SurfacingUtil.log( "Average of prot with a least one dom assigned : " + ( 100 * protein_coverage_stats.arithmeticMean() )
- + "% (+/-" + ( 100 * protein_coverage_stats.sampleStandardDeviation() ) + "%)", log_writer );
- SurfacingUtil.log( "Range of prot with a least one dom assigned : " + ( 100 * protein_coverage_stats.getMin() ) + "%-"
- + ( 100 * protein_coverage_stats.getMax() ) + "%", log_writer );
+ SurfacingUtil.log( "Average of prot with a least one dom assigned : "
+ + ( 100 * protein_coverage_stats.arithmeticMean() ) + "% (+/-"
+ + ( 100 * protein_coverage_stats.sampleStandardDeviation() ) + "%)",
+ log_writer );
+ SurfacingUtil.log( "Range of prot with a least one dom assigned : "
+ + ( 100 * protein_coverage_stats.getMin() ) + "%-"
+ + ( 100 * protein_coverage_stats.getMax() ) + "%",
+ log_writer );
}
catch ( final IOException e2 ) {
ForesterUtil.fatalError( surfacing.PRG_NAME, e2.getLocalizedMessage() );
}
if ( ( ( intrees != null ) && ( intrees.length > 0 ) ) && ( number_of_genomes > 2 ) ) {
final StringBuilder parameters_sb = SurfacingUtil.createParametersAsString( ignore_dufs,
- e_value_max,
- max_allowed_overlap,
- no_engulfing_overlaps,
- cutoff_scores_file,
- dc_type );
+ e_value_max,
+ max_allowed_overlap,
+ no_engulfing_overlaps,
+ cutoff_scores_file,
+ dc_type );
String s = "_";
if ( radomize_fitch_parsimony ) {
s += random_number_seed_for_fitch_parsimony + "_";
}
if ( plus_minus_analysis_high_copy_base_species.size() > 0 ) {
SurfacingUtil.executePlusMinusAnalysis( output_file,
- plus_minus_analysis_high_copy_base_species,
- plus_minus_analysis_high_copy_target_species,
- plus_minus_analysis_high_low_copy_species,
- gwcd_list,
- protein_lists_per_species,
- domain_id_to_go_ids_map,
- go_id_to_term_map,
- plus_minus_analysis_numbers );
+ plus_minus_analysis_high_copy_base_species,
+ plus_minus_analysis_high_copy_target_species,
+ plus_minus_analysis_high_low_copy_species,
+ gwcd_list,
+ protein_lists_per_species,
+ domain_id_to_go_ids_map,
+ go_id_to_term_map,
+ plus_minus_analysis_numbers );
}
if ( output_protein_lists_for_all_domains ) {
SurfacingUtil.writeProteinListsForAllSpecies( out_dir,
- protein_lists_per_species,
- gwcd_list,
- output_list_of_all_proteins_per_domain_e_value_max );
+ protein_lists_per_species,
+ gwcd_list,
+ output_list_of_all_proteins_per_domain_e_value_max );
}
gwcd_list = null;
if ( all_bin_domain_combinations_gained_fitch != null ) {
try {
- SurfacingUtil.executeFitchGainsAnalysis( new File( output_file
- + surfacing.OUTPUT_DOMAIN_COMBINATIONS_GAINED_MORE_THAN_ONCE_ANALYSIS_SUFFIX ),
- all_bin_domain_combinations_gained_fitch,
- all_domains_encountered.size(),
- all_bin_domain_combinations_encountered,
- true );
+ SurfacingUtil
+ .executeFitchGainsAnalysis( new File( output_file
+ + surfacing.OUTPUT_DOMAIN_COMBINATIONS_GAINED_MORE_THAN_ONCE_ANALYSIS_SUFFIX ),
+ all_bin_domain_combinations_gained_fitch,
+ all_domains_encountered.size(),
+ all_bin_domain_combinations_encountered,
+ true );
}
catch ( final IOException e ) {
ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
}
if ( all_bin_domain_combinations_lost_fitch != null ) {
try {
- SurfacingUtil.executeFitchGainsAnalysis( new File( output_file
- + surfacing.OUTPUT_DOMAIN_COMBINATIONS_LOST_MORE_THAN_ONCE_ANALYSIS_SUFFIX ),
- all_bin_domain_combinations_lost_fitch,
- all_domains_encountered.size(),
- all_bin_domain_combinations_encountered,
- false );
+ SurfacingUtil
+ .executeFitchGainsAnalysis( new File( output_file
+ + surfacing.OUTPUT_DOMAIN_COMBINATIONS_LOST_MORE_THAN_ONCE_ANALYSIS_SUFFIX ),
+ all_bin_domain_combinations_lost_fitch,
+ all_domains_encountered.size(),
+ all_bin_domain_combinations_encountered,
+ false );
}
catch ( final IOException e ) {
ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );