// ---
public final static String PRG_NAME = "surfacing";
public static final String DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO = "_d_dollo"
- + ForesterConstants.PHYLO_XML_SUFFIX;
+ + ForesterConstants.PHYLO_XML_SUFFIX;
public static final String DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH = "_d_fitch"
- + ForesterConstants.PHYLO_XML_SUFFIX;
+ + ForesterConstants.PHYLO_XML_SUFFIX;
public static final String BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO = "_dc_dollo"
- + ForesterConstants.PHYLO_XML_SUFFIX;
+ + ForesterConstants.PHYLO_XML_SUFFIX;
public static final String BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH = "_dc_fitch"
- + ForesterConstants.PHYLO_XML_SUFFIX;
+ + ForesterConstants.PHYLO_XML_SUFFIX;
public static final String NEXUS_EXTERNAL_DOMAINS = "_dom.nex";
public static final String NEXUS_EXTERNAL_DOMAIN_COMBINATIONS = "_dc.nex";
public static final String NEXUS_SECONDARY_FEATURES = "_secondary_features.nex";
public static final String PARSIMONY_OUTPUT_DOLLO_LOSSES_SECONDARY_FEATURES = "_dollo_losses_secondary_features";
public static final String PARSIMONY_OUTPUT_DOLLO_PRESENT_SECONDARY_FEATURES = "_dollo_present_secondary_features";
public static final String SECONDARY_FEATURES_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO = "_secondary_features_dollo"
- + ForesterConstants.PHYLO_XML_SUFFIX;
+ + ForesterConstants.PHYLO_XML_SUFFIX;
public static final String PARSIMONY_OUTPUT_DOLLO_ALL_GOID_D_ALL_NAMESPACES = "_dollo_goid_d";
public static final String PARSIMONY_OUTPUT_FITCH_ALL_GOID_BC_ALL_NAMESPACES = "_fitch_goid_dc";
final static private String HELP_OPTION_1 = "help";
final static private String MATRIX_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX = "_domains.pwd";
final static private String MATRIX_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX = "_bin_combinations.pwd";
final static private String NJ_TREE_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX = "_mean_score_NJ"
- + ForesterConstants.PHYLO_XML_SUFFIX;
+ + ForesterConstants.PHYLO_XML_SUFFIX;
final static private String NJ_TREE_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX = "_domains_NJ"
- + ForesterConstants.PHYLO_XML_SUFFIX;
+ + ForesterConstants.PHYLO_XML_SUFFIX;
final static private String NJ_TREE_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX = "_bin_combinations_NJ"
- + ForesterConstants.PHYLO_XML_SUFFIX;
+ + ForesterConstants.PHYLO_XML_SUFFIX;
final static private String FILTER_POSITIVE_OPTION = "pos_filter";
final static private String FILTER_NEGATIVE_OPTION = "neg_filter";
final static private String FILTER_NEGATIVE_DOMAINS_OPTION = "neg_dom_filter";
final static private String INPUT_GENOMES_FILE_OPTION = "genomes";
final static private String INPUT_SPECIES_TREE_OPTION = "species_tree";
final static private String SEQ_EXTRACT_OPTION = "prot_extract";
- final static private String PRG_VERSION = "2.403";
- final static private String PRG_DATE = "131127";
+ final static private String PRG_VERSION = "2.404";
+ final static private String PRG_DATE = "140709";
final static private String E_MAIL = "czmasek@burnham.org";
final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing";
final static private boolean IGNORE_DUFS_DEFAULT = true;
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique.txt";
public static final String LIMIT_SPEC_FOR_PROT_EX = null; // e.g. "HUMAN"; set to null for not using this feature (default).
public static final String BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH_MAPPED = "_dc_MAPPED_secondary_features_fitch"
- + ForesterConstants.PHYLO_XML_SUFFIX;
+ + ForesterConstants.PHYLO_XML_SUFFIX;
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_counts_MAPPED.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_MAPPED.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt";
private static final boolean CALC_SIMILARITY_SCORES = false;
-
+ @SuppressWarnings("unchecked")
public static void main( final String args[] ) {
final long start_time = new Date().getTime();
// final StringBuffer log = new StringBuffer();
if ( cla.isOptionSet( DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS ) ) {
output_binary_domain_combinationsfor_graph_analysis = true;
}
- final boolean output_binary_domain_combinationsfor_counts = false;
+ boolean output_binary_domain_combinationsfor_counts = false;
if ( cla.isOptionSet( DOMAIN_COMBINITONS_COUNTS_OUTPUT_OPTION ) ) {
- output_binary_domain_combinationsfor_graph_analysis = true;
+ output_binary_domain_combinationsfor_counts = true;
}
if ( cla.isOptionSet( surfacing.MAX_FS_E_VALUE_OPTION ) ) {
try {
&& ( cla.isOptionSet( surfacing.FILTER_NEGATIVE_OPTION ) || cla
.isOptionSet( surfacing.FILTER_POSITIVE_OPTION ) ) ) {
ForesterUtil
- .fatalError( surfacing.PRG_NAME,
- "attempt to use both negative or positive protein filter together wirh a negative domains filter" );
+ .fatalError( surfacing.PRG_NAME,
+ "attempt to use both negative or positive protein filter together wirh a negative domains filter" );
}
if ( cla.isOptionSet( surfacing.FILTER_NEGATIVE_OPTION ) ) {
if ( !cla.isOptionValueSet( surfacing.FILTER_NEGATIVE_OPTION ) ) {
final String msg = ForesterUtil.isReadableFile( negative_domains_filter_file );
if ( !ForesterUtil.isEmpty( msg ) ) {
ForesterUtil.fatalError( surfacing.PRG_NAME, "can not read from \"" + negative_domains_filter_file
- + "\": " + msg );
+ + "\": " + msg );
}
}
final List<String> plus_minus_analysis_high_copy_base_species = new ArrayList<String>();
final String msg = ForesterUtil.isReadableFile( input_genomes_file );
if ( !ForesterUtil.isEmpty( msg ) ) {
ForesterUtil
- .fatalError( surfacing.PRG_NAME, "can not read from \"" + input_genomes_file + "\": " + msg );
+ .fatalError( surfacing.PRG_NAME, "can not read from \"" + input_genomes_file + "\": " + msg );
}
}
else {
}
else {
ForesterUtil.fatalError( surfacing.PRG_NAME, "unknown value \"" + scoring_str
- + "\" for scoring method for domain combinations similarity calculation: \"-"
- + surfacing.SCORING_OPTION + "=<" + surfacing.SCORING_DOMAIN_COUNT_BASED + "|"
- + surfacing.SCORING_PROTEIN_COUNT_BASED + "|" + surfacing.SCORING_COMBINATION_BASED + ">\"" );
+ + "\" for scoring method for domain combinations similarity calculation: \"-"
+ + surfacing.SCORING_OPTION + "=<" + surfacing.SCORING_DOMAIN_COUNT_BASED + "|"
+ + surfacing.SCORING_PROTEIN_COUNT_BASED + "|" + surfacing.SCORING_COMBINATION_BASED + ">\"" );
}
}
boolean sort_by_species_count_first = false;
if ( cla.isOptionSet( surfacing.SEQ_EXTRACT_OPTION ) ) {
if ( !cla.isOptionValueSet( surfacing.SEQ_EXTRACT_OPTION ) ) {
ForesterUtil
- .fatalError( surfacing.PRG_NAME,
- "no domain ids given for sequences with given domains to be extracted : -"
- + surfacing.SEQ_EXTRACT_OPTION
- + "=<ordered domain sequences, domain ids separated by '~', sequences separated by '#'>" );
+ .fatalError( surfacing.PRG_NAME,
+ "no domain ids given for sequences with given domains to be extracted : -"
+ + surfacing.SEQ_EXTRACT_OPTION
+ + "=<ordered domain sequences, domain ids separated by '~', sequences separated by '#'>" );
}
query_domain_ids = cla.getOptionValue( surfacing.SEQ_EXTRACT_OPTION );
}
}
else {
ForesterUtil.fatalError( surfacing.PRG_NAME, "unknown value \"" + sort_str
- + "\" for domain combinations similarities sorting: \"-"
- + surfacing.DOMAIN_SIMILARITY_SORT_OPTION + "=<" + surfacing.DOMAIN_SIMILARITY_SORT_ALPHA + "|"
- + surfacing.DOMAIN_SIMILARITY_SORT_MAX + "|" + surfacing.DOMAIN_SIMILARITY_SORT_MIN + "|"
- + surfacing.DOMAIN_SIMILARITY_SORT_MEAN + "|" + surfacing.DOMAIN_SIMILARITY_SORT_DIFF + "|"
- + surfacing.DOMAIN_SIMILARITY_SORT_ABS_COUNTS_DIFF + "|"
- + surfacing.DOMAIN_SIMILARITY_SORT_COUNTS_DIFF + "|" + "|"
- + surfacing.DOMAIN_SIMILARITY_SORT_SPECIES_COUNT + "|" + surfacing.DOMAIN_SIMILARITY_SORT_SD
- + ">\"" );
+ + "\" for domain combinations similarities sorting: \"-"
+ + surfacing.DOMAIN_SIMILARITY_SORT_OPTION + "=<" + surfacing.DOMAIN_SIMILARITY_SORT_ALPHA + "|"
+ + surfacing.DOMAIN_SIMILARITY_SORT_MAX + "|" + surfacing.DOMAIN_SIMILARITY_SORT_MIN + "|"
+ + surfacing.DOMAIN_SIMILARITY_SORT_MEAN + "|" + surfacing.DOMAIN_SIMILARITY_SORT_DIFF + "|"
+ + surfacing.DOMAIN_SIMILARITY_SORT_ABS_COUNTS_DIFF + "|"
+ + surfacing.DOMAIN_SIMILARITY_SORT_COUNTS_DIFF + "|" + "|"
+ + surfacing.DOMAIN_SIMILARITY_SORT_SPECIES_COUNT + "|" + surfacing.DOMAIN_SIMILARITY_SORT_SD
+ + ">\"" );
}
}
DomainSimilarity.PRINT_OPTION domain_similarity_print_option = DOMAIN_SIMILARITY_PRINT_OPTION_DEFAULT;
}
else {
ForesterUtil.fatalError( surfacing.PRG_NAME, "unknown value \"" + sort
- + "\" for sorting of domain counts: \"-" + surfacing.DOMAIN_COUNT_SORT_OPTION + "=<"
- + surfacing.DOMAIN_COUNT_SORT_ALPHA + "|" + surfacing.DOMAIN_COUNT_SORT_KEY_DOMAIN_COUNT + "|"
- + surfacing.DOMAIN_COUNT_SORT_KEY_DOMAIN_PROTEINS_COUNT + "|"
- + surfacing.DOMAIN_COUNT_SORT_COMBINATIONS_COUNT + ">\"" );
+ + "\" for sorting of domain counts: \"-" + surfacing.DOMAIN_COUNT_SORT_OPTION + "=<"
+ + surfacing.DOMAIN_COUNT_SORT_ALPHA + "|" + surfacing.DOMAIN_COUNT_SORT_KEY_DOMAIN_COUNT + "|"
+ + surfacing.DOMAIN_COUNT_SORT_KEY_DOMAIN_PROTEINS_COUNT + "|"
+ + surfacing.DOMAIN_COUNT_SORT_COMBINATIONS_COUNT + ">\"" );
}
}
final String[][] input_file_properties = SurfacingUtil.processInputGenomesFile( input_genomes_file );
}
SurfacingUtil.checkForOutputFileWriteability( dcc_outfile );
}
- File pfam_to_go_file = null;
- Map<String, List<GoId>> domain_id_to_go_ids_map = null;
- int domain_id_to_go_ids_count = 0;
+ File pfam_to_go_file = new File( "pfam2go.txt" );
if ( cla.isOptionSet( surfacing.PFAM_TO_GO_FILE_USE_OPTION ) ) {
if ( !cla.isOptionValueSet( surfacing.PFAM_TO_GO_FILE_USE_OPTION ) ) {
ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for Pfam to GO mapping file: -"
+ surfacing.PFAM_TO_GO_FILE_USE_OPTION + "=<file>" );
}
pfam_to_go_file = new File( cla.getOptionValue( surfacing.PFAM_TO_GO_FILE_USE_OPTION ) );
- final String error = ForesterUtil.isReadableFile( pfam_to_go_file );
- if ( !ForesterUtil.isEmpty( error ) ) {
- ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read Pfam to GO mapping file: " + error );
- }
- try {
- final PfamToGoParser parser = new PfamToGoParser( pfam_to_go_file );
- final List<PfamToGoMapping> pfam_to_go_mappings = parser.parse();
- domain_id_to_go_ids_map = SurfacingUtil.createDomainIdToGoIdMap( pfam_to_go_mappings );
- if ( parser.getMappingCount() < domain_id_to_go_ids_map.size() ) {
- ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME,
- "parser.getMappingCount() < domain_id_to_go_ids_map.size()" );
- }
- domain_id_to_go_ids_count = parser.getMappingCount();
- }
- catch ( final IOException e ) {
- ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read from Pfam to GO mapping file: " + e );
+ }
+ final String error1 = ForesterUtil.isReadableFile( pfam_to_go_file );
+ if ( !ForesterUtil.isEmpty( error1 ) ) {
+ ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read Pfam to GO mapping file: " + error1 );
+ }
+ Map<String, List<GoId>> domain_id_to_go_ids_map = null;
+ int domain_id_to_go_ids_count = 0;
+ try {
+ final PfamToGoParser parser = new PfamToGoParser( pfam_to_go_file );
+ final List<PfamToGoMapping> pfam_to_go_mappings = parser.parse();
+ domain_id_to_go_ids_map = SurfacingUtil.createDomainIdToGoIdMap( pfam_to_go_mappings );
+ if ( parser.getMappingCount() < domain_id_to_go_ids_map.size() ) {
+ ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME,
+ "parser.getMappingCount() < domain_id_to_go_ids_map.size()" );
}
+ domain_id_to_go_ids_count = parser.getMappingCount();
}
- File go_obo_file = null;
- List<GoTerm> go_terms = null;
+ catch ( final IOException e ) {
+ ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read from Pfam to GO mapping file: " + e );
+ }
+ File go_obo_file = new File( "go.obo" );
if ( cla.isOptionSet( surfacing.GO_OBO_FILE_USE_OPTION ) ) {
if ( !cla.isOptionValueSet( surfacing.GO_OBO_FILE_USE_OPTION ) ) {
ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for GO OBO file: -"
+ surfacing.GO_OBO_FILE_USE_OPTION + "=<file>" );
}
- if ( ( domain_id_to_go_ids_map == null ) || ( domain_id_to_go_ids_map.size() < 1 ) ) {
- ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot use GO OBO file (-"
- + surfacing.GO_OBO_FILE_USE_OPTION + "=<file>) without Pfam to GO mapping file ("
- + surfacing.PFAM_TO_GO_FILE_USE_OPTION + "=<file>)" );
- }
go_obo_file = new File( cla.getOptionValue( surfacing.GO_OBO_FILE_USE_OPTION ) );
- final String error = ForesterUtil.isReadableFile( go_obo_file );
- if ( !ForesterUtil.isEmpty( error ) ) {
- ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read GO OBO file: " + error );
- }
- try {
- final OBOparser parser = new OBOparser( go_obo_file, OBOparser.ReturnType.BASIC_GO_TERM );
- go_terms = parser.parse();
- if ( parser.getGoTermCount() != go_terms.size() ) {
- ForesterUtil
- .unexpectedFatalError( surfacing.PRG_NAME, "parser.getGoTermCount() != go_terms.size()" );
- }
- }
- catch ( final IOException e ) {
- ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read from GO OBO file: " + e );
+ }
+ final String error2 = ForesterUtil.isReadableFile( go_obo_file );
+ if ( !ForesterUtil.isEmpty( error2 ) ) {
+ ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read GO OBO file: " + error2 );
+ }
+ List<GoTerm> go_terms = null;
+ try {
+ final OBOparser parser = new OBOparser( go_obo_file, OBOparser.ReturnType.BASIC_GO_TERM );
+ go_terms = parser.parse();
+ if ( parser.getGoTermCount() != go_terms.size() ) {
+ ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME, "parser.getGoTermCount() != go_terms.size()" );
}
}
+ catch ( final IOException e ) {
+ ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read from GO OBO file: " + e );
+ }
Map<GoId, GoTerm> go_id_to_term_map = null;
if ( ( ( domain_id_to_go_ids_map != null ) && ( domain_id_to_go_ids_map.size() > 0 ) )
&& ( ( go_terms != null ) && ( go_terms.size() > 0 ) ) ) {
}
else {
ForesterUtil.fatalError( surfacing.PRG_NAME, "unknown value \"" + go_namespace_limit_str
- + "\" for GO namespace limit: \"-" + surfacing.GO_NAMESPACE_LIMIT_OPTION + "=<"
- + surfacing.GO_NAMESPACE_LIMIT_OPTION_MOLECULAR_FUNCTION + "|"
- + surfacing.GO_NAMESPACE_LIMIT_OPTION_BIOLOGICAL_PROCESS + "|"
- + surfacing.GO_NAMESPACE_LIMIT_OPTION_CELLULAR_COMPONENT + ">\"" );
+ + "\" for GO namespace limit: \"-" + surfacing.GO_NAMESPACE_LIMIT_OPTION + "=<"
+ + surfacing.GO_NAMESPACE_LIMIT_OPTION_MOLECULAR_FUNCTION + "|"
+ + surfacing.GO_NAMESPACE_LIMIT_OPTION_BIOLOGICAL_PROCESS + "|"
+ + surfacing.GO_NAMESPACE_LIMIT_OPTION_CELLULAR_COMPONENT + ">\"" );
}
}
if ( ( domain_similarity_sort_field == DomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE )
Map<String, Set<String>>[] domain_id_to_secondary_features_maps = null;
File[] secondary_features_map_files = null;
final File domain_lengths_analysis_outfile = new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file
- + DOMAIN_LENGTHS_ANALYSIS_SUFFIX );
+ + DOMAIN_LENGTHS_ANALYSIS_SUFFIX );
if ( domain_length_analysis ) {
SurfacingUtil.checkForOutputFileWriteability( domain_lengths_analysis_outfile );
}
if ( positive_filter_file != null ) {
final int filter_size = filter.size();
System.out.println( "Positive protein filter : " + positive_filter_file + " [" + filter_size
- + " domain ids]" );
+ + " domain ids]" );
html_desc.append( "<tr><td>Positive protein filter:</td><td>" + positive_filter_file + " [" + filter_size
- + " domain ids]</td></tr>" + nl );
+ + " domain ids]</td></tr>" + nl );
}
if ( negative_filter_file != null ) {
final int filter_size = filter.size();
System.out.println( "Negative protein filter : " + negative_filter_file + " [" + filter_size
- + " domain ids]" );
+ + " domain ids]" );
html_desc.append( "<tr><td>Negative protein filter:</td><td>" + negative_filter_file + " [" + filter_size
- + " domain ids]</td></tr>" + nl );
+ + " domain ids]</td></tr>" + nl );
}
if ( negative_domains_filter_file != null ) {
final int filter_size = filter.size();
System.out.println( "Negative domain filter : " + negative_domains_filter_file + " [" + filter_size
- + " domain ids]" );
+ + " domain ids]" );
html_desc.append( "<tr><td>Negative domain filter:</td><td>" + negative_domains_filter_file + " ["
+ filter_size + " domain ids]</td></tr>" + nl );
}
}
System.out.println( "Plus-minus analysis : " + plus1 + "&& " + plus0 + "&& " + minus );
html_desc.append( "<tr><td>Plus-minus analysis:</td><td>" + plus1 + "&& " + plus0 + "&& " + minus
- + "</td></tr>" + nl );
+ + "</td></tr>" + nl );
}
if ( cutoff_scores_file != null ) {
System.out.println( "Cutoff scores file : " + cutoff_scores_file );
if ( no_engulfing_overlaps ) {
System.out.println( "Ignore engulfed domains : " + no_engulfing_overlaps );
html_desc.append( "<tr><td>Ignore (lower confidence) engulfed domains:</td><td>" + no_engulfing_overlaps
- + "</td></tr>" + nl );
+ + "</td></tr>" + nl );
}
System.out.println( "Ignore singlet domains : " + ignore_domains_without_combs_in_all_spec );
html_desc
- .append( "<tr><td>Ignore singlet domains for domain combination similarity analyses (not for parsimony analyses):</td><td>"
- + ignore_domains_without_combs_in_all_spec + "</td></tr>" + nl );
+ .append( "<tr><td>Ignore singlet domains for domain combination similarity analyses (not for parsimony analyses):</td><td>"
+ + ignore_domains_without_combs_in_all_spec + "</td></tr>" + nl );
System.out.println( "Ignore species specific doms: " + ignore_species_specific_domains );
html_desc
- .append( "<tr><td>Ignore species specific domains for domain combination similarity analyses (not for parsimony analyses):</td><td>"
- + ignore_species_specific_domains + "</td></tr>" + nl );
+ .append( "<tr><td>Ignore species specific domains for domain combination similarity analyses (not for parsimony analyses):</td><td>"
+ + ignore_species_specific_domains + "</td></tr>" + nl );
System.out.println( "Ignore combination with self: " + ignore_combination_with_same );
html_desc.append( "<tr><td>Ignore combination with self for domain combination similarity analyses:</td><td>"
+ ignore_combination_with_same + "</td></tr>" + nl );
if ( perform_dc_fich ) {
System.out.println( "Use last in Fitch parsimony : " + use_last_in_fitch_parsimony );
html_desc.append( "<tr><td>Use last in Fitch parsimony:</td><td>" + use_last_in_fitch_parsimony
- + "</td></tr>" + nl );
+ + "</td></tr>" + nl );
}
System.out.println( "Write to Nexus files : " + write_to_nexus );
html_desc.append( "<tr><td>Write to Nexus files:</td><td>" + write_to_nexus + "</td></tr>" + nl );
if ( perform_dc_fich ) {
System.out.println( "DC regain prot stats : " + perform_dc_regain_proteins_stats );
html_desc.append( "<tr><td>DC regain prot stats:</td><td>" + perform_dc_regain_proteins_stats
- + "</td></tr>" + nl );
+ + "</td></tr>" + nl );
}
System.out.println( "DA analysis : " + da_analysis );
html_desc.append( "<tr><td>DA analysis :</td><td>" + da_analysis + "</td></tr>" + nl );
}
if ( domain_id_to_go_ids_map != null ) {
System.out.println( "Pfam to GO mappings from : " + pfam_to_go_file + " [" + domain_id_to_go_ids_count
- + " mappings]" );
+ + " mappings]" );
html_desc.append( "<tr><td>Pfam to GO mappings from:</td><td>" + pfam_to_go_file + " ["
+ domain_id_to_go_ids_count + " mappings]" + "</td></tr>" + nl );
}
if ( perform_pwc ) {
System.out.println( "Suffix for PWC files : " + automated_pairwise_comparison_suffix );
html_desc.append( "<tr><td>Suffix for PWC files</td><td>" + automated_pairwise_comparison_suffix
- + "</td></tr>" + nl );
+ + "</td></tr>" + nl );
}
if ( out_dir != null ) {
System.out.println( "Output directory : " + out_dir );
break;
default:
ForesterUtil
- .unexpectedFatalError( surfacing.PRG_NAME, "unknown value for sorting for similarities" );
+ .unexpectedFatalError( surfacing.PRG_NAME, "unknown value for sorting for similarities" );
}
System.out.println();
html_desc.append( "</td></tr>" + nl );
if ( ( intrees != null ) && ( intrees.length > 0 ) ) {
for( final File intree_file : intree_files ) {
html_desc.append( "<tr><td>Intree for gain/loss parsimony analysis:</td><td>" + intree_file
- + "</td></tr>" + nl );
+ + "</td></tr>" + nl );
System.out.println( " Intree for gain/loss pars.: " + intree_file );
}
}
} // if ( perform_pwc ) {
System.out.println();
html_desc.append( "<tr><td>Command line:</td><td>" + nl + nl + cla.getCommandLineArgsAsString() + nl + nl
- + "</td></tr>" + nl );
+ + "</td></tr>" + nl );
System.out.println( "Command line : " + cla.getCommandLineArgsAsString() );
BufferedWriter[] query_domains_writer_ary = null;
List<String>[] query_domain_ids_array = null;
all_bin_domain_combinations_lost_fitch = new ArrayList<BinaryDomainCombination>();
}
final File per_genome_domain_promiscuity_statistics_file = new File( out_dir + ForesterUtil.FILE_SEPARATOR
- + output_file + D_PROMISCUITY_FILE_SUFFIX );
+ + output_file + D_PROMISCUITY_FILE_SUFFIX );
BufferedWriter per_genome_domain_promiscuity_statistics_writer = null;
try {
per_genome_domain_promiscuity_statistics_writer = new BufferedWriter( new FileWriter( per_genome_domain_promiscuity_statistics_file ) );
BufferedWriter domains_per_potein_stats_writer = null;
try {
domains_per_potein_stats_writer = new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR
- + output_file + "_domains_per_potein_stats.txt" ) );
+ + output_file + "_domains_per_potein_stats.txt" ) );
domains_per_potein_stats_writer.write( "Genome" );
domains_per_potein_stats_writer.write( "\t" );
domains_per_potein_stats_writer.write( "Mean" );
System.out.println( "Coverage : "
+ ForesterUtil.roundToInt( 100.0 * coverage ) + "%" );
SurfacingUtil.log( "Coverage : "
- + ForesterUtil.roundToInt( 100.0 * coverage ) + "%",
- log_writer );
+ + ForesterUtil.roundToInt( 100.0 * coverage ) + "%",
+ log_writer );
System.out.println( "Domains encountered : " + parser.getDomainsEncountered() );
SurfacingUtil.log( "Domains encountered : " + parser.getDomainsEncountered(),
log_writer );
System.out.println( "Domains ignored due to individual score cutoffs: "
+ parser.getDomainsIgnoredDueToIndividualScoreCutoff() );
SurfacingUtil.log( "Domains ignored due to individual score cutoffs: "
- + parser.getDomainsIgnoredDueToIndividualScoreCutoff(),
- log_writer );
+ + parser.getDomainsIgnoredDueToIndividualScoreCutoff(),
+ log_writer );
System.out.println( "Domains ignored due to FS E-value : "
+ parser.getDomainsIgnoredDueToFsEval() );
SurfacingUtil.log( "Domains ignored due to FS E-value : "
- + parser.getDomainsIgnoredDueToFsEval(),
- log_writer );
+ + parser.getDomainsIgnoredDueToFsEval(),
+ log_writer );
System.out.println( "Domains ignored due to iE-value : "
+ parser.getDomainsIgnoredDueToIEval() );
SurfacingUtil.log( "Domains ignored due to iE-value : "
- + parser.getDomainsIgnoredDueToIEval(),
- log_writer );
+ + parser.getDomainsIgnoredDueToIEval(),
+ log_writer );
System.out.println( "Domains ignored due to DUF designation : "
+ parser.getDomainsIgnoredDueToDuf() );
SurfacingUtil
- .log( "Domains ignored due to DUF designation : " + parser.getDomainsIgnoredDueToDuf(),
- log_writer );
+ .log( "Domains ignored due to DUF designation : " + parser.getDomainsIgnoredDueToDuf(),
+ log_writer );
if ( ignore_virus_like_ids ) {
System.out.println( "Domains ignored due virus like ids : "
+ parser.getDomainsIgnoredDueToVirusLikeIds() );
SurfacingUtil.log( "Domains ignored due virus like ids : "
- + parser.getDomainsIgnoredDueToVirusLikeIds(),
- log_writer );
+ + parser.getDomainsIgnoredDueToVirusLikeIds(),
+ log_writer );
}
System.out.println( "Domains ignored due negative domain filter : "
+ parser.getDomainsIgnoredDueToNegativeDomainFilter() );
SurfacingUtil.log( "Domains ignored due negative domain filter : "
- + parser.getDomainsIgnoredDueToNegativeDomainFilter(),
- log_writer );
+ + parser.getDomainsIgnoredDueToNegativeDomainFilter(),
+ log_writer );
System.out.println( "Domains ignored due to overlap : "
+ parser.getDomainsIgnoredDueToOverlap() );
SurfacingUtil.log( "Domains ignored due to overlap : "
- + parser.getDomainsIgnoredDueToOverlap(),
- log_writer );
+ + parser.getDomainsIgnoredDueToOverlap(),
+ log_writer );
if ( negative_filter_file != null ) {
System.out.println( "Proteins ignored due to negative filter : "
+ parser.getProteinsIgnoredDueToFilter() );
SurfacingUtil.log( "Proteins ignored due to negative filter : "
- + parser.getProteinsIgnoredDueToFilter(),
- log_writer );
+ + parser.getProteinsIgnoredDueToFilter(),
+ log_writer );
}
if ( positive_filter_file != null ) {
System.out.println( "Proteins ignored due to positive filter : "
+ parser.getProteinsIgnoredDueToFilter() );
SurfacingUtil.log( "Proteins ignored due to positive filter : "
- + parser.getProteinsIgnoredDueToFilter(),
- log_writer );
+ + parser.getProteinsIgnoredDueToFilter(),
+ log_writer );
}
if ( da_analysis ) {
System.out.println( "Distinct domain architectures stored : " + distinct_das );
int count = 0;
for( final Protein protein : protein_list ) {
dc_data_writer.write( SurfacingUtil.proteinToDomainCombinations( protein, count + "", "\t" )
- .toString() );
+ .toString() );
++count;
for( final Domain d : protein.getProteinDomains() ) {
final String d_str = d.getDomainId().toString();
}
if ( !da_analysis ) {
gwcd_list.add( BasicGenomeWideCombinableDomains
- .createInstance( protein_list,
- ignore_combination_with_same,
- new BasicSpecies( input_file_properties[ i ][ 1 ] ),
- domain_id_to_go_ids_map,
- dc_type,
- protein_length_stats_by_dc,
- domain_number_stats_by_dc ) );
+ .createInstance( protein_list,
+ ignore_combination_with_same,
+ new BasicSpecies( input_file_properties[ i ][ 1 ] ),
+ domain_id_to_go_ids_map,
+ dc_type,
+ protein_length_stats_by_dc,
+ domain_number_stats_by_dc ) );
if ( gwcd_list.get( i ).getSize() > 0 ) {
if ( output_binary_domain_combinationsfor_counts ) {
SurfacingUtil
- .writeDomainCombinationsCountsFile( input_file_properties,
- out_dir,
- per_genome_domain_promiscuity_statistics_writer,
- gwcd_list.get( i ),
- i,
- dc_sort_order );
+ .writeDomainCombinationsCountsFile( input_file_properties,
+ out_dir,
+ per_genome_domain_promiscuity_statistics_writer,
+ gwcd_list.get( i ),
+ i,
+ dc_sort_order );
}
if ( output_binary_domain_combinationsfor_graph_analysis ) {
SurfacingUtil.writeBinaryDomainCombinationsFileForGraphAnalysis( input_file_properties,
distinct_domain_architecuture_counts,
10,
new File( out_dir.toString() + "/" + output_file
- + "_DA_counts.txt" ),
- new File( out_dir.toString() + "/" + output_file
- + "_unique_DAs.txt" ) );
+ + "_DA_counts.txt" ),
+ new File( out_dir.toString() + "/" + output_file
+ + "_unique_DAs.txt" ) );
distinct_domain_architecutures_per_genome.clear();
distinct_domain_architecuture_counts.clear();
System.gc();
all_genomes_domains_per_potein_stats = null;
SurfacingUtil.printOutPercentageOfMultidomainProteins( all_genomes_domains_per_potein_histo, log_writer );
ForesterUtil.map2file( new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file
- + "_all_genomes_domains_per_potein_histo.txt" ), all_genomes_domains_per_potein_histo, "\t", "\n" );
+ + "_all_genomes_domains_per_potein_histo.txt" ), all_genomes_domains_per_potein_histo, "\t", "\n" );
ForesterUtil.collection2file( new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file
- + "_domains_always_single_.txt" ), domains_which_are_always_single, "\n" );
+ + "_domains_always_single_.txt" ), domains_which_are_always_single, "\n" );
ForesterUtil.collection2file( new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file
- + "_domains_single_or_combined.txt" ), domains_which_are_sometimes_single_sometimes_not, "\n" );
+ + "_domains_single_or_combined.txt" ), domains_which_are_sometimes_single_sometimes_not, "\n" );
ForesterUtil.collection2file( new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file
- + "_domains_always_combined.txt" ), domains_which_never_single, "\n" );
+ + "_domains_always_combined.txt" ), domains_which_never_single, "\n" );
ForesterUtil.programMessage( PRG_NAME,
"Average of proteins with a least one domain assigned: "
+ ( 100 * protein_coverage_stats.arithmeticMean() ) + "% (+/-"
+ ( 100 * protein_coverage_stats.getMin() ) + "%-" + ( 100 * protein_coverage_stats.getMax() )
+ "%" );
SurfacingUtil.log( "Average of prot with a least one dom assigned : "
- + ( 100 * protein_coverage_stats.arithmeticMean() ) + "% (+/-"
- + ( 100 * protein_coverage_stats.sampleStandardDeviation() ) + "%)",
- log_writer );
+ + ( 100 * protein_coverage_stats.arithmeticMean() ) + "% (+/-"
+ + ( 100 * protein_coverage_stats.sampleStandardDeviation() ) + "%)",
+ log_writer );
SurfacingUtil.log( "Range of prot with a least one dom assigned : "
- + ( 100 * protein_coverage_stats.getMin() ) + "%-"
- + ( 100 * protein_coverage_stats.getMax() ) + "%",
- log_writer );
+ + ( 100 * protein_coverage_stats.getMin() ) + "%-"
+ + ( 100 * protein_coverage_stats.getMax() ) + "%",
+ log_writer );
protein_coverage_stats = null;
}
catch ( final IOException e2 ) {
final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( domain_similarity_sort_field,
sort_by_species_count_first,
number_of_genomes == 2,
- CALC_SIMILARITY_SCORES );
+ CALC_SIMILARITY_SCORES,
+ true );
switch ( scoring ) {
case COMBINATIONS:
pw_calc = new CombinationsBasedPairwiseDomainSimilarityCalculator();
html_desc.append( "<tr><td>Sum of all distinct binary combinations:</td><td>"
+ all_bin_domain_combinations_encountered.size() + "</td></tr>" + nl );
html_desc.append( "<tr><td>Sum of all distinct domains:</td><td>" + all_domains_encountered.size()
- + "</td></tr>" + nl );
+ + "</td></tr>" + nl );
html_desc.append( "<tr><td>Analysis date/time:</td><td>"
+ new java.text.SimpleDateFormat( "yyyy.MM.dd HH:mm:ss" ).format( new java.util.Date() )
+ "</td></tr>" + nl );
html_desc.append( "</table>" + nl );
final Writer simple_tab_writer = new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR
- + my_outfile + ".tsv" ) );
+ + my_outfile + ".tsv" ) );
SurfacingUtil.writeDomainSimilaritiesToFile( html_desc,
new StringBuilder( number_of_genomes + " genomes" ),
simple_tab_writer,
output_file = new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file );
}
SurfacingUtil.writeMatrixToFile( new File( matrix_output_file
- + surfacing.MATRIX_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX ), pwgc.getDomainDistanceScoresMeans() );
+ + surfacing.MATRIX_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX ), pwgc.getDomainDistanceScoresMeans() );
SurfacingUtil
- .writeMatrixToFile( new File( matrix_output_file
- + surfacing.MATRIX_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX ),
- pwgc.getSharedBinaryCombinationsBasedDistances() );
+ .writeMatrixToFile( new File( matrix_output_file
+ + surfacing.MATRIX_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX ),
+ pwgc.getSharedBinaryCombinationsBasedDistances() );
SurfacingUtil.writeMatrixToFile( new File( matrix_output_file
- + surfacing.MATRIX_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX ),
- pwgc.getSharedDomainsBasedDistances() );
+ + surfacing.MATRIX_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX ),
+ pwgc.getSharedDomainsBasedDistances() );
final Phylogeny nj_gd = SurfacingUtil.createNjTreeBasedOnMatrixToFile( new File( matrix_output_file
- + surfacing.NJ_TREE_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX ), pwgc.getDomainDistanceScoresMeans()
- .get( 0 ) );
+ + surfacing.NJ_TREE_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX ), pwgc.getDomainDistanceScoresMeans()
+ .get( 0 ) );
final Phylogeny nj_bc = SurfacingUtil.createNjTreeBasedOnMatrixToFile( new File( matrix_output_file
- + surfacing.NJ_TREE_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX ), pwgc
- .getSharedBinaryCombinationsBasedDistances().get( 0 ) );
+ + surfacing.NJ_TREE_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX ), pwgc
+ .getSharedBinaryCombinationsBasedDistances().get( 0 ) );
final Phylogeny nj_d = SurfacingUtil.createNjTreeBasedOnMatrixToFile( new File( matrix_output_file
- + surfacing.NJ_TREE_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX ), pwgc
- .getSharedDomainsBasedDistances().get( 0 ) );
+ + surfacing.NJ_TREE_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX ), pwgc
+ .getSharedDomainsBasedDistances().get( 0 ) );
inferred_trees = new ArrayList<Phylogeny>();
inferred_trees.add( nj_gd );
inferred_trees.add( nj_bc );
parameters_sb.toString(),
domain_id_to_secondary_features_maps,
positive_filter_file == null ? null : filter,
- output_binary_domain_combinationsfor_graph_analysis,
- all_bin_domain_combinations_gained_fitch,
- all_bin_domain_combinations_lost_fitch,
- dc_type,
- protein_length_stats_by_dc,
- domain_number_stats_by_dc,
- domain_length_stats_by_domain,
- tax_code_to_id_map,
- write_to_nexus,
- use_last_in_fitch_parsimony,
- perform_dc_fich );
- // Listing of all domain combinations gained is only done if only one input tree is used.
+ output_binary_domain_combinationsfor_graph_analysis,
+ all_bin_domain_combinations_gained_fitch,
+ all_bin_domain_combinations_lost_fitch,
+ dc_type,
+ protein_length_stats_by_dc,
+ domain_number_stats_by_dc,
+ domain_length_stats_by_domain,
+ tax_code_to_id_map,
+ write_to_nexus,
+ use_last_in_fitch_parsimony,
+ perform_dc_fich );
+ // Listing of all domain combinations gained is only done if only one input tree is used.
if ( ( domain_id_to_secondary_features_maps != null )
&& ( domain_id_to_secondary_features_maps.length > 0 ) ) {
int j = 0;
final DomainParsimonyCalculator secondary_features_parsimony = DomainParsimonyCalculator
.createInstance( intree, gwcd_list, domain_id_to_secondary_features_map );
SurfacingUtil
- .executeParsimonyAnalysisForSecondaryFeatures( outfile_name
- + "_"
- + secondary_features_map_files[ j++ ],
- secondary_features_parsimony,
- intree,
- parameters_sb.toString(),
- mapping_results_map,
- use_last_in_fitch_parsimony );
+ .executeParsimonyAnalysisForSecondaryFeatures( outfile_name
+ + "_"
+ + secondary_features_map_files[ j++ ],
+ secondary_features_parsimony,
+ intree,
+ parameters_sb.toString(),
+ mapping_results_map,
+ use_last_in_fitch_parsimony );
if ( i == 0 ) {
System.out.println();
System.out.println( "Mapping to secondary features:" );
SurfacingUtil.writeProteinListsForAllSpecies( out_dir,
protein_lists_per_species,
gwcd_list,
- output_list_of_all_proteins_per_domain_e_value_max );
+ output_list_of_all_proteins_per_domain_e_value_max,
+ positive_filter_file != null ? filter : null );
}
gwcd_list = null;
if ( all_bin_domain_combinations_gained_fitch != null ) {
try {
SurfacingUtil
- .executeFitchGainsAnalysis( new File( output_file
- + surfacing.OUTPUT_DOMAIN_COMBINATIONS_GAINED_MORE_THAN_ONCE_ANALYSIS_SUFFIX ),
- all_bin_domain_combinations_gained_fitch,
- all_domains_encountered.size(),
- all_bin_domain_combinations_encountered,
- true );
+ .executeFitchGainsAnalysis( new File( output_file
+ + surfacing.OUTPUT_DOMAIN_COMBINATIONS_GAINED_MORE_THAN_ONCE_ANALYSIS_SUFFIX ),
+ all_bin_domain_combinations_gained_fitch,
+ all_domains_encountered.size(),
+ all_bin_domain_combinations_encountered,
+ true );
}
catch ( final IOException e ) {
ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
if ( all_bin_domain_combinations_lost_fitch != null ) {
try {
SurfacingUtil
- .executeFitchGainsAnalysis( new File( output_file
- + surfacing.OUTPUT_DOMAIN_COMBINATIONS_LOST_MORE_THAN_ONCE_ANALYSIS_SUFFIX ),
- all_bin_domain_combinations_lost_fitch,
- all_domains_encountered.size(),
- all_bin_domain_combinations_encountered,
- false );
+ .executeFitchGainsAnalysis( new File( output_file
+ + surfacing.OUTPUT_DOMAIN_COMBINATIONS_LOST_MORE_THAN_ONCE_ANALYSIS_SUFFIX ),
+ all_bin_domain_combinations_lost_fitch,
+ all_domains_encountered.size(),
+ all_bin_domain_combinations_encountered,
+ false );
}
catch ( final IOException e ) {
ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
final long free_memory = rt.freeMemory() / 1000000;
final long total_memory = rt.totalMemory() / 1000000;
ForesterUtil.programMessage( PRG_NAME, "Time for analysis : " + ( new Date().getTime() - analysis_start_time )
- + "ms" );
+ + "ms" );
ForesterUtil.programMessage( PRG_NAME, "Total running time: " + ( new Date().getTime() - start_time ) + "ms " );
ForesterUtil.programMessage( PRG_NAME, "Free memory : " + free_memory + "MB, total memory: "
+ total_memory + "MB" );
System.out.println( "Usage:" );
System.out.println();
System.out.println( "% java -Xms256m -Xmx512m -cp forester.jar org.forester.applications." + surfacing.PRG_NAME
- + " [options] <phylogen(y|ies) infile> [external node name 1] [name 2] ... [name n]" );
+ + " [options] <phylogen(y|ies) infile> [external node name 1] [name 2] ... [name n]" );
System.out.println();
System.out.println( " Note: This software might need a significant amount of memory (heap space);" );
System.out
- .println( " hence use \"-Xms128m -Xmx512m\" (or more) to prevent a \"java.lang.OutOfMemoryError\"." );
+ .println( " hence use \"-Xms128m -Xmx512m\" (or more) to prevent a \"java.lang.OutOfMemoryError\"." );
System.out.println();
System.out.println( " Options: " );
System.out.println( surfacing.DETAILEDNESS_OPTION + ": level of detail for similarities output file (default:"
+ DETAILEDNESS_DEFAULT + ")" );
System.out.println( surfacing.IGNORE_COMBINATION_WITH_SAME_OPTION
- + ": to ignore combinations with self (default: not to ignore)" );
+ + ": to ignore combinations with self (default: not to ignore)" );
System.out
- .println( surfacing.IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_OPTION
- + ": to ignore domains without combinations in any species (for similarity calc purposes, not for parsimony analyses) (default: not to ignore)" );
+ .println( surfacing.IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_OPTION
+ + ": to ignore domains without combinations in any species (for similarity calc purposes, not for parsimony analyses) (default: not to ignore)" );
System.out
- .println( surfacing.IGNORE_DOMAINS_SPECIFIC_TO_ONE_SPECIES_OPTION
- + ": to ignore domains specific to one species (for similarity calc purposes, not for parsimony analyses) (default: not to ignore)" );
+ .println( surfacing.IGNORE_DOMAINS_SPECIFIC_TO_ONE_SPECIES_OPTION
+ + ": to ignore domains specific to one species (for similarity calc purposes, not for parsimony analyses) (default: not to ignore)" );
System.out.println( surfacing.NOT_IGNORE_DUFS_OPTION
- + ": to _not_ ignore DUFs (domains with unknown function) (default: ignore DUFs)" );
+ + ": to _not_ ignore DUFs (domains with unknown function) (default: ignore DUFs)" );
System.out
- .println( surfacing.IGNORE_VIRAL_IDS
- + ": to ignore domains with ids containing 'vir', 'retro', 'transpos', 'phage', or starting with 'rv' or 'gag_'" );
+ .println( surfacing.IGNORE_VIRAL_IDS
+ + ": to ignore domains with ids containing 'vir', 'retro', 'transpos', 'phage', or starting with 'rv' or 'gag_'" );
System.out.println( surfacing.DOMAIN_SIMILARITY_SORT_OPTION + ": sorting for similarities (default: "
+ DOMAIN_SORT_FILD_DEFAULT + ")" );
System.out.println( surfacing.OUTPUT_FILE_OPTION + ": name for (main) output file (mandatory)" );
+ DOMAIN_SIMILARITY_PRINT_OPTION_DEFAULT + ")" );
System.out.println( surfacing.CUTOFF_SCORE_FILE_OPTION + ": cutoff score file" );
System.out.println( surfacing.DOMAIN_SIMILARITY_SORT_BY_SPECIES_COUNT_FIRST_OPTION
- + ": sort by species count first" );
+ + ": sort by species count first" );
System.out.println( surfacing.OUTPUT_DIR_OPTION + ": output directory" );
System.out.println( surfacing.PFAM_TO_GO_FILE_USE_OPTION + ": Pfam to GO mapping file" );
System.out.println( surfacing.GO_OBO_FILE_USE_OPTION + ": GO terms file (OBO format)" );
System.out.println( surfacing.GO_NAMESPACE_LIMIT_OPTION + ": limit GO term to one GO namespace" );
System.out.println( surfacing.PAIRWISE_DOMAIN_COMPARISONS_OPTION
- + "[=<suffix for pairwise comparison output files>]: to perform pairwise comparison based analyses" );
+ + "[=<suffix for pairwise comparison output files>]: to perform pairwise comparison based analyses" );
System.out.println( surfacing.INPUT_SPECIES_TREE_OPTION
- + ": species tree, to perform (Dollo, Fitch) parismony analyses" );
+ + ": species tree, to perform (Dollo, Fitch) parismony analyses" );
System.out
- .println( surfacing.INPUT_SPECIES_TREE_OPTION
- + "=<treefiles in phyloXML format, separated by #>: to infer domain/binary domain combination gains/losses on given species trees" );
+ .println( surfacing.INPUT_SPECIES_TREE_OPTION
+ + "=<treefiles in phyloXML format, separated by #>: to infer domain/binary domain combination gains/losses on given species trees" );
System.out.println( surfacing.FILTER_POSITIVE_OPTION
- + "=<file>: to filter out proteins not containing at least one domain listed in <file>" );
+ + "=<file>: to filter out proteins not containing at least one domain listed in <file>" );
System.out.println( surfacing.FILTER_NEGATIVE_OPTION
- + "=<file>: to filter out proteins containing at least one domain listed in <file>" );
+ + "=<file>: to filter out proteins containing at least one domain listed in <file>" );
System.out.println( surfacing.FILTER_NEGATIVE_DOMAINS_OPTION
- + "=<file>: to filter out (ignore) domains listed in <file>" );
+ + "=<file>: to filter out (ignore) domains listed in <file>" );
System.out.println( surfacing.INPUT_GENOMES_FILE_OPTION + "=<file>: to read input files from <file>" );
System.out
- .println( surfacing.RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION
- + "=<seed>: seed for random number generator for Fitch Parsimony analysis (type: long, default: no randomization - given a choice, prefer absence" );
+ .println( surfacing.RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION
+ + "=<seed>: seed for random number generator for Fitch Parsimony analysis (type: long, default: no randomization - given a choice, prefer absence" );
System.out.println( surfacing.CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS
- + ": to consider directedness in binary combinations: e.g. A-B != B-A" );
+ + ": to consider directedness in binary combinations: e.g. A-B != B-A" );
System.out.println( surfacing.CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS_AND_ADJACENCY
- + ": to consider directedness and adjacency in binary combinations" );
+ + ": to consider directedness and adjacency in binary combinations" );
System.out
- .println( surfacing.SEQ_EXTRACT_OPTION
- + "=<domain ids (Pfam names)>: to extract sequence names of sequences containing matching domains and/or domain-sequences (order N to C) (domain separator: '~', domain sequences speparator: '#', e.g. 'NACHT#BIR~CARD')" );
+ .println( surfacing.SEQ_EXTRACT_OPTION
+ + "=<domain ids (Pfam names)>: to extract sequence names of sequences containing matching domains and/or domain-sequences (order N to C) (domain separator: '~', domain sequences speparator: '#', e.g. 'NACHT#BIR~CARD')" );
System.out.println( surfacing.SECONDARY_FEATURES_PARSIMONY_MAP_FILE
- + "=<file>: to perfom parsimony analysis on secondary features" );
+ + "=<file>: to perfom parsimony analysis on secondary features" );
System.out.println( surfacing.PLUS_MINUS_ANALYSIS_OPTION + "=<file>: to presence/absence genome analysis" );
System.out.println( surfacing.DOMAIN_COMBINITONS_COUNTS_OUTPUT_OPTION
- + ": to output binary domain counts (as individual files)" );
+ + ": to output binary domain counts (as individual files)" );
System.out.println( surfacing.DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS
- + ": to output binary domain combinations for (downstream) graph analysis" );
+ + ": to output binary domain combinations for (downstream) graph analysis" );
System.out.println( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS + ": to output all proteins per domain" );
System.out.println( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION
- + ": e value max per domain for output of all proteins per domain" );
+ + ": e value max per domain for output of all proteins per domain" );
System.out.println( surfacing.USE_LAST_IN_FITCH_OPTION + ": to use last in Fitch parsimony" );
System.out.println( surfacing.WRITE_TO_NEXUS_OPTION + ": to output in Nexus format" );
System.out.println( PERFORM_DC_FITCH + ": to perform DC Fitch parsimony" );
System.out.println( DA_ANALYSIS_OPTION + ": to perform DA analysis" );
System.out.println( PERFORM_DOMAIN_LENGTH_ANALYSIS_OPTION + ": to perform domain length analysis" );
System.out.println();
- System.out.println( "Example 1: java -Xms128m -Xmx512m -cp path/to/forester.jar"
- + " org.forester.application.surfacing p2g=pfam2go_2012_02_07.txt -dufs -cos=Pfam_260_NC1"
- + " -no_eo -mo=0 -genomes=eukaryotes.txt -out_dir=out -o=o "
- + " -species_tree=tol.xml -obo=gene_ontology_2012_02_07.obo -pos_filter=f.txt -all_prot" );
System.out.println();
- System.out.println( "Example 2: java -Xms128m -Xmx512m -cp path/to/forester.jar"
- + " org.forester.application.surfacing -detail=punctilious -o=TEST.html -pwc=TEST"
- + " -cos=Pfam_ls_22_TC2 -p2g=pfam2go -obo=gene_ontology_edit.obo "
- + "-dc_sort=dom -ignore_with_self -no_singles -ie=0.001 -mo=1 -no_eo -genomes=eukaryotes.txt "
- + "-ds_output=detailed_html -scoring=domains -sort=alpha " );
+ System.out
+ .println( "Example 1: surfacing -p2g=pfam2go.txt -obo=go.obo -species_tree=tol_156.xml -no_eo -ie=0.01 -dufs -genomes=genomes_all.txt -pos_filter=tf_1.txt -out_dir=_tf1 -o=tf1" );
+ System.out.println();
+ System.out
+ .println( "Example 2: surfacing -p2g=pfam2go.txt -obo=go.obo -species_tree=tol_156.xml -last -ignore_viral_ids -no_eo -ie=0.1 -dufs -genomes=genomes_all.txt -pos_filter=tf_1.txt -all_prot -all_prot_e=0.1 -out_dir=_tf1_e01_ape01 -o=tf1_e01_ape01" );
System.out.println();
}
}