import java.util.TreeMap;
import java.util.TreeSet;
-
import org.forester.evoinference.matrix.character.CharacterStateMatrix.Format;
import org.forester.go.GoId;
import org.forester.go.GoNameSpace;
final static private long JACKNIFE_RANDOM_SEED_DEFAULT = 19;
final static private double JACKNIFE_RATIO_DEFAULT = 0.5;
//final static private String INFER_SPECIES_TREES_OPTION = "species_tree_inference";
- final static private String FILTER_POSITIVE_OPTION = "pos_filter";
+ final static private String FILTER_POSITIVE_OPTION = "pos_filter";
final static private String FILTER_NEGATIVE_OPTION = "neg_filter";
final static private String FILTER_NEGATIVE_DOMAINS_OPTION = "neg_dom_filter";
final static private String INPUT_FILES_FROM_FILE_OPTION = "input";
private static final String PLUS_MINUS_ALL_GO_IDS_DOM_SUFFIX = "_plus_minus_go_ids_all.txt";
private static final String PLUS_MINUS_PASSING_GO_IDS_DOM_SUFFIX = "_plus_minus_go_ids_passing.txt";
private static final String OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS = "all_prot";
- final static private String OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION = "all_prot_e";
-
-
+ final static private String OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION = "all_prot_e";
private static final boolean VERBOSE = false;
private static final String OUTPUT_DOMAIN_COMBINATIONS_GAINED_MORE_THAN_ONCE_ANALYSIS_SUFFIX = "_fitch_dc_gains_counts";
private static final String OUTPUT_DOMAIN_COMBINATIONS_LOST_MORE_THAN_ONCE_ANALYSIS_SUFFIX = "_fitch_dc_losses_counts";
private static final String LOG_FILE_SUFFIX = "_log.txt";
private static final String DATA_FILE_SUFFIX = "_domain_combination_data.txt";
private static final String DATA_FILE_DESC = "#SPECIES\tPRTEIN_ID\tN_TERM_DOMAIN\tC_TERM_DOMAIN\tN_TERM_DOMAIN_PER_DOMAIN_E_VALUE\tC_TERM_DOMAIN_PER_DOMAIN_E_VALUE\tN_TERM_DOMAIN_COUNTS_PER_PROTEIN\tC_TERM_DOMAIN_COUNTS_PER_PROTEIN";
- private static final INDIVIDUAL_SCORE_CUTOFF INDIVIDUAL_SCORE_CUTOFF_DEFAULT = INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE;
+ private static final INDIVIDUAL_SCORE_CUTOFF INDIVIDUAL_SCORE_CUTOFF_DEFAULT = INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE; //TODO look at me! change?
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_counts.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping.txt";
return intrees;
}
-
-
private static void log( final String msg, final Writer w ) {
try {
w.write( msg );
//
if ( cla.isOptionSet( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION ) ) {
try {
- output_list_of_all_proteins_per_domain_e_value_max = cla.getOptionValueAsDouble( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION );
+ output_list_of_all_proteins_per_domain_e_value_max = cla
+ .getOptionValueAsDouble( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION );
}
catch ( final Exception e ) {
ForesterUtil.fatalError( surfacing.PRG_NAME, "no acceptable value for per domain E-value maximum" );
+ "=<ordered domain sequences, domain ids separated by '~', sequences separated by '#'>" );
}
query_domain_ids = cla.getOptionValue( surfacing.SEQ_EXTRACT_OPTION );
-
}
DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field = DOMAIN_SORT_FILD_DEFAULT;
DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field_for_automated_pwc = DOMAIN_SORT_FILD_DEFAULT;
System.out.println( "E-value maximum (inclusive) : " + e_value_max );
html_desc.append( "<tr><td>E-value maximum (inclusive):</td><td>" + e_value_max + "</td></tr>" + nl );
}
- if ( output_protein_lists_for_all_domains ) {
+ if ( output_protein_lists_for_all_domains ) {
System.out.println( "Domain E-value max : " + output_list_of_all_proteins_per_domain_e_value_max );
- html_desc.append( "<tr><td>Protein lists: E-value maximum per domain (inclusive):</td><td>" + output_list_of_all_proteins_per_domain_e_value_max + "</td></tr>" + nl );
+ html_desc.append( "<tr><td>Protein lists: E-value maximum per domain (inclusive):</td><td>"
+ + output_list_of_all_proteins_per_domain_e_value_max + "</td></tr>" + nl );
}
System.out.println( "Ignore DUFs : " + ignore_dufs );
if ( ignore_virus_like_ids ) {
"Average of proteins with a least one domain assigned: "
+ ( 100 * protein_coverage_stats.arithmeticMean() ) + "% (+/-"
+ ( 100 * protein_coverage_stats.sampleStandardDeviation() ) + "%)" );
- ForesterUtil.programMessage( PRG_NAME, "Range of proteins with a least one domain assigned: " + 100
- * protein_coverage_stats.getMin() + "%-" + 100 * protein_coverage_stats.getMax() + "%" );
+ ForesterUtil.programMessage( PRG_NAME, "Range of proteins with a least one domain assigned: "
+ + ( 100 * protein_coverage_stats.getMin() ) + "%-" + ( 100 * protein_coverage_stats.getMax() )
+ + "%" );
log( "Average of prot with a least one dom assigned : " + ( 100 * protein_coverage_stats.arithmeticMean() )
+ "% (+/-" + ( 100 * protein_coverage_stats.sampleStandardDeviation() ) + "%)", log_writer );
- log( "Range of prot with a least one dom assigned : " + 100 * protein_coverage_stats.getMin() + "%-"
- + 100 * protein_coverage_stats.getMax() + "%", log_writer );
+ log( "Range of prot with a least one dom assigned : " + ( 100 * protein_coverage_stats.getMin() ) + "%-"
+ + ( 100 * protein_coverage_stats.getMax() ) + "%", log_writer );
}
catch ( final IOException e2 ) {
ForesterUtil.fatalError( surfacing.PRG_NAME, e2.getLocalizedMessage() );
System.out.print( ", not mapped domains = " + mapping_results.getSumOfFailures() );
if ( total_domains > 0 ) {
System.out.println( ", mapped ratio = "
- + ( 100 * mapping_results.getSumOfSuccesses() / total_domains ) + "%" );
+ + ( ( 100 * mapping_results.getSumOfSuccesses() ) / total_domains ) + "%" );
}
else {
System.out.println( ", mapped ratio = n/a (total domains = 0 )" );
protein_lists_per_species,
domain_id_to_go_ids_map,
go_id_to_term_map,
- plus_minus_analysis_numbers
- );
+ plus_minus_analysis_numbers );
}
if ( output_protein_lists_for_all_domains ) {
- writeProteinListsForAllSpecies( out_dir, protein_lists_per_species, gwcd_list, output_list_of_all_proteins_per_domain_e_value_max );
+ writeProteinListsForAllSpecies( out_dir,
+ protein_lists_per_species,
+ gwcd_list,
+ output_list_of_all_proteins_per_domain_e_value_max );
}
-
if ( all_bin_domain_combinations_gained_fitch != null ) {
try {
executeFitchGainsAnalysis( new File( output_file
for( final Entry<Integer, Integer> entry : all_genomes_domains_per_potein_histo.entrySet() ) {
sum += entry.getValue();
}
- final double percentage = 100.0 * ( sum - all_genomes_domains_per_potein_histo.get( 1 ) ) / sum;
+ final double percentage = ( 100.0 * ( sum - all_genomes_domains_per_potein_histo.get( 1 ) ) ) / sum;
ForesterUtil.programMessage( PRG_NAME, "Percentage of multidomain proteins: " + percentage + "%" );
log( "Percentage of multidomain proteins: : " + percentage + "%", log_writer );
}
}
System.out.println( "--" );
}
- for( int i = 0; i < input_file_properties.length; ++i ) {
+ for( final String[] input_file_propertie : input_file_properties ) {
try {
- intree.getNode( input_file_properties[ i ][ 1 ] );
+ intree.getNode( input_file_propertie[ 1 ] );
}
catch ( final IllegalArgumentException e ) {
- ForesterUtil.fatalError( surfacing.PRG_NAME, "node named [" + input_file_properties[ i ][ 1 ]
+ ForesterUtil.fatalError( surfacing.PRG_NAME, "node named [" + input_file_propertie[ 1 ]
+ "] not present/not unique in input tree" );
}
}
}
-
private static void printHelp() {
System.out.println();
System.out.println( "Usage:" );
System.out.println( surfacing.DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS
+ ": to output binary domain combinations for (downstream) graph analysis" );
System.out.println( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS + ": to output all proteins per domain" );
- System.out.println( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION + ": e value max per domain for output of all proteins per domain" );
-
+ System.out.println( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION
+ + ": e value max per domain for output of all proteins per domain" );
System.out.println();
System.out.println( "Example 1: java -Xms128m -Xmx512m -cp path/to/forester.jar"
+ " org.forester.application.surfacing p2g=pfam2go_2012_02_07.txt -dufs -cos=Pfam_260_NC1"
proteins_file_writer,
"\t",
LIMIT_SPEC_FOR_PROT_EX,
- domain_e_cutoff );
+ domain_e_cutoff );
proteins_file_writer.close();
}
catch ( final IOException e ) {