// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
package org.forester.application;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
import java.util.TreeSet;
-import org.forester.evoinference.distance.NeighborJoining;
import org.forester.evoinference.matrix.character.CharacterStateMatrix.Format;
-import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
import org.forester.go.GoId;
import org.forester.go.GoNameSpace;
import org.forester.go.GoTerm;
import org.forester.io.parsers.HmmscanPerDomainTableParser;
import org.forester.io.parsers.HmmscanPerDomainTableParser.INDIVIDUAL_SCORE_CUTOFF;
import org.forester.io.parsers.util.ParserUtils;
-import org.forester.io.writers.PhylogenyWriter;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyMethods;
import org.forester.phylogeny.PhylogenyNode;
import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.protein.BasicProtein;
import org.forester.protein.BinaryDomainCombination;
import org.forester.protein.Domain;
import org.forester.protein.DomainId;
final static private long JACKNIFE_RANDOM_SEED_DEFAULT = 19;
final static private double JACKNIFE_RATIO_DEFAULT = 0.5;
//final static private String INFER_SPECIES_TREES_OPTION = "species_tree_inference";
- final static private String INFERRED_SD_BASED_NJ_SPECIES_TREE_SUFFIX = "_sd_nj.nh";
- final static private String INFERRED_SBC_BASED_NJ_SPECIES_TREE_SUFFIX = "_sbc_nj.nh";
final static private String FILTER_POSITIVE_OPTION = "pos_filter";
final static private String FILTER_NEGATIVE_OPTION = "neg_filter";
final static private String FILTER_NEGATIVE_DOMAINS_OPTION = "neg_dom_filter";
final static private String INPUT_SPECIES_TREE_OPTION = "species_tree";
final static private String SEQ_EXTRACT_OPTION = "prot_extract";
final static private char SEPARATOR_FOR_INPUT_VALUES = '#';
- final static private String PRG_VERSION = "2.250";
- final static private String PRG_DATE = "2012.05.07";
+ final static private String PRG_VERSION = "2.252";
+ final static private String PRG_DATE = "2012.08.01";
final static private String E_MAIL = "czmasek@burnham.org";
final static private String WWW = "www.phylosoft.org/forester/applications/surfacing";
final static private boolean IGNORE_DUFS_DEFAULT = true;
private static final String PLUS_MINUS_ALL_GO_IDS_DOM_SUFFIX = "_plus_minus_go_ids_all.txt";
private static final String PLUS_MINUS_PASSING_GO_IDS_DOM_SUFFIX = "_plus_minus_go_ids_passing.txt";
private static final String OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS = "all_prot";
+ final static private String OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION = "all_prot_e";
private static final boolean VERBOSE = false;
private static final String OUTPUT_DOMAIN_COMBINATIONS_GAINED_MORE_THAN_ONCE_ANALYSIS_SUFFIX = "_fitch_dc_gains_counts";
private static final String OUTPUT_DOMAIN_COMBINATIONS_LOST_MORE_THAN_ONCE_ANALYSIS_SUFFIX = "_fitch_dc_losses_counts";
private static final String LOG_FILE_SUFFIX = "_log.txt";
private static final String DATA_FILE_SUFFIX = "_domain_combination_data.txt";
private static final String DATA_FILE_DESC = "#SPECIES\tPRTEIN_ID\tN_TERM_DOMAIN\tC_TERM_DOMAIN\tN_TERM_DOMAIN_PER_DOMAIN_E_VALUE\tC_TERM_DOMAIN_PER_DOMAIN_E_VALUE\tN_TERM_DOMAIN_COUNTS_PER_PROTEIN\tC_TERM_DOMAIN_COUNTS_PER_PROTEIN";
- private static final INDIVIDUAL_SCORE_CUTOFF INDIVIDUAL_SCORE_CUTOFF_DEFAULT = INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE;
+ private static final INDIVIDUAL_SCORE_CUTOFF INDIVIDUAL_SCORE_CUTOFF_DEFAULT = INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE; //TODO look at me! change?
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_counts.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt";
private static final boolean PERFORM_DC_REGAIN_PROTEINS_STATS = true;
+ private static final boolean DA_ANALYSIS = true;
private static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
final String[][] input_file_properties,
return intrees;
}
- private static List<Phylogeny> inferSpeciesTrees( final File outfile,
- final List<BasicSymmetricalDistanceMatrix> distances_list ) {
- final NeighborJoining nj = NeighborJoining.createInstance();
- final List<Phylogeny> phylogenies = nj.execute( distances_list );
- final PhylogenyWriter w = new PhylogenyWriter();
- try {
- w.toNewHampshire( phylogenies, true, true, outfile, ";" );
- }
- catch ( final IOException e ) {
- ForesterUtil.fatalError( PRG_NAME, "failed to write to outfile [" + outfile + "]: " + e.getMessage() );
- }
- return phylogenies;
- }
-
private static void log( final String msg, final Writer w ) {
try {
w.write( msg );
allowed_options.add( FILTER_NEGATIVE_DOMAINS_OPTION );
allowed_options.add( IGNORE_VIRAL_IDS );
allowed_options.add( SEQ_EXTRACT_OPTION );
+ allowed_options.add( OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION );
allowed_options.add( SECONDARY_FEATURES_PARSIMONY_MAP_FILE );
allowed_options.add( PLUS_MINUS_ANALYSIS_OPTION );
allowed_options.add( DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS );
+ error );
}
try {
- final BasicTable<String> scores_table = BasicTableParser.parse( cutoff_scores_file, " " );
+ final BasicTable<String> scores_table = BasicTableParser.parse( cutoff_scores_file, ' ' );
individual_score_cutoffs = scores_table.getColumnsAsMapDouble( 0, 1 );
}
catch ( final IOException e ) {
species_matrix = true;
}
boolean output_protein_lists_for_all_domains = false;
+ double output_list_of_all_proteins_per_domain_e_value_max = -1;
if ( cla.isOptionSet( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS ) ) {
output_protein_lists_for_all_domains = true;
+ //
+ if ( cla.isOptionSet( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION ) ) {
+ try {
+ output_list_of_all_proteins_per_domain_e_value_max = cla
+ .getOptionValueAsDouble( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION );
+ }
+ catch ( final Exception e ) {
+ ForesterUtil.fatalError( surfacing.PRG_NAME, "no acceptable value for per domain E-value maximum" );
+ }
+ }
+ //
}
Detailedness detailedness = DETAILEDNESS_DEFAULT;
if ( cla.isOptionSet( surfacing.DETAILEDNESS_OPTION ) ) {
System.out.println( "E-value maximum (inclusive) : " + e_value_max );
html_desc.append( "<tr><td>E-value maximum (inclusive):</td><td>" + e_value_max + "</td></tr>" + nl );
}
+ if ( output_protein_lists_for_all_domains ) {
+ System.out.println( "Domain E-value max : " + output_list_of_all_proteins_per_domain_e_value_max );
+ html_desc.append( "<tr><td>Protein lists: E-value maximum per domain (inclusive):</td><td>"
+ + output_list_of_all_proteins_per_domain_e_value_max + "</td></tr>" + nl );
+ }
System.out.println( "Ignore DUFs : " + ignore_dufs );
if ( ignore_virus_like_ids ) {
System.out.println( "Ignore virus like ids : " + ignore_virus_like_ids );
domain_number_stats_by_dc = new HashMap<String, DescriptiveStatistics>();
}
// Main loop:
+ final SortedMap<String, Set<String>> distinct_domain_architecutures_per_genome = new TreeMap<String, Set<String>>();
+ final SortedMap<String, Integer> distinct_domain_architecuture_counts = new TreeMap<String, Integer>();
for( int i = 0; i < number_of_genomes; ++i ) {
System.out.println();
System.out.println( ( i + 1 ) + "/" + number_of_genomes );
}
final double coverage = ( double ) protein_list.size() / parser.getProteinsEncountered();
protein_coverage_stats.addValue( coverage );
+ int distinct_das = -1;
+ if ( DA_ANALYSIS ) {
+ final String genome = input_file_properties[ i ][ 0 ];
+ distinct_das = storeDomainArchitectures( genome,
+ distinct_domain_architecutures_per_genome,
+ protein_list,
+ distinct_domain_architecuture_counts );
+ }
System.out.println( "Number of proteins encountered : " + parser.getProteinsEncountered() );
log( "Number of proteins encountered : " + parser.getProteinsEncountered(), log_writer );
System.out.println( "Number of proteins stored : " + protein_list.size() );
log( "Proteins ignored due to positive filter : " + parser.getProteinsIgnoredDueToFilter(),
log_writer );
}
+ if ( DA_ANALYSIS ) {
+ System.out.println( "Distinct domain architectures stored : " + distinct_das );
+ log( "Distinct domain architectures stored : " + distinct_das, log_writer );
+ }
System.out.println( "Time for processing : " + parser.getTime() + "ms" );
log( "", log_writer );
html_desc.append( "<tr><td>" + input_file_properties[ i ][ 0 ] + " [species: "
domains_which_are_sometimes_single_sometimes_not,
domains_which_never_single,
domains_per_potein_stats_writer );
- gwcd_list.add( BasicGenomeWideCombinableDomains
- .createInstance( protein_list,
- ignore_combination_with_same,
- new BasicSpecies( input_file_properties[ i ][ 1 ] ),
- domain_id_to_go_ids_map,
- dc_type,
- protein_length_stats_by_dc,
- domain_number_stats_by_dc ) );
domain_lengths_table.addLengths( protein_list );
- if ( gwcd_list.get( i ).getSize() > 0 ) {
- SurfacingUtil.writeDomainCombinationsCountsFile( input_file_properties,
- out_dir,
- per_genome_domain_promiscuity_statistics_writer,
- gwcd_list.get( i ),
- i,
- dc_sort_order );
- if ( output_binary_domain_combinationsfor_graph_analysis ) {
- SurfacingUtil.writeBinaryDomainCombinationsFileForGraphAnalysis( input_file_properties,
- out_dir,
- gwcd_list.get( i ),
- i,
- dc_sort_order );
+ if ( !DA_ANALYSIS ) {
+ gwcd_list.add( BasicGenomeWideCombinableDomains
+ .createInstance( protein_list,
+ ignore_combination_with_same,
+ new BasicSpecies( input_file_properties[ i ][ 1 ] ),
+ domain_id_to_go_ids_map,
+ dc_type,
+ protein_length_stats_by_dc,
+ domain_number_stats_by_dc ) );
+ if ( gwcd_list.get( i ).getSize() > 0 ) {
+ SurfacingUtil.writeDomainCombinationsCountsFile( input_file_properties,
+ out_dir,
+ per_genome_domain_promiscuity_statistics_writer,
+ gwcd_list.get( i ),
+ i,
+ dc_sort_order );
+ if ( output_binary_domain_combinationsfor_graph_analysis ) {
+ SurfacingUtil.writeBinaryDomainCombinationsFileForGraphAnalysis( input_file_properties,
+ out_dir,
+ gwcd_list.get( i ),
+ i,
+ dc_sort_order );
+ }
+ SurfacingUtil.addAllDomainIdsToSet( gwcd_list.get( i ), all_domains_encountered );
+ SurfacingUtil.addAllBinaryDomainCombinationToSet( gwcd_list.get( i ),
+ all_bin_domain_combinations_encountered );
}
- SurfacingUtil.addAllDomainIdsToSet( gwcd_list.get( i ), all_domains_encountered );
- SurfacingUtil.addAllBinaryDomainCombinationToSet( gwcd_list.get( i ),
- all_bin_domain_combinations_encountered );
}
if ( query_domains_writer_ary != null ) {
for( int j = 0; j < query_domain_ids_array.length; j++ ) {
ForesterUtil.programMessage( PRG_NAME, "Wrote domain promiscuities to: "
+ per_genome_domain_promiscuity_statistics_file );
//
+ if ( DA_ANALYSIS ) {
+ performDomainArchitectureAnalysis( distinct_domain_architecutures_per_genome,
+ distinct_domain_architecuture_counts,
+ 10 );
+ distinct_domain_architecutures_per_genome.clear();
+ distinct_domain_architecuture_counts.clear();
+ System.gc();
+ }
try {
domains_per_potein_stats_writer.write( "ALL" );
domains_per_potein_stats_writer.write( "\t" );
"Average of proteins with a least one domain assigned: "
+ ( 100 * protein_coverage_stats.arithmeticMean() ) + "% (+/-"
+ ( 100 * protein_coverage_stats.sampleStandardDeviation() ) + "%)" );
- ForesterUtil.programMessage( PRG_NAME, "Range of proteins with a least one domain assigned: " + 100
- * protein_coverage_stats.getMin() + "%-" + 100 * protein_coverage_stats.getMax() + "%" );
+ ForesterUtil.programMessage( PRG_NAME, "Range of proteins with a least one domain assigned: "
+ + ( 100 * protein_coverage_stats.getMin() ) + "%-" + ( 100 * protein_coverage_stats.getMax() )
+ + "%" );
log( "Average of prot with a least one dom assigned : " + ( 100 * protein_coverage_stats.arithmeticMean() )
+ "% (+/-" + ( 100 * protein_coverage_stats.sampleStandardDeviation() ) + "%)", log_writer );
- log( "Range of prot with a least one dom assigned : " + 100 * protein_coverage_stats.getMin() + "%-"
- + 100 * protein_coverage_stats.getMax() + "%", log_writer );
+ log( "Range of prot with a least one dom assigned : " + ( 100 * protein_coverage_stats.getMin() ) + "%-"
+ + ( 100 * protein_coverage_stats.getMax() ) + "%", log_writer );
}
catch ( final IOException e2 ) {
ForesterUtil.fatalError( surfacing.PRG_NAME, e2.getLocalizedMessage() );
System.out.print( ", not mapped domains = " + mapping_results.getSumOfFailures() );
if ( total_domains > 0 ) {
System.out.println( ", mapped ratio = "
- + ( 100 * mapping_results.getSumOfSuccesses() / total_domains ) + "%" );
+ + ( ( 100 * mapping_results.getSumOfSuccesses() ) / total_domains ) + "%" );
}
else {
System.out.println( ", mapped ratio = n/a (total domains = 0 )" );
plus_minus_analysis_numbers );
}
if ( output_protein_lists_for_all_domains ) {
- writeProteinListsForAllSpecies( out_dir, protein_lists_per_species, gwcd_list );
- }
- // if ( ( intrees != null ) && ( intrees.length > 0 ) && ( inferred_trees != null ) && ( inferred_trees.size() > 0 ) ) {
- // final StringBuilder parameters_sb = createParametersAsString( ignore_dufs,
- // e_value_max,
- // max_allowed_overlap,
- // no_engulfing_overlaps,
- // cutoff_scores_file );
- // String s = "_";
- // if ( radomize_fitch_parsimony ) {
- // s += random_number_seed_for_fitch_parsimony + "_";
- // }
- // int i = 0;
- // for( final Phylogeny inferred_tree : inferred_trees ) {
- // if ( !inferred_tree.isRooted() ) {
- // intrees[ 0 ].getRoot().getName();
- // inferred_tree.r
- // }
- // final String outfile_name = ForesterUtil.removeSuffix( inferred_tree.getName() ) + s;
- // final DomainParsimonyCalculator domain_parsimony = DomainParsimonyCalculator
- // .createInstance( inferred_tree, gwcd_list );
- // SurfacingUtil.executeParsimonyAnalysis( random_number_seed_for_fitch_parsimony,
- // radomize_fitch_parsimony,
- // outfile_name,
- // domain_parsimony,
- // inferred_tree,
- // domain_id_to_go_ids_map,
- // go_id_to_term_map,
- // go_namespace_limit,
- // parameters_sb.toString() );
- // i++;
- // }
- // }
+ writeProteinListsForAllSpecies( out_dir,
+ protein_lists_per_species,
+ gwcd_list,
+ output_list_of_all_proteins_per_domain_e_value_max );
+ }
if ( all_bin_domain_combinations_gained_fitch != null ) {
try {
executeFitchGainsAnalysis( new File( output_file
System.out.println();
}
+ private static void performDomainArchitectureAnalysis( final SortedMap<String, Set<String>> domain_architecutures,
+ final SortedMap<String, Integer> domain_architecuture_counts,
+ final int min_count ) {
+ final StringBuilder unique_das = new StringBuilder();
+ final Iterator<Entry<String, Integer>> it = domain_architecuture_counts.entrySet().iterator();
+ System.out.println( "Domain Architecture Counts (min count: " + min_count + " ):" );
+ while ( it.hasNext() ) {
+ final Map.Entry<String, Integer> e = it.next();
+ final String da = e.getKey();
+ final int count = e.getValue();
+ if ( count >= min_count ) {
+ System.out.println( da + "\t" + count );
+ }
+ if ( count == 1 ) {
+ final Iterator<Entry<String, Set<String>>> it2 = domain_architecutures.entrySet().iterator();
+ while ( it2.hasNext() ) {
+ final Map.Entry<String, Set<String>> e2 = it2.next();
+ final String genome = e2.getKey();
+ final Set<String> das = e2.getValue();
+ if ( das.contains( da ) ) {
+ unique_das.append( genome + "\t" + da + ForesterUtil.LINE_SEPARATOR );
+ }
+ }
+ }
+ }
+ System.out.println();
+ System.out.println();
+ System.out.println( "Unique Domain Architectures:" );
+ System.out.println( unique_das );
+ System.out.println();
+ System.out.println();
+ }
+
+ private static int storeDomainArchitectures( final String genome,
+ final SortedMap<String, Set<String>> domain_architecutures,
+ final List<Protein> protein_list,
+ final Map<String, Integer> distinct_domain_architecuture_counts ) {
+ final Set<String> da = new HashSet<String>();
+ domain_architecutures.put( genome, da );
+ for( final Protein protein : protein_list ) {
+ final String da_str = ( ( BasicProtein ) protein ).toDomainArchitectureString( "~" );
+ if ( !da.contains( da_str ) ) {
+ if ( !distinct_domain_architecuture_counts.containsKey( da_str ) ) {
+ distinct_domain_architecuture_counts.put( da_str, 1 );
+ }
+ else {
+ distinct_domain_architecuture_counts.put( da_str,
+ distinct_domain_architecuture_counts.get( da_str ) + 1 );
+ }
+ da.add( da_str );
+ }
+ }
+ return da.size();
+ }
+
private static void createSplitWriters( final File out_dir,
final String my_outfile,
final Map<Character, Writer> split_writers ) throws IOException {
for( final Entry<Integer, Integer> entry : all_genomes_domains_per_potein_histo.entrySet() ) {
sum += entry.getValue();
}
- final double percentage = 100.0 * ( sum - all_genomes_domains_per_potein_histo.get( 1 ) ) / sum;
+ final double percentage = ( 100.0 * ( sum - all_genomes_domains_per_potein_histo.get( 1 ) ) ) / sum;
ForesterUtil.programMessage( PRG_NAME, "Percentage of multidomain proteins: " + percentage + "%" );
log( "Percentage of multidomain proteins: : " + percentage + "%", log_writer );
}
}
System.out.println( "--" );
}
- for( int i = 0; i < input_file_properties.length; ++i ) {
+ for( final String[] input_file_propertie : input_file_properties ) {
try {
- intree.getNode( input_file_properties[ i ][ 1 ] );
+ intree.getNode( input_file_propertie[ 1 ] );
}
catch ( final IllegalArgumentException e ) {
- ForesterUtil.fatalError( surfacing.PRG_NAME, "node named [" + input_file_properties[ i ][ 1 ]
+ ForesterUtil.fatalError( surfacing.PRG_NAME, "node named [" + input_file_propertie[ 1 ]
+ "] not present/not unique in input tree" );
}
}
}
- // public static StringBuffer stringCombinableDomainsMapToStringBuffer(
- // final SortedMap<String, CombinableDomains> map ) {
- // final StringBuffer sb = new StringBuffer();
- // for( final Iterator<String> iter = map.keySet().iterator();
- // iter.hasNext(); ) {
- // final Object key = iter.next();
- // sb.append( ForesterUtil.pad( new StringBuffer( key.toString() ), 18, ' ',
- // false ) );
- // final CombinableDomains domain_combination = map.get( key );
- // sb.append( ForesterUtil.pad( new StringBuffer( "" +
- // domain_combination.getNumberOfCombiningDomains() ), 8,
- // ' ', false ) );
- // sb.append( domain_combination.toStringBuffer() );
- // sb.append( ForesterUtil.getLineSeparator() );
- // }
- // return sb;
- // }
private static void printHelp() {
System.out.println();
System.out.println( "Usage:" );
System.out.println( surfacing.DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS
+ ": to output binary domain combinations for (downstream) graph analysis" );
System.out.println( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS + ": to output all proteins per domain" );
+ System.out.println( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION
+ + ": e value max per domain for output of all proteins per domain" );
System.out.println();
+ System.out.println( "Example 1: java -Xms128m -Xmx512m -cp path/to/forester.jar"
+ + " org.forester.application.surfacing p2g=pfam2go_2012_02_07.txt -dufs -cos=Pfam_260_NC1"
+ + " -no_eo -mo=0 -input=genomes_limited.txt -out_dir=out -o=o "
+ + " -species_tree=tol.xml -obo=gene_ontology_2012_02_07.obo -pos_filter=f.txt -all_prot" );
System.out.println();
- System.out.println( "Example: java -Xms128m -Xmx512m -cp path/to/forester.jar"
+ System.out.println( "Example 2: java -Xms128m -Xmx512m -cp path/to/forester.jar"
+ " org.forester.application.surfacing -detail=punctilious -o=TEST.html -pwc=TEST"
+ " -cos=Pfam_ls_22_TC2 -p2g=pfam2go -obo=gene_ontology_edit.obo "
+ "-dc_sort=dom -ignore_with_self -no_singles -e=0.001 -mo=1 -no_eo "
- + "-ds_output=detailed_html -scoring=domains -sort=alpha -" + JACKNIFE_OPTION
- + "=50 human mouse brafl strpu" );
+ + "-ds_output=detailed_html -scoring=domains -sort=alpha human mouse brafl strpu" );
System.out.println();
}
private static void writeProteinListsForAllSpecies( final File output_dir,
final SortedMap<Species, List<Protein>> protein_lists_per_species,
- final List<GenomeWideCombinableDomains> gwcd_list ) {
+ final List<GenomeWideCombinableDomains> gwcd_list,
+ final double domain_e_cutoff ) {
final SortedSet<DomainId> all_domains = new TreeSet<DomainId>();
for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
all_domains.addAll( gwcd.getAllDomainIds() );
domain,
proteins_file_writer,
"\t",
- LIMIT_SPEC_FOR_PROT_EX );
+ LIMIT_SPEC_FOR_PROT_EX,
+ domain_e_cutoff );
proteins_file_writer.close();
}
catch ( final IOException e ) {