domains_which_are_sometimes_single_sometimes_not,
domains_which_never_single,
domains_per_potein_stats_writer );
- gwcd_list.add( BasicGenomeWideCombinableDomains
- .createInstance( protein_list,
- ignore_combination_with_same,
- new BasicSpecies( input_file_properties[ i ][ 1 ] ),
- domain_id_to_go_ids_map,
- dc_type,
- protein_length_stats_by_dc,
- domain_number_stats_by_dc ) );
domain_lengths_table.addLengths( protein_list );
- if ( gwcd_list.get( i ).getSize() > 0 ) {
- SurfacingUtil.writeDomainCombinationsCountsFile( input_file_properties,
- out_dir,
- per_genome_domain_promiscuity_statistics_writer,
- gwcd_list.get( i ),
- i,
- dc_sort_order );
- if ( output_binary_domain_combinationsfor_graph_analysis ) {
- SurfacingUtil.writeBinaryDomainCombinationsFileForGraphAnalysis( input_file_properties,
- out_dir,
- gwcd_list.get( i ),
- i,
- dc_sort_order );
+ if ( !DA_ANALYSIS ) {
+ gwcd_list.add( BasicGenomeWideCombinableDomains
+ .createInstance( protein_list,
+ ignore_combination_with_same,
+ new BasicSpecies( input_file_properties[ i ][ 1 ] ),
+ domain_id_to_go_ids_map,
+ dc_type,
+ protein_length_stats_by_dc,
+ domain_number_stats_by_dc ) );
+ if ( gwcd_list.get( i ).getSize() > 0 ) {
+ SurfacingUtil.writeDomainCombinationsCountsFile( input_file_properties,
+ out_dir,
+ per_genome_domain_promiscuity_statistics_writer,
+ gwcd_list.get( i ),
+ i,
+ dc_sort_order );
+ if ( output_binary_domain_combinationsfor_graph_analysis ) {
+ SurfacingUtil.writeBinaryDomainCombinationsFileForGraphAnalysis( input_file_properties,
+ out_dir,
+ gwcd_list.get( i ),
+ i,
+ dc_sort_order );
+ }
+ SurfacingUtil.addAllDomainIdsToSet( gwcd_list.get( i ), all_domains_encountered );
+ SurfacingUtil.addAllBinaryDomainCombinationToSet( gwcd_list.get( i ),
+ all_bin_domain_combinations_encountered );
}
- SurfacingUtil.addAllDomainIdsToSet( gwcd_list.get( i ), all_domains_encountered );
- SurfacingUtil.addAllBinaryDomainCombinationToSet( gwcd_list.get( i ),
- all_bin_domain_combinations_encountered );
}
if ( query_domains_writer_ary != null ) {
for( int j = 0; j < query_domain_ids_array.length; j++ ) {
if ( DA_ANALYSIS ) {
performDomainArchitectureAnalysis( distinct_domain_architecutures_per_genome,
distinct_domain_architecuture_counts,
- ForesterUtil.roundToInt( number_of_genomes / 2.0 ) );
+ 10 );
distinct_domain_architecutures_per_genome.clear();
distinct_domain_architecuture_counts.clear();
System.gc();
return sb.toString();
}
+ public String toDomainArchitectureString( final String separator, int max_repeats ) {
+ if ( max_repeats < 2 ) {
+ throw new IllegalArgumentException( "max repeats cannot be smaller than 2" );
+ }
+ final StringBuilder sb = new StringBuilder();
+ boolean first = true;
+ String prev_id = "";
+ int counter = 0;
+ for( final Domain d : getDomainsSortedByPosition() ) {
+ if ( first ) {
+ first = false;
+ }
+ else {
+ sb.append( separator );
+ }
+ if ( prev_id.equals( d.getDomainId().getId() ) ) {
+ counter++;
+ }
+ else {
+ counter = 0;
+ }
+ if ( counter >= max_repeats ) {
+ }
+ sb.append( d.getDomainId().getId() );
+ prev_id = d.getDomainId().getId();
+ }
+ return sb.toString();
+ }
+
private List<DomainId> getProteinDomainIds() {
final List<DomainId> ids = new ArrayList<DomainId>( getProteinDomains().size() );
for( final Domain domain : getProteinDomains() ) {