From 9a4a1484a2ae6b664aa0adee8cbbc529d029ceb1 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Fri, 24 May 2013 02:47:21 +0000 Subject: [PATCH] inprogress --- .../src/org/forester/application/surfacing.java | 52 ++++++++++---------- .../src/org/forester/protein/BasicProtein.java | 29 +++++++++++ 2 files changed, 56 insertions(+), 25 deletions(-) diff --git a/forester/java/src/org/forester/application/surfacing.java b/forester/java/src/org/forester/application/surfacing.java index d3a50cd..87712eb 100644 --- a/forester/java/src/org/forester/application/surfacing.java +++ b/forester/java/src/org/forester/application/surfacing.java @@ -1956,32 +1956,34 @@ public class surfacing { domains_which_are_sometimes_single_sometimes_not, domains_which_never_single, domains_per_potein_stats_writer ); - gwcd_list.add( BasicGenomeWideCombinableDomains - .createInstance( protein_list, - ignore_combination_with_same, - new BasicSpecies( input_file_properties[ i ][ 1 ] ), - domain_id_to_go_ids_map, - dc_type, - protein_length_stats_by_dc, - domain_number_stats_by_dc ) ); domain_lengths_table.addLengths( protein_list ); - if ( gwcd_list.get( i ).getSize() > 0 ) { - SurfacingUtil.writeDomainCombinationsCountsFile( input_file_properties, - out_dir, - per_genome_domain_promiscuity_statistics_writer, - gwcd_list.get( i ), - i, - dc_sort_order ); - if ( output_binary_domain_combinationsfor_graph_analysis ) { - SurfacingUtil.writeBinaryDomainCombinationsFileForGraphAnalysis( input_file_properties, - out_dir, - gwcd_list.get( i ), - i, - dc_sort_order ); + if ( !DA_ANALYSIS ) { + gwcd_list.add( BasicGenomeWideCombinableDomains + .createInstance( protein_list, + ignore_combination_with_same, + new BasicSpecies( input_file_properties[ i ][ 1 ] ), + domain_id_to_go_ids_map, + dc_type, + protein_length_stats_by_dc, + domain_number_stats_by_dc ) ); + if ( gwcd_list.get( i ).getSize() > 0 ) { + SurfacingUtil.writeDomainCombinationsCountsFile( input_file_properties, + out_dir, + per_genome_domain_promiscuity_statistics_writer, + gwcd_list.get( i ), + i, + dc_sort_order ); + if ( output_binary_domain_combinationsfor_graph_analysis ) { + SurfacingUtil.writeBinaryDomainCombinationsFileForGraphAnalysis( input_file_properties, + out_dir, + gwcd_list.get( i ), + i, + dc_sort_order ); + } + SurfacingUtil.addAllDomainIdsToSet( gwcd_list.get( i ), all_domains_encountered ); + SurfacingUtil.addAllBinaryDomainCombinationToSet( gwcd_list.get( i ), + all_bin_domain_combinations_encountered ); } - SurfacingUtil.addAllDomainIdsToSet( gwcd_list.get( i ), all_domains_encountered ); - SurfacingUtil.addAllBinaryDomainCombinationToSet( gwcd_list.get( i ), - all_bin_domain_combinations_encountered ); } if ( query_domains_writer_ary != null ) { for( int j = 0; j < query_domain_ids_array.length; j++ ) { @@ -2015,7 +2017,7 @@ public class surfacing { if ( DA_ANALYSIS ) { performDomainArchitectureAnalysis( distinct_domain_architecutures_per_genome, distinct_domain_architecuture_counts, - ForesterUtil.roundToInt( number_of_genomes / 2.0 ) ); + 10 ); distinct_domain_architecutures_per_genome.clear(); distinct_domain_architecuture_counts.clear(); System.gc(); diff --git a/forester/java/src/org/forester/protein/BasicProtein.java b/forester/java/src/org/forester/protein/BasicProtein.java index b9c8cbe..b33082c 100644 --- a/forester/java/src/org/forester/protein/BasicProtein.java +++ b/forester/java/src/org/forester/protein/BasicProtein.java @@ -228,6 +228,35 @@ public class BasicProtein implements Protein { return sb.toString(); } + public String toDomainArchitectureString( final String separator, int max_repeats ) { + if ( max_repeats < 2 ) { + throw new IllegalArgumentException( "max repeats cannot be smaller than 2" ); + } + final StringBuilder sb = new StringBuilder(); + boolean first = true; + String prev_id = ""; + int counter = 0; + for( final Domain d : getDomainsSortedByPosition() ) { + if ( first ) { + first = false; + } + else { + sb.append( separator ); + } + if ( prev_id.equals( d.getDomainId().getId() ) ) { + counter++; + } + else { + counter = 0; + } + if ( counter >= max_repeats ) { + } + sb.append( d.getDomainId().getId() ); + prev_id = d.getDomainId().getId(); + } + return sb.toString(); + } + private List getProteinDomainIds() { final List ids = new ArrayList( getProteinDomains().size() ); for( final Domain domain : getProteinDomains() ) { -- 1.7.10.2