inprogress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 24 May 2013 02:47:21 +0000 (02:47 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 24 May 2013 02:47:21 +0000 (02:47 +0000)
forester/java/src/org/forester/application/surfacing.java
forester/java/src/org/forester/protein/BasicProtein.java

index d3a50cd..87712eb 100644 (file)
@@ -1956,32 +1956,34 @@ public class surfacing {
                                                         domains_which_are_sometimes_single_sometimes_not,
                                                         domains_which_never_single,
                                                         domains_per_potein_stats_writer );
-            gwcd_list.add( BasicGenomeWideCombinableDomains
-                    .createInstance( protein_list,
-                                     ignore_combination_with_same,
-                                     new BasicSpecies( input_file_properties[ i ][ 1 ] ),
-                                     domain_id_to_go_ids_map,
-                                     dc_type,
-                                     protein_length_stats_by_dc,
-                                     domain_number_stats_by_dc ) );
             domain_lengths_table.addLengths( protein_list );
-            if ( gwcd_list.get( i ).getSize() > 0 ) {
-                SurfacingUtil.writeDomainCombinationsCountsFile( input_file_properties,
-                                                                 out_dir,
-                                                                 per_genome_domain_promiscuity_statistics_writer,
-                                                                 gwcd_list.get( i ),
-                                                                 i,
-                                                                 dc_sort_order );
-                if ( output_binary_domain_combinationsfor_graph_analysis ) {
-                    SurfacingUtil.writeBinaryDomainCombinationsFileForGraphAnalysis( input_file_properties,
-                                                                                     out_dir,
-                                                                                     gwcd_list.get( i ),
-                                                                                     i,
-                                                                                     dc_sort_order );
+            if ( !DA_ANALYSIS ) {
+                gwcd_list.add( BasicGenomeWideCombinableDomains
+                        .createInstance( protein_list,
+                                         ignore_combination_with_same,
+                                         new BasicSpecies( input_file_properties[ i ][ 1 ] ),
+                                         domain_id_to_go_ids_map,
+                                         dc_type,
+                                         protein_length_stats_by_dc,
+                                         domain_number_stats_by_dc ) );
+                if ( gwcd_list.get( i ).getSize() > 0 ) {
+                    SurfacingUtil.writeDomainCombinationsCountsFile( input_file_properties,
+                                                                     out_dir,
+                                                                     per_genome_domain_promiscuity_statistics_writer,
+                                                                     gwcd_list.get( i ),
+                                                                     i,
+                                                                     dc_sort_order );
+                    if ( output_binary_domain_combinationsfor_graph_analysis ) {
+                        SurfacingUtil.writeBinaryDomainCombinationsFileForGraphAnalysis( input_file_properties,
+                                                                                         out_dir,
+                                                                                         gwcd_list.get( i ),
+                                                                                         i,
+                                                                                         dc_sort_order );
+                    }
+                    SurfacingUtil.addAllDomainIdsToSet( gwcd_list.get( i ), all_domains_encountered );
+                    SurfacingUtil.addAllBinaryDomainCombinationToSet( gwcd_list.get( i ),
+                                                                      all_bin_domain_combinations_encountered );
                 }
-                SurfacingUtil.addAllDomainIdsToSet( gwcd_list.get( i ), all_domains_encountered );
-                SurfacingUtil.addAllBinaryDomainCombinationToSet( gwcd_list.get( i ),
-                                                                  all_bin_domain_combinations_encountered );
             }
             if ( query_domains_writer_ary != null ) {
                 for( int j = 0; j < query_domain_ids_array.length; j++ ) {
@@ -2015,7 +2017,7 @@ public class surfacing {
         if ( DA_ANALYSIS ) {
             performDomainArchitectureAnalysis( distinct_domain_architecutures_per_genome,
                                                distinct_domain_architecuture_counts,
-                                               ForesterUtil.roundToInt( number_of_genomes / 2.0 ) );
+                                               10 );
             distinct_domain_architecutures_per_genome.clear();
             distinct_domain_architecuture_counts.clear();
             System.gc();
index b9c8cbe..b33082c 100644 (file)
@@ -228,6 +228,35 @@ public class BasicProtein implements Protein {
         return sb.toString();
     }
 
+    public String toDomainArchitectureString( final String separator, int max_repeats ) {
+        if ( max_repeats < 2 ) {
+            throw new IllegalArgumentException( "max repeats cannot be smaller than 2" );
+        }
+        final StringBuilder sb = new StringBuilder();
+        boolean first = true;
+        String prev_id = "";
+        int counter = 0;
+        for( final Domain d : getDomainsSortedByPosition() ) {
+            if ( first ) {
+                first = false;
+            }
+            else {
+                sb.append( separator );
+            }
+            if ( prev_id.equals( d.getDomainId().getId() ) ) {
+                counter++;
+            }
+            else {
+                counter = 0;
+            }
+            if ( counter >= max_repeats ) {
+            }
+            sb.append( d.getDomainId().getId() );
+            prev_id = d.getDomainId().getId();
+        }
+        return sb.toString();
+    }
+
     private List<DomainId> getProteinDomainIds() {
         final List<DomainId> ids = new ArrayList<DomainId>( getProteinDomains().size() );
         for( final Domain domain : getProteinDomains() ) {