cleanup
[jalview.git] / forester / java / src / org / forester / surfacing / DomainCountsDifferenceUtil.java
index 6996ef3..8f9a249 100644 (file)
@@ -23,7 +23,7 @@
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 //
 // Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
 
 package org.forester.surfacing;
 
@@ -32,8 +32,6 @@ import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.io.Writer;
-import java.text.DecimalFormat;
-import java.text.NumberFormat;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -44,8 +42,12 @@ import java.util.SortedSet;
 import java.util.TreeMap;
 import java.util.TreeSet;
 
+import org.forester.application.surfacing;
 import org.forester.go.GoId;
 import org.forester.go.GoTerm;
+import org.forester.protein.BinaryDomainCombination;
+import org.forester.protein.Protein;
+import org.forester.species.Species;
 import org.forester.util.BasicDescriptiveStatistics;
 import org.forester.util.DescriptiveStatistics;
 import org.forester.util.ForesterUtil;
@@ -56,78 +58,11 @@ import org.forester.util.ForesterUtil;
  */
 public final class DomainCountsDifferenceUtil {
 
-    private final static NumberFormat          FORMATTER                                   = new DecimalFormat( "0.0E0" );
-    private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES = COPY_CALCULATION_MODE.MIN;
     private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES   = COPY_CALCULATION_MODE.MIN;
+    private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES = COPY_CALCULATION_MODE.MIN;
     private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_LOW_COPY_SPECIES         = COPY_CALCULATION_MODE.MAX;
     private static final String                PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX         = ".prot";
 
-    //FIXME really needs to be tested! 
-    private static void addCounts( final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
-                                   final BinaryDomainCombination dc,
-                                   final GenomeWideCombinableDomains genome,
-                                   final Set<BinaryDomainCombination> bdc ) {
-        if ( !copy_counts.containsKey( dc ) ) {
-            copy_counts.put( dc, new ArrayList<Integer>() );
-        }
-        if ( bdc.contains( dc )
-                && ( ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc.getId1() ) != null ) ) {
-            final int count = ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains()
-                    .get( dc.getId1() );
-            copy_counts.get( dc ).add( count );
-        }
-        else {
-            copy_counts.get( dc ).add( 0 );
-        }
-    }
-
-    private static void addCounts( final SortedMap<DomainId, List<Integer>> copy_counts,
-                                   final DomainId domain,
-                                   final GenomeWideCombinableDomains genome ) {
-        if ( !copy_counts.containsKey( domain ) ) {
-            copy_counts.put( domain, new ArrayList<Integer>() );
-        }
-        if ( genome.contains( domain ) ) {
-            copy_counts.get( domain ).add( genome.get( domain ).getKeyDomainProteinsCount() );
-        }
-        else {
-            copy_counts.get( domain ).add( 0 );
-        }
-    }
-
-    private static StringBuilder addGoInformation( final DomainId d,
-                                                   final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
-                                                   final Map<GoId, GoTerm> go_id_to_term_map ) {
-        final StringBuilder sb = new StringBuilder();
-        if ( ( domain_id_to_go_ids_map == null ) || domain_id_to_go_ids_map.isEmpty()
-                || !domain_id_to_go_ids_map.containsKey( d ) ) {
-            return sb;
-        }
-        final List<GoId> go_ids = domain_id_to_go_ids_map.get( d );
-        for( int i = 0; i < go_ids.size(); ++i ) {
-            final GoId go_id = go_ids.get( i );
-            if ( go_id_to_term_map.containsKey( go_id ) ) {
-                appendGoTerm( sb, go_id_to_term_map.get( go_id ) );
-                sb.append( "<br>" );
-            }
-            else {
-                sb.append( "go id \"" + go_id + "\" not found [" + d.getId() + "]" );
-            }
-        }
-        return sb;
-    }
-
-    private static void appendGoTerm( final StringBuilder sb, final GoTerm go_term ) {
-        final GoId go_id = go_term.getGoId();
-        sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id + "\" target=\"amigo_window\">" + go_id
-                + "</a>" );
-        sb.append( ":" );
-        sb.append( go_term.getName() );
-        sb.append( " [" );
-        sb.append( go_term.getGoNameSpace().toShortString() );
-        sb.append( "]" );
-    }
-
     public static void calculateCopyNumberDifferences( final List<GenomeWideCombinableDomains> genomes,
                                                        final SortedMap<Species, List<Protein>> protein_lists_per_species,
                                                        final List<String> high_copy_base_species,
@@ -138,7 +73,7 @@ public final class DomainCountsDifferenceUtil {
                                                        final File plain_output_dom,
                                                        final File html_output_dom,
                                                        final File html_output_dc,
-                                                       final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
+                                                       final Map<String, List<GoId>> domain_id_to_go_ids_map,
                                                        final Map<GoId, GoTerm> go_id_to_term_map,
                                                        final File all_domains_go_ids_out_dom,
                                                        final File passing_domains_go_ids_out_dom,
@@ -170,13 +105,13 @@ public final class DomainCountsDifferenceUtil {
         final Writer html_writer_dc = new BufferedWriter( new FileWriter( html_output_dc ) );
         final Writer all_gos_writer = new BufferedWriter( new FileWriter( all_domains_go_ids_out_dom ) );
         final Writer passing_gos_writer = new BufferedWriter( new FileWriter( passing_domains_go_ids_out_dom ) );
-        final SortedMap<DomainId, Double> high_copy_base_values = new TreeMap<DomainId, Double>();
-        final SortedMap<DomainId, Double> high_copy_target_values = new TreeMap<DomainId, Double>();
-        final SortedMap<DomainId, Double> low_copy_values = new TreeMap<DomainId, Double>();
-        final SortedMap<DomainId, List<Integer>> high_copy_base_copy_counts = new TreeMap<DomainId, List<Integer>>();
-        final SortedMap<DomainId, List<Integer>> high_copy_target_copy_counts = new TreeMap<DomainId, List<Integer>>();
-        final SortedMap<DomainId, List<Integer>> low_copy_copy_counts = new TreeMap<DomainId, List<Integer>>();
-        final SortedSet<DomainId> all_domains = new TreeSet<DomainId>();
+        final SortedMap<String, Double> high_copy_base_values = new TreeMap<String, Double>();
+        final SortedMap<String, Double> high_copy_target_values = new TreeMap<String, Double>();
+        final SortedMap<String, Double> low_copy_values = new TreeMap<String, Double>();
+        final SortedMap<String, List<Integer>> high_copy_base_copy_counts = new TreeMap<String, List<Integer>>();
+        final SortedMap<String, List<Integer>> high_copy_target_copy_counts = new TreeMap<String, List<Integer>>();
+        final SortedMap<String, List<Integer>> low_copy_copy_counts = new TreeMap<String, List<Integer>>();
+        final SortedSet<String> all_domains = new TreeSet<String>();
         final SortedMap<BinaryDomainCombination, Double> high_copy_base_values_dc = new TreeMap<BinaryDomainCombination, Double>();
         final SortedMap<BinaryDomainCombination, Double> high_copy_target_values_dc = new TreeMap<BinaryDomainCombination, Double>();
         final SortedMap<BinaryDomainCombination, Double> low_copy_values_dc = new TreeMap<BinaryDomainCombination, Double>();
@@ -188,11 +123,11 @@ public final class DomainCountsDifferenceUtil {
         final SortedSet<GoId> go_ids_of_passing_domains = new TreeSet<GoId>();
         final SortedSet<GoId> go_ids_all = new TreeSet<GoId>();
         for( final GenomeWideCombinableDomains genome : genomes ) {
-            final SortedSet<DomainId> domains = genome.getAllDomainIds();
+            final SortedSet<String> domains = genome.getAllDomainIds();
             final SortedSet<BinaryDomainCombination> dcs = genome.toBinaryDomainCombinations();
             final String species = genome.getSpecies().getSpeciesId();
             bdcs_per_genome.put( species, genome.toBinaryDomainCombinations() );
-            for( final DomainId d : domains ) {
+            for( final String d : domains ) {
                 all_domains.add( d );
                 if ( domain_id_to_go_ids_map.containsKey( d ) ) {
                     go_ids_all.addAll( domain_id_to_go_ids_map.get( d ) );
@@ -202,7 +137,7 @@ public final class DomainCountsDifferenceUtil {
                 all_dcs.add( dc );
             }
         }
-        for( final DomainId domain : all_domains ) {
+        for( final String domain : all_domains ) {
             for( final GenomeWideCombinableDomains genome : genomes ) {
                 final String species = genome.getSpecies().getSpeciesId();
                 if ( high_copy_base_species.contains( species ) ) {
@@ -239,7 +174,7 @@ public final class DomainCountsDifferenceUtil {
                 }
             }
         }
-        for( final DomainId domain : all_domains ) {
+        for( final String domain : all_domains ) {
             calculateDomainCountsBasedValue( high_copy_target_values,
                                              high_copy_target_copy_counts,
                                              domain,
@@ -300,6 +235,72 @@ public final class DomainCountsDifferenceUtil {
         writeGoIdsToFile( passing_gos_writer, go_ids_of_passing_domains );
     }
 
+    //FIXME really needs to be tested! 
+    private static void addCounts( final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
+                                   final BinaryDomainCombination dc,
+                                   final GenomeWideCombinableDomains genome,
+                                   final Set<BinaryDomainCombination> bdc ) {
+        if ( !copy_counts.containsKey( dc ) ) {
+            copy_counts.put( dc, new ArrayList<Integer>() );
+        }
+        if ( bdc.contains( dc )
+                && ( ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc.getId1() ) != null ) ) {
+            final int count = ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains()
+                    .get( dc.getId1() );
+            copy_counts.get( dc ).add( count );
+        }
+        else {
+            copy_counts.get( dc ).add( 0 );
+        }
+    }
+
+    private static void addCounts( final SortedMap<String, List<Integer>> copy_counts,
+                                   final String domain,
+                                   final GenomeWideCombinableDomains genome ) {
+        if ( !copy_counts.containsKey( domain ) ) {
+            copy_counts.put( domain, new ArrayList<Integer>() );
+        }
+        if ( genome.contains( domain ) ) {
+            copy_counts.get( domain ).add( genome.get( domain ).getKeyDomainProteinsCount() );
+        }
+        else {
+            copy_counts.get( domain ).add( 0 );
+        }
+    }
+
+    private static StringBuilder addGoInformation( final String d,
+                                                   final Map<String, List<GoId>> domain_id_to_go_ids_map,
+                                                   final Map<GoId, GoTerm> go_id_to_term_map ) {
+        final StringBuilder sb = new StringBuilder();
+        if ( ( domain_id_to_go_ids_map == null ) || domain_id_to_go_ids_map.isEmpty()
+                || !domain_id_to_go_ids_map.containsKey( d ) ) {
+            return sb;
+        }
+        final List<GoId> go_ids = domain_id_to_go_ids_map.get( d );
+        for( int i = 0; i < go_ids.size(); ++i ) {
+            final GoId go_id = go_ids.get( i );
+            if ( go_id_to_term_map.containsKey( go_id ) ) {
+                appendGoTerm( sb, go_id_to_term_map.get( go_id ) );
+                sb.append( "<br>" );
+            }
+            else {
+                sb.append( "go id \"" + go_id + "\" not found [" + d + "]" );
+            }
+        }
+        return sb;
+    }
+
+    private static void appendGoTerm( final StringBuilder sb, final GoTerm go_term ) {
+        final GoId go_id = go_term.getGoId();
+        sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id + "\" target=\"amigo_window\">" + go_id
+                + "</a>" );
+        sb.append( ":" );
+        sb.append( go_term.getName() );
+        sb.append( " [" );
+        sb.append( go_term.getGoNameSpace().toShortString() );
+        sb.append( "]" );
+    }
+
     private static void calculateDomainCountsBasedValue( final SortedMap<BinaryDomainCombination, Double> copy_values,
                                                          final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
                                                          final BinaryDomainCombination bdc,
@@ -327,9 +328,9 @@ public final class DomainCountsDifferenceUtil {
         }
     }
 
-    private static void calculateDomainCountsBasedValue( final SortedMap<DomainId, Double> copy_values,
-                                                         final SortedMap<DomainId, List<Integer>> copy_counts,
-                                                         final DomainId domain,
+    private static void calculateDomainCountsBasedValue( final SortedMap<String, Double> copy_values,
+                                                         final SortedMap<String, List<Integer>> copy_counts,
+                                                         final String domain,
                                                          final COPY_CALCULATION_MODE copy_calc_mode ) {
         if ( copy_counts.containsKey( domain ) ) {
             switch ( copy_calc_mode ) {
@@ -367,9 +368,9 @@ public final class DomainCountsDifferenceUtil {
         results.put( bdc, ( double ) max );
     }
 
-    private static void calculateMaxCount( final SortedMap<DomainId, Double> results,
-                                           final SortedMap<DomainId, List<Integer>> copy_counts,
-                                           final DomainId domain ) {
+    private static void calculateMaxCount( final SortedMap<String, Double> results,
+                                           final SortedMap<String, List<Integer>> copy_counts,
+                                           final String domain ) {
         final List<Integer> counts = copy_counts.get( domain );
         int max = 0;
         for( final Integer count : counts ) {
@@ -391,9 +392,9 @@ public final class DomainCountsDifferenceUtil {
         results.put( bdc, ( ( double ) sum ) / ( ( double ) counts.size() ) );
     }
 
-    private static void calculateMeanCount( final SortedMap<DomainId, Double> results,
-                                            final SortedMap<DomainId, List<Integer>> copy_counts,
-                                            final DomainId domain ) {
+    private static void calculateMeanCount( final SortedMap<String, Double> results,
+                                            final SortedMap<String, List<Integer>> copy_counts,
+                                            final String domain ) {
         final List<Integer> counts = copy_counts.get( domain );
         int sum = 0;
         for( final Integer count : counts ) {
@@ -413,9 +414,9 @@ public final class DomainCountsDifferenceUtil {
         results.put( bdc, stats.median() );
     }
 
-    private static void calculateMedianCount( final SortedMap<DomainId, Double> results,
-                                              final SortedMap<DomainId, List<Integer>> copy_counts,
-                                              final DomainId domain ) {
+    private static void calculateMedianCount( final SortedMap<String, Double> results,
+                                              final SortedMap<String, List<Integer>> copy_counts,
+                                              final String domain ) {
         final List<Integer> counts = copy_counts.get( domain );
         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
         for( final Integer count : counts ) {
@@ -437,9 +438,9 @@ public final class DomainCountsDifferenceUtil {
         results.put( bdc, ( double ) min );
     }
 
-    private static void calculateMinCount( final SortedMap<DomainId, Double> results,
-                                           final SortedMap<DomainId, List<Integer>> copy_counts,
-                                           final DomainId domain ) {
+    private static void calculateMinCount( final SortedMap<String, Double> results,
+                                           final SortedMap<String, List<Integer>> copy_counts,
+                                           final String domain ) {
         final List<Integer> counts = copy_counts.get( domain );
         int min = Integer.MAX_VALUE;
         for( final Integer count : counts ) {
@@ -453,9 +454,6 @@ public final class DomainCountsDifferenceUtil {
     private static String combinableDomaindToString( final CombinableDomains cd ) {
         final StringBuilder sb = new StringBuilder();
         sb.append( cd.getKeyDomainProteinsCount() );
-        sb.append( "\t[" );
-        sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) );
-        sb.append( "]" );
         return sb.toString();
     }
 
@@ -467,8 +465,6 @@ public final class DomainCountsDifferenceUtil {
         sb.append( cd.getKeyDomainProteinsCount() );
         sb.append( "</b>, " );
         sb.append( cd.getNumberOfCombinableDomains() );
-        sb.append( "]</td><td>[" );
-        sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) );
         sb.append( "]</td><td>" );
         sb.append( cd.getCombiningDomainIdsAsStringBuilder() );
         return sb.toString();
@@ -501,8 +497,8 @@ public final class DomainCountsDifferenceUtil {
         html_writer.write( "</td>" );
     }
 
-    private static void writeCopyNumberValues( final SortedMap<DomainId, Double> copy_means,
-                                               final DomainId domain,
+    private static void writeCopyNumberValues( final SortedMap<String, Double> copy_means,
+                                               final String domain,
                                                final GenomeWideCombinableDomains genome,
                                                final String species,
                                                final Writer plain_writer,
@@ -546,12 +542,12 @@ public final class DomainCountsDifferenceUtil {
         int counter = 0;
         int total_absense_counter = 0;
         int not_total_absense_counter = 0;
-        SurfacingUtil.addHtmlHead( html_writer, "Binary Domain Combination Copy Differences" );
+        SurfacingUtil.writeHtmlHead( html_writer, "Binary Domain Combination Copy Differences" );
         html_writer.write( "<body><table>" );
         for( final BinaryDomainCombination bdc : all_bdcs ) {
             if ( ( high_copy_base_values.get( bdc ) > 0 ) && ( high_copy_target_values.get( bdc ) > 0 )
                     && ( high_copy_base_values.get( bdc ) >= low_copy_values.get( bdc ) ) ) {
-                if ( high_copy_target_values.get( bdc ) >= min_diff + ( factor * low_copy_values.get( bdc ) ) ) {
+                if ( high_copy_target_values.get( bdc ) >= ( min_diff + ( factor * low_copy_values.get( bdc ) ) ) ) {
                     if ( low_copy_values.get( bdc ) <= 0.0 ) {
                         ++total_absense_counter;
                     }
@@ -655,28 +651,28 @@ public final class DomainCountsDifferenceUtil {
                                                   final List<String> low_copy_species,
                                                   final int min_diff,
                                                   final Double factor,
-                                                  final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
+                                                  final Map<String, List<GoId>> domain_id_to_go_ids_map,
                                                   final Map<GoId, GoTerm> go_id_to_term_map,
                                                   final Writer plain_writer,
                                                   final Writer html_writer,
                                                   final File proteins_file_base,
-                                                  final SortedMap<DomainId, Double> high_copy_base_values,
-                                                  final SortedMap<DomainId, Double> high_copy_target_values,
-                                                  final SortedMap<DomainId, Double> low_copy_values,
-                                                  final SortedSet<DomainId> all_domains,
+                                                  final SortedMap<String, Double> high_copy_base_values,
+                                                  final SortedMap<String, Double> high_copy_target_values,
+                                                  final SortedMap<String, Double> low_copy_values,
+                                                  final SortedSet<String> all_domains,
                                                   final SortedSet<GoId> go_ids_of_passing_domains,
                                                   final SortedMap<Species, List<Protein>> protein_lists_per_species )
             throws IOException {
         int counter = 0;
         int total_absense_counter = 0;
         int not_total_absense_counter = 0;
-        SurfacingUtil.addHtmlHead( html_writer, "Domain Copy Differences" );
+        SurfacingUtil.writeHtmlHead( html_writer, "Domain Copy Differences" );
         html_writer.write( "<body><table>" );
-        for( final DomainId domain_id : all_domains ) {
+        for( final String domain_id : all_domains ) {
             if ( ( high_copy_base_values.get( domain_id ) > 0 ) && ( high_copy_target_values.get( domain_id ) > 0 )
                     && ( high_copy_base_values.get( domain_id ) >= low_copy_values.get( domain_id ) ) ) {
-                if ( high_copy_target_values.get( domain_id ) >= min_diff
-                        + ( factor * low_copy_values.get( domain_id ) ) ) {
+                if ( high_copy_target_values.get( domain_id ) >= ( min_diff + ( factor * low_copy_values
+                        .get( domain_id ) ) ) ) {
                     if ( low_copy_values.get( domain_id ) <= 0.0 ) {
                         ++total_absense_counter;
                     }
@@ -688,10 +684,10 @@ public final class DomainCountsDifferenceUtil {
                     if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
                         go_ids_of_passing_domains.addAll( domain_id_to_go_ids_map.get( domain_id ) );
                     }
-                    plain_writer.write( domain_id.getId() );
+                    plain_writer.write( domain_id );
                     plain_writer.write( SurfacingConstants.NL );
-                    html_writer.write( "<tr><td><a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK
-                            + domain_id.getId() + "\">" + domain_id.getId() + "</a></td><td>" );
+                    html_writer.write( "<tr><td><a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_id
+                            + "\">" + domain_id + "</a></td><td>" );
                     html_writer.write( addGoInformation( domain_id, domain_id_to_go_ids_map, go_id_to_term_map )
                             .toString() );
                     html_writer.write( "</td><td>" );
@@ -815,17 +811,22 @@ public final class DomainCountsDifferenceUtil {
 
     private static void writeProteinsToFile( final File proteins_file_base,
                                              final SortedMap<Species, List<Protein>> protein_lists_per_species,
-                                             final DomainId domain_id ) throws IOException {
+                                             final String domain_id ) throws IOException {
         final File my_proteins_file = new File( proteins_file_base.getParentFile() + ForesterUtil.FILE_SEPARATOR
                 + domain_id + PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX );
         SurfacingUtil.checkForOutputFileWriteability( my_proteins_file );
         final Writer proteins_file_writer = new BufferedWriter( new FileWriter( my_proteins_file ) );
-        SurfacingUtil.extractProteinNames( protein_lists_per_species, domain_id, proteins_file_writer, "\t" );
+        SurfacingUtil.extractProteinNames( protein_lists_per_species,
+                                           domain_id,
+                                           proteins_file_writer,
+                                           "\t",
+                                           surfacing.LIMIT_SPEC_FOR_PROT_EX,
+                                           -1 );
         proteins_file_writer.close();
         System.out.println( "Wrote proteins list to \"" + my_proteins_file + "\"" );
     }
 
     public static enum COPY_CALCULATION_MODE {
-        MEAN, MEDIAN, MAX, MIN
+        MAX, MEAN, MEDIAN, MIN
     }
 }