inprogress
[jalview.git] / forester / java / src / org / forester / surfacing / BasicDomainSimilarityCalculator.java
index 69adcb9..81cef33 100644 (file)
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 //
 // Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
 
 package org.forester.surfacing;
 
+import java.text.DecimalFormat;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.SortedMap;
@@ -34,23 +35,30 @@ import java.util.SortedSet;
 import java.util.TreeMap;
 import java.util.TreeSet;
 
-import org.forester.protein.DomainId;
 import org.forester.species.Species;
 import org.forester.util.BasicDescriptiveStatistics;
 import org.forester.util.DescriptiveStatistics;
+import org.forester.util.ForesterUtil;
 
 public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculator {
 
     final DomainSimilarity.DomainSimilaritySortField _sort;
     private final boolean                            _sort_by_species_count_first;
     private final boolean                            _treat_as_binary_comparison;
+    private final boolean                            _calc_similarity_score;
 
     public BasicDomainSimilarityCalculator( final DomainSimilarity.DomainSimilaritySortField sort,
                                             final boolean sort_by_species_count_first,
-                                            final boolean treat_as_binary_comparison ) {
+                                            final boolean treat_as_binary_comparison,
+                                            final boolean calc_similarity_score ) {
         _sort = sort;
         _sort_by_species_count_first = sort_by_species_count_first;
         _treat_as_binary_comparison = treat_as_binary_comparison;
+        _calc_similarity_score = calc_similarity_score;
+    }
+
+    public boolean isCalcSimilarityScore() {
+        return _calc_similarity_score;
     }
 
     @Override
@@ -62,11 +70,16 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
             throw new IllegalArgumentException( "attempt to calculate multiple combinable domains similarity for less than two combinale domains collections" );
         }
         final SortedSet<DomainSimilarity> similarities = new TreeSet<DomainSimilarity>();
-        final SortedSet<DomainId> keys = new TreeSet<DomainId>();
+        final SortedSet<String> keys = new TreeSet<String>();
         for( final GenomeWideCombinableDomains cdc : cdc_list ) {
             keys.addAll( ( cdc ).getAllCombinableDomainsIds().keySet() );
         }
-        for( final DomainId key : keys ) {
+        final DecimalFormat pf = new java.text.DecimalFormat( "000000" );
+        int counter = 1;
+        System.out.println( keys.size() );
+        for( final String key : keys ) {
+            ForesterUtil.updateProgress( counter, pf );
+            counter++;
             final List<CombinableDomains> same_id_cd_list = new ArrayList<CombinableDomains>( cdc_list.size() );
             final List<Species> species_with_key_id_domain = new ArrayList<Species>();
             for( final GenomeWideCombinableDomains cdc : cdc_list ) {
@@ -87,9 +100,6 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
                     continue;
                 }
             }
-            // BIG CHANGE IN LOGIC: Tuesday July 08, 0;55
-            // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-            // OLD: if ( same_id_cd_list.size() > 1 ) {
             if ( same_id_cd_list.size() > 0 ) {
                 if ( !ignore_domains_specific_to_one_genome || ( same_id_cd_list.size() > 1 ) ) {
                     final DomainSimilarity s = calculateSimilarity( pairwise_calculator, same_id_cd_list );
@@ -101,50 +111,48 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
                     }
                 }
             }
-            // ~~~ NEW:
             else {
                 throw new RuntimeException( "this should not have happened" );
             }
-            // ~~~ OLD:
-            // else if ( same_id_cd_list.size() == 1 ) {
-            // TODO need to go in file
-            // System.out.println( "only in one species [" +
-            // species_with_key_id_domain.get( 0 ) + "]: " + key_id );
-            //}
-            //else {
-            //    throw new RuntimeException( "this should not have happened" );
-            // }
         }
+        System.out.println();
         return similarities;
     }
 
     private DomainSimilarity calculateSimilarity( final PairwiseDomainSimilarityCalculator pairwise_calculator,
                                                   final List<CombinableDomains> domains_list ) {
         if ( domains_list.size() == 1 ) {
-            // BIG CHANGE IN LOGIC: Tuesday July 08, 0;55
-            // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-            // ~~~OLD:
-            //throw new IllegalArgumentException( "attempt to calculate multiple combinable domains similarity for less than two combinable domains" );
-            // ~~~new: 
-            final SortedMap<Species, SpeciesSpecificDomainSimilariyData> species_data = new TreeMap<Species, SpeciesSpecificDomainSimilariyData>();
+            final SortedMap<Species, SpeciesSpecificDcData> species_data = new TreeMap<Species, SpeciesSpecificDcData>();
             species_data.put( domains_list.get( 0 ).getSpecies(),
                               createSpeciesSpecificDomainSimilariyData( domains_list.get( 0 ) ) );
-            return new PrintableDomainSimilarity( domains_list.get( 0 ),
-                                                  1.0,
-                                                  1.0,
-                                                  1.0,
-                                                  1.0,
-                                                  0.0,
-                                                  0,
-                                                  0,
-                                                  0,
-                                                  species_data,
-                                                  getSort(),
-                                                  isSortBySpeciesCountFirst(),
-                                                  isTreatAsBinaryComparison() );
+            if ( !isCalcSimilarityScore() ) {
+                return new PrintableDomainSimilarity( domains_list.get( 0 ),
+                                                      0,
+                                                      0,
+                                                      species_data,
+                                                      isSortBySpeciesCountFirst(),
+                                                      isTreatAsBinaryComparison() );
+            }
+            else {
+                return new PrintableDomainSimilarity( domains_list.get( 0 ),
+                                                      1.0,
+                                                      1.0,
+                                                      1.0,
+                                                      1.0,
+                                                      0.0,
+                                                      0,
+                                                      0,
+                                                      0,
+                                                      species_data,
+                                                      isSortBySpeciesCountFirst(),
+                                                      isTreatAsBinaryComparison() );
+            }
+        }
+        DescriptiveStatistics stat = null;
+        if ( isCalcSimilarityScore() ) {
+            stat = new BasicDescriptiveStatistics();
         }
-        final DescriptiveStatistics stat = new BasicDescriptiveStatistics();
-        final SortedMap<Species, SpeciesSpecificDomainSimilariyData> species_data = new TreeMap<Species, SpeciesSpecificDomainSimilariyData>();
+        final SortedMap<Species, SpeciesSpecificDcData> species_data = new TreeMap<Species, SpeciesSpecificDcData>();
         species_data.put( domains_list.get( 0 ).getSpecies(),
                           createSpeciesSpecificDomainSimilariyData( domains_list.get( 0 ) ) );
         int max_difference_in_counts = 0;
@@ -172,59 +180,67 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
                 if ( Math.abs( difference ) > Math.abs( max_difference ) ) {
                     max_difference = difference;
                 }
-                stat.addValue( pairwise_similarity.getSimilarityScore() );
+                if ( isCalcSimilarityScore() ) {
+                    stat.addValue( pairwise_similarity.getSimilarityScore() );
+                }
             }
         }
-        if ( stat.getN() < 1 ) {
-            throw new AssertionError( "empty descriptive statistics: this should not have happened" );
-        }
-        if ( ( stat.getN() != 1 ) && isTreatAsBinaryComparison() ) {
-            throw new IllegalArgumentException( "attmpt to treat similarity with N not equal to one as binary comparison" );
+        if ( isCalcSimilarityScore() ) {
+            if ( stat.getN() < 1 ) {
+                throw new RuntimeException( "empty descriptive statistics: this should not have happened" );
+            }
+            if ( ( stat.getN() != 1 ) && isTreatAsBinaryComparison() ) {
+                throw new IllegalArgumentException( "attmpt to treat similarity with N not equal to one as binary comparison" );
+            }
         }
-        if ( ( /*stat.getN() != 1 ||*/!isTreatAsBinaryComparison() ) && ( max_difference_in_counts < 0 ) ) {
+        if ( !isTreatAsBinaryComparison() && ( max_difference_in_counts < 0 ) ) {
             max_difference_in_counts = Math.abs( max_difference_in_counts );
             if ( !is_domain_combination_based ) {
-                max_difference = Math.abs( max_difference ); //=max_difference_in_counts for !is_domain_combination_based.
+                max_difference = Math.abs( max_difference );
             }
         }
         DomainSimilarity similarity = null;
-        if ( stat.getN() == 1 ) {
+        if ( !isCalcSimilarityScore() ) {
             similarity = new PrintableDomainSimilarity( domains_list.get( 0 ),
-                                                        stat.getMin(),
-                                                        stat.getMax(),
-                                                        stat.arithmeticMean(),
-                                                        stat.median(),
-                                                        0.0,
-                                                        stat.getN(),
                                                         max_difference_in_counts,
                                                         max_difference,
                                                         species_data,
-                                                        getSort(),
                                                         isSortBySpeciesCountFirst(),
                                                         isTreatAsBinaryComparison() );
         }
         else {
-            similarity = new PrintableDomainSimilarity( domains_list.get( 0 ),
-                                                        stat.getMin(),
-                                                        stat.getMax(),
-                                                        stat.arithmeticMean(),
-                                                        stat.median(),
-                                                        stat.sampleStandardDeviation(),
-                                                        stat.getN(),
-                                                        max_difference_in_counts,
-                                                        max_difference,
-                                                        species_data,
-                                                        getSort(),
-                                                        isSortBySpeciesCountFirst(),
-                                                        isTreatAsBinaryComparison() );
+            if ( stat.getN() == 1 ) {
+                similarity = new PrintableDomainSimilarity( domains_list.get( 0 ),
+                                                            stat.getMin(),
+                                                            stat.getMax(),
+                                                            stat.arithmeticMean(),
+                                                            stat.median(),
+                                                            0.0,
+                                                            stat.getN(),
+                                                            max_difference_in_counts,
+                                                            max_difference,
+                                                            species_data,
+                                                            isSortBySpeciesCountFirst(),
+                                                            isTreatAsBinaryComparison() );
+            }
+            else {
+                similarity = new PrintableDomainSimilarity( domains_list.get( 0 ),
+                                                            stat.getMin(),
+                                                            stat.getMax(),
+                                                            stat.arithmeticMean(),
+                                                            stat.median(),
+                                                            stat.sampleStandardDeviation(),
+                                                            stat.getN(),
+                                                            max_difference_in_counts,
+                                                            max_difference,
+                                                            species_data,
+                                                            isSortBySpeciesCountFirst(),
+                                                            isTreatAsBinaryComparison() );
+            }
         }
         return similarity;
     }
 
-    private DomainSimilarity.DomainSimilaritySortField getSort() {
-        return _sort;
-    }
-
     private boolean isSortBySpeciesCountFirst() {
         return _sort_by_species_count_first;
     }
@@ -233,12 +249,13 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
         return _treat_as_binary_comparison;
     }
 
-    private static SpeciesSpecificDomainSimilariyData createSpeciesSpecificDomainSimilariyData( final CombinableDomains cd ) {
-        final SpeciesSpecificDomainSimilariyData sd = new PrintableSpeciesSpecificDomainSimilariyData( cd.getKeyDomainProteinsCount(),
-                                                                                                       cd.getKeyDomainCount(),
-                                                                                                       cd.getNumberOfCombinableDomains(),
-                                                                                                       cd.getKeyDomainConfidenceDescriptiveStatistics() );
-        for( final DomainId domain : cd.getCombinableDomains() ) {
+    private static SpeciesSpecificDcData createSpeciesSpecificDomainSimilariyData( final CombinableDomains cd ) {
+        final SpeciesSpecificDcData sd = new PrintableSpeciesSpecificDcData( cd.getKeyDomainCount(),
+                                                                             cd.getNumberOfCombinableDomains() );
+        for( final String prot : cd.getKeyDomainProteins() ) {
+            sd.addKeyDomainProtein( prot );
+        }
+        for( final String domain : cd.getCombinableDomains() ) {
             sd.addProteinsExhibitingCombinationCount( domain, cd.getNumberOfProteinsExhibitingCombination( domain ) );
         }
         return sd;