(no commit message)
[jalview.git] / forester / java / src / org / forester / surfacing / DomainSimilarity.java
index be7273e..db25dd5 100644 (file)
 
 package org.forester.surfacing;
 
+import java.awt.Color;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
 import java.util.SortedMap;
 import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
 
+import org.forester.phylogeny.Phylogeny;
 import org.forester.species.Species;
-import org.forester.surfacing.PrintableDomainSimilarity.PRINT_OPTION;
+import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
+import org.forester.util.ForesterUtil;
 
-/*
- * This is to represent a measure of similarity between two or more domains from
- * different genomes.
- */
-public interface DomainSimilarity extends Comparable<DomainSimilarity> {
+public class DomainSimilarity implements Comparable<DomainSimilarity> {
 
-    static public enum DomainSimilarityScoring {
-        DOMAINS, PROTEINS, COMBINATIONS;
+    final public static String                              SPECIES_SEPARATOR          = "  ";
+    final private static int                                EQUAL                      = 0;
+    final private static String                             NO_SPECIES                 = "     ";
+    private static final boolean                            OUTPUT_TAXCODES_PER_DOMAIN = false;
+    final private CombinableDomains                         _combinable_domains;
+    private DomainSimilarityCalculator.Detailedness         _detailedness;
+    final private double                                    _max;
+    private final int                                       _max_difference;
+    private final int                                       _max_difference_in_counts;
+    final private double                                    _mean;
+    final private double                                    _min;
+    final private int                                       _n;
+    final private double                                    _sd;
+    final private SortedMap<Species, SpeciesSpecificDcData> _species_data;
+    private List<Species>                                   _species_order;
+    private final boolean                                   _treat_as_binary_comparison;
+
+    public DomainSimilarity( final CombinableDomains combinable_domains,
+                             final double min,
+                             final double max,
+                             final double mean,
+                             final double median,
+                             final double sd,
+                             final int n,
+                             final int max_difference_in_counts,
+                             final int max_difference,
+                             final SortedMap<Species, SpeciesSpecificDcData> species_data,
+                             final boolean sort_by_species_count_first,
+                             final boolean treat_as_binary_comparison ) {
+        if ( combinable_domains == null ) {
+            throw new IllegalArgumentException( "attempt to use null combinable domains" );
+        }
+        if ( species_data == null ) {
+            throw new IllegalArgumentException( "attempt to use null species data" );
+        }
+        if ( species_data.size() < 1 ) {
+            throw new IllegalArgumentException( "attempt to use empty species data" );
+        }
+        if ( n < 0 ) {
+            throw new IllegalArgumentException( "attempt to use N less than 0" );
+        }
+        if ( ( species_data.size() > 1 ) && ( n < 1 ) ) {
+            throw new IllegalArgumentException( "attempt to use N less than 1" );
+        }
+        if ( sd < 0.0 ) {
+            throw new IllegalArgumentException( "attempt to use negative SD" );
+        }
+        if ( max < min ) {
+            throw new IllegalArgumentException( "attempt to use max smaller than min" );
+        }
+        init();
+        _combinable_domains = combinable_domains;
+        _min = min;
+        _max = max;
+        _mean = mean;
+        _sd = sd;
+        _n = n;
+        _max_difference_in_counts = max_difference_in_counts;
+        _max_difference = max_difference;
+        _species_data = species_data;
+        _treat_as_binary_comparison = treat_as_binary_comparison;
+        final int s = species_data.size();
+        if ( ( ( s * s ) - s ) != ( getN() * 2 ) ) {
+            throw new IllegalArgumentException( "illegal species count and n: species count:" + s + ", n:" + _n
+                                                + " for domain " + combinable_domains.getKeyDomain() );
+        }
+        if ( s > 2 ) {
+            if ( getMaximalDifferenceInCounts() < 0 ) {
+                throw new IllegalArgumentException( "attempt to use negative max difference in counts with more than two species" );
+            }
+            if ( getMaximalDifference() < 0 ) {
+                throw new IllegalArgumentException( "attempt to use negative max difference with more than two species" );
+            }
+        }
     }
 
-    public static enum DomainSimilaritySortField {
-        MIN, MAX, SD, MEAN, ABS_MAX_COUNTS_DIFFERENCE, MAX_COUNTS_DIFFERENCE, MAX_DIFFERENCE, SPECIES_COUNT, DOMAIN_ID,
+    public DomainSimilarity( final CombinableDomains combinable_domains,
+                             final int max_difference_in_counts,
+                             final int max_difference,
+                             final SortedMap<Species, SpeciesSpecificDcData> species_data,
+                             final boolean sort_by_species_count_first,
+                             final boolean treat_as_binary_comparison ) {
+        if ( combinable_domains == null ) {
+            throw new IllegalArgumentException( "attempt to use null combinable domains" );
+        }
+        if ( species_data == null ) {
+            throw new IllegalArgumentException( "attempt to use null species data" );
+        }
+        if ( species_data.size() < 1 ) {
+            throw new IllegalArgumentException( "attempt to use empty species data" );
+        }
+        init();
+        _combinable_domains = combinable_domains;
+        _min = -1;
+        _max = -1;
+        _mean = -1;
+        _sd = -1;
+        _n = -1;
+        _max_difference_in_counts = max_difference_in_counts;
+        _max_difference = max_difference;
+        _species_data = species_data;
+        _treat_as_binary_comparison = treat_as_binary_comparison;
+        final int s = species_data.size();
+        if ( s > 2 ) {
+            if ( getMaximalDifferenceInCounts() < 0 ) {
+                throw new IllegalArgumentException( "attempt to use negative max difference in counts with more than two species" );
+            }
+            if ( getMaximalDifference() < 0 ) {
+                throw new IllegalArgumentException( "attempt to use negative max difference with more than two species" );
+            }
+        }
+    }
+
+    @Override
+    public int compareTo( final DomainSimilarity domain_similarity ) {
+        if ( this == domain_similarity ) {
+            return EQUAL;
+        }
+        else if ( domain_similarity == null ) {
+            throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to null" );
+        }
+        else if ( domain_similarity.getClass() != this.getClass() ) {
+            throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to "
+                    + domain_similarity.getClass() );
+        }
+        return compareByDomainId( domain_similarity );
     }
 
-    public SortedSet<String> getCombinableDomainIds( final Species species_of_combinable_domain );
+    public SortedSet<String> getCombinableDomainIds( final Species species_of_combinable_domain ) {
+        final SortedSet<String> sorted_ids = new TreeSet<String>();
+        if ( getSpeciesData().containsKey( species_of_combinable_domain ) ) {
+            for( final String id : getSpeciesData().get( species_of_combinable_domain )
+                    .getCombinableDomainIdToCountsMap().keySet() ) {
+                sorted_ids.add( id );
+            }
+        }
+        return sorted_ids;
+    }
 
-    public String getDomainId();
+    public String getDomainId() {
+        return getCombinableDomains().getKeyDomain();
+    }
 
     /**
      * For pairwise similarities, this should return the "difference"; for example the difference in counts
      * for copy number based features (the same as getMaximalDifferenceInCounts(), or the number
-     * of actually different domain combinations. 
+     * of actually different domain combinations.
      * For pairwise similarities, this should return the difference,
      * while for comparisons of more than two domains, this should return the maximal difference
-     * 
-     * 
-     * 
-     * @return
+     *
      */
-    public int getMaximalDifference();
+    public int getMaximalDifference() {
+        return _max_difference;
+    }
 
     /**
      * For pairwise similarities, this should return the difference in counts,
      * while for comparisons of more than two domains, this should return the maximal difference
      * in counts
-     * 
-     * 
+     *
+     *
      * @return the (maximal) difference in counts
      */
-    public int getMaximalDifferenceInCounts();
+    public int getMaximalDifferenceInCounts() {
+        return _max_difference_in_counts;
+    }
 
-    public double getMaximalSimilarityScore();
+    public double getMaximalSimilarityScore() {
+        return _max;
+    }
 
-    public double getMeanSimilarityScore();
+    public double getMeanSimilarityScore() {
+        return _mean;
+    }
 
-    public double getMinimalSimilarityScore();
+    public double getMinimalSimilarityScore() {
+        return _min;
+    }
 
     /**
      * This should return the number of pairwise distances used to calculate
      * this similarity score
-     * 
+     *
      * @return the number of pairwise distances
      */
-    public int getN();
+    public int getN() {
+        return _n;
+    }
+
+    public SortedSet<Species> getSpecies() {
+        final SortedSet<Species> species = new TreeSet<Species>();
+        for( final Species s : getSpeciesData().keySet() ) {
+            species.add( s );
+        }
+        return species;
+    }
 
-    public SortedSet<Species> getSpecies();
+    public List<Species> getSpeciesCustomOrder() {
+        return _species_order;
+    }
 
     /**
      * This should return a map, which maps species names to
      * SpeciesSpecificDomainSimilariyData
-     * 
-     * 
+     *
+     *
      * @return SortedMap<String, SpeciesSpecificDomainSimilariyData>
      */
-    public SortedMap<Species, SpeciesSpecificDomainSimilariyData> getSpeciesData();
+    public SortedMap<Species, SpeciesSpecificDcData> getSpeciesData() {
+        return _species_data;
+    }
+
+    public double getStandardDeviationOfSimilarityScore() {
+        return _sd;
+    }
+
+    public void setDetailedness( final Detailedness detailedness ) {
+        _detailedness = detailedness;
+    }
+
+    public void setSpeciesOrder( final List<Species> species_order ) {
+        if ( !species_order.containsAll( getSpeciesData().keySet() ) ) {
+            throw new IllegalArgumentException( "list to order species must contain all species of multiple combinable domains similarity" );
+        }
+        _species_order = species_order;
+    }
+
+    public StringBuffer toStringBuffer( final DomainSimilarity.PRINT_OPTION print_option,
+                                        final Map<String, Integer> tax_code_to_id_map,
+                                        final Phylogeny phy ) {
+        switch ( print_option ) {
+            case SIMPLE_TAB_DELIMITED:
+                return toStringBufferSimpleTabDelimited();
+            case HTML:
+                return toStringBufferDetailedHTML( tax_code_to_id_map, phy, OUTPUT_TAXCODES_PER_DOMAIN );
+            default:
+                throw new AssertionError( "Unknown print option: " + print_option );
+        }
+    }
+
+    private void addSpeciesSpecificDomainData( final StringBuffer sb,
+                                               final Species species,
+                                               final boolean html,
+                                               final Map<String, Integer> tax_code_to_id_map,
+                                               final Phylogeny phy ) {
+        if ( html ) {
+            sb.append( "<tr>" );
+            sb.append( "<td>" );
+            addTaxWithLink( sb, species.getSpeciesId(), tax_code_to_id_map, phy );
+            sb.append( "</td>" );
+        }
+        else {
+            sb.append( species.getSpeciesId() );
+        }
+        if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
+            if ( html ) {
+                //sb.append( ":" );
+            }
+            else {
+                sb.append( "\t" );
+            }
+            sb.append( getSpeciesData().get( species ).toStringBuffer( getDetaildness(), html ) );
+        }
+        if ( html ) {
+            //sb.append( "<br>" );
+            sb.append( "</tr>" );
+        }
+        else {
+            sb.append( "\n\t" );
+        }
+    }
+
+    private void addTaxWithLink( final StringBuffer sb,
+                                 final String tax_code,
+                                 final Map<String, Integer> tax_code_to_id_map,
+                                 final Phylogeny phy ) {
+        String hex = null;
+        if ( ( phy != null ) && !phy.isEmpty() ) {
+            hex = SurfacingUtil.obtainHexColorStringDependingOnTaxonomyGroup( tax_code, phy );
+        }
+        sb.append( "<b>" );
+        if ( !ForesterUtil.isEmpty( tax_code )
+                && ( ( tax_code_to_id_map != null ) && tax_code_to_id_map.containsKey( tax_code ) ) ) {
+            if ( !ForesterUtil.isEmpty( hex ) ) {
+                sb.append( "<a href=\"" );
+                sb.append( SurfacingConstants.UNIPROT_TAXONOMY_ID_LINK );
+                sb.append( tax_code_to_id_map.get( tax_code ) );
+                sb.append( "\" target=\"tw\"><span style=\"color:" );
+                sb.append( hex );
+                sb.append( "\">" );
+                sb.append( tax_code );
+                sb.append( "</span></a>" );
+            }
+            else {
+                sb.append( "<a href=\"" );
+                sb.append( SurfacingConstants.UNIPROT_TAXONOMY_ID_LINK );
+                sb.append( tax_code_to_id_map.get( tax_code ) );
+                sb.append( "\" target=\"tw\">" );
+                sb.append( tax_code );
+                sb.append( "</a>" );
+            }
+        }
+        else {
+            sb.append( tax_code );
+        }
+        sb.append( "</b>" );
+    }
+
+    private int compareByDomainId( final DomainSimilarity other ) {
+        return getDomainId().compareToIgnoreCase( other.getDomainId() );
+    }
 
-    public double getStandardDeviationOfSimilarityScore();
+    private CombinableDomains getCombinableDomains() {
+        return _combinable_domains;
+    }
 
-    public StringBuffer toStringBuffer( PRINT_OPTION print_option, Map<String, Integer> tax_code_to_id_map );
+    private DomainSimilarityCalculator.Detailedness getDetaildness() {
+        return _detailedness;
+    }
+
+    private StringBuffer getDomainDataInAlphabeticalOrder() {
+        final SortedMap<String, SortedSet<String>> m = new TreeMap<String, SortedSet<String>>();
+        final StringBuffer sb = new StringBuffer();
+        for( final Species species : getSpeciesData().keySet() ) {
+            for( final String combable_dom : getCombinableDomainIds( species ) ) {
+                if ( !m.containsKey( combable_dom ) ) {
+                    m.put( combable_dom, new TreeSet<String>() );
+                }
+                m.get( combable_dom ).add( species.getSpeciesId() );
+            }
+        }
+        for( final Map.Entry<String, SortedSet<String>> e : m.entrySet() ) {
+            sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + e.getKey() + "\">" + e.getKey() + "</a>" );
+            sb.append( " " );
+            sb.append( "<span style=\"font-size:7px\">" );
+            for( final String tax : e.getValue() ) {
+                final String hex = SurfacingUtil.obtainHexColorStringDependingOnTaxonomyGroup( tax, null );
+                if ( !ForesterUtil.isEmpty( hex ) ) {
+                    sb.append( "<span style=\"color:" );
+                    sb.append( hex );
+                    sb.append( "\">" );
+                    sb.append( tax );
+                    sb.append( "</span>" );
+                }
+                else {
+                    sb.append( tax );
+                }
+                sb.append( " " );
+            }
+            sb.append( "</span>" );
+            sb.append( "<br>\n" );
+        }
+        return sb;
+    }
+
+    private StringBuffer getSpeciesDataInAlphabeticalOrder( final boolean html,
+                                                            final Map<String, Integer> tax_code_to_id_map,
+                                                            final Phylogeny phy ) {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( "<table>" );
+        for( final Species species : getSpeciesData().keySet() ) {
+            addSpeciesSpecificDomainData( sb, species, html, tax_code_to_id_map, phy );
+        }
+        sb.append( "</table>" );
+        return sb;
+    }
+
+    private StringBuffer getSpeciesDataInCustomOrder( final boolean html,
+                                                      final Map<String, Integer> tax_code_to_id_map,
+                                                      final Phylogeny phy ) {
+        final StringBuffer sb = new StringBuffer();
+        for( final Species order_species : getSpeciesCustomOrder() ) {
+            if ( getSpeciesData().keySet().contains( order_species ) ) {
+                addSpeciesSpecificDomainData( sb, order_species, html, tax_code_to_id_map, phy );
+            }
+            else {
+                sb.append( DomainSimilarity.NO_SPECIES );
+                sb.append( DomainSimilarity.SPECIES_SEPARATOR );
+            }
+        }
+        return sb;
+    }
+
+    private StringBuffer getTaxonomyGroupDistribution( final Phylogeny tol ) {
+        final SortedMap<String, Set<String>> domain_to_species_set_map = new TreeMap<String, Set<String>>();
+        for( final Species species : getSpeciesData().keySet() ) {
+            for( final String combable_dom : getCombinableDomainIds( species ) ) {
+                if ( !domain_to_species_set_map.containsKey( combable_dom ) ) {
+                    domain_to_species_set_map.put( combable_dom, new HashSet<String>() );
+                }
+                domain_to_species_set_map.get( combable_dom ).add( species.getSpeciesId() );
+            }
+        }
+        final StringBuffer sb = new StringBuffer();
+        sb.append( "<table>" );
+        for( final Map.Entry<String, Set<String>> domain_to_species_set : domain_to_species_set_map.entrySet() ) {
+            final Map<String, Integer> counts = new HashMap<String, Integer>();
+            for( final String tax_code : domain_to_species_set.getValue() ) {
+                final String group = SurfacingUtil.obtainTaxonomyGroup( tax_code, tol );
+                if ( !ForesterUtil.isEmpty( group ) ) {
+                    if ( !counts.containsKey( group ) ) {
+                        counts.put( group, 1 );
+                    }
+                    else {
+                        counts.put( group, counts.get( group ) + 1 );
+                    }
+                }
+                else {
+                    return null;
+                }
+            }
+            final SortedMap<Integer, SortedSet<String>> counts_to_groups = new TreeMap<Integer, SortedSet<String>>( new Comparator<Integer>() {
+
+                @Override
+                public int compare( final Integer first, final Integer second ) {
+                    return second.compareTo( first );
+                }
+            } );
+            for( final Map.Entry<String, Integer> group_to_counts : counts.entrySet() ) {
+                final int c = group_to_counts.getValue();
+                if ( !counts_to_groups.containsKey( c ) ) {
+                    counts_to_groups.put( c, new TreeSet<String>() );
+                }
+                counts_to_groups.get( c ).add( group_to_counts.getKey() );
+            }
+            sb.append( "<tr>" );
+            sb.append( "<td>" );
+            sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_to_species_set.getKey() + "\">"
+                    + domain_to_species_set.getKey() + "</a>" );
+            sb.append( " " );
+            sb.append( "</td>" );
+            boolean first = true;
+            for( final Entry<Integer, SortedSet<String>> count_to_groups : counts_to_groups.entrySet() ) {
+                if ( first ) {
+                    first = false;
+                }
+                else {
+                    sb.append( "<tr>" );
+                    sb.append( "<td>" );
+                    sb.append( "</td>" );
+                }
+                sb.append( "<td>" );
+                final SortedSet<String> groups = count_to_groups.getValue();
+                sb.append( count_to_groups.getKey() );
+                sb.append( " " );
+                for( final String group : groups ) {
+                    final Color color = ForesterUtil.obtainColorDependingOnTaxonomyGroup( group );
+                    if ( color == null ) {
+                        throw new IllegalArgumentException( "no color found for taxonomy group\"" + group + "\"" );
+                    }
+                    final String hex = String.format( "#%02x%02x%02x",
+                                                      color.getRed(),
+                                                      color.getGreen(),
+                                                      color.getBlue() );
+                    sb.append( "<span style=\"color:" );
+                    sb.append( hex );
+                    sb.append( "\">" );
+                    sb.append( " " );
+                    sb.append( group );
+                    sb.append( "</span>" );
+                }
+                sb.append( "</td>" );
+                sb.append( "</tr>" );
+            }
+            sb.append( ForesterUtil.getLineSeparator() );
+        }
+        sb.append( "</table>" );
+        return sb;
+    }
+
+    private void init() {
+        _detailedness = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS;
+    }
+
+    private boolean isTreatAsBinaryComparison() {
+        return _treat_as_binary_comparison;
+    }
+
+    private StringBuffer toStringBufferDetailedHTML( final Map<String, Integer> tax_code_to_id_map,
+                                                     final Phylogeny phy,
+                                                     final boolean output_tax_codes_per_domain ) {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( "<tr>" );
+        sb.append( "<td>" );
+        sb.append( "<b>" );
+        sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + getDomainId() + "\" target=\"pfam_window\">"
+                + getDomainId() + "</a>" );
+        sb.append( "</b>" );
+        sb.append( "<a name=\"" + getDomainId() + "\">" );
+        sb.append( "</td>" );
+        sb.append( "<td>" );
+        sb.append( "<a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_SEARCH + getDomainId()
+                   + "\" target=\"gs_window\">gs</a>" );
+        sb.append( "</td>" );
+        if ( getMaximalSimilarityScore() > 0 ) {
+            sb.append( "<td>" );
+            sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
+            sb.append( "</td>" );
+            if ( SurfacingConstants.PRINT_MORE_DOM_SIMILARITY_INFO ) {
+                if ( !isTreatAsBinaryComparison() ) {
+                    sb.append( "<td>" );
+                    sb.append( "(" );
+                    sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
+                    sb.append( ")" );
+                    sb.append( "</td>" );
+                    sb.append( "<td>" );
+                    sb.append( "[" );
+                    sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
+                    sb.append( "-" );
+                    sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
+                    sb.append( "]" );
+                    sb.append( "</td>" );
+                }
+            }
+        }
+        sb.append( "<td>" );
+        sb.append( getMaximalDifference() );
+        sb.append( "</td>" );
+        sb.append( "<td>" );
+        if ( isTreatAsBinaryComparison() ) {
+            sb.append( getMaximalDifferenceInCounts() );
+        }
+        else {
+            sb.append( Math.abs( getMaximalDifferenceInCounts() ) );
+        }
+        sb.append( "</td>" );
+        if ( !isTreatAsBinaryComparison() ) {
+            sb.append( "<td>" );
+            sb.append( "<b>" );
+            sb.append( getSpeciesData().size() );
+            sb.append( "</b>" );
+            sb.append( "</td>" );
+        }
+        if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) {
+            sb.append( "<td>" );
+            sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map, phy ) );
+            if ( output_tax_codes_per_domain ) {
+                sb.append( getDomainDataInAlphabeticalOrder() );
+            }
+            sb.append( getTaxonomyGroupDistribution( phy ) );
+            sb.append( "</td>" );
+        }
+        else {
+            sb.append( "<td>" );
+            sb.append( getSpeciesDataInCustomOrder( true, tax_code_to_id_map, phy ) );
+            if ( output_tax_codes_per_domain ) {
+                sb.append( getDomainDataInAlphabeticalOrder() );
+            }
+            sb.append( getTaxonomyGroupDistribution( phy ) );
+            sb.append( "</td>" );
+        }
+        sb.append( "</tr>" );
+        return sb;
+    }
+
+    private StringBuffer toStringBufferSimpleTabDelimited() {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( getDomainId() );
+        sb.append( "\t" );
+        sb.append( getSpeciesDataInAlphabeticalOrder( false, null, null ) );
+        sb.append( "\n" );
+        return sb;
+    }
+
+    static public enum DomainSimilarityScoring {
+        COMBINATIONS, DOMAINS, PROTEINS;
+    }
+
+    public static enum DomainSimilaritySortField {
+        ABS_MAX_COUNTS_DIFFERENCE, DOMAIN_ID, MAX, MAX_COUNTS_DIFFERENCE, MAX_DIFFERENCE, MEAN, MIN, SD, SPECIES_COUNT,
+    }
+
+    public static enum PRINT_OPTION {
+        HTML, SIMPLE_TAB_DELIMITED;
+    }
+
+    class ValueComparator implements Comparator<String> {
+
+        final private Map<String, Integer> _base;
+
+        public ValueComparator( final Map<String, Integer> base ) {
+            _base = base;
+        }
+
+        @Override
+        public int compare( final String a, final String b ) {
+            if ( _base.get( a ) >= _base.get( b ) ) {
+                return -1;
+            }
+            else {
+                return 1;
+            } // returning 0 would merge keys
+        }
+    }
 }