+ private CombinableDomains getCombinableDomains() {
+ return _combinable_domains;
+ }
+
+ private DomainSimilarityCalculator.Detailedness getDetaildness() {
+ return _detailedness;
+ }
+
+ private StringBuffer getDomainDataInAlphabeticalOrder() {
+ final SortedMap<String, SortedSet<String>> m = new TreeMap<String, SortedSet<String>>();
+ final StringBuffer sb = new StringBuffer();
+ for( final Species species : getSpeciesData().keySet() ) {
+ for( final String combable_dom : getCombinableDomainIds( species ) ) {
+ if ( !m.containsKey( combable_dom ) ) {
+ m.put( combable_dom, new TreeSet<String>() );
+ }
+ m.get( combable_dom ).add( species.getSpeciesId() );
+ }
+ }
+ for( final Map.Entry<String, SortedSet<String>> e : m.entrySet() ) {
+ sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + e.getKey() + "\">" + e.getKey() + "</a>" );
+ sb.append( " " );
+ sb.append( "<span style=\"font-size:7px\">" );
+ for( final String tax : e.getValue() ) {
+ final String hex = SurfacingUtil.obtainHexColorStringDependingOnTaxonomyGroup( tax, null );
+ if ( !ForesterUtil.isEmpty( hex ) ) {
+ sb.append( "<span style=\"color:" );
+ sb.append( hex );
+ sb.append( "\">" );
+ sb.append( tax );
+ sb.append( "</span>" );
+ }
+ else {
+ sb.append( tax );
+ }
+ sb.append( " " );
+ }
+ sb.append( "</span>" );
+ sb.append( "<br>\n" );
+ }
+ return sb;
+ }
+
+ private StringBuffer getSpeciesDataInAlphabeticalOrder( final boolean html,
+ final Map<String, Integer> tax_code_to_id_map,
+ final Phylogeny phy ) {
+ final StringBuffer sb = new StringBuffer();
+ sb.append( "<table>" );
+ for( final Species species : getSpeciesData().keySet() ) {
+ addSpeciesSpecificDomainData( sb, species, html, tax_code_to_id_map, phy );
+ }
+ sb.append( "</table>" );
+ return sb;
+ }
+
+ private StringBuffer getSpeciesDataInCustomOrder( final boolean html,
+ final Map<String, Integer> tax_code_to_id_map,
+ final Phylogeny phy ) {
+ final StringBuffer sb = new StringBuffer();
+ for( final Species order_species : getSpeciesCustomOrder() ) {
+ if ( getSpeciesData().keySet().contains( order_species ) ) {
+ addSpeciesSpecificDomainData( sb, order_species, html, tax_code_to_id_map, phy );
+ }
+ else {
+ sb.append( DomainSimilarity.NO_SPECIES );
+ sb.append( DomainSimilarity.SPECIES_SEPARATOR );
+ }
+ }
+ return sb;
+ }
+
+ private StringBuffer getTaxonomyGroupDistribution( final Phylogeny tol ) {
+ final SortedMap<String, Set<String>> domain_to_species_set_map = new TreeMap<String, Set<String>>();
+ for( final Species species : getSpeciesData().keySet() ) {
+ for( final String combable_dom : getCombinableDomainIds( species ) ) {
+ if ( !domain_to_species_set_map.containsKey( combable_dom ) ) {
+ domain_to_species_set_map.put( combable_dom, new HashSet<String>() );
+ }
+ domain_to_species_set_map.get( combable_dom ).add( species.getSpeciesId() );
+ }
+ }
+ final StringBuffer sb = new StringBuffer();
+ sb.append( "<table>" );
+ for( final Map.Entry<String, Set<String>> domain_to_species_set : domain_to_species_set_map.entrySet() ) {
+ final Map<String, Integer> counts = new HashMap<String, Integer>();
+ for( final String tax_code : domain_to_species_set.getValue() ) {
+ final String group = SurfacingUtil.obtainTaxonomyGroup( tax_code, tol );
+ if ( !ForesterUtil.isEmpty( group ) ) {
+ if ( !counts.containsKey( group ) ) {
+ counts.put( group, 1 );
+ }
+ else {
+ counts.put( group, counts.get( group ) + 1 );
+ }
+ }
+ else {
+ return null;
+ }
+ }
+ final SortedMap<Integer, SortedSet<String>> counts_to_groups = new TreeMap<Integer, SortedSet<String>>( new Comparator<Integer>() {
+
+ @Override
+ public int compare( final Integer first, final Integer second ) {
+ return second.compareTo( first );
+ }
+ } );
+ for( final Map.Entry<String, Integer> group_to_counts : counts.entrySet() ) {
+ final int c = group_to_counts.getValue();
+ if ( !counts_to_groups.containsKey( c ) ) {
+ counts_to_groups.put( c, new TreeSet<String>() );
+ }
+ counts_to_groups.get( c ).add( group_to_counts.getKey() );
+ }
+ sb.append( "<tr>" );
+ sb.append( "<td>" );
+ sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_to_species_set.getKey() + "\">"
+ + domain_to_species_set.getKey() + "</a>" );
+ sb.append( " " );
+ sb.append( "</td>" );
+ boolean first = true;
+ for( final Entry<Integer, SortedSet<String>> count_to_groups : counts_to_groups.entrySet() ) {
+ if ( first ) {
+ first = false;
+ }
+ else {
+ sb.append( "<tr>" );
+ sb.append( "<td>" );
+ sb.append( "</td>" );
+ }
+ sb.append( "<td>" );
+ final SortedSet<String> groups = count_to_groups.getValue();
+ sb.append( count_to_groups.getKey() );
+ sb.append( " " );
+ for( final String group : groups ) {
+ final Color color = ForesterUtil.obtainColorDependingOnTaxonomyGroup( group );
+ if ( color == null ) {
+ throw new IllegalArgumentException( "no color found for taxonomy group\"" + group + "\"" );
+ }
+ final String hex = String.format( "#%02x%02x%02x",
+ color.getRed(),
+ color.getGreen(),
+ color.getBlue() );
+ sb.append( "<span style=\"color:" );
+ sb.append( hex );
+ sb.append( "\">" );
+ sb.append( " " );
+ sb.append( group );
+ sb.append( "</span>" );
+ }
+ sb.append( "</td>" );
+ sb.append( "</tr>" );
+ }
+ sb.append( ForesterUtil.getLineSeparator() );
+ }
+ sb.append( "</table>" );
+ return sb;
+ }
+
+ private void init() {
+ _detailedness = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS;
+ }
+
+ private boolean isTreatAsBinaryComparison() {
+ return _treat_as_binary_comparison;
+ }
+
+ private StringBuffer toStringBufferDetailedHTML( final Map<String, Integer> tax_code_to_id_map,
+ final Phylogeny phy,
+ final boolean output_tax_codes_per_domain ) {
+ final StringBuffer sb = new StringBuffer();
+ sb.append( "<tr>" );
+ sb.append( "<td>" );
+ sb.append( "<b>" );
+ sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + getDomainId() + "\" target=\"pfam_window\">"
+ + getDomainId() + "</a>" );
+ sb.append( "</b>" );
+ sb.append( "<a name=\"" + getDomainId() + "\">" );
+ sb.append( "</td>" );
+ sb.append( "<td>" );
+ sb.append( "<a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_SEARCH + getDomainId()
+ + "\" target=\"gs_window\">gs</a>" );
+ sb.append( "</td>" );
+ if ( getMaximalSimilarityScore() > 0 ) {
+ sb.append( "<td>" );
+ sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
+ sb.append( "</td>" );
+ if ( SurfacingConstants.PRINT_MORE_DOM_SIMILARITY_INFO ) {
+ if ( !isTreatAsBinaryComparison() ) {
+ sb.append( "<td>" );
+ sb.append( "(" );
+ sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
+ sb.append( ")" );
+ sb.append( "</td>" );
+ sb.append( "<td>" );
+ sb.append( "[" );
+ sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
+ sb.append( "-" );
+ sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
+ sb.append( "]" );
+ sb.append( "</td>" );
+ }
+ }
+ }
+ sb.append( "<td>" );
+ sb.append( getMaximalDifference() );
+ sb.append( "</td>" );
+ sb.append( "<td>" );
+ if ( isTreatAsBinaryComparison() ) {
+ sb.append( getMaximalDifferenceInCounts() );
+ }
+ else {
+ sb.append( Math.abs( getMaximalDifferenceInCounts() ) );
+ }
+ sb.append( "</td>" );
+ if ( !isTreatAsBinaryComparison() ) {
+ sb.append( "<td>" );
+ sb.append( "<b>" );
+ sb.append( getSpeciesData().size() );
+ sb.append( "</b>" );
+ sb.append( "</td>" );
+ }
+ if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) {
+ sb.append( "<td>" );
+ sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map, phy ) );
+ if ( output_tax_codes_per_domain ) {
+ sb.append( getDomainDataInAlphabeticalOrder() );
+ }
+ sb.append( getTaxonomyGroupDistribution( phy ) );
+ sb.append( "</td>" );
+ }
+ else {
+ sb.append( "<td>" );
+ sb.append( getSpeciesDataInCustomOrder( true, tax_code_to_id_map, phy ) );
+ if ( output_tax_codes_per_domain ) {
+ sb.append( getDomainDataInAlphabeticalOrder() );
+ }
+ sb.append( getTaxonomyGroupDistribution( phy ) );
+ sb.append( "</td>" );
+ }
+ sb.append( "</tr>" );
+ return sb;
+ }
+
+ private StringBuffer toStringBufferSimpleTabDelimited() {
+ final StringBuffer sb = new StringBuffer();
+ sb.append( getDomainId() );
+ sb.append( "\t" );
+ sb.append( getSpeciesDataInAlphabeticalOrder( false, null, null ) );
+ sb.append( "\n" );
+ return sb;
+ }
+
+ static public enum DomainSimilarityScoring {
+ COMBINATIONS, DOMAINS, PROTEINS;
+ }
+
+ public static enum DomainSimilaritySortField {
+ ABS_MAX_COUNTS_DIFFERENCE, DOMAIN_ID, MAX, MAX_COUNTS_DIFFERENCE, MAX_DIFFERENCE, MEAN, MIN, SD, SPECIES_COUNT,
+ }
+
+ public static enum PRINT_OPTION {
+ HTML, SIMPLE_TAB_DELIMITED;
+ }
+
+ class ValueComparator implements Comparator<String> {
+
+ final private Map<String, Integer> _base;
+
+ public ValueComparator( final Map<String, Integer> base ) {
+ _base = base;
+ }
+
+ public int compare( final String a, final String b ) {
+ if ( _base.get( a ) >= _base.get( b ) ) {
+ return -1;
+ }
+ else {
+ return 1;
+ } // returning 0 would merge keys
+ }
+ }