+ public void setSpeciesOrder( final List<Species> species_order ) {
+ if ( !species_order.containsAll( getSpeciesData().keySet() ) ) {
+ throw new IllegalArgumentException( "list to order species must contain all species of multiple combinable domains similarity" );
+ }
+ _species_order = species_order;
+ }
+
+ public StringBuffer toStringBuffer( final DomainSimilarity.PRINT_OPTION print_option,
+ final Map<String, Integer> tax_code_to_id_map,
+ final Phylogeny phy ) {
+ switch ( print_option ) {
+ case SIMPLE_TAB_DELIMITED:
+ return toStringBufferSimpleTabDelimited();
+ case HTML:
+ return toStringBufferDetailedHTML( tax_code_to_id_map, phy, OUTPUT_TAXCODES_PER_DOMAIN );
+ default:
+ throw new AssertionError( "Unknown print option: " + print_option );
+ }
+ }
+
+ private void addSpeciesSpecificDomainData( final StringBuffer sb,
+ final Species species,
+ final boolean html,
+ final Map<String, Integer> tax_code_to_id_map,
+ final Phylogeny phy ) {
+ if ( html ) {
+ sb.append( "<tr>" );
+ sb.append( "<td>" );
+ addTaxWithLink( sb, species.getSpeciesId(), tax_code_to_id_map, phy );
+ sb.append( "</td>" );
+ }
+ else {
+ sb.append( species.getSpeciesId() );
+ }
+ if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
+ if ( html ) {
+ //sb.append( ":" );
+ }
+ else {
+ sb.append( "\t" );
+ }
+ sb.append( getSpeciesData().get( species ).toStringBuffer( getDetaildness(), html ) );
+ }
+ if ( html ) {
+ //sb.append( "<br>" );
+ sb.append( "</tr>" );
+ }
+ else {
+ sb.append( "\n\t" );
+ }
+ }
+
+ private void addTaxWithLink( final StringBuffer sb,
+ final String tax_code,
+ final Map<String, Integer> tax_code_to_id_map,
+ final Phylogeny phy ) {
+ String hex = null;
+ if ( ( phy != null ) && !phy.isEmpty() ) {
+ hex = SurfacingUtil.obtainHexColorStringDependingOnTaxonomyGroup( tax_code, phy );
+ }
+ sb.append( "<b>" );
+ if ( !ForesterUtil.isEmpty( tax_code )
+ && ( ( tax_code_to_id_map != null ) && tax_code_to_id_map.containsKey( tax_code ) ) ) {
+ if ( !ForesterUtil.isEmpty( hex ) ) {
+ sb.append( "<a href=\"" );
+ sb.append( SurfacingConstants.UNIPROT_TAXONOMY_ID_LINK );
+ sb.append( tax_code_to_id_map.get( tax_code ) );
+ sb.append( "\" target=\"tw\"><span style=\"color:" );
+ sb.append( hex );
+ sb.append( "\">" );
+ sb.append( tax_code );
+ sb.append( "</span></a>" );
+ }
+ else {
+ sb.append( "<a href=\"" );
+ sb.append( SurfacingConstants.UNIPROT_TAXONOMY_ID_LINK );
+ sb.append( tax_code_to_id_map.get( tax_code ) );
+ sb.append( "\" target=\"tw\">" );
+ sb.append( tax_code );
+ sb.append( "</a>" );
+ }
+ }
+ else {
+ sb.append( tax_code );
+ }
+ sb.append( "</b>" );
+ }
+
+ private int compareByDomainId( final DomainSimilarity other ) {
+ return getDomainId().compareToIgnoreCase( other.getDomainId() );
+ }
+
+ private CombinableDomains getCombinableDomains() {
+ return _combinable_domains;
+ }
+
+ private DomainSimilarityCalculator.Detailedness getDetaildness() {
+ return _detailedness;
+ }
+
+ private StringBuffer getDomainDataInAlphabeticalOrder() {
+ final SortedMap<String, SortedSet<String>> m = new TreeMap<String, SortedSet<String>>();
+ final StringBuffer sb = new StringBuffer();
+ for( final Species species : getSpeciesData().keySet() ) {
+ for( final String combable_dom : getCombinableDomainIds( species ) ) {
+ if ( !m.containsKey( combable_dom ) ) {
+ m.put( combable_dom, new TreeSet<String>() );
+ }
+ m.get( combable_dom ).add( species.getSpeciesId() );
+ }
+ }
+ for( final Map.Entry<String, SortedSet<String>> e : m.entrySet() ) {
+ sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + e.getKey() + "\">" + e.getKey() + "</a>" );
+ sb.append( " " );
+ sb.append( "<span style=\"font-size:7px\">" );
+ for( final String tax : e.getValue() ) {
+ final String hex = SurfacingUtil.obtainHexColorStringDependingOnTaxonomyGroup( tax, null );
+ if ( !ForesterUtil.isEmpty( hex ) ) {
+ sb.append( "<span style=\"color:" );
+ sb.append( hex );
+ sb.append( "\">" );
+ sb.append( tax );
+ sb.append( "</span>" );
+ }
+ else {
+ sb.append( tax );
+ }
+ sb.append( " " );
+ }
+ sb.append( "</span>" );
+ sb.append( "<br>\n" );
+ }
+ return sb;
+ }
+
+ private StringBuffer getSpeciesDataInAlphabeticalOrder( final boolean html,
+ final Map<String, Integer> tax_code_to_id_map,
+ final Phylogeny phy ) {
+ final StringBuffer sb = new StringBuffer();
+ sb.append( "<table>" );
+ for( final Species species : getSpeciesData().keySet() ) {
+ addSpeciesSpecificDomainData( sb, species, html, tax_code_to_id_map, phy );
+ }
+ sb.append( "</table>" );
+ return sb;
+ }
+
+ private StringBuffer getSpeciesDataInCustomOrder( final boolean html,
+ final Map<String, Integer> tax_code_to_id_map,
+ final Phylogeny phy ) {
+ final StringBuffer sb = new StringBuffer();
+ for( final Species order_species : getSpeciesCustomOrder() ) {
+ if ( getSpeciesData().keySet().contains( order_species ) ) {
+ addSpeciesSpecificDomainData( sb, order_species, html, tax_code_to_id_map, phy );
+ }
+ else {
+ sb.append( DomainSimilarity.NO_SPECIES );
+ sb.append( DomainSimilarity.SPECIES_SEPARATOR );
+ }
+ }
+ return sb;
+ }
+
+ private StringBuffer getTaxonomyGroupDistribution( final Phylogeny tol ) {
+ final SortedMap<String, Set<String>> domain_to_species_set_map = new TreeMap<String, Set<String>>();
+ for( final Species species : getSpeciesData().keySet() ) {
+ for( final String combable_dom : getCombinableDomainIds( species ) ) {
+ if ( !domain_to_species_set_map.containsKey( combable_dom ) ) {
+ domain_to_species_set_map.put( combable_dom, new HashSet<String>() );
+ }
+ domain_to_species_set_map.get( combable_dom ).add( species.getSpeciesId() );
+ }
+ }
+ final StringBuffer sb = new StringBuffer();
+ sb.append( "<table>" );
+ for( final Map.Entry<String, Set<String>> domain_to_species_set : domain_to_species_set_map.entrySet() ) {
+ final Map<String, Integer> counts = new HashMap<String, Integer>();
+ for( final String tax_code : domain_to_species_set.getValue() ) {
+ final String group = SurfacingUtil.obtainTaxonomyGroup( tax_code, tol );
+ if ( !ForesterUtil.isEmpty( group ) ) {
+ if ( !counts.containsKey( group ) ) {
+ counts.put( group, 1 );
+ }
+ else {
+ counts.put( group, counts.get( group ) + 1 );
+ }
+ }
+ else {
+ return null;
+ }
+ }
+ final SortedMap<Integer, SortedSet<String>> counts_to_groups = new TreeMap<Integer, SortedSet<String>>( new Comparator<Integer>() {
+
+ @Override
+ public int compare( final Integer first, final Integer second ) {
+ return second.compareTo( first );
+ }
+ } );
+ for( final Map.Entry<String, Integer> group_to_counts : counts.entrySet() ) {
+ final int c = group_to_counts.getValue();
+ if ( !counts_to_groups.containsKey( c ) ) {
+ counts_to_groups.put( c, new TreeSet<String>() );
+ }
+ counts_to_groups.get( c ).add( group_to_counts.getKey() );
+ }
+ sb.append( "<tr>" );
+ sb.append( "<td>" );
+ sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_to_species_set.getKey() + "\">"
+ + domain_to_species_set.getKey() + "</a>" );
+ sb.append( " " );
+ sb.append( "</td>" );
+ boolean first = true;
+ for( final Entry<Integer, SortedSet<String>> count_to_groups : counts_to_groups.entrySet() ) {
+ if ( first ) {
+ first = false;
+ }
+ else {
+ sb.append( "<tr>" );
+ sb.append( "<td>" );
+ sb.append( "</td>" );
+ }
+ sb.append( "<td>" );
+ final SortedSet<String> groups = count_to_groups.getValue();
+ sb.append( count_to_groups.getKey() );
+ sb.append( " " );
+ for( final String group : groups ) {
+ final Color color = ForesterUtil.obtainColorDependingOnTaxonomyGroup( group );
+ if ( color == null ) {
+ throw new IllegalArgumentException( "no color found for taxonomy group\"" + group + "\"" );
+ }
+ final String hex = String.format( "#%02x%02x%02x",
+ color.getRed(),
+ color.getGreen(),
+ color.getBlue() );
+ sb.append( "<span style=\"color:" );
+ sb.append( hex );
+ sb.append( "\">" );
+ sb.append( " " );
+ sb.append( group );
+ sb.append( "</span>" );
+ }
+ sb.append( "</td>" );
+ sb.append( "</tr>" );
+ }
+ sb.append( ForesterUtil.getLineSeparator() );
+ }
+ sb.append( "</table>" );
+ return sb;
+ }
+
+ private void init() {
+ _detailedness = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS;
+ }
+
+ private boolean isTreatAsBinaryComparison() {
+ return _treat_as_binary_comparison;
+ }
+
+ private StringBuffer toStringBufferDetailedHTML( final Map<String, Integer> tax_code_to_id_map,
+ final Phylogeny phy,
+ final boolean output_tax_codes_per_domain ) {
+ final StringBuffer sb = new StringBuffer();
+ sb.append( "<tr>" );
+ sb.append( "<td>" );
+ sb.append( "<b>" );
+ sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + getDomainId() + "\" target=\"pfam_window\">"
+ + getDomainId() + "</a>" );
+ sb.append( "</b>" );
+ sb.append( "<a name=\"" + getDomainId() + "\">" );
+ sb.append( "</td>" );
+ sb.append( "<td>" );
+ sb.append( "<a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_SEARCH + getDomainId()
+ + "\" target=\"gs_window\">gs</a>" );
+ sb.append( "</td>" );
+ if ( getMaximalSimilarityScore() > 0 ) {
+ sb.append( "<td>" );
+ sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
+ sb.append( "</td>" );
+ if ( SurfacingConstants.PRINT_MORE_DOM_SIMILARITY_INFO ) {
+ if ( !isTreatAsBinaryComparison() ) {
+ sb.append( "<td>" );
+ sb.append( "(" );
+ sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
+ sb.append( ")" );
+ sb.append( "</td>" );
+ sb.append( "<td>" );
+ sb.append( "[" );
+ sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
+ sb.append( "-" );
+ sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
+ sb.append( "]" );
+ sb.append( "</td>" );
+ }
+ }
+ }
+ sb.append( "<td>" );
+ sb.append( getMaximalDifference() );
+ sb.append( "</td>" );
+ sb.append( "<td>" );
+ if ( isTreatAsBinaryComparison() ) {
+ sb.append( getMaximalDifferenceInCounts() );
+ }
+ else {
+ sb.append( Math.abs( getMaximalDifferenceInCounts() ) );
+ }
+ sb.append( "</td>" );
+ if ( !isTreatAsBinaryComparison() ) {
+ sb.append( "<td>" );
+ sb.append( "<b>" );
+ sb.append( getSpeciesData().size() );
+ sb.append( "</b>" );
+ sb.append( "</td>" );
+ }
+ if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) {
+ sb.append( "<td>" );
+ sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map, phy ) );
+ if ( output_tax_codes_per_domain ) {
+ sb.append( getDomainDataInAlphabeticalOrder() );
+ }
+ sb.append( getTaxonomyGroupDistribution( phy ) );
+ sb.append( "</td>" );
+ }
+ else {
+ sb.append( "<td>" );
+ sb.append( getSpeciesDataInCustomOrder( true, tax_code_to_id_map, phy ) );
+ if ( output_tax_codes_per_domain ) {
+ sb.append( getDomainDataInAlphabeticalOrder() );
+ }
+ sb.append( getTaxonomyGroupDistribution( phy ) );
+ sb.append( "</td>" );
+ }
+ sb.append( "</tr>" );
+ return sb;
+ }
+
+ private StringBuffer toStringBufferSimpleTabDelimited() {
+ final StringBuffer sb = new StringBuffer();
+ sb.append( getDomainId() );
+ sb.append( "\t" );
+ sb.append( getSpeciesDataInAlphabeticalOrder( false, null, null ) );
+ sb.append( "\n" );
+ return sb;
+ }