inprogress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Tue, 9 Jul 2013 22:43:56 +0000 (22:43 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Tue, 9 Jul 2013 22:43:56 +0000 (22:43 +0000)
forester/java/src/org/forester/application/surfacing.java
forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java
forester/java/src/org/forester/surfacing/BasicGenomeWideCombinableDomains.java
forester/java/src/org/forester/surfacing/DomainSimilarity.java
forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java
forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java
forester/java/src/org/forester/surfacing/PrintableSpeciesSpecificDomainSimilariyData.java
forester/java/src/org/forester/surfacing/SurfacingUtil.java

index 10a615f..0c548af 100644 (file)
@@ -225,8 +225,8 @@ public class surfacing {
     final static private String                               INPUT_GENOMES_FILE_OPTION                                                     = "genomes";
     final static private String                               INPUT_SPECIES_TREE_OPTION                                                     = "species_tree";
     final static private String                               SEQ_EXTRACT_OPTION                                                            = "prot_extract";
-    final static private String                               PRG_VERSION                                                                   = "2.280";
-    final static private String                               PRG_DATE                                                                      = "130701";
+    final static private String                               PRG_VERSION                                                                   = "2.290";
+    final static private String                               PRG_DATE                                                                      = "130709";
     final static private String                               E_MAIL                                                                        = "czmasek@burnham.org";
     final static private String                               WWW                                                                           = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing";
     final static private boolean                              IGNORE_DUFS_DEFAULT                                                           = true;
@@ -600,9 +600,6 @@ public class surfacing {
         allowed_options.add( surfacing.PAIRWISE_DOMAIN_COMPARISONS_OPTION );
         allowed_options.add( surfacing.IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_OPTION );
         allowed_options.add( surfacing.CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS );
-        //allowed_options.add( JACKNIFE_OPTION );
-        // allowed_options.add( JACKNIFE_RANDOM_SEED_OPTION );
-        // allowed_options.add( JACKNIFE_RATIO_OPTION );
         allowed_options.add( INPUT_SPECIES_TREE_OPTION );
         allowed_options.add( FILTER_POSITIVE_OPTION );
         allowed_options.add( FILTER_NEGATIVE_OPTION );
@@ -1844,22 +1841,6 @@ public class surfacing {
             }
             System.out.println( "Time for processing                            : " + parser.getTime() + "ms" );
             log( "", log_writer );
-            html_desc.append( "<tr><td>" + input_file_properties[ i ][ 0 ] + ":</td><td>doms analyzed: "
-                    + parser.getDomainsStored() + "; doms ignored: [ind score cutoffs: "
-                    + parser.getDomainsIgnoredDueToIndividualScoreCutoff() + "] [E-value cutoff: "
-                    + parser.getDomainsIgnoredDueToEval() + "] [DUF: " + parser.getDomainsIgnoredDueToDuf()
-                    + "] [virus like ids: " + parser.getDomainsIgnoredDueToVirusLikeIds() + "] [negative dom filter: "
-                    + parser.getDomainsIgnoredDueToNegativeDomainFilter() + "] [overlap: "
-                    + parser.getDomainsIgnoredDueToOverlap() + "]" );
-            if ( negative_filter_file != null ) {
-                html_desc.append( "; proteins ignored due to negative filter: "
-                        + parser.getProteinsIgnoredDueToFilter() );
-            }
-            if ( positive_filter_file != null ) {
-                html_desc.append( "; proteins ignored due to positive filter: "
-                        + parser.getProteinsIgnoredDueToFilter() );
-            }
-            html_desc.append( "</td></tr>" + nl );
             try {
                 int count = 0;
                 for( final Protein protein : protein_list ) {
@@ -2097,10 +2078,10 @@ public class surfacing {
                                                     number_of_genomes == 2,
                                                     species_order,
                                                     domain_similarity_print_option,
-                                                    domain_similarity_sort_field,
                                                     scoring,
                                                     true,
-                                                    tax_code_to_id_map );
+                                                    tax_code_to_id_map,
+                                                    false );
             ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote main output (includes domain similarities) to: \""
                     + ( out_dir == null ? my_outfile : out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile ) + "\"" );
         }
index 846aa7a..0e8406a 100644 (file)
@@ -138,7 +138,6 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
                                                   0,
                                                   0,
                                                   species_data,
-                                                  getSort(),
                                                   isSortBySpeciesCountFirst(),
                                                   isTreatAsBinaryComparison() );
         }
@@ -198,7 +197,6 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
                                                         max_difference_in_counts,
                                                         max_difference,
                                                         species_data,
-                                                        getSort(),
                                                         isSortBySpeciesCountFirst(),
                                                         isTreatAsBinaryComparison() );
         }
@@ -213,17 +211,12 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
                                                         max_difference_in_counts,
                                                         max_difference,
                                                         species_data,
-                                                        getSort(),
                                                         isSortBySpeciesCountFirst(),
                                                         isTreatAsBinaryComparison() );
         }
         return similarity;
     }
 
-    private DomainSimilarity.DomainSimilaritySortField getSort() {
-        return _sort;
-    }
-
     private boolean isSortBySpeciesCountFirst() {
         return _sort_by_species_count_first;
     }
@@ -235,8 +228,7 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
     private static SpeciesSpecificDomainSimilariyData createSpeciesSpecificDomainSimilariyData( final CombinableDomains cd ) {
         final SpeciesSpecificDomainSimilariyData sd = new PrintableSpeciesSpecificDomainSimilariyData( cd.getKeyDomainProteinsCount(),
                                                                                                        cd.getKeyDomainCount(),
-                                                                                                       cd.getNumberOfCombinableDomains(),
-                                                                                                       cd.getKeyDomainConfidenceDescriptiveStatistics() );
+                                                                                                       cd.getNumberOfCombinableDomains() );
         for( final String domain : cd.getCombinableDomains() ) {
             sd.addProteinsExhibitingCombinationCount( domain, cd.getNumberOfProteinsExhibitingCombination( domain ) );
         }
index c6d205f..6e782a3 100644 (file)
@@ -315,13 +315,6 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom
                         else {
                             domain_combination = new BasicCombinableDomains( pd_i.getDomainId(), species );
                         }
-                        // ^^       if ( ( domain_id_to_go_ids_map != null )
-                        // ^^             && domain_id_to_go_ids_map.containsKey( pd_i.getDomainId() ) ) {
-                        // ^^        final List<GoId> go_ids = domain_id_to_go_ids_map.get( pd_i.getDomainId() );
-                        // ^^        for( final GoId go_id : go_ids ) {
-                        // ^^           domain_combination.getKeyDomain().addGoId( go_id );
-                        // ^^       }
-                        // ^^  }
                         instance.add( id_i, domain_combination );
                     }
                     final Set<String> saw_j = new HashSet<String>();
index 0787439..be7273e 100644 (file)
@@ -47,7 +47,7 @@ public interface DomainSimilarity extends Comparable<DomainSimilarity> {
         MIN, MAX, SD, MEAN, ABS_MAX_COUNTS_DIFFERENCE, MAX_COUNTS_DIFFERENCE, MAX_DIFFERENCE, SPECIES_COUNT, DOMAIN_ID,
     }
 
-    public SortedSet<String> getCombinableDomainIds( final Species species_of_combinable_domain );;
+    public SortedSet<String> getCombinableDomainIds( final Species species_of_combinable_domain );
 
     public String getDomainId();
 
index eae12fc..e9ee8b3 100644 (file)
@@ -217,10 +217,10 @@ public class PairwiseGenomeComparator {
                                                                                 true,
                                                                                 null,
                                                                                 domain_similarity_print_option,
-                                                                                domain_similarity_sort_field,
                                                                                 scoring,
                                                                                 false,
-                                                                                tax_code_to_id_map );
+                                                                                tax_code_to_id_map,
+                                                                                false );
                     }
                     catch ( final IOException e ) {
                         ForesterUtil.fatalError( command_line_prg_name, "Failed to write similarites to: \""
index 891fba5..c785ff7 100644 (file)
@@ -30,6 +30,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.SortedMap;
 import java.util.SortedSet;
+import java.util.TreeMap;
 import java.util.TreeSet;
 
 import org.forester.species.Species;
@@ -39,11 +40,9 @@ import org.forester.util.ForesterUtil;
 public class PrintableDomainSimilarity implements DomainSimilarity {
 
     final public static String                                           SPECIES_SEPARATOR = "  ";
-    final private static char                                            TAB               = '\t';
-    final private static int                                             BEFORE            = -1;
     final private static int                                             EQUAL             = 0;
-    final private static int                                             AFTER             = 1;
     final private static String                                          NO_SPECIES        = "     ";
+    private static final boolean                                         PRINT_MORE_INFO   = false;
     final private double                                                 _min;
     final private double                                                 _max;
     final private double                                                 _mean;
@@ -53,18 +52,10 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
     private final int                                                    _max_difference;
     final private CombinableDomains                                      _combinable_domains;
     final private SortedMap<Species, SpeciesSpecificDomainSimilariyData> _species_data;
-    final private DomainSimilaritySortField                              _sort_field;
     private List<Species>                                                _species_order;
-    private final boolean                                                _sort_by_species_count_first;
     private DomainSimilarityCalculator.Detailedness                      _detailedness;
     private final boolean                                                _treat_as_binary_comparison;
 
-    /**
-     * If go_id_to_term_map not null, detailed GO information is written,
-     * only GO ids otherwise.
-     * 
-     * 
-     */
     public PrintableDomainSimilarity( final CombinableDomains combinable_domains,
                                       final double min,
                                       final double max,
@@ -75,15 +66,11 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
                                       final int max_difference_in_counts,
                                       final int max_difference,
                                       final SortedMap<Species, SpeciesSpecificDomainSimilariyData> species_data,
-                                      final DomainSimilaritySortField sort_field,
                                       final boolean sort_by_species_count_first,
                                       final boolean treat_as_binary_comparison ) {
         if ( combinable_domains == null ) {
             throw new IllegalArgumentException( "attempt to use null combinable domains" );
         }
-        if ( sort_field == null ) {
-            throw new IllegalArgumentException( "attempt to use null sorting" );
-        }
         if ( species_data == null ) {
             throw new IllegalArgumentException( "attempt to use null species data" );
         }
@@ -112,8 +99,6 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
         _max_difference_in_counts = max_difference_in_counts;
         _max_difference = max_difference;
         _species_data = species_data;
-        _sort_field = sort_field;
-        _sort_by_species_count_first = sort_by_species_count_first;
         _treat_as_binary_comparison = treat_as_binary_comparison;
         final int s = species_data.size();
         if ( ( ( s * s ) - s ) != ( getN() * 2 ) ) {
@@ -134,21 +119,8 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
                                                final Species species,
                                                final boolean html,
                                                final Map<String, Integer> tax_code_to_id_map ) {
-        if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
-            sb.append( "[" );
-        }
         if ( html ) {
-            sb.append( "<b>" );
-            final String tax_code = species.getSpeciesId();
-            if ( !ForesterUtil.isEmpty( tax_code )
-                    && ( ( tax_code_to_id_map != null ) && tax_code_to_id_map.containsKey( tax_code ) ) ) {
-                sb.append( "<a href=\"" + SurfacingConstants.UNIPROT_TAXONOMY_ID_LINK
-                        + tax_code_to_id_map.get( tax_code ) + "\" target=\"taxonomy_window\">" + tax_code + "</a>" );
-            }
-            else {
-                sb.append( tax_code );
-            }
-            sb.append( "</b>" );
+            addTaxWithLink( sb, species.getSpeciesId(), tax_code_to_id_map );
         }
         else {
             sb.append( species.getSpeciesId() );
@@ -156,48 +128,38 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
         if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
             sb.append( ":" );
             sb.append( getSpeciesData().get( species ).toStringBuffer( getDetaildness(), html ) );
-            sb.append( "]" );
         }
         if ( html ) {
             sb.append( "<br>" );
         }
-        sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
-    }
-
-    private void boldEndIfSortedBy( final DomainSimilaritySortField sort_field, final StringBuffer sb ) {
-        if ( getSortField() == sort_field ) {
-            sb.append( "</b>" );
+        else {
+            sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
         }
     }
 
-    private void boldStartIfSortedBy( final DomainSimilaritySortField sort_field, final StringBuffer sb ) {
-        if ( getSortField() == sort_field ) {
-            sb.append( "<b>" );
+    private void addTaxWithLink( final StringBuffer sb,
+                                 final String tax_code,
+                                 final Map<String, Integer> tax_code_to_id_map ) {
+        sb.append( "<b>" );
+        if ( !ForesterUtil.isEmpty( tax_code )
+                && ( ( tax_code_to_id_map != null ) && tax_code_to_id_map.containsKey( tax_code ) ) ) {
+            sb.append( "<a href=\"" + SurfacingConstants.UNIPROT_TAXONOMY_ID_LINK + tax_code_to_id_map.get( tax_code )
+                    + "\" target=\"taxonomy_window\">" + tax_code + "</a>" );
+        }
+        else {
+            sb.append( tax_code );
         }
+        sb.append( "</b>" );
     }
 
     private int compareByDomainId( final DomainSimilarity other ) {
-        return getDomainId().compareTo( other.getDomainId() );
-    }
-
-    private int compareBySpeciesCount( final DomainSimilarity domain_similarity ) {
-        final int s_this = getSpeciesData().size();
-        final int s_other = domain_similarity.getSpeciesData().size();
-        if ( s_this < s_other ) {
-            return PrintableDomainSimilarity.BEFORE;
-        }
-        else if ( s_this > s_other ) {
-            return PrintableDomainSimilarity.AFTER;
-        }
-        else {
-            return PrintableDomainSimilarity.EQUAL;
-        }
+        return getDomainId().compareToIgnoreCase( other.getDomainId() );
     }
 
     @Override
     public int compareTo( final DomainSimilarity domain_similarity ) {
         if ( this == domain_similarity ) {
-            return PrintableDomainSimilarity.EQUAL;
+            return EQUAL;
         }
         else if ( domain_similarity == null ) {
             throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to null" );
@@ -206,138 +168,7 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
             throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to "
                     + domain_similarity.getClass() );
         }
-        switch ( getSortField() ) {
-            case MIN:
-                if ( isSortBySpeciesCountFirst() ) {
-                    final int i = compareBySpeciesCount( domain_similarity );
-                    if ( i != PrintableDomainSimilarity.EQUAL ) {
-                        return i;
-                    }
-                }
-                if ( getMinimalSimilarityScore() < domain_similarity.getMinimalSimilarityScore() ) {
-                    return PrintableDomainSimilarity.BEFORE;
-                }
-                else if ( getMinimalSimilarityScore() > domain_similarity.getMinimalSimilarityScore() ) {
-                    return PrintableDomainSimilarity.AFTER;
-                }
-                else {
-                    return compareByDomainId( domain_similarity );
-                }
-            case MAX:
-                if ( isSortBySpeciesCountFirst() ) {
-                    final int i = compareBySpeciesCount( domain_similarity );
-                    if ( i != PrintableDomainSimilarity.EQUAL ) {
-                        return i;
-                    }
-                }
-                if ( getMaximalSimilarityScore() < domain_similarity.getMaximalSimilarityScore() ) {
-                    return PrintableDomainSimilarity.BEFORE;
-                }
-                else if ( getMaximalSimilarityScore() > domain_similarity.getMaximalSimilarityScore() ) {
-                    return PrintableDomainSimilarity.AFTER;
-                }
-                else {
-                    return compareByDomainId( domain_similarity );
-                }
-            case MEAN:
-                if ( isSortBySpeciesCountFirst() ) {
-                    final int i = compareBySpeciesCount( domain_similarity );
-                    if ( i != PrintableDomainSimilarity.EQUAL ) {
-                        return i;
-                    }
-                }
-                if ( getMeanSimilarityScore() < domain_similarity.getMeanSimilarityScore() ) {
-                    return PrintableDomainSimilarity.BEFORE;
-                }
-                else if ( getMeanSimilarityScore() > domain_similarity.getMeanSimilarityScore() ) {
-                    return PrintableDomainSimilarity.AFTER;
-                }
-                else {
-                    return compareByDomainId( domain_similarity );
-                }
-            case SD:
-                if ( isSortBySpeciesCountFirst() ) {
-                    final int i = compareBySpeciesCount( domain_similarity );
-                    if ( i != PrintableDomainSimilarity.EQUAL ) {
-                        return i;
-                    }
-                }
-                if ( getStandardDeviationOfSimilarityScore() < domain_similarity
-                        .getStandardDeviationOfSimilarityScore() ) {
-                    return PrintableDomainSimilarity.BEFORE;
-                }
-                else if ( getStandardDeviationOfSimilarityScore() > domain_similarity
-                        .getStandardDeviationOfSimilarityScore() ) {
-                    return PrintableDomainSimilarity.AFTER;
-                }
-                else {
-                    return compareByDomainId( domain_similarity );
-                }
-            case MAX_DIFFERENCE:
-                if ( isSortBySpeciesCountFirst() ) {
-                    final int i = compareBySpeciesCount( domain_similarity );
-                    if ( i != PrintableDomainSimilarity.EQUAL ) {
-                        return i;
-                    }
-                }
-                if ( getMaximalDifference() > domain_similarity.getMaximalDifference() ) {
-                    return PrintableDomainSimilarity.BEFORE;
-                }
-                else if ( getMaximalDifference() < domain_similarity.getMaximalDifference() ) {
-                    return PrintableDomainSimilarity.AFTER;
-                }
-                else {
-                    return compareByDomainId( domain_similarity );
-                }
-            case ABS_MAX_COUNTS_DIFFERENCE:
-                if ( isSortBySpeciesCountFirst() ) {
-                    final int i = compareBySpeciesCount( domain_similarity );
-                    if ( i != PrintableDomainSimilarity.EQUAL ) {
-                        return i;
-                    }
-                }
-                if ( Math.abs( getMaximalDifferenceInCounts() ) > Math.abs( domain_similarity
-                        .getMaximalDifferenceInCounts() ) ) {
-                    return PrintableDomainSimilarity.BEFORE;
-                }
-                else if ( Math.abs( getMaximalDifferenceInCounts() ) < Math.abs( domain_similarity
-                        .getMaximalDifferenceInCounts() ) ) {
-                    return PrintableDomainSimilarity.AFTER;
-                }
-                else {
-                    return compareByDomainId( domain_similarity );
-                }
-            case MAX_COUNTS_DIFFERENCE:
-                if ( getSpeciesData().size() != 2 ) {
-                    throw new RuntimeException( "attempt to sort by maximal difference with species not equal to two" );
-                }
-                if ( isSortBySpeciesCountFirst() ) {
-                    final int i = compareBySpeciesCount( domain_similarity );
-                    if ( i != PrintableDomainSimilarity.EQUAL ) {
-                        return i;
-                    }
-                }
-                if ( getMaximalDifferenceInCounts() > domain_similarity.getMaximalDifferenceInCounts() ) {
-                    return PrintableDomainSimilarity.BEFORE;
-                }
-                else if ( getMaximalDifferenceInCounts() < domain_similarity.getMaximalDifferenceInCounts() ) {
-                    return PrintableDomainSimilarity.AFTER;
-                }
-                else {
-                    return compareByDomainId( domain_similarity );
-                }
-            case SPECIES_COUNT:
-                final int i = compareBySpeciesCount( domain_similarity );
-                if ( i != PrintableDomainSimilarity.EQUAL ) {
-                    return i;
-                }
-                else {
-                    return compareByDomainId( domain_similarity );
-                }
-            case DOMAIN_ID:
-                return compareByDomainId( domain_similarity );
-        }
-        throw new AssertionError( "Unknown sort method: " + getSortField() );
+        return compareByDomainId( domain_similarity );
     }
 
     @Override
@@ -395,10 +226,6 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
         return _n;
     }
 
-    private DomainSimilaritySortField getSortField() {
-        return _sort_field;
-    }
-
     @Override
     public SortedSet<Species> getSpecies() {
         final SortedSet<Species> species = new TreeSet<Species>();
@@ -426,6 +253,29 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
         return sb;
     }
 
+    private StringBuffer getDomainDataInAlphabeticalOrder() {
+        final SortedMap<String, SortedSet<String>> m = new TreeMap<String, SortedSet<String>>();
+        final StringBuffer sb = new StringBuffer();
+        for( final Species species : getSpeciesData().keySet() ) {
+            for( final String combable_dom : getCombinableDomainIds( species ) ) {
+                if ( !m.containsKey( combable_dom ) ) {
+                    m.put( combable_dom, new TreeSet<String>() );
+                }
+                m.get( combable_dom ).add( species.getSpeciesId() );
+            }
+        }
+        for( final Map.Entry<String, SortedSet<String>> e : m.entrySet() ) {
+            sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + e.getKey() + "\">" + e.getKey() + "</a>" );
+            sb.append( ": " );
+            for( final String s : e.getValue() ) {
+                sb.append( s );
+                sb.append( " " );
+            }
+            sb.append( "<br>" );
+        }
+        return sb;
+    }
+
     private StringBuffer getSpeciesDataInCustomOrder( final boolean html, final Map<String, Integer> tax_code_to_id_map ) {
         final StringBuffer sb = new StringBuffer();
         for( final Species order_species : getSpeciesCustomOrder() ) {
@@ -449,10 +299,6 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
         _detailedness = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS;
     }
 
-    private boolean isSortBySpeciesCountFirst() {
-        return _sort_by_species_count_first;
-    }
-
     private boolean isTreatAsBinaryComparison() {
         return _treat_as_binary_comparison;
     }
@@ -485,10 +331,10 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
         final StringBuffer sb = new StringBuffer();
         sb.append( "<tr>" );
         sb.append( "<td>" );
-        boldStartIfSortedBy( DomainSimilaritySortField.DOMAIN_ID, sb );
+        sb.append( "<b>" );
         sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + getDomainId() + "\" target=\"pfam_window\">"
                 + getDomainId() + "</a>" );
-        boldEndIfSortedBy( DomainSimilaritySortField.DOMAIN_ID, sb );
+        sb.append( "</b>" );
         sb.append( "<a name=\"" + getDomainId() + "\">" );
         sb.append( "</td>" );
         sb.append( "<td>" );
@@ -496,70 +342,52 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
                 + "\" target=\"gs_window\">gs</a>" );
         sb.append( "</td>" );
         sb.append( "<td>" );
-        boldStartIfSortedBy( DomainSimilaritySortField.MEAN, sb );
         sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
-        boldEndIfSortedBy( DomainSimilaritySortField.MEAN, sb );
         sb.append( "</td>" );
-        if ( !isTreatAsBinaryComparison() ) {
-            sb.append( "<td>" );
-            sb.append( "(" );
-            boldStartIfSortedBy( DomainSimilaritySortField.SD, sb );
-            sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
-            boldEndIfSortedBy( DomainSimilaritySortField.SD, sb );
-            sb.append( ")" );
-            sb.append( "</td>" );
-            sb.append( "<td>" );
-            sb.append( "[" );
-            boldStartIfSortedBy( DomainSimilaritySortField.MIN, sb );
-            sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
-            boldEndIfSortedBy( DomainSimilaritySortField.MIN, sb );
-            sb.append( "-" );
-            boldStartIfSortedBy( DomainSimilaritySortField.MAX, sb );
-            sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
-            boldEndIfSortedBy( DomainSimilaritySortField.MAX, sb );
-            sb.append( "]" );
-            sb.append( "</td>" );
+        if ( PRINT_MORE_INFO ) {
+            if ( !isTreatAsBinaryComparison() ) {
+                sb.append( "<td>" );
+                sb.append( "(" );
+                sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
+                sb.append( ")" );
+                sb.append( "</td>" );
+                sb.append( "<td>" );
+                sb.append( "[" );
+                sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
+                sb.append( "-" );
+                sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
+                sb.append( "]" );
+                sb.append( "</td>" );
+            }
         }
         sb.append( "<td>" );
-        boldStartIfSortedBy( DomainSimilaritySortField.MAX_DIFFERENCE, sb );
         sb.append( getMaximalDifference() );
-        boldEndIfSortedBy( DomainSimilaritySortField.MAX_DIFFERENCE, sb );
         sb.append( "</td>" );
         sb.append( "<td>" );
         if ( isTreatAsBinaryComparison() ) {
-            boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
-            boldStartIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
             sb.append( getMaximalDifferenceInCounts() );
-            boldEndIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
-            boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
         }
         else {
-            boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
-            boldStartIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
             sb.append( Math.abs( getMaximalDifferenceInCounts() ) );
-            boldEndIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
-            boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
         }
         sb.append( "</td>" );
         if ( !isTreatAsBinaryComparison() ) {
             sb.append( "<td>" );
-            if ( ( getSortField() == DomainSimilaritySortField.SPECIES_COUNT ) || isSortBySpeciesCountFirst() ) {
-                sb.append( "<b>" );
-            }
+            sb.append( "<b>" );
             sb.append( getSpeciesData().size() );
-            if ( ( getSortField() == DomainSimilaritySortField.SPECIES_COUNT ) || isSortBySpeciesCountFirst() ) {
-                sb.append( "</b>" );
-            }
+            sb.append( "</b>" );
             sb.append( "</td>" );
         }
         if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) {
             sb.append( "<td>" );
             sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map ) );
+            sb.append( getDomainDataInAlphabeticalOrder() );
             sb.append( "</td>" );
         }
         else {
             sb.append( "<td>" );
             sb.append( getSpeciesDataInCustomOrder( true, tax_code_to_id_map ) );
+            sb.append( getDomainDataInAlphabeticalOrder() );
             sb.append( "</td>" );
         }
         sb.append( "</tr>" );
@@ -569,49 +397,6 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
     private StringBuffer toStringBufferSimpleTabDelimited() {
         final StringBuffer sb = new StringBuffer();
         sb.append( getDomainId() );
-        switch ( getSortField() ) {
-            case MIN:
-                sb.append( TAB );
-                sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
-                break;
-            case MAX:
-                sb.append( TAB );
-                sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
-                break;
-            case MEAN:
-                sb.append( TAB );
-                sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
-                break;
-            case SD:
-                sb.append( TAB );
-                sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
-                break;
-            case MAX_DIFFERENCE:
-                sb.append( TAB );
-                sb.append( getMaximalDifference() );
-            case ABS_MAX_COUNTS_DIFFERENCE:
-            case MAX_COUNTS_DIFFERENCE:
-                sb.append( TAB );
-                if ( isTreatAsBinaryComparison() ) {
-                    sb.append( getMaximalDifferenceInCounts() );
-                }
-                else {
-                    sb.append( Math.abs( getMaximalDifferenceInCounts() ) );
-                }
-                break;
-            case SPECIES_COUNT:
-                sb.append( TAB );
-                sb.append( getSpeciesData().size() );
-                break;
-            case DOMAIN_ID:
-                break;
-            default:
-                throw new AssertionError( "Unknown sort method: " + getSortField() );
-        }
-        // ^^     if ( getGoAnnotationOutput() != DomainSimilarityCalculator.GoAnnotationOutput.NONE ) {
-        // ^^       sb.append( TAB );
-        // ^^       addGoInformation( sb, true, false );
-        // ^^   }
         return sb;
     }
 
index d361573..74e168d 100644 (file)
 
 package org.forester.surfacing;
 
-import java.text.DecimalFormat;
-import java.text.NumberFormat;
 import java.util.Set;
 import java.util.SortedMap;
 import java.util.TreeMap;
 
-import org.forester.util.DescriptiveStatistics;
-
 class PrintableSpeciesSpecificDomainSimilariyData implements SpeciesSpecificDomainSimilariyData {
 
-    private final static NumberFormat   FORMATTER = new DecimalFormat( "0.0E0" );
-    final SortedMap<String, Integer>    _combinable_domain_id_to_count_map;
-    final private int                   _key_domain_proteins_count;
-    final private int                   _key_domain_domains_count;
-    final private int                   _combinable_domains_count;
-    final private DescriptiveStatistics _key_domain_confidence_descriptive_statistics;
+    final SortedMap<String, Integer> _combinable_domain_id_to_count_map;
+    final private int                _key_domain_proteins_count;
+    final private int                _key_domain_domains_count;
+    final private int                _combinable_domains_count;
 
     public PrintableSpeciesSpecificDomainSimilariyData( final int key_domain_proteins_count,
                                                         final int key_domain_domains_count,
-                                                        final int combinable_domains,
-                                                        final DescriptiveStatistics key_domain_confidence_descriptive_statistics ) {
+                                                        final int combinable_domains ) {
         _key_domain_proteins_count = key_domain_proteins_count;
         _key_domain_domains_count = key_domain_domains_count;
         _combinable_domains_count = combinable_domains;
-        _key_domain_confidence_descriptive_statistics = key_domain_confidence_descriptive_statistics;
         _combinable_domain_id_to_count_map = new TreeMap<String, Integer>();
     }
 
@@ -72,10 +64,6 @@ class PrintableSpeciesSpecificDomainSimilariyData implements SpeciesSpecificDoma
         return _combinable_domains_count;
     }
 
-    private DescriptiveStatistics getKeyDomainConfidenceDescriptiveStatistics() {
-        return _key_domain_confidence_descriptive_statistics;
-    }
-
     private int getKeyDomainDomainsCount() {
         return _key_domain_domains_count;
     }
@@ -108,14 +96,6 @@ class PrintableSpeciesSpecificDomainSimilariyData implements SpeciesSpecificDoma
             sb.append( getKeyDomainProteinsCount() );
             sb.append( ", " );
             sb.append( getCombinableDomainsCount() );
-            sb.append( ", " );
-            if ( html ) {
-                sb.append( "<i>" );
-            }
-            sb.append( FORMATTER.format( getKeyDomainConfidenceDescriptiveStatistics().arithmeticMean() ) );
-            if ( html ) {
-                sb.append( "</i>" );
-            }
             if ( !getCombinableDomainIdToCountsMap().isEmpty() ) {
                 sb.append( ":" );
             }
@@ -136,9 +116,6 @@ class PrintableSpeciesSpecificDomainSimilariyData implements SpeciesSpecificDoma
                 sb.append( ":" );
                 sb.append( getCombinableDomainIdToCountsMap().get( domain_id ) );
             }
-            if ( i < ( ids.size() - 1 ) ) {
-                sb.append( "," );
-            }
         }
         return sb;
     }
index 3191312..9dfc40f 100644 (file)
@@ -451,10 +451,6 @@ public final class SurfacingUtil {
                     out.write( species + "\t" );
                 }
                 out.write( ForesterUtil.LINE_SEPARATOR );
-                // DescriptiveStatistics stats_for_domain = domain_lengths
-                //         .calculateMeanBasedStatistics();
-                //AsciiHistogram histo = new AsciiHistogram( stats_for_domain );
-                //System.out.println( histo.toStringBuffer( 40, '=', 60, 4 ).toString() );
             }
         }
         out.write( ForesterUtil.LINE_SEPARATOR );
@@ -488,16 +484,6 @@ public final class SurfacingUtil {
             }
         }
         out.close();
-        //        final List<HistogramData> histogram_datas = new ArrayList<HistogramData>();
-        //        for( int i = 0; i < number_of_genomes; ++i ) {
-        //            final Species species = new BasicSpecies( input_file_properties[ i ][ 0 ] );
-        //            histogram_datas
-        //                    .add( new HistogramData( species.toString(), domain_lengths_table
-        //                            .calculateMeanBasedStatisticsForSpecies( species )
-        //                            .getDataAsDoubleArray(), 5, 600, null, 60 ) );
-        //        }
-        //        final HistogramsFrame hf = new HistogramsFrame( histogram_datas );
-        //        hf.setVisible( true );
         System.gc();
     }
 
@@ -1668,101 +1654,28 @@ public final class SurfacingUtil {
                                                                        final boolean treat_as_binary,
                                                                        final List<Species> species_order,
                                                                        final PrintableDomainSimilarity.PRINT_OPTION print_option,
-                                                                       final DomainSimilarity.DomainSimilaritySortField sort_field,
                                                                        final DomainSimilarity.DomainSimilarityScoring scoring,
                                                                        final boolean verbose,
-                                                                       final Map<String, Integer> tax_code_to_id_map )
+                                                                       final Map<String, Integer> tax_code_to_id_map,
+                                                                       final boolean print_some_stats )
             throws IOException {
-        final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
-        String histogram_title = null;
-        switch ( sort_field ) {
-            case ABS_MAX_COUNTS_DIFFERENCE:
-                if ( treat_as_binary ) {
-                    histogram_title = "absolute counts difference:";
-                }
-                else {
-                    histogram_title = "absolute (maximal) counts difference:";
-                }
-                break;
-            case MAX_COUNTS_DIFFERENCE:
-                if ( treat_as_binary ) {
-                    histogram_title = "counts difference:";
-                }
-                else {
-                    histogram_title = "(maximal) counts difference:";
-                }
-                break;
-            case DOMAIN_ID:
-                histogram_title = "score mean:";
-                break;
-            case MIN:
-                histogram_title = "score minimum:";
-                break;
-            case MAX:
-                histogram_title = "score maximum:";
-                break;
-            case MAX_DIFFERENCE:
-                if ( treat_as_binary ) {
-                    histogram_title = "difference:";
-                }
-                else {
-                    histogram_title = "(maximal) difference:";
+        DescriptiveStatistics stats = null;
+        AsciiHistogram histo = null;
+        if ( print_some_stats ) {
+            stats = new BasicDescriptiveStatistics();
+            final String histogram_title = "score mean distribution:";
+            for( final DomainSimilarity similarity : similarities ) {
+                stats.addValue( similarity.getMeanSimilarityScore() );
+            }
+            try {
+                if ( stats.getMin() < stats.getMax() ) {
+                    histo = new AsciiHistogram( stats, histogram_title );
                 }
-                break;
-            case MEAN:
-                histogram_title = "score mean:";
-                break;
-            case SD:
-                histogram_title = "score standard deviation:";
-                break;
-            case SPECIES_COUNT:
-                histogram_title = "species number:";
-                break;
-            default:
-                throw new AssertionError( "Unknown sort field: " + sort_field );
-        }
-        for( final DomainSimilarity similarity : similarities ) {
-            switch ( sort_field ) {
-                case ABS_MAX_COUNTS_DIFFERENCE:
-                    stats.addValue( Math.abs( similarity.getMaximalDifferenceInCounts() ) );
-                    break;
-                case MAX_COUNTS_DIFFERENCE:
-                    stats.addValue( similarity.getMaximalDifferenceInCounts() );
-                    break;
-                case DOMAIN_ID:
-                    stats.addValue( similarity.getMeanSimilarityScore() );
-                    break;
-                case MIN:
-                    stats.addValue( similarity.getMinimalSimilarityScore() );
-                    break;
-                case MAX:
-                    stats.addValue( similarity.getMaximalSimilarityScore() );
-                    break;
-                case MAX_DIFFERENCE:
-                    stats.addValue( similarity.getMaximalDifference() );
-                    break;
-                case MEAN:
-                    stats.addValue( similarity.getMeanSimilarityScore() );
-                    break;
-                case SD:
-                    stats.addValue( similarity.getStandardDeviationOfSimilarityScore() );
-                    break;
-                case SPECIES_COUNT:
-                    stats.addValue( similarity.getSpecies().size() );
-                    break;
-                default:
-                    throw new AssertionError( "Unknown sort field: " + sort_field );
             }
-        }
-        AsciiHistogram histo = null;
-        try {
-            if ( stats.getMin() < stats.getMax() ) {
-                histo = new AsciiHistogram( stats, histogram_title );
+            catch ( final Exception e ) {
+                histo = null;
             }
         }
-        catch ( Exception e ) {
-            histo = null;
-        }
         if ( ( single_writer != null ) && ( ( split_writers == null ) || split_writers.isEmpty() ) ) {
             split_writers = new HashMap<Character, Writer>();
             split_writers.put( '_', single_writer );
@@ -1776,70 +1689,38 @@ public final class SurfacingUtil {
                     w.write( "<html>" );
                     w.write( SurfacingConstants.NL );
                     if ( key != '_' ) {
-                        addHtmlHead( w, "DCs (" + html_title + ") " + key.toString().toUpperCase() );
+                        addHtmlHead( w, "DC analysis (" + html_title + ") " + key.toString().toUpperCase() );
                     }
                     else {
-                        addHtmlHead( w, "DCs (" + html_title + ")" );
+                        addHtmlHead( w, "DC analysis (" + html_title + ")" );
                     }
                     w.write( SurfacingConstants.NL );
                     w.write( "<body>" );
                     w.write( SurfacingConstants.NL );
                     w.write( html_desc.toString() );
                     w.write( SurfacingConstants.NL );
-                    w.write( "<hr>" );
-                    w.write( "<br>" );
-                    w.write( SurfacingConstants.NL );
-                    w.write( "<tt><pre>" );
-                    w.write( SurfacingConstants.NL );
-                    if ( histo != null ) {
-                        w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
-                        w.write( SurfacingConstants.NL );
-                    }
-                    w.write( "</pre></tt>" );
-                    w.write( SurfacingConstants.NL );
-                    w.write( "<table>" );
-                    w.write( SurfacingConstants.NL );
-                    w.write( "<tr><td>N: </td><td>" + stats.getN() + "</td></tr>" );
-                    w.write( SurfacingConstants.NL );
-                    w.write( "<tr><td>Min: </td><td>" + stats.getMin() + "</td></tr>" );
-                    w.write( SurfacingConstants.NL );
-                    w.write( "<tr><td>Max: </td><td>" + stats.getMax() + "</td></tr>" );
-                    w.write( SurfacingConstants.NL );
-                    w.write( "<tr><td>Mean: </td><td>" + stats.arithmeticMean() + "</td></tr>" );
-                    w.write( SurfacingConstants.NL );
-                    if ( stats.getN() > 1 ) {
-                        w.write( "<tr><td>SD: </td><td>" + stats.sampleStandardDeviation() + "</td></tr>" );
+                    if ( print_some_stats ) {
+                        printSomeStats( stats, histo, w );
                     }
-                    else {
-                        w.write( "<tr><td>SD: </td><td>n/a</td></tr>" );
-                    }
-                    w.write( SurfacingConstants.NL );
-                    w.write( "<tr><td>Median: </td><td>" + stats.median() + "</td></tr>" );
-                    w.write( SurfacingConstants.NL );
-                    w.write( "</table>" );
-                    w.write( SurfacingConstants.NL );
-                    w.write( "<br>" );
-                    w.write( SurfacingConstants.NL );
                     w.write( "<hr>" );
                     w.write( SurfacingConstants.NL );
                     w.write( "<br>" );
                     w.write( SurfacingConstants.NL );
                     w.write( "<table>" );
                     w.write( SurfacingConstants.NL );
+                    w.write( "<tr><td><b>Domains:</b></td></tr>" );
+                    w.write( SurfacingConstants.NL );
                 }
                 break;
         }
-        for( final Writer w : split_writers.values() ) {
-            w.write( SurfacingConstants.NL );
-        }
         //
         for( final DomainSimilarity similarity : similarities ) {
             if ( ( species_order != null ) && !species_order.isEmpty() ) {
                 ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
             }
             if ( single_writer != null ) {
-                single_writer.write( "<a href=\"#" + similarity.getDomainId() + "\">" + similarity.getDomainId()
-                        + "</a><br>" );
+                single_writer.write( "<tr><td><b><a href=\"#" + similarity.getDomainId() + "\">"
+                        + similarity.getDomainId() + "</a></b></td></tr>" );
                 single_writer.write( SurfacingConstants.NL );
             }
             else {
@@ -1848,13 +1729,19 @@ public final class SurfacingUtil {
                 if ( local_writer == null ) {
                     local_writer = split_writers.get( '0' );
                 }
-                local_writer.write( "<a href=\"#" + similarity.getDomainId() + "\">" + similarity.getDomainId()
-                        + "</a><br>" );
+                local_writer.write( "<tr><td><b><a href=\"#" + similarity.getDomainId() + "\">"
+                        + similarity.getDomainId() + "</a></b></td></tr>" );
                 local_writer.write( SurfacingConstants.NL );
             }
         }
-        // w.write( "<hr>" );
-        // w.write( SurfacingConstants.NL );
+        for( final Writer w : split_writers.values() ) {
+            w.write( "</table>" );
+            w.write( SurfacingConstants.NL );
+            w.write( "<hr>" );
+            w.write( SurfacingConstants.NL );
+            w.write( "<table>" );
+            w.write( SurfacingConstants.NL );
+        }
         //
         for( final DomainSimilarity similarity : similarities ) {
             if ( ( species_order != null ) && !species_order.isEmpty() ) {
@@ -1895,6 +1782,42 @@ public final class SurfacingUtil {
         return stats;
     }
 
+    private static void printSomeStats( final DescriptiveStatistics stats, final AsciiHistogram histo, final Writer w )
+            throws IOException {
+        w.write( "<hr>" );
+        w.write( "<br>" );
+        w.write( SurfacingConstants.NL );
+        w.write( "<tt><pre>" );
+        w.write( SurfacingConstants.NL );
+        if ( histo != null ) {
+            w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
+            w.write( SurfacingConstants.NL );
+        }
+        w.write( "</pre></tt>" );
+        w.write( SurfacingConstants.NL );
+        w.write( "<table>" );
+        w.write( SurfacingConstants.NL );
+        w.write( "<tr><td>N: </td><td>" + stats.getN() + "</td></tr>" );
+        w.write( SurfacingConstants.NL );
+        w.write( "<tr><td>Min: </td><td>" + stats.getMin() + "</td></tr>" );
+        w.write( SurfacingConstants.NL );
+        w.write( "<tr><td>Max: </td><td>" + stats.getMax() + "</td></tr>" );
+        w.write( SurfacingConstants.NL );
+        w.write( "<tr><td>Mean: </td><td>" + stats.arithmeticMean() + "</td></tr>" );
+        w.write( SurfacingConstants.NL );
+        if ( stats.getN() > 1 ) {
+            w.write( "<tr><td>SD: </td><td>" + stats.sampleStandardDeviation() + "</td></tr>" );
+        }
+        else {
+            w.write( "<tr><td>SD: </td><td>n/a</td></tr>" );
+        }
+        w.write( SurfacingConstants.NL );
+        w.write( "</table>" );
+        w.write( SurfacingConstants.NL );
+        w.write( "<br>" );
+        w.write( SurfacingConstants.NL );
+    }
+
     public static void writeMatrixToFile( final CharacterStateMatrix<?> matrix,
                                           final String filename,
                                           final Format format ) {