inprogress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 6 Nov 2013 20:28:27 +0000 (20:28 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 6 Nov 2013 20:28:27 +0000 (20:28 +0000)
24 files changed:
forester/java/src/org/forester/application/surfacing.java
forester/java/src/org/forester/surfacing/BasicBinaryDomainCombination.java
forester/java/src/org/forester/surfacing/BasicCombinableDomains.java
forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java
forester/java/src/org/forester/surfacing/BasicGenomeWideCombinableDomains.java
forester/java/src/org/forester/surfacing/CombinableDomains.java
forester/java/src/org/forester/surfacing/CombinationsBasedPairwiseDomainSimilarity.java
forester/java/src/org/forester/surfacing/DomainArchitectureBasedGenomeSimilarityCalculator.java
forester/java/src/org/forester/surfacing/DomainCountsDifferenceUtil.java
forester/java/src/org/forester/surfacing/DomainLengths.java
forester/java/src/org/forester/surfacing/DomainLengthsTable.java
forester/java/src/org/forester/surfacing/DomainParsimonyCalculator.java
forester/java/src/org/forester/surfacing/DomainSimilarity.java [deleted file]
forester/java/src/org/forester/surfacing/DomainSimilarityCalculator.java
forester/java/src/org/forester/surfacing/GenomeWideCombinableDomains.java
forester/java/src/org/forester/surfacing/MappingResults.java
forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java
forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java
forester/java/src/org/forester/surfacing/PrintableSpeciesSpecificDcData.java
forester/java/src/org/forester/surfacing/SpeciesSpecificDcData.java
forester/java/src/org/forester/surfacing/SurfacingConstants.java
forester/java/src/org/forester/surfacing/SurfacingUtil.java
forester/java/src/org/forester/surfacing/TestSurfacing.java
forester/java/src/org/forester/test/Test.java

index 413f45e..302800d 100644 (file)
@@ -63,9 +63,6 @@ import org.forester.surfacing.CombinationsBasedPairwiseDomainSimilarityCalculato
 import org.forester.surfacing.DomainCountsBasedPairwiseSimilarityCalculator;
 import org.forester.surfacing.DomainLengthsTable;
 import org.forester.surfacing.DomainParsimonyCalculator;
-import org.forester.surfacing.DomainSimilarity;
-import org.forester.surfacing.DomainSimilarity.DomainSimilarityScoring;
-import org.forester.surfacing.DomainSimilarity.DomainSimilaritySortField;
 import org.forester.surfacing.DomainSimilarityCalculator;
 import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
 import org.forester.surfacing.GenomeWideCombinableDomains;
@@ -74,6 +71,7 @@ import org.forester.surfacing.MappingResults;
 import org.forester.surfacing.PairwiseDomainSimilarityCalculator;
 import org.forester.surfacing.PairwiseGenomeComparator;
 import org.forester.surfacing.PrintableDomainSimilarity;
+import org.forester.surfacing.PrintableDomainSimilarity.DomainSimilarityScoring;
 import org.forester.surfacing.PrintableDomainSimilarity.PRINT_OPTION;
 import org.forester.surfacing.ProteinCountsBasedPairwiseDomainSimilarityCalculator;
 import org.forester.surfacing.SurfacingUtil;
@@ -87,188 +85,188 @@ import org.forester.util.ForesterUtil;
 
 public class surfacing {
 
-    private static final int                                  MINIMAL_NUMBER_OF_SIMILARITIES_FOR_SPLITTING                                  = 1000;
-    public final static String                                DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS                           = "graph_analysis_out";
-    public final static String                                DOMAIN_COMBINITONS_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS                       = "_dc.dot";
-    public final static String                                PARSIMONY_OUTPUT_FITCH_PRESENT_BC_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS        = "_fitch_present_dc.dot";
-    public final static String                                DOMAIN_COMBINITON_COUNTS_OUTPUTFILE_SUFFIX                                    = ".dcc";
+    private static final int                                                 MINIMAL_NUMBER_OF_SIMILARITIES_FOR_SPLITTING                                  = 1000;
+    public final static String                                               DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS                           = "graph_analysis_out";
+    public final static String                                               DOMAIN_COMBINITONS_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS                       = "_dc.dot";
+    public final static String                                               PARSIMONY_OUTPUT_FITCH_PRESENT_BC_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS        = "_fitch_present_dc.dot";
+    public final static String                                               DOMAIN_COMBINITON_COUNTS_OUTPUTFILE_SUFFIX                                    = ".dcc";
     // gain/loss:
-    public final static String                                PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_DOMAINS                                      = "_dollo_gl_d";
-    public final static String                                PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_BINARY_COMBINATIONS                          = "_dollo_gl_dc";
-    public final static String                                PARSIMONY_OUTPUT_GL_SUFFIX_FITCH_DOMAINS                                      = "_fitch_gl_d";
-    public final static String                                PARSIMONY_OUTPUT_GL_SUFFIX_FITCH_BINARY_COMBINATIONS                          = "_fitch_gl_dc";
+    public final static String                                               PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_DOMAINS                                      = "_dollo_gl_d";
+    public final static String                                               PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_BINARY_COMBINATIONS                          = "_dollo_gl_dc";
+    public final static String                                               PARSIMONY_OUTPUT_GL_SUFFIX_FITCH_DOMAINS                                      = "_fitch_gl_d";
+    public final static String                                               PARSIMONY_OUTPUT_GL_SUFFIX_FITCH_BINARY_COMBINATIONS                          = "_fitch_gl_dc";
     // gain/loss counts:
-    public final static String                                PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_DOMAINS                               = "_dollo_glc_d";
-    public final static String                                PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_BINARY_COMBINATIONS                   = "_dollo_glc_dc";
-    public final static String                                PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_FITCH_DOMAINS                               = "_fitch_glc_d";
-    public final static String                                PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_FITCH_BINARY_COMBINATIONS                   = "_fitch_glc_dc";
+    public final static String                                               PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_DOMAINS                               = "_dollo_glc_d";
+    public final static String                                               PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_BINARY_COMBINATIONS                   = "_dollo_glc_dc";
+    public final static String                                               PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_FITCH_DOMAINS                               = "_fitch_glc_d";
+    public final static String                                               PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_FITCH_BINARY_COMBINATIONS                   = "_fitch_glc_dc";
     // tables:
-    public final static String                                PARSIMONY_OUTPUT_FITCH_GAINS_BC                                               = "_fitch_gains_dc";
-    public final static String                                PARSIMONY_OUTPUT_FITCH_GAINS_HTML_BC                                          = "_fitch_gains_dc.html";
-    public final static String                                PARSIMONY_OUTPUT_FITCH_LOSSES_BC                                              = "_fitch_losses_dc";
-    public final static String                                PARSIMONY_OUTPUT_FITCH_LOSSES_HTML_BC                                         = "_fitch_losses_dc.html";
-    public final static String                                PARSIMONY_OUTPUT_FITCH_PRESENT_BC                                             = "_fitch_present_dc";
-    public final static String                                PARSIMONY_OUTPUT_FITCH_PRESENT_HTML_BC                                        = "_fitch_present_dc.html";
-    public final static String                                PARSIMONY_OUTPUT_DOLLO_GAINS_D                                                = "_dollo_gains_d";
-    public final static String                                PARSIMONY_OUTPUT_DOLLO_GAINS_HTML_D                                           = "_dollo_gains_d.html";
-    public final static String                                PARSIMONY_OUTPUT_DOLLO_LOSSES_D                                               = "_dollo_losses_d";
-    public final static String                                PARSIMONY_OUTPUT_DOLLO_LOSSES_HTML_D                                          = "_dollo_losses_d.html";
-    public final static String                                PARSIMONY_OUTPUT_DOLLO_PRESENT_D                                              = "_dollo_present_d";
-    public final static String                                PARSIMONY_OUTPUT_DOLLO_PRESENT_HTML_D                                         = "_dollo_present_d.html";
-    public final static String                                DOMAINS_PRESENT_NEXUS                                                         = "_dom.nex";
-    public final static String                                BDC_PRESENT_NEXUS                                                             = "_dc.nex";
+    public final static String                                               PARSIMONY_OUTPUT_FITCH_GAINS_BC                                               = "_fitch_gains_dc";
+    public final static String                                               PARSIMONY_OUTPUT_FITCH_GAINS_HTML_BC                                          = "_fitch_gains_dc.html";
+    public final static String                                               PARSIMONY_OUTPUT_FITCH_LOSSES_BC                                              = "_fitch_losses_dc";
+    public final static String                                               PARSIMONY_OUTPUT_FITCH_LOSSES_HTML_BC                                         = "_fitch_losses_dc.html";
+    public final static String                                               PARSIMONY_OUTPUT_FITCH_PRESENT_BC                                             = "_fitch_present_dc";
+    public final static String                                               PARSIMONY_OUTPUT_FITCH_PRESENT_HTML_BC                                        = "_fitch_present_dc.html";
+    public final static String                                               PARSIMONY_OUTPUT_DOLLO_GAINS_D                                                = "_dollo_gains_d";
+    public final static String                                               PARSIMONY_OUTPUT_DOLLO_GAINS_HTML_D                                           = "_dollo_gains_d.html";
+    public final static String                                               PARSIMONY_OUTPUT_DOLLO_LOSSES_D                                               = "_dollo_losses_d";
+    public final static String                                               PARSIMONY_OUTPUT_DOLLO_LOSSES_HTML_D                                          = "_dollo_losses_d.html";
+    public final static String                                               PARSIMONY_OUTPUT_DOLLO_PRESENT_D                                              = "_dollo_present_d";
+    public final static String                                               PARSIMONY_OUTPUT_DOLLO_PRESENT_HTML_D                                         = "_dollo_present_d.html";
+    public final static String                                               DOMAINS_PRESENT_NEXUS                                                         = "_dom.nex";
+    public final static String                                               BDC_PRESENT_NEXUS                                                             = "_dc.nex";
     // ---
-    public final static String                                PRG_NAME                                                                      = "surfacing";
-    public static final String                                DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO                                    = "_d_dollo"
-                                                                                                                                                    + ForesterConstants.PHYLO_XML_SUFFIX;
-    public static final String                                DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH                                    = "_d_fitch"
-                                                                                                                                                    + ForesterConstants.PHYLO_XML_SUFFIX;
-    public static final String                                BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO                 = "_dc_dollo"
-                                                                                                                                                    + ForesterConstants.PHYLO_XML_SUFFIX;
-    public static final String                                BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH                 = "_dc_fitch"
-                                                                                                                                                    + ForesterConstants.PHYLO_XML_SUFFIX;
-    public static final String                                NEXUS_EXTERNAL_DOMAINS                                                        = "_dom.nex";
-    public static final String                                NEXUS_EXTERNAL_DOMAIN_COMBINATIONS                                            = "_dc.nex";
-    public static final String                                NEXUS_SECONDARY_FEATURES                                                      = "_secondary_features.nex";
-    public static final String                                PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_SECONDARY_FEATURES                           = "_dollo_gl_secondary_features";
-    public static final String                                PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_SECONDARY_FEATURES                    = "_dollo_glc_secondary_features";
-    public static final String                                PARSIMONY_OUTPUT_DOLLO_GAINS_SECONDARY_FEATURES                               = "_dollo_gains_secondary_features";
-    public static final String                                PARSIMONY_OUTPUT_DOLLO_LOSSES_SECONDARY_FEATURES                              = "_dollo_losses_secondary_features";
-    public static final String                                PARSIMONY_OUTPUT_DOLLO_PRESENT_SECONDARY_FEATURES                             = "_dollo_present_secondary_features";
-    public static final String                                SECONDARY_FEATURES_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO                         = "_secondary_features_dollo"
-                                                                                                                                                    + ForesterConstants.PHYLO_XML_SUFFIX;
-    public static final String                                PARSIMONY_OUTPUT_DOLLO_ALL_GOID_D_ALL_NAMESPACES                              = "_dollo_goid_d";
-    public static final String                                PARSIMONY_OUTPUT_FITCH_ALL_GOID_BC_ALL_NAMESPACES                             = "_fitch_goid_dc";
-    final static private String                               HELP_OPTION_1                                                                 = "help";
-    final static private String                               HELP_OPTION_2                                                                 = "h";
-    final static private String                               OUTPUT_DIR_OPTION                                                             = "out_dir";
-    final static private String                               SCORING_OPTION                                                                = "scoring";
-    private static final DomainSimilarityScoring              SCORING_DEFAULT                                                               = DomainSimilarity.DomainSimilarityScoring.COMBINATIONS;
-    final static private String                               SCORING_DOMAIN_COUNT_BASED                                                    = "domains";
-    final static private String                               SCORING_PROTEIN_COUNT_BASED                                                   = "proteins";
-    final static private String                               SCORING_COMBINATION_BASED                                                     = "combinations";
-    final static private String                               DETAILEDNESS_OPTION                                                           = "detail";
-    private final static Detailedness                         DETAILEDNESS_DEFAULT                                                          = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS;
-    final static private String                               SPECIES_MATRIX_OPTION                                                         = "smatrix";
-    final static private String                               DETAILEDNESS_BASIC                                                            = "basic";
-    final static private String                               DETAILEDNESS_LIST_IDS                                                         = "list_ids";
-    final static private String                               DETAILEDNESS_PUNCTILIOUS                                                      = "punctilious";
-    final static private String                               DOMAIN_SIMILARITY_SORT_OPTION                                                 = "sort";
-    private static final DomainSimilaritySortField            DOMAIN_SORT_FILD_DEFAULT                                                      = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
-    final static private String                               DOMAIN_SIMILARITY_SORT_MIN                                                    = "min";
-    final static private String                               DOMAIN_SIMILARITY_SORT_MAX                                                    = "max";
-    final static private String                               DOMAIN_SIMILARITY_SORT_SD                                                     = "sd";
-    final static private String                               DOMAIN_SIMILARITY_SORT_MEAN                                                   = "mean";
-    final static private String                               DOMAIN_SIMILARITY_SORT_DIFF                                                   = "diff";
-    final static private String                               DOMAIN_SIMILARITY_SORT_COUNTS_DIFF                                            = "count_diff";
-    final static private String                               DOMAIN_SIMILARITY_SORT_ABS_COUNTS_DIFF                                        = "abs_count_diff";
-    final static private String                               DOMAIN_SIMILARITY_SORT_SPECIES_COUNT                                          = "species";
-    final static private String                               DOMAIN_SIMILARITY_SORT_ALPHA                                                  = "alpha";
-    final static private String                               DOMAIN_SIMILARITY_SORT_BY_SPECIES_COUNT_FIRST_OPTION                          = "species_first";
-    final static private String                               DOMAIN_COUNT_SORT_OPTION                                                      = "dc_sort";
-    private static final GenomeWideCombinableDomainsSortOrder DOMAINS_SORT_ORDER_DEFAULT                                                    = GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder.ALPHABETICAL_KEY_ID;
-    final static private String                               DOMAIN_COUNT_SORT_ALPHA                                                       = "alpha";
-    final static private String                               DOMAIN_COUNT_SORT_KEY_DOMAIN_COUNT                                            = "dom";
-    final static private String                               DOMAIN_COUNT_SORT_KEY_DOMAIN_PROTEINS_COUNT                                   = "prot";
-    final static private String                               DOMAIN_COUNT_SORT_COMBINATIONS_COUNT                                          = "comb";
-    final static private String                               CUTOFF_SCORE_FILE_OPTION                                                      = "cos";
-    final static private String                               NOT_IGNORE_DUFS_OPTION                                                        = "dufs";
-    final static private String                               MAX_E_VALUE_OPTION                                                            = "e";
-    final static private String                               MAX_ALLOWED_OVERLAP_OPTION                                                    = "mo";
-    final static private String                               NO_ENGULFING_OVERLAP_OPTION                                                   = "no_eo";
-    final static private String                               IGNORE_COMBINATION_WITH_SAME_OPTION                                           = "ignore_self_comb";
-    final static private String                               PERFORM_DC_REGAIN_PROTEINS_STATS_OPTION                                       = "dc_regain_stats";
-    final static private String                               DA_ANALYSIS_OPTION                                                            = "DA_analyis";
-    final static private String                               USE_LAST_IN_FITCH_OPTION                                                      = "last";
-    public final static String                                PAIRWISE_DOMAIN_COMPARISONS_PREFIX                                            = "pwc_";
-    final static private String                               PAIRWISE_DOMAIN_COMPARISONS_OPTION                                            = "pwc";
-    final static private String                               OUTPUT_FILE_OPTION                                                            = "o";
-    final static private String                               PFAM_TO_GO_FILE_USE_OPTION                                                    = "p2g";
-    final static private String                               GO_OBO_FILE_USE_OPTION                                                        = "obo";
-    final static private String                               GO_NAMESPACE_LIMIT_OPTION                                                     = "go_namespace";
-    final static private String                               GO_NAMESPACE_LIMIT_OPTION_MOLECULAR_FUNCTION                                  = "molecular_function";
-    final static private String                               GO_NAMESPACE_LIMIT_OPTION_BIOLOGICAL_PROCESS                                  = "biological_process";
-    final static private String                               GO_NAMESPACE_LIMIT_OPTION_CELLULAR_COMPONENT                                  = "cellular_component";
-    final static private String                               SECONDARY_FEATURES_PARSIMONY_MAP_FILE                                         = "secondary";
-    final static private String                               DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_TAB_DELIMITED                           = "simple_tab";
-    final static private String                               DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_HTML                                    = "simple_html";
-    final static private String                               DOMAIN_SIMILARITY_PRINT_OPTION_DETAILED_HTML                                  = "detailed_html";
-    final static private String                               DOMAIN_SIMILARITY_PRINT_OPTION                                                = "ds_output";
-    private static final PRINT_OPTION                         DOMAIN_SIMILARITY_PRINT_OPTION_DEFAULT                                        = PrintableDomainSimilarity.PRINT_OPTION.HTML;
-    final static private String                               IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_OPTION                     = "ignore_singlet_domains";
-    final static private String                               IGNORE_VIRAL_IDS                                                              = "ignore_viral_ids";
-    final static private boolean                              IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_DEFAULT                    = false;
-    final static private String                               IGNORE_DOMAINS_SPECIFIC_TO_ONE_SPECIES_OPTION                                 = "ignore_species_specific_domains";
-    final static private boolean                              IGNORE_DOMAINS_SPECIFIC_TO_ONE_SPECIES_OPTION_DEFAULT                         = false;
-    final static private String                               MATRIX_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX                                = "_mean_score.pwd";
-    final static private String                               MATRIX_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX                            = "_domains.pwd";
-    final static private String                               MATRIX_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX                   = "_bin_combinations.pwd";
-    final static private String                               NJ_TREE_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX                               = "_mean_score_NJ"
-                                                                                                                                                    + ForesterConstants.PHYLO_XML_SUFFIX;
-    final static private String                               NJ_TREE_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX                           = "_domains_NJ"
-                                                                                                                                                    + ForesterConstants.PHYLO_XML_SUFFIX;
-    final static private String                               NJ_TREE_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX                  = "_bin_combinations_NJ"
-                                                                                                                                                    + ForesterConstants.PHYLO_XML_SUFFIX;
-    final static private String                               FILTER_POSITIVE_OPTION                                                        = "pos_filter";
-    final static private String                               FILTER_NEGATIVE_OPTION                                                        = "neg_filter";
-    final static private String                               FILTER_NEGATIVE_DOMAINS_OPTION                                                = "neg_dom_filter";
-    final static private String                               INPUT_GENOMES_FILE_OPTION                                                     = "genomes";
-    final static private String                               INPUT_SPECIES_TREE_OPTION                                                     = "species_tree";
-    final static private String                               SEQ_EXTRACT_OPTION                                                            = "prot_extract";
-    final static private String                               PRG_VERSION                                                                   = "2.304";
-    final static private String                               PRG_DATE                                                                      = "131024";
-    final static private String                               E_MAIL                                                                        = "czmasek@burnham.org";
-    final static private String                               WWW                                                                           = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing";
-    final static private boolean                              IGNORE_DUFS_DEFAULT                                                           = true;
-    final static private boolean                              IGNORE_COMBINATION_WITH_SAME_DEFAULLT                                         = false;
-    final static private double                               MAX_E_VALUE_DEFAULT                                                           = -1;
-    public final static int                                   MAX_ALLOWED_OVERLAP_DEFAULT                                                   = -1;
-    private static final String                               RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION                                        = "random_seed";
-    private static final String                               CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS                                      = "consider_bdc_direction";
-    private static final String                               CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS_AND_ADJACENCY                        = "consider_bdc_adj";
-    public static final String                                SEQ_EXTRACT_SUFFIX                                                            = ".prot";
-    public static final String                                PLUS_MINUS_ANALYSIS_OPTION                                                    = "plus_minus";
-    public static final String                                PLUS_MINUS_DOM_SUFFIX                                                         = "_plus_minus_dom.txt";
-    public static final String                                PLUS_MINUS_DOM_SUFFIX_HTML                                                    = "_plus_minus_dom.html";
-    public static final String                                PLUS_MINUS_DC_SUFFIX_HTML                                                     = "_plus_minus_dc.html";
-    public static final int                                   PLUS_MINUS_ANALYSIS_MIN_DIFF_DEFAULT                                          = 0;
-    public static final double                                PLUS_MINUS_ANALYSIS_FACTOR_DEFAULT                                            = 1.0;
-    public static final String                                PLUS_MINUS_ALL_GO_IDS_DOM_SUFFIX                                              = "_plus_minus_go_ids_all.txt";
-    public static final String                                PLUS_MINUS_PASSING_GO_IDS_DOM_SUFFIX                                          = "_plus_minus_go_ids_passing.txt";
-    private static final String                               OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS                                           = "all_prot";
-    final static private String                               OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION                         = "all_prot_e";
-    public static final boolean                               VERBOSE                                                                       = false;
-    private static final String                               OUTPUT_DOMAIN_COMBINATIONS_GAINED_MORE_THAN_ONCE_ANALYSIS_SUFFIX              = "_fitch_dc_gains_counts";
-    private static final String                               OUTPUT_DOMAIN_COMBINATIONS_LOST_MORE_THAN_ONCE_ANALYSIS_SUFFIX                = "_fitch_dc_losses_counts";
-    private static final String                               DOMAIN_LENGTHS_ANALYSIS_SUFFIX                                                = "_domain_lengths_analysis";
-    private static final boolean                              PERFORM_DOMAIN_LENGTH_ANALYSIS                                                = true;
-    public static final String                                ALL_PFAMS_ENCOUNTERED_SUFFIX                                                  = "_all_encountered_pfams";
-    public static final String                                ALL_PFAMS_ENCOUNTERED_WITH_GO_ANNOTATION_SUFFIX                               = "_all_encountered_pfams_with_go_annotation";
-    public static final String                                ENCOUNTERED_PFAMS_SUMMARY_SUFFIX                                              = "_encountered_pfams_summary";
-    public static final String                                ALL_PFAMS_GAINED_AS_DOMAINS_SUFFIX                                            = "_all_pfams_gained_as_domains";
-    public static final String                                ALL_PFAMS_LOST_AS_DOMAINS_SUFFIX                                              = "_all_pfams_lost_as_domains";
-    public static final String                                ALL_PFAMS_GAINED_AS_DC_SUFFIX                                                 = "_all_pfams_gained_as_dc";
-    public static final String                                ALL_PFAMS_LOST_AS_DC_SUFFIX                                                   = "_all_pfams_lost_as_dc";
-    public static final String                                BASE_DIRECTORY_PER_NODE_DOMAIN_GAIN_LOSS_FILES                                = "PER_NODE_EVENTS";
-    public static final String                                BASE_DIRECTORY_PER_SUBTREE_DOMAIN_GAIN_LOSS_FILES                             = "PER_SUBTREE_EVENTS";
-    public static final String                                D_PROMISCUITY_FILE_SUFFIX                                                     = "_domain_promiscuities";
-    private static final String                               LOG_FILE_SUFFIX                                                               = "_log.txt";
-    private static final String                               DATA_FILE_SUFFIX                                                              = "_domain_combination_data.txt";
-    private static final String                               DATA_FILE_DESC                                                                = "#SPECIES\tPRTEIN_ID\tN_TERM_DOMAIN\tC_TERM_DOMAIN\tN_TERM_DOMAIN_PER_DOMAIN_E_VALUE\tC_TERM_DOMAIN_PER_DOMAIN_E_VALUE\tN_TERM_DOMAIN_COUNTS_PER_PROTEIN\tC_TERM_DOMAIN_COUNTS_PER_PROTEIN";
-    private static final String                               WRITE_TO_NEXUS_OPTION                                                         = "nexus";
-    private static final INDIVIDUAL_SCORE_CUTOFF              INDIVIDUAL_SCORE_CUTOFF_DEFAULT                                               = INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE;                                                                                                                                                      //TODO look at me! change?
-    public static final String                                INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_OUTPUT_SUFFIX                          = "_indep_dc_gains_fitch_counts.txt";
-    public static final String                                INDEPENDENT_DC_GAINS_FITCH_PARS_DC_OUTPUT_SUFFIX                              = "_indep_dc_gains_fitch_lists.txt";
-    public static final String                                INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_SUFFIX               = "_indep_dc_gains_fitch_lists_for_go_mapping.txt";
-    public static final String                                INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_UNIQUE_SUFFIX        = "_indep_dc_gains_fitch_lists_for_go_mapping_unique.txt";
-    public static final String                                LIMIT_SPEC_FOR_PROT_EX                                                        = null;                                                                                                                                                                                       // e.g. "HUMAN"; set to null for not using this feature (default).
-    public static final String                                BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH_MAPPED          = "_dc_MAPPED_secondary_features_fitch"
-                                                                                                                                                    + ForesterConstants.PHYLO_XML_SUFFIX;
-    public static final String                                INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_MAPPED_OUTPUT_SUFFIX                   = "_indep_dc_gains_fitch_counts_MAPPED.txt";
-    public static final String                                INDEPENDENT_DC_GAINS_FITCH_PARS_DC_MAPPED_OUTPUT_SUFFIX                       = "_indep_dc_gains_fitch_lists_MAPPED.txt";
-    public static final String                                INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX        = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt";
-    public static final String                                INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt";
-    private static final boolean                              CALC_SIMILARITY_SCORES                                                        = false;
+    public final static String                                               PRG_NAME                                                                      = "surfacing";
+    public static final String                                               DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO                                    = "_d_dollo"
+                                                                                                                                                                   + ForesterConstants.PHYLO_XML_SUFFIX;
+    public static final String                                               DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH                                    = "_d_fitch"
+                                                                                                                                                                   + ForesterConstants.PHYLO_XML_SUFFIX;
+    public static final String                                               BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO                 = "_dc_dollo"
+                                                                                                                                                                   + ForesterConstants.PHYLO_XML_SUFFIX;
+    public static final String                                               BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH                 = "_dc_fitch"
+                                                                                                                                                                   + ForesterConstants.PHYLO_XML_SUFFIX;
+    public static final String                                               NEXUS_EXTERNAL_DOMAINS                                                        = "_dom.nex";
+    public static final String                                               NEXUS_EXTERNAL_DOMAIN_COMBINATIONS                                            = "_dc.nex";
+    public static final String                                               NEXUS_SECONDARY_FEATURES                                                      = "_secondary_features.nex";
+    public static final String                                               PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_SECONDARY_FEATURES                           = "_dollo_gl_secondary_features";
+    public static final String                                               PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_SECONDARY_FEATURES                    = "_dollo_glc_secondary_features";
+    public static final String                                               PARSIMONY_OUTPUT_DOLLO_GAINS_SECONDARY_FEATURES                               = "_dollo_gains_secondary_features";
+    public static final String                                               PARSIMONY_OUTPUT_DOLLO_LOSSES_SECONDARY_FEATURES                              = "_dollo_losses_secondary_features";
+    public static final String                                               PARSIMONY_OUTPUT_DOLLO_PRESENT_SECONDARY_FEATURES                             = "_dollo_present_secondary_features";
+    public static final String                                               SECONDARY_FEATURES_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO                         = "_secondary_features_dollo"
+                                                                                                                                                                   + ForesterConstants.PHYLO_XML_SUFFIX;
+    public static final String                                               PARSIMONY_OUTPUT_DOLLO_ALL_GOID_D_ALL_NAMESPACES                              = "_dollo_goid_d";
+    public static final String                                               PARSIMONY_OUTPUT_FITCH_ALL_GOID_BC_ALL_NAMESPACES                             = "_fitch_goid_dc";
+    final static private String                                              HELP_OPTION_1                                                                 = "help";
+    final static private String                                              HELP_OPTION_2                                                                 = "h";
+    final static private String                                              OUTPUT_DIR_OPTION                                                             = "out_dir";
+    final static private String                                              SCORING_OPTION                                                                = "scoring";
+    private static final DomainSimilarityScoring                             SCORING_DEFAULT                                                               = PrintableDomainSimilarity.DomainSimilarityScoring.COMBINATIONS;
+    final static private String                                              SCORING_DOMAIN_COUNT_BASED                                                    = "domains";
+    final static private String                                              SCORING_PROTEIN_COUNT_BASED                                                   = "proteins";
+    final static private String                                              SCORING_COMBINATION_BASED                                                     = "combinations";
+    final static private String                                              DETAILEDNESS_OPTION                                                           = "detail";
+    private final static Detailedness                                        DETAILEDNESS_DEFAULT                                                          = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS;
+    final static private String                                              SPECIES_MATRIX_OPTION                                                         = "smatrix";
+    final static private String                                              DETAILEDNESS_BASIC                                                            = "basic";
+    final static private String                                              DETAILEDNESS_LIST_IDS                                                         = "list_ids";
+    final static private String                                              DETAILEDNESS_PUNCTILIOUS                                                      = "punctilious";
+    final static private String                                              DOMAIN_SIMILARITY_SORT_OPTION                                                 = "sort";
+    private static final PrintableDomainSimilarity.DomainSimilaritySortField DOMAIN_SORT_FILD_DEFAULT                                                      = PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
+    final static private String                                              DOMAIN_SIMILARITY_SORT_MIN                                                    = "min";
+    final static private String                                              DOMAIN_SIMILARITY_SORT_MAX                                                    = "max";
+    final static private String                                              DOMAIN_SIMILARITY_SORT_SD                                                     = "sd";
+    final static private String                                              DOMAIN_SIMILARITY_SORT_MEAN                                                   = "mean";
+    final static private String                                              DOMAIN_SIMILARITY_SORT_DIFF                                                   = "diff";
+    final static private String                                              DOMAIN_SIMILARITY_SORT_COUNTS_DIFF                                            = "count_diff";
+    final static private String                                              DOMAIN_SIMILARITY_SORT_ABS_COUNTS_DIFF                                        = "abs_count_diff";
+    final static private String                                              DOMAIN_SIMILARITY_SORT_SPECIES_COUNT                                          = "species";
+    final static private String                                              DOMAIN_SIMILARITY_SORT_ALPHA                                                  = "alpha";
+    final static private String                                              DOMAIN_SIMILARITY_SORT_BY_SPECIES_COUNT_FIRST_OPTION                          = "species_first";
+    final static private String                                              DOMAIN_COUNT_SORT_OPTION                                                      = "dc_sort";
+    private static final GenomeWideCombinableDomainsSortOrder                DOMAINS_SORT_ORDER_DEFAULT                                                    = GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder.ALPHABETICAL_KEY_ID;
+    final static private String                                              DOMAIN_COUNT_SORT_ALPHA                                                       = "alpha";
+    final static private String                                              DOMAIN_COUNT_SORT_KEY_DOMAIN_COUNT                                            = "dom";
+    final static private String                                              DOMAIN_COUNT_SORT_KEY_DOMAIN_PROTEINS_COUNT                                   = "prot";
+    final static private String                                              DOMAIN_COUNT_SORT_COMBINATIONS_COUNT                                          = "comb";
+    final static private String                                              CUTOFF_SCORE_FILE_OPTION                                                      = "cos";
+    final static private String                                              NOT_IGNORE_DUFS_OPTION                                                        = "dufs";
+    final static private String                                              MAX_E_VALUE_OPTION                                                            = "e";
+    final static private String                                              MAX_ALLOWED_OVERLAP_OPTION                                                    = "mo";
+    final static private String                                              NO_ENGULFING_OVERLAP_OPTION                                                   = "no_eo";
+    final static private String                                              IGNORE_COMBINATION_WITH_SAME_OPTION                                           = "ignore_self_comb";
+    final static private String                                              PERFORM_DC_REGAIN_PROTEINS_STATS_OPTION                                       = "dc_regain_stats";
+    final static private String                                              DA_ANALYSIS_OPTION                                                            = "DA_analyis";
+    final static private String                                              USE_LAST_IN_FITCH_OPTION                                                      = "last";
+    public final static String                                               PAIRWISE_DOMAIN_COMPARISONS_PREFIX                                            = "pwc_";
+    final static private String                                              PAIRWISE_DOMAIN_COMPARISONS_OPTION                                            = "pwc";
+    final static private String                                              OUTPUT_FILE_OPTION                                                            = "o";
+    final static private String                                              PFAM_TO_GO_FILE_USE_OPTION                                                    = "p2g";
+    final static private String                                              GO_OBO_FILE_USE_OPTION                                                        = "obo";
+    final static private String                                              GO_NAMESPACE_LIMIT_OPTION                                                     = "go_namespace";
+    final static private String                                              GO_NAMESPACE_LIMIT_OPTION_MOLECULAR_FUNCTION                                  = "molecular_function";
+    final static private String                                              GO_NAMESPACE_LIMIT_OPTION_BIOLOGICAL_PROCESS                                  = "biological_process";
+    final static private String                                              GO_NAMESPACE_LIMIT_OPTION_CELLULAR_COMPONENT                                  = "cellular_component";
+    final static private String                                              SECONDARY_FEATURES_PARSIMONY_MAP_FILE                                         = "secondary";
+    final static private String                                              DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_TAB_DELIMITED                           = "simple_tab";
+    final static private String                                              DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_HTML                                    = "simple_html";
+    final static private String                                              DOMAIN_SIMILARITY_PRINT_OPTION_DETAILED_HTML                                  = "detailed_html";
+    final static private String                                              DOMAIN_SIMILARITY_PRINT_OPTION                                                = "ds_output";
+    private static final PRINT_OPTION                                        DOMAIN_SIMILARITY_PRINT_OPTION_DEFAULT                                        = PrintableDomainSimilarity.PRINT_OPTION.HTML;
+    final static private String                                              IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_OPTION                     = "ignore_singlet_domains";
+    final static private String                                              IGNORE_VIRAL_IDS                                                              = "ignore_viral_ids";
+    final static private boolean                                             IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_DEFAULT                    = false;
+    final static private String                                              IGNORE_DOMAINS_SPECIFIC_TO_ONE_SPECIES_OPTION                                 = "ignore_species_specific_domains";
+    final static private boolean                                             IGNORE_DOMAINS_SPECIFIC_TO_ONE_SPECIES_OPTION_DEFAULT                         = false;
+    final static private String                                              MATRIX_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX                                = "_mean_score.pwd";
+    final static private String                                              MATRIX_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX                            = "_domains.pwd";
+    final static private String                                              MATRIX_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX                   = "_bin_combinations.pwd";
+    final static private String                                              NJ_TREE_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX                               = "_mean_score_NJ"
+                                                                                                                                                                   + ForesterConstants.PHYLO_XML_SUFFIX;
+    final static private String                                              NJ_TREE_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX                           = "_domains_NJ"
+                                                                                                                                                                   + ForesterConstants.PHYLO_XML_SUFFIX;
+    final static private String                                              NJ_TREE_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX                  = "_bin_combinations_NJ"
+                                                                                                                                                                   + ForesterConstants.PHYLO_XML_SUFFIX;
+    final static private String                                              FILTER_POSITIVE_OPTION                                                        = "pos_filter";
+    final static private String                                              FILTER_NEGATIVE_OPTION                                                        = "neg_filter";
+    final static private String                                              FILTER_NEGATIVE_DOMAINS_OPTION                                                = "neg_dom_filter";
+    final static private String                                              INPUT_GENOMES_FILE_OPTION                                                     = "genomes";
+    final static private String                                              INPUT_SPECIES_TREE_OPTION                                                     = "species_tree";
+    final static private String                                              SEQ_EXTRACT_OPTION                                                            = "prot_extract";
+    final static private String                                              PRG_VERSION                                                                   = "2.400";
+    final static private String                                              PRG_DATE                                                                      = "131106";
+    final static private String                                              E_MAIL                                                                        = "czmasek@burnham.org";
+    final static private String                                              WWW                                                                           = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing";
+    final static private boolean                                             IGNORE_DUFS_DEFAULT                                                           = true;
+    final static private boolean                                             IGNORE_COMBINATION_WITH_SAME_DEFAULLT                                         = false;
+    final static private double                                              MAX_E_VALUE_DEFAULT                                                           = -1;
+    public final static int                                                  MAX_ALLOWED_OVERLAP_DEFAULT                                                   = -1;
+    private static final String                                              RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION                                        = "random_seed";
+    private static final String                                              CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS                                      = "consider_bdc_direction";
+    private static final String                                              CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS_AND_ADJACENCY                        = "consider_bdc_adj";
+    public static final String                                               SEQ_EXTRACT_SUFFIX                                                            = ".prot";
+    public static final String                                               PLUS_MINUS_ANALYSIS_OPTION                                                    = "plus_minus";
+    public static final String                                               PLUS_MINUS_DOM_SUFFIX                                                         = "_plus_minus_dom.txt";
+    public static final String                                               PLUS_MINUS_DOM_SUFFIX_HTML                                                    = "_plus_minus_dom.html";
+    public static final String                                               PLUS_MINUS_DC_SUFFIX_HTML                                                     = "_plus_minus_dc.html";
+    public static final int                                                  PLUS_MINUS_ANALYSIS_MIN_DIFF_DEFAULT                                          = 0;
+    public static final double                                               PLUS_MINUS_ANALYSIS_FACTOR_DEFAULT                                            = 1.0;
+    public static final String                                               PLUS_MINUS_ALL_GO_IDS_DOM_SUFFIX                                              = "_plus_minus_go_ids_all.txt";
+    public static final String                                               PLUS_MINUS_PASSING_GO_IDS_DOM_SUFFIX                                          = "_plus_minus_go_ids_passing.txt";
+    private static final String                                              OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS                                           = "all_prot";
+    final static private String                                              OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION                         = "all_prot_e";
+    public static final boolean                                              VERBOSE                                                                       = false;
+    private static final String                                              OUTPUT_DOMAIN_COMBINATIONS_GAINED_MORE_THAN_ONCE_ANALYSIS_SUFFIX              = "_fitch_dc_gains_counts";
+    private static final String                                              OUTPUT_DOMAIN_COMBINATIONS_LOST_MORE_THAN_ONCE_ANALYSIS_SUFFIX                = "_fitch_dc_losses_counts";
+    private static final String                                              DOMAIN_LENGTHS_ANALYSIS_SUFFIX                                                = "_domain_lengths_analysis";
+    private static final boolean                                             PERFORM_DOMAIN_LENGTH_ANALYSIS                                                = true;
+    public static final String                                               ALL_PFAMS_ENCOUNTERED_SUFFIX                                                  = "_all_encountered_pfams";
+    public static final String                                               ALL_PFAMS_ENCOUNTERED_WITH_GO_ANNOTATION_SUFFIX                               = "_all_encountered_pfams_with_go_annotation";
+    public static final String                                               ENCOUNTERED_PFAMS_SUMMARY_SUFFIX                                              = "_encountered_pfams_summary";
+    public static final String                                               ALL_PFAMS_GAINED_AS_DOMAINS_SUFFIX                                            = "_all_pfams_gained_as_domains";
+    public static final String                                               ALL_PFAMS_LOST_AS_DOMAINS_SUFFIX                                              = "_all_pfams_lost_as_domains";
+    public static final String                                               ALL_PFAMS_GAINED_AS_DC_SUFFIX                                                 = "_all_pfams_gained_as_dc";
+    public static final String                                               ALL_PFAMS_LOST_AS_DC_SUFFIX                                                   = "_all_pfams_lost_as_dc";
+    public static final String                                               BASE_DIRECTORY_PER_NODE_DOMAIN_GAIN_LOSS_FILES                                = "PER_NODE_EVENTS";
+    public static final String                                               BASE_DIRECTORY_PER_SUBTREE_DOMAIN_GAIN_LOSS_FILES                             = "PER_SUBTREE_EVENTS";
+    public static final String                                               D_PROMISCUITY_FILE_SUFFIX                                                     = "_domain_promiscuities";
+    private static final String                                              LOG_FILE_SUFFIX                                                               = "_log.txt";
+    private static final String                                              DATA_FILE_SUFFIX                                                              = "_domain_combination_data.txt";
+    private static final String                                              DATA_FILE_DESC                                                                = "#SPECIES\tPRTEIN_ID\tN_TERM_DOMAIN\tC_TERM_DOMAIN\tN_TERM_DOMAIN_PER_DOMAIN_E_VALUE\tC_TERM_DOMAIN_PER_DOMAIN_E_VALUE\tN_TERM_DOMAIN_COUNTS_PER_PROTEIN\tC_TERM_DOMAIN_COUNTS_PER_PROTEIN";
+    private static final String                                              WRITE_TO_NEXUS_OPTION                                                         = "nexus";
+    private static final INDIVIDUAL_SCORE_CUTOFF                             INDIVIDUAL_SCORE_CUTOFF_DEFAULT                                               = INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE;                                                                                                                                                      //TODO look at me! change?
+    public static final String                                               INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_OUTPUT_SUFFIX                          = "_indep_dc_gains_fitch_counts.txt";
+    public static final String                                               INDEPENDENT_DC_GAINS_FITCH_PARS_DC_OUTPUT_SUFFIX                              = "_indep_dc_gains_fitch_lists.txt";
+    public static final String                                               INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_SUFFIX               = "_indep_dc_gains_fitch_lists_for_go_mapping.txt";
+    public static final String                                               INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_UNIQUE_SUFFIX        = "_indep_dc_gains_fitch_lists_for_go_mapping_unique.txt";
+    public static final String                                               LIMIT_SPEC_FOR_PROT_EX                                                        = null;                                                                                                                                                                                       // e.g. "HUMAN"; set to null for not using this feature (default).
+    public static final String                                               BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH_MAPPED          = "_dc_MAPPED_secondary_features_fitch"
+                                                                                                                                                                   + ForesterConstants.PHYLO_XML_SUFFIX;
+    public static final String                                               INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_MAPPED_OUTPUT_SUFFIX                   = "_indep_dc_gains_fitch_counts_MAPPED.txt";
+    public static final String                                               INDEPENDENT_DC_GAINS_FITCH_PARS_DC_MAPPED_OUTPUT_SUFFIX                       = "_indep_dc_gains_fitch_lists_MAPPED.txt";
+    public static final String                                               INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX        = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt";
+    public static final String                                               INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt";
+    private static final boolean                                             CALC_SIMILARITY_SCORES                                                        = false;
 
     public static void main( final String args[] ) {
         final long start_time = new Date().getTime();
@@ -544,7 +542,7 @@ public class surfacing {
             ForesterUtil.fatalError( surfacing.PRG_NAME, "no input genomes file given: "
                     + surfacing.INPUT_GENOMES_FILE_OPTION + "=<file>" );
         }
-        DomainSimilarity.DomainSimilarityScoring scoring = SCORING_DEFAULT;
+        PrintableDomainSimilarity.DomainSimilarityScoring scoring = SCORING_DEFAULT;
         if ( cla.isOptionSet( surfacing.SCORING_OPTION ) ) {
             if ( !cla.isOptionValueSet( surfacing.SCORING_OPTION ) ) {
                 ForesterUtil.fatalError( surfacing.PRG_NAME,
@@ -556,13 +554,13 @@ public class surfacing {
             }
             final String scoring_str = cla.getOptionValue( surfacing.SCORING_OPTION );
             if ( scoring_str.equals( surfacing.SCORING_DOMAIN_COUNT_BASED ) ) {
-                scoring = DomainSimilarity.DomainSimilarityScoring.DOMAINS;
+                scoring = PrintableDomainSimilarity.DomainSimilarityScoring.DOMAINS;
             }
             else if ( scoring_str.equals( surfacing.SCORING_COMBINATION_BASED ) ) {
-                scoring = DomainSimilarity.DomainSimilarityScoring.COMBINATIONS;
+                scoring = PrintableDomainSimilarity.DomainSimilarityScoring.COMBINATIONS;
             }
             else if ( scoring_str.equals( surfacing.SCORING_PROTEIN_COUNT_BASED ) ) {
-                scoring = DomainSimilarity.DomainSimilarityScoring.PROTEINS;
+                scoring = PrintableDomainSimilarity.DomainSimilarityScoring.PROTEINS;
             }
             else {
                 ForesterUtil.fatalError( surfacing.PRG_NAME, "unknown value \"" + scoring_str
@@ -641,8 +639,8 @@ public class surfacing {
             }
             query_domain_ids = cla.getOptionValue( surfacing.SEQ_EXTRACT_OPTION );
         }
-        DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field = DOMAIN_SORT_FILD_DEFAULT;
-        DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field_for_automated_pwc = DOMAIN_SORT_FILD_DEFAULT;
+        PrintableDomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field = DOMAIN_SORT_FILD_DEFAULT;
+        PrintableDomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field_for_automated_pwc = DOMAIN_SORT_FILD_DEFAULT;
         if ( cla.isOptionSet( surfacing.DOMAIN_SIMILARITY_SORT_OPTION ) ) {
             if ( !cla.isOptionValueSet( surfacing.DOMAIN_SIMILARITY_SORT_OPTION ) ) {
                 ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for domain combinations similarities sorting: -"
@@ -656,40 +654,40 @@ public class surfacing {
             }
             final String sort_str = cla.getOptionValue( surfacing.DOMAIN_SIMILARITY_SORT_OPTION ).toLowerCase();
             if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_ALPHA ) ) {
-                domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
-                domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
+                domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
+                domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
             }
             else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_MAX ) ) {
-                domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.MAX;
-                domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
+                domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.MAX;
+                domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
             }
             else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_MIN ) ) {
-                domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.MIN;
-                domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
+                domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.MIN;
+                domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
             }
             else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_MEAN ) ) {
-                domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.MEAN;
-                domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.MEAN;
+                domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.MEAN;
+                domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.MEAN;
             }
             else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_SPECIES_COUNT ) ) {
-                domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.SPECIES_COUNT;
-                domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
+                domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.SPECIES_COUNT;
+                domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
             }
             else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_SD ) ) {
-                domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.SD;
-                domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
+                domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.SD;
+                domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
             }
             else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_DIFF ) ) {
-                domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.MAX_DIFFERENCE;
-                domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.MAX_DIFFERENCE;
+                domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.MAX_DIFFERENCE;
+                domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.MAX_DIFFERENCE;
             }
             else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_ABS_COUNTS_DIFF ) ) {
-                domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE;
-                domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE;
+                domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE;
+                domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE;
             }
             else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_COUNTS_DIFF ) ) {
-                domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE;
-                domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE;
+                domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE;
+                domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE;
             }
             else {
                 ForesterUtil.fatalError( surfacing.PRG_NAME, "unknown value \"" + sort_str
@@ -875,9 +873,9 @@ public class surfacing {
                         + surfacing.GO_NAMESPACE_LIMIT_OPTION_CELLULAR_COMPONENT + ">\"" );
             }
         }
-        if ( ( domain_similarity_sort_field == DomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE )
+        if ( ( domain_similarity_sort_field == PrintableDomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE )
                 && ( number_of_genomes > 2 ) ) {
-            domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE;
+            domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE;
         }
         File[] intree_files = null;
         Phylogeny[] intrees = null;
@@ -1778,7 +1776,7 @@ public class surfacing {
         if ( domain_id_to_go_ids_map != null ) {
             go_annotation_output = DomainSimilarityCalculator.GoAnnotationOutput.ALL;
         }
-        final SortedSet<DomainSimilarity> similarities = calc
+        final SortedSet<PrintableDomainSimilarity> similarities = calc
                 .calculateSimilarities( pw_calc,
                                         gwcd_list,
                                         ignore_domains_without_combs_in_all_spec,
index 3f799d1..518c32e 100644 (file)
@@ -35,15 +35,6 @@ public class BasicBinaryDomainCombination implements BinaryDomainCombination {
     String _id1;
     String _str;
 
-    BasicBinaryDomainCombination() {
-        _id0 = null;
-        _id1 = null;
-    }
-
-    private String getAsStr() {
-        return _id0 + SEPARATOR + _id1;
-    }
-
     public BasicBinaryDomainCombination( final String id0, final String id1 ) {
         if ( ( id0 == null ) || ( id1 == null ) ) {
             throw new IllegalArgumentException( "attempt to create binary domain combination using null" );
@@ -58,6 +49,11 @@ public class BasicBinaryDomainCombination implements BinaryDomainCombination {
         }
     }
 
+    BasicBinaryDomainCombination() {
+        _id0 = null;
+        _id1 = null;
+    }
+
     @Override
     public int compareTo( final BinaryDomainCombination binary_domain_combination ) {
         if ( binary_domain_combination.getClass() != this.getClass() ) {
@@ -158,6 +154,10 @@ public class BasicBinaryDomainCombination implements BinaryDomainCombination {
         return getAsStr();
     }
 
+    private String getAsStr() {
+        return _id0 + SEPARATOR + _id1;
+    }
+
     public static BinaryDomainCombination createInstance( final String ids ) {
         if ( ids.indexOf( BinaryDomainCombination.SEPARATOR ) < 1 ) {
             throw new IllegalArgumentException( "Unexpected format for binary domain combination [" + ids + "]" );
index c04333c..2f48a64 100644 (file)
@@ -40,11 +40,11 @@ import org.forester.util.ForesterUtil;
 
 public class BasicCombinableDomains implements CombinableDomains {
 
+    final private TreeMap<String, Integer> _combining_domains;
     final private String                   _key_domain;
     private int                            _key_domain_count;
-    final private Species                  _species;
-    final private TreeMap<String, Integer> _combining_domains;
     final private Set<String>              _proteins_with_key_domain;
+    final private Species                  _species;
 
     public BasicCombinableDomains( final String key_domain, final Species species ) {
         _key_domain = key_domain;
@@ -117,10 +117,6 @@ public class BasicCombinableDomains implements CombinableDomains {
         return sb;
     }
 
-    protected TreeMap<String, Integer> getCombiningDomains() {
-        return _combining_domains;
-    }
-
     @Override
     public String getKeyDomain() {
         return _key_domain;
@@ -132,6 +128,11 @@ public class BasicCombinableDomains implements CombinableDomains {
     }
 
     @Override
+    public Set<String> getKeyDomainProteins() {
+        return _proteins_with_key_domain;
+    }
+
+    @Override
     public int getKeyDomainProteinsCount() {
         return getKeyDomainProteins().size();
     }
@@ -190,8 +191,7 @@ public class BasicCombinableDomains implements CombinableDomains {
         return sb.toString();
     }
 
-    @Override
-    public Set<String> getKeyDomainProteins() {
-        return _proteins_with_key_domain;
+    protected TreeMap<String, Integer> getCombiningDomains() {
+        return _combining_domains;
     }
 }
index 81cef33..c4c5ab7 100644 (file)
@@ -42,12 +42,12 @@ import org.forester.util.ForesterUtil;
 
 public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculator {
 
-    final DomainSimilarity.DomainSimilaritySortField _sort;
-    private final boolean                            _sort_by_species_count_first;
-    private final boolean                            _treat_as_binary_comparison;
-    private final boolean                            _calc_similarity_score;
+    final PrintableDomainSimilarity.DomainSimilaritySortField _sort;
+    private final boolean                                     _calc_similarity_score;
+    private final boolean                                     _sort_by_species_count_first;
+    private final boolean                                     _treat_as_binary_comparison;
 
-    public BasicDomainSimilarityCalculator( final DomainSimilarity.DomainSimilaritySortField sort,
+    public BasicDomainSimilarityCalculator( final PrintableDomainSimilarity.DomainSimilaritySortField sort,
                                             final boolean sort_by_species_count_first,
                                             final boolean treat_as_binary_comparison,
                                             final boolean calc_similarity_score ) {
@@ -57,19 +57,15 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
         _calc_similarity_score = calc_similarity_score;
     }
 
-    public boolean isCalcSimilarityScore() {
-        return _calc_similarity_score;
-    }
-
     @Override
-    public SortedSet<DomainSimilarity> calculateSimilarities( final PairwiseDomainSimilarityCalculator pairwise_calculator,
-                                                              final List<GenomeWideCombinableDomains> cdc_list,
-                                                              final boolean ignore_domains_without_combinations_in_any_genome,
-                                                              final boolean ignore_domains_specific_to_one_genome ) {
+    public SortedSet<PrintableDomainSimilarity> calculateSimilarities( final PairwiseDomainSimilarityCalculator pairwise_calculator,
+                                                                       final List<GenomeWideCombinableDomains> cdc_list,
+                                                                       final boolean ignore_domains_without_combinations_in_any_genome,
+                                                                       final boolean ignore_domains_specific_to_one_genome ) {
         if ( cdc_list.size() < 2 ) {
             throw new IllegalArgumentException( "attempt to calculate multiple combinable domains similarity for less than two combinale domains collections" );
         }
-        final SortedSet<DomainSimilarity> similarities = new TreeSet<DomainSimilarity>();
+        final SortedSet<PrintableDomainSimilarity> similarities = new TreeSet<PrintableDomainSimilarity>();
         final SortedSet<String> keys = new TreeSet<String>();
         for( final GenomeWideCombinableDomains cdc : cdc_list ) {
             keys.addAll( ( cdc ).getAllCombinableDomainsIds().keySet() );
@@ -102,7 +98,7 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
             }
             if ( same_id_cd_list.size() > 0 ) {
                 if ( !ignore_domains_specific_to_one_genome || ( same_id_cd_list.size() > 1 ) ) {
-                    final DomainSimilarity s = calculateSimilarity( pairwise_calculator, same_id_cd_list );
+                    final PrintableDomainSimilarity s = calculateSimilarity( pairwise_calculator, same_id_cd_list );
                     if ( s != null ) {
                         similarities.add( s );
                     }
@@ -119,8 +115,12 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
         return similarities;
     }
 
-    private DomainSimilarity calculateSimilarity( final PairwiseDomainSimilarityCalculator pairwise_calculator,
-                                                  final List<CombinableDomains> domains_list ) {
+    public boolean isCalcSimilarityScore() {
+        return _calc_similarity_score;
+    }
+
+    private PrintableDomainSimilarity calculateSimilarity( final PairwiseDomainSimilarityCalculator pairwise_calculator,
+                                                           final List<CombinableDomains> domains_list ) {
         if ( domains_list.size() == 1 ) {
             final SortedMap<Species, SpeciesSpecificDcData> species_data = new TreeMap<Species, SpeciesSpecificDcData>();
             species_data.put( domains_list.get( 0 ).getSpecies(),
@@ -199,7 +199,7 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
                 max_difference = Math.abs( max_difference );
             }
         }
-        DomainSimilarity similarity = null;
+        PrintableDomainSimilarity similarity = null;
         if ( !isCalcSimilarityScore() ) {
             similarity = new PrintableDomainSimilarity( domains_list.get( 0 ),
                                                         max_difference_in_counts,
index 3e59603..1d115b2 100644 (file)
@@ -26,18 +26,18 @@ import org.forester.util.ForesterUtil;
 
 public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDomains {
 
-    private static final Comparator<CombinableDomains> DESCENDING_KEY_DOMAIN_COUNT_ORDER          = new Comparator<CombinableDomains>() {
+    private static final Comparator<CombinableDomains> DESCENDING_COMBINATIONS_COUNT_ORDER        = new Comparator<CombinableDomains>() {
 
                                                                                                       @Override
                                                                                                       public int compare( final CombinableDomains d1,
                                                                                                                           final CombinableDomains d2 ) {
-                                                                                                          if ( d1.getKeyDomainCount() < d2
-                                                                                                                  .getKeyDomainCount() ) {
+                                                                                                          if ( d1.getNumberOfCombinableDomains() < d2
+                                                                                                                  .getNumberOfCombinableDomains() ) {
                                                                                                               return 1;
                                                                                                           }
                                                                                                           else if ( d1
-                                                                                                                  .getKeyDomainCount() > d2
-                                                                                                                  .getKeyDomainCount() ) {
+                                                                                                                  .getNumberOfCombinableDomains() > d2
+                                                                                                                  .getNumberOfCombinableDomains() ) {
                                                                                                               return -1;
                                                                                                           }
                                                                                                           else {
@@ -48,18 +48,18 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom
                                                                                                           }
                                                                                                       }
                                                                                                   };
-    private static final Comparator<CombinableDomains> DESCENDING_KEY_DOMAIN_PROTEINS_COUNT_ORDER = new Comparator<CombinableDomains>() {
+    private static final Comparator<CombinableDomains> DESCENDING_KEY_DOMAIN_COUNT_ORDER          = new Comparator<CombinableDomains>() {
 
                                                                                                       @Override
                                                                                                       public int compare( final CombinableDomains d1,
                                                                                                                           final CombinableDomains d2 ) {
-                                                                                                          if ( d1.getKeyDomainProteinsCount() < d2
-                                                                                                                  .getKeyDomainProteinsCount() ) {
+                                                                                                          if ( d1.getKeyDomainCount() < d2
+                                                                                                                  .getKeyDomainCount() ) {
                                                                                                               return 1;
                                                                                                           }
                                                                                                           else if ( d1
-                                                                                                                  .getKeyDomainProteinsCount() > d2
-                                                                                                                  .getKeyDomainProteinsCount() ) {
+                                                                                                                  .getKeyDomainCount() > d2
+                                                                                                                  .getKeyDomainCount() ) {
                                                                                                               return -1;
                                                                                                           }
                                                                                                           else {
@@ -70,18 +70,18 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom
                                                                                                           }
                                                                                                       }
                                                                                                   };
-    private static final Comparator<CombinableDomains> DESCENDING_COMBINATIONS_COUNT_ORDER        = new Comparator<CombinableDomains>() {
+    private static final Comparator<CombinableDomains> DESCENDING_KEY_DOMAIN_PROTEINS_COUNT_ORDER = new Comparator<CombinableDomains>() {
 
                                                                                                       @Override
                                                                                                       public int compare( final CombinableDomains d1,
                                                                                                                           final CombinableDomains d2 ) {
-                                                                                                          if ( d1.getNumberOfCombinableDomains() < d2
-                                                                                                                  .getNumberOfCombinableDomains() ) {
+                                                                                                          if ( d1.getKeyDomainProteinsCount() < d2
+                                                                                                                  .getKeyDomainProteinsCount() ) {
                                                                                                               return 1;
                                                                                                           }
                                                                                                           else if ( d1
-                                                                                                                  .getNumberOfCombinableDomains() > d2
-                                                                                                                  .getNumberOfCombinableDomains() ) {
+                                                                                                                  .getKeyDomainProteinsCount() > d2
+                                                                                                                  .getKeyDomainProteinsCount() ) {
                                                                                                               return -1;
                                                                                                           }
                                                                                                           else {
@@ -93,8 +93,8 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom
                                                                                                       }
                                                                                                   };
     final private SortedMap<String, CombinableDomains> _combinable_domains_map;
-    final private Species                              _species;
     final private DomainCombinationType                _dc_type;
+    final private Species                              _species;
 
     private BasicGenomeWideCombinableDomains( final Species species, final DomainCombinationType dc_type ) {
         _combinable_domains_map = new TreeMap<String, CombinableDomains>();
@@ -102,10 +102,6 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom
         _dc_type = dc_type;
     }
 
-    private void add( final String key, final CombinableDomains cdc ) {
-        _combinable_domains_map.put( key, cdc );
-    }
-
     @Override
     public boolean contains( final String key_id ) {
         return _combinable_domains_map.containsKey( key_id );
@@ -220,16 +216,8 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom
         return sb;
     }
 
-    private static void countDomains( final Map<String, Integer> domain_counts,
-                                      final Set<String> saw_c,
-                                      final String id_i ) {
-        if ( domain_counts.containsKey( id_i ) ) {
-            domain_counts.put( id_i, 1 + domain_counts.get( ( id_i ) ) );
-        }
-        else {
-            domain_counts.put( id_i, 1 );
-        }
-        saw_c.add( id_i );
+    private void add( final String key, final CombinableDomains cdc ) {
+        _combinable_domains_map.put( key, cdc );
     }
 
     public static BasicGenomeWideCombinableDomains createInstance( final List<Protein> protein_list,
@@ -355,4 +343,16 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom
         }
         return instance;
     }
+
+    private static void countDomains( final Map<String, Integer> domain_counts,
+                                      final Set<String> saw_c,
+                                      final String id_i ) {
+        if ( domain_counts.containsKey( id_i ) ) {
+            domain_counts.put( id_i, 1 + domain_counts.get( ( id_i ) ) );
+        }
+        else {
+            domain_counts.put( id_i, 1 );
+        }
+        saw_c.add( id_i );
+    }
 }
index 028810e..28fa0e2 100644 (file)
@@ -43,17 +43,6 @@ public interface CombinableDomains {
     public void addCombinableDomain( final String protein_domain );
 
     /**
-     * 
-     * This must return all domains in this set of combinable domains (i.e.
-     * the key domain and all domains which can combine with the key domain).
-     * 
-     *  @return all domains
-     */
-    List<String> getAllDomains();
-
-    List<String> getCombinableDomains();
-
-    /**
      * Returns the combinable domain identifiers sorted in alphabetical manner: -
      * keys are the combinable domain identifiers - values are the counts of
      * proteins exhibiting a particular combination
@@ -80,6 +69,8 @@ public interface CombinableDomains {
      */
     public int getKeyDomainCount();
 
+    public Set<String> getKeyDomainProteins();
+
     /**
      * Returns how many proteins with the key domain are present in a given
      * species genome.
@@ -88,8 +79,6 @@ public interface CombinableDomains {
      */
     public int getKeyDomainProteinsCount();
 
-    public Set<String> getKeyDomainProteins();
-
     public int getNumberOfCombinableDomains();
 
     public int getNumberOfProteinsExhibitingCombination( final String protein_domain );
@@ -103,6 +92,21 @@ public interface CombinableDomains {
 
     public boolean isCombinable( final String protein_domain );
 
+    public List<BinaryDomainCombination> toBinaryDomainCombinations();
+
+    void addKeyDomainProtein( String protein );
+
+    /**
+     * 
+     * This must return all domains in this set of combinable domains (i.e.
+     * the key domain and all domains which can combine with the key domain).
+     * 
+     *  @return all domains
+     */
+    List<String> getAllDomains();
+
+    List<String> getCombinableDomains();
+
     /**
      * Sets how many times the key domain is present in a given species genome.
      * 
@@ -110,8 +114,4 @@ public interface CombinableDomains {
      *            key domain count in species
      */
     void setKeyDomainCount( final int key_domain_count );
-
-    public List<BinaryDomainCombination> toBinaryDomainCombinations();
-
-    void addKeyDomainProtein( String protein );
 }
\ No newline at end of file
index f271afa..a98298d 100644 (file)
@@ -29,9 +29,9 @@ package org.forester.surfacing;
 
 public class CombinationsBasedPairwiseDomainSimilarity implements PairwiseDomainSimilarity {
 
-    private final int    _same_domains;
-    private final int    _different_domains;
     private final int    _difference_in_counts;
+    private final int    _different_domains;
+    private final int    _same_domains;
     private final double _score;
 
     public CombinationsBasedPairwiseDomainSimilarity( final int same_domains,
index caf6cb0..a61ba30 100644 (file)
@@ -35,18 +35,18 @@ public class DomainArchitectureBasedGenomeSimilarityCalculator {
 
     public static final double                MAX_SIMILARITY_SCORE = 1.0;
     public static final double                MIN_SIMILARITY_SCORE = 0.0;
+    private Set<BinaryDomainCombination>      _all_binary_domain_combinations;
+    private Set<String>                       _all_domains;
+    private boolean                           _allow_domains_to_be_ignored;
+    private Set<BinaryDomainCombination>      _binary_domain_combinations_specific_to_0;
+    private Set<BinaryDomainCombination>      _binary_domain_combinations_specific_to_1;
     final private GenomeWideCombinableDomains _combinable_domains_genome_0;
     final private GenomeWideCombinableDomains _combinable_domains_genome_1;
     private Set<String>                       _domain_ids_to_ignore;
-    private boolean                           _allow_domains_to_be_ignored;
-    private Set<String>                       _all_domains;
-    private Set<String>                       _shared_domains;
     private Set<String>                       _domains_specific_to_0;
     private Set<String>                       _domains_specific_to_1;
-    private Set<BinaryDomainCombination>      _all_binary_domain_combinations;
     private Set<BinaryDomainCombination>      _shared_binary_domain_combinations;
-    private Set<BinaryDomainCombination>      _binary_domain_combinations_specific_to_0;
-    private Set<BinaryDomainCombination>      _binary_domain_combinations_specific_to_1;
+    private Set<String>                       _shared_domains;
 
     public DomainArchitectureBasedGenomeSimilarityCalculator( final GenomeWideCombinableDomains combinable_domains_genome_0,
                                                               final GenomeWideCombinableDomains combinable_domains_genome_1 ) {
@@ -115,17 +115,6 @@ public class DomainArchitectureBasedGenomeSimilarityCalculator {
         setDomainIdsToIgnore( new HashSet<String>() );
     }
 
-    private void forceRecalculation() {
-        _all_domains = null;
-        _shared_domains = null;
-        _domains_specific_to_0 = null;
-        _domains_specific_to_1 = null;
-        _all_binary_domain_combinations = null;
-        _shared_binary_domain_combinations = null;
-        _binary_domain_combinations_specific_to_0 = null;
-        _binary_domain_combinations_specific_to_1 = null;
-    }
-
     /**
      * Does not return binary combinations which contain one or two domains
      * to be ignored -- if ignoring is allowed.
@@ -169,30 +158,6 @@ public class DomainArchitectureBasedGenomeSimilarityCalculator {
         return _all_domains;
     }
 
-    private Set<BinaryDomainCombination> getBinaryDomainCombinationsSpecificToGenome( final boolean specific_to_genome_0 ) {
-        final Set<BinaryDomainCombination> specific = new HashSet<BinaryDomainCombination>();
-        final Set<BinaryDomainCombination> bc0 = getCombinableDomainsGenome0().toBinaryDomainCombinations();
-        final Set<BinaryDomainCombination> bc1 = getCombinableDomainsGenome1().toBinaryDomainCombinations();
-        if ( specific_to_genome_0 ) {
-            for( final BinaryDomainCombination binary_domain_combination0 : bc0 ) {
-                if ( !bc1.contains( binary_domain_combination0 ) ) {
-                    specific.add( binary_domain_combination0 );
-                }
-            }
-        }
-        else {
-            for( final BinaryDomainCombination binary_domain_combination1 : bc1 ) {
-                if ( !bc0.contains( binary_domain_combination1 ) ) {
-                    specific.add( binary_domain_combination1 );
-                }
-            }
-        }
-        if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
-            return pruneBinaryCombinations( specific );
-        }
-        return specific;
-    }
-
     public Set<BinaryDomainCombination> getBinaryDomainCombinationsSpecificToGenome0() {
         if ( _binary_domain_combinations_specific_to_0 == null ) {
             _binary_domain_combinations_specific_to_0 = getBinaryDomainCombinationsSpecificToGenome( true );
@@ -207,42 +172,6 @@ public class DomainArchitectureBasedGenomeSimilarityCalculator {
         return _binary_domain_combinations_specific_to_1;
     }
 
-    private GenomeWideCombinableDomains getCombinableDomainsGenome0() {
-        return _combinable_domains_genome_0;
-    }
-
-    private GenomeWideCombinableDomains getCombinableDomainsGenome1() {
-        return _combinable_domains_genome_1;
-    }
-
-    private Set<String> getDomainIdsToIgnore() {
-        return _domain_ids_to_ignore;
-    }
-
-    private Set<String> getDomainsSpecificToGenome( final boolean specific_to_genome_0 ) {
-        final Set<String> specific = new HashSet<String>();
-        final Set<String> d0 = getCombinableDomainsGenome0().getAllDomainIds();
-        final Set<String> d1 = getCombinableDomainsGenome1().getAllDomainIds();
-        if ( specific_to_genome_0 ) {
-            for( final String domain0 : d0 ) {
-                if ( !d1.contains( domain0 ) ) {
-                    specific.add( domain0 );
-                }
-            }
-        }
-        else {
-            for( final String domain1 : d1 ) {
-                if ( !d0.contains( domain1 ) ) {
-                    specific.add( domain1 );
-                }
-            }
-        }
-        if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
-            return pruneDomains( specific );
-        }
-        return specific;
-    }
-
     public Set<String> getDomainsSpecificToGenome0() {
         if ( _domains_specific_to_0 == null ) {
             _domains_specific_to_0 = getDomainsSpecificToGenome( true );
@@ -293,6 +222,87 @@ public class DomainArchitectureBasedGenomeSimilarityCalculator {
         return _shared_domains;
     }
 
+    public void setAllowDomainsToBeIgnored( final boolean allow_domains_to_be_ignored ) {
+        forceRecalculation();
+        _allow_domains_to_be_ignored = allow_domains_to_be_ignored;
+    }
+
+    void setDomainIdsToIgnore( final Set<String> domain_ids_to_ignore ) {
+        forceRecalculation();
+        _domain_ids_to_ignore = domain_ids_to_ignore;
+    }
+
+    private void forceRecalculation() {
+        _all_domains = null;
+        _shared_domains = null;
+        _domains_specific_to_0 = null;
+        _domains_specific_to_1 = null;
+        _all_binary_domain_combinations = null;
+        _shared_binary_domain_combinations = null;
+        _binary_domain_combinations_specific_to_0 = null;
+        _binary_domain_combinations_specific_to_1 = null;
+    }
+
+    private Set<BinaryDomainCombination> getBinaryDomainCombinationsSpecificToGenome( final boolean specific_to_genome_0 ) {
+        final Set<BinaryDomainCombination> specific = new HashSet<BinaryDomainCombination>();
+        final Set<BinaryDomainCombination> bc0 = getCombinableDomainsGenome0().toBinaryDomainCombinations();
+        final Set<BinaryDomainCombination> bc1 = getCombinableDomainsGenome1().toBinaryDomainCombinations();
+        if ( specific_to_genome_0 ) {
+            for( final BinaryDomainCombination binary_domain_combination0 : bc0 ) {
+                if ( !bc1.contains( binary_domain_combination0 ) ) {
+                    specific.add( binary_domain_combination0 );
+                }
+            }
+        }
+        else {
+            for( final BinaryDomainCombination binary_domain_combination1 : bc1 ) {
+                if ( !bc0.contains( binary_domain_combination1 ) ) {
+                    specific.add( binary_domain_combination1 );
+                }
+            }
+        }
+        if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
+            return pruneBinaryCombinations( specific );
+        }
+        return specific;
+    }
+
+    private GenomeWideCombinableDomains getCombinableDomainsGenome0() {
+        return _combinable_domains_genome_0;
+    }
+
+    private GenomeWideCombinableDomains getCombinableDomainsGenome1() {
+        return _combinable_domains_genome_1;
+    }
+
+    private Set<String> getDomainIdsToIgnore() {
+        return _domain_ids_to_ignore;
+    }
+
+    private Set<String> getDomainsSpecificToGenome( final boolean specific_to_genome_0 ) {
+        final Set<String> specific = new HashSet<String>();
+        final Set<String> d0 = getCombinableDomainsGenome0().getAllDomainIds();
+        final Set<String> d1 = getCombinableDomainsGenome1().getAllDomainIds();
+        if ( specific_to_genome_0 ) {
+            for( final String domain0 : d0 ) {
+                if ( !d1.contains( domain0 ) ) {
+                    specific.add( domain0 );
+                }
+            }
+        }
+        else {
+            for( final String domain1 : d1 ) {
+                if ( !d0.contains( domain1 ) ) {
+                    specific.add( domain1 );
+                }
+            }
+        }
+        if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
+            return pruneDomains( specific );
+        }
+        return specific;
+    }
+
     private void init() {
         deleteAllDomainIdsToIgnore();
         setAllowDomainsToBeIgnored( false );
@@ -322,14 +332,4 @@ public class DomainArchitectureBasedGenomeSimilarityCalculator {
         }
         return pruned;
     }
-
-    public void setAllowDomainsToBeIgnored( final boolean allow_domains_to_be_ignored ) {
-        forceRecalculation();
-        _allow_domains_to_be_ignored = allow_domains_to_be_ignored;
-    }
-
-    void setDomainIdsToIgnore( final Set<String> domain_ids_to_ignore ) {
-        forceRecalculation();
-        _domain_ids_to_ignore = domain_ids_to_ignore;
-    }
 }
\ No newline at end of file
index e172659..8f9a249 100644 (file)
@@ -58,77 +58,11 @@ import org.forester.util.ForesterUtil;
  */
 public final class DomainCountsDifferenceUtil {
 
-    private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES = COPY_CALCULATION_MODE.MIN;
     private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES   = COPY_CALCULATION_MODE.MIN;
+    private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES = COPY_CALCULATION_MODE.MIN;
     private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_LOW_COPY_SPECIES         = COPY_CALCULATION_MODE.MAX;
     private static final String                PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX         = ".prot";
 
-    //FIXME really needs to be tested! 
-    private static void addCounts( final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
-                                   final BinaryDomainCombination dc,
-                                   final GenomeWideCombinableDomains genome,
-                                   final Set<BinaryDomainCombination> bdc ) {
-        if ( !copy_counts.containsKey( dc ) ) {
-            copy_counts.put( dc, new ArrayList<Integer>() );
-        }
-        if ( bdc.contains( dc )
-                && ( ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc.getId1() ) != null ) ) {
-            final int count = ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains()
-                    .get( dc.getId1() );
-            copy_counts.get( dc ).add( count );
-        }
-        else {
-            copy_counts.get( dc ).add( 0 );
-        }
-    }
-
-    private static void addCounts( final SortedMap<String, List<Integer>> copy_counts,
-                                   final String domain,
-                                   final GenomeWideCombinableDomains genome ) {
-        if ( !copy_counts.containsKey( domain ) ) {
-            copy_counts.put( domain, new ArrayList<Integer>() );
-        }
-        if ( genome.contains( domain ) ) {
-            copy_counts.get( domain ).add( genome.get( domain ).getKeyDomainProteinsCount() );
-        }
-        else {
-            copy_counts.get( domain ).add( 0 );
-        }
-    }
-
-    private static StringBuilder addGoInformation( final String d,
-                                                   final Map<String, List<GoId>> domain_id_to_go_ids_map,
-                                                   final Map<GoId, GoTerm> go_id_to_term_map ) {
-        final StringBuilder sb = new StringBuilder();
-        if ( ( domain_id_to_go_ids_map == null ) || domain_id_to_go_ids_map.isEmpty()
-                || !domain_id_to_go_ids_map.containsKey( d ) ) {
-            return sb;
-        }
-        final List<GoId> go_ids = domain_id_to_go_ids_map.get( d );
-        for( int i = 0; i < go_ids.size(); ++i ) {
-            final GoId go_id = go_ids.get( i );
-            if ( go_id_to_term_map.containsKey( go_id ) ) {
-                appendGoTerm( sb, go_id_to_term_map.get( go_id ) );
-                sb.append( "<br>" );
-            }
-            else {
-                sb.append( "go id \"" + go_id + "\" not found [" + d + "]" );
-            }
-        }
-        return sb;
-    }
-
-    private static void appendGoTerm( final StringBuilder sb, final GoTerm go_term ) {
-        final GoId go_id = go_term.getGoId();
-        sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id + "\" target=\"amigo_window\">" + go_id
-                + "</a>" );
-        sb.append( ":" );
-        sb.append( go_term.getName() );
-        sb.append( " [" );
-        sb.append( go_term.getGoNameSpace().toShortString() );
-        sb.append( "]" );
-    }
-
     public static void calculateCopyNumberDifferences( final List<GenomeWideCombinableDomains> genomes,
                                                        final SortedMap<Species, List<Protein>> protein_lists_per_species,
                                                        final List<String> high_copy_base_species,
@@ -301,6 +235,72 @@ public final class DomainCountsDifferenceUtil {
         writeGoIdsToFile( passing_gos_writer, go_ids_of_passing_domains );
     }
 
+    //FIXME really needs to be tested! 
+    private static void addCounts( final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
+                                   final BinaryDomainCombination dc,
+                                   final GenomeWideCombinableDomains genome,
+                                   final Set<BinaryDomainCombination> bdc ) {
+        if ( !copy_counts.containsKey( dc ) ) {
+            copy_counts.put( dc, new ArrayList<Integer>() );
+        }
+        if ( bdc.contains( dc )
+                && ( ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc.getId1() ) != null ) ) {
+            final int count = ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains()
+                    .get( dc.getId1() );
+            copy_counts.get( dc ).add( count );
+        }
+        else {
+            copy_counts.get( dc ).add( 0 );
+        }
+    }
+
+    private static void addCounts( final SortedMap<String, List<Integer>> copy_counts,
+                                   final String domain,
+                                   final GenomeWideCombinableDomains genome ) {
+        if ( !copy_counts.containsKey( domain ) ) {
+            copy_counts.put( domain, new ArrayList<Integer>() );
+        }
+        if ( genome.contains( domain ) ) {
+            copy_counts.get( domain ).add( genome.get( domain ).getKeyDomainProteinsCount() );
+        }
+        else {
+            copy_counts.get( domain ).add( 0 );
+        }
+    }
+
+    private static StringBuilder addGoInformation( final String d,
+                                                   final Map<String, List<GoId>> domain_id_to_go_ids_map,
+                                                   final Map<GoId, GoTerm> go_id_to_term_map ) {
+        final StringBuilder sb = new StringBuilder();
+        if ( ( domain_id_to_go_ids_map == null ) || domain_id_to_go_ids_map.isEmpty()
+                || !domain_id_to_go_ids_map.containsKey( d ) ) {
+            return sb;
+        }
+        final List<GoId> go_ids = domain_id_to_go_ids_map.get( d );
+        for( int i = 0; i < go_ids.size(); ++i ) {
+            final GoId go_id = go_ids.get( i );
+            if ( go_id_to_term_map.containsKey( go_id ) ) {
+                appendGoTerm( sb, go_id_to_term_map.get( go_id ) );
+                sb.append( "<br>" );
+            }
+            else {
+                sb.append( "go id \"" + go_id + "\" not found [" + d + "]" );
+            }
+        }
+        return sb;
+    }
+
+    private static void appendGoTerm( final StringBuilder sb, final GoTerm go_term ) {
+        final GoId go_id = go_term.getGoId();
+        sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id + "\" target=\"amigo_window\">" + go_id
+                + "</a>" );
+        sb.append( ":" );
+        sb.append( go_term.getName() );
+        sb.append( " [" );
+        sb.append( go_term.getGoNameSpace().toShortString() );
+        sb.append( "]" );
+    }
+
     private static void calculateDomainCountsBasedValue( final SortedMap<BinaryDomainCombination, Double> copy_values,
                                                          final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
                                                          final BinaryDomainCombination bdc,
@@ -827,6 +827,6 @@ public final class DomainCountsDifferenceUtil {
     }
 
     public static enum COPY_CALCULATION_MODE {
-        MEAN, MEDIAN, MAX, MIN
+        MAX, MEAN, MEDIAN, MIN
     }
 }
index 1018aea..5bb71cf 100644 (file)
@@ -52,13 +52,6 @@ public class DomainLengths {
         getLengthStatistic( species ).addValue( domain_length );
     }
 
-    private void addLengthStatistics( final Species species, final DescriptiveStatistics length_statistic ) {
-        if ( getLengthStatistics().containsKey( species ) ) {
-            throw new IllegalArgumentException( "length statistics for [" + species.getSpeciesId() + "] already added" );
-        }
-        getLengthStatistics().put( species, length_statistic );
-    }
-
     /**
      * Returns descriptive statistics based on the arithmetic means
      * for each species.  
@@ -98,10 +91,6 @@ public class DomainLengths {
         return getLengthStatistics().get( species );
     }
 
-    private SortedMap<Species, DescriptiveStatistics> getLengthStatistics() {
-        return _length_statistics;
-    }
-
     public List<DescriptiveStatistics> getLengthStatisticsList() {
         final List<DescriptiveStatistics> list = new ArrayList<DescriptiveStatistics>();
         for( final DescriptiveStatistics stats : _length_statistics.values() ) {
@@ -141,4 +130,15 @@ public class DomainLengths {
     public boolean isHasLengthStatistic( final Species species ) {
         return getLengthStatistics().containsKey( species );
     }
+
+    private void addLengthStatistics( final Species species, final DescriptiveStatistics length_statistic ) {
+        if ( getLengthStatistics().containsKey( species ) ) {
+            throw new IllegalArgumentException( "length statistics for [" + species.getSpeciesId() + "] already added" );
+        }
+        getLengthStatistics().put( species, length_statistic );
+    }
+
+    private SortedMap<Species, DescriptiveStatistics> getLengthStatistics() {
+        return _length_statistics;
+    }
 }
index 44b6247..9ef11cc 100644 (file)
@@ -50,21 +50,6 @@ public class DomainLengthsTable {
         _species = new ArrayList<Species>();
     }
 
-    private void addDomainLengths( final DomainLengths domain_lengths ) {
-        if ( getDomainLengths().containsKey( domain_lengths.getDomainId() ) ) {
-            throw new IllegalArgumentException( "domain lengths for [" + domain_lengths.getDomainId()
-                    + "] already added" );
-        }
-        getDomainLengths().put( domain_lengths.getDomainId(), domain_lengths );
-    }
-
-    private void addLength( final String domain_id, final Species species, final int domain_length ) {
-        if ( !getDomainLengths().containsKey( domain_id ) ) {
-            addDomainLengths( new DomainLengths( domain_id ) );
-        }
-        getDomainLengths().get( domain_id ).addLength( species, domain_length );
-    }
-
     public void addLengths( final List<Protein> protein_list ) {
         for( final Protein protein : protein_list ) {
             final Species species = protein.getSpecies();
@@ -142,10 +127,6 @@ public class DomainLengthsTable {
         return sb;
     }
 
-    private SortedMap<String, DomainLengths> getDomainLengths() {
-        return _domain_lengths;
-    }
-
     public DomainLengths getDomainLengths( final String domain_id ) {
         return getDomainLengths().get( domain_id );
     }
@@ -165,4 +146,23 @@ public class DomainLengthsTable {
     public List<Species> getSpecies() {
         return _species;
     }
+
+    private void addDomainLengths( final DomainLengths domain_lengths ) {
+        if ( getDomainLengths().containsKey( domain_lengths.getDomainId() ) ) {
+            throw new IllegalArgumentException( "domain lengths for [" + domain_lengths.getDomainId()
+                    + "] already added" );
+        }
+        getDomainLengths().put( domain_lengths.getDomainId(), domain_lengths );
+    }
+
+    private void addLength( final String domain_id, final Species species, final int domain_length ) {
+        if ( !getDomainLengths().containsKey( domain_id ) ) {
+            addDomainLengths( new DomainLengths( domain_id ) );
+        }
+        getDomainLengths().get( domain_id ).addLength( species, domain_length );
+    }
+
+    private SortedMap<String, DomainLengths> getDomainLengths() {
+        return _domain_lengths;
+    }
 }
index 4de0b35..e667d39 100644 (file)
@@ -55,16 +55,16 @@ import org.forester.util.ForesterUtil;
 public final class DomainParsimonyCalculator {
 
     private static final String                     TYPE_FORBINARY_CHARACTERS = "parsimony inferred";
-    private CharacterStateMatrix<GainLossStates>    _gain_loss_matrix;
     private CharacterStateMatrix<BinaryStates>      _binary_internal_states_matrix;
+    private int                                     _cost;
+    private Map<String, Set<String>>                _domain_id_to_secondary_features_map;
+    private CharacterStateMatrix<GainLossStates>    _gain_loss_matrix;
     private final List<GenomeWideCombinableDomains> _gwcd_list;
     private final Phylogeny                         _phylogeny;
-    private int                                     _total_losses;
+    private SortedSet<String>                       _positive_filter;
     private int                                     _total_gains;
+    private int                                     _total_losses;
     private int                                     _total_unchanged;
-    private int                                     _cost;
-    private Map<String, Set<String>>                _domain_id_to_secondary_features_map;
-    private SortedSet<String>                       _positive_filter;
 
     private DomainParsimonyCalculator( final Phylogeny phylogeny ) {
         init();
@@ -87,70 +87,6 @@ public final class DomainParsimonyCalculator {
         setDomainIdToSecondaryFeaturesMap( domain_id_to_secondary_features_map );
     }
 
-    int calculateNumberOfBinaryDomainCombination() {
-        if ( getGenomeWideCombinableDomainsList().isEmpty() ) {
-            throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
-        }
-        final Set<BinaryDomainCombination> all_binary_combinations = new HashSet<BinaryDomainCombination>();
-        for( final GenomeWideCombinableDomains gwcd : getGenomeWideCombinableDomainsList() ) {
-            for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
-                all_binary_combinations.add( bc );
-            }
-        }
-        return all_binary_combinations.size();
-    }
-
-    CharacterStateMatrix<BinaryStates> createMatrixOfBinaryDomainCombinationPresenceOrAbsence() {
-        return createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
-    }
-
-    CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence() {
-        return createMatrixOfDomainPresenceOrAbsence( getGenomeWideCombinableDomainsList(), getPositiveFilter() );
-    }
-
-    CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeaturePresenceOrAbsence( final Map<Species, MappingResults> mapping_results_map ) {
-        return createMatrixOfSecondaryFeaturePresenceOrAbsence( getGenomeWideCombinableDomainsList(),
-                                                                getDomainIdToSecondaryFeaturesMap(),
-                                                                mapping_results_map );
-    }
-
-    Phylogeny decoratePhylogenyWithDomains( final Phylogeny phylogeny ) {
-        for( final PhylogenyNodeIterator it = phylogeny.iteratorPostorder(); it.hasNext(); ) {
-            final PhylogenyNode node = it.next();
-            final String node_identifier = node.getName();
-            final BinaryCharacters bc = new BinaryCharacters( getUnitsOnNode( node_identifier ),
-                                                              getUnitsGainedOnNode( node_identifier ),
-                                                              getUnitsLostOnNode( node_identifier ),
-                                                              TYPE_FORBINARY_CHARACTERS,
-                                                              getSumOfPresentOnNode( node_identifier ),
-                                                              getSumOfGainsOnNode( node_identifier ),
-                                                              getSumOfLossesOnNode( node_identifier ) );
-            node.getNodeData().setBinaryCharacters( bc );
-        }
-        return phylogeny;
-    }
-
-    private void executeDolloParsimony( final boolean on_domain_presence ) {
-        reset();
-        final DolloParsimony dollo = DolloParsimony.createInstance();
-        dollo.setReturnGainLossMatrix( true );
-        dollo.setReturnInternalStates( true );
-        CharacterStateMatrix<BinaryStates> states = null;
-        if ( on_domain_presence ) {
-            states = createMatrixOfDomainPresenceOrAbsence();
-        }
-        else {
-            states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence();
-        }
-        dollo.execute( getPhylogeny(), states );
-        setGainLossMatrix( dollo.getGainLossMatrix() );
-        setBinaryInternalStatesMatrix( dollo.getInternalStatesMatrix() );
-        setCost( dollo.getCost() );
-        setTotalGains( dollo.getTotalGains() );
-        setTotalLosses( dollo.getTotalLosses() );
-        setTotalUnchanged( dollo.getTotalUnchanged() );
-    }
-
     public void executeDolloParsimonyOnBinaryDomainCombintionPresence() {
         executeDolloParsimony( false );
     }
@@ -183,86 +119,18 @@ public final class DomainParsimonyCalculator {
         setTotalUnchanged( dollo.getTotalUnchanged() );
     }
 
-    private void executeFitchParsimony( final boolean on_domain_presence,
-                                        final boolean use_last,
-                                        final boolean randomize,
-                                        final long random_number_seed ) {
-        reset();
-        if ( use_last ) {
-            System.out.println( "   Fitch parsimony: use_last = true" );
-        }
-        final FitchParsimony<BinaryStates> fitch = new FitchParsimony<BinaryStates>();
-        fitch.setRandomize( randomize );
-        if ( randomize ) {
-            fitch.setRandomNumberSeed( random_number_seed );
-        }
-        fitch.setUseLast( use_last );
-        fitch.setReturnGainLossMatrix( true );
-        fitch.setReturnInternalStates( true );
-        CharacterStateMatrix<BinaryStates> states = null;
-        if ( on_domain_presence ) {
-            states = createMatrixOfDomainPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
-        }
-        else {
-            states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
-        }
-        fitch.execute( getPhylogeny(), states, true );
-        setGainLossMatrix( fitch.getGainLossMatrix() );
-        setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() );
-        setCost( fitch.getCost() );
-        setTotalGains( fitch.getTotalGains() );
-        setTotalLosses( fitch.getTotalLosses() );
-        setTotalUnchanged( fitch.getTotalUnchanged() );
-    }
-
-    private void executeFitchParsimonyOnSecondaryFeatures( final boolean use_last,
-                                                           final boolean randomize,
-                                                           final long random_number_seed ) {
-        reset();
-        if ( use_last ) {
-            System.out.println( "   Fitch parsimony: use_last = true" );
-        }
-        final FitchParsimony<BinaryStates> fitch = new FitchParsimony<BinaryStates>();
-        fitch.setRandomize( randomize );
-        if ( randomize ) {
-            fitch.setRandomNumberSeed( random_number_seed );
-        }
-        fitch.setUseLast( use_last );
-        fitch.setReturnGainLossMatrix( true );
-        fitch.setReturnInternalStates( true );
-        final Map<String, Set<String>> map = getDomainIdToSecondaryFeaturesMap();
-        final Map<String, String> newmap = new HashMap<String, String>();
-        final Iterator<Entry<String, Set<String>>> it = map.entrySet().iterator();
-        while ( it.hasNext() ) {
-            final Map.Entry<String, Set<String>> pair = it.next();
-            if ( pair.getValue().size() != 1 ) {
-                throw new IllegalArgumentException( pair.getKey() + " mapps to " + pair.getValue().size() + " items" );
-            }
-            newmap.put( pair.getKey(), ( String ) pair.getValue().toArray()[ 0 ] );
-        }
-        final CharacterStateMatrix<BinaryStates> states = createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList(),
-                                                                                                                                  newmap );
-        fitch.execute( getPhylogeny(), states, true );
-        setGainLossMatrix( fitch.getGainLossMatrix() );
-        setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() );
-        setCost( fitch.getCost() );
-        setTotalGains( fitch.getTotalGains() );
-        setTotalLosses( fitch.getTotalLosses() );
-        setTotalUnchanged( fitch.getTotalUnchanged() );
-    }
-
     public void executeFitchParsimonyOnBinaryDomainCombintion( final boolean use_last ) {
         executeFitchParsimony( false, use_last, false, 0 );
     }
 
-    public void executeFitchParsimonyOnBinaryDomainCombintionOnSecondaryFeatures( final boolean use_last ) {
-        executeFitchParsimonyOnSecondaryFeatures( use_last, false, 0 );
-    }
-
     public void executeFitchParsimonyOnBinaryDomainCombintion( final long random_number_seed ) {
         executeFitchParsimony( false, false, true, random_number_seed );
     }
 
+    public void executeFitchParsimonyOnBinaryDomainCombintionOnSecondaryFeatures( final boolean use_last ) {
+        executeFitchParsimonyOnSecondaryFeatures( use_last, false, 0 );
+    }
+
     public void executeFitchParsimonyOnDomainPresence( final boolean use_last ) {
         executeFitchParsimony( true, use_last, false, 0 );
     }
@@ -348,10 +216,6 @@ public final class DomainParsimonyCalculator {
         return _cost;
     }
 
-    private Map<String, Set<String>> getDomainIdToSecondaryFeaturesMap() {
-        return _domain_id_to_secondary_features_map;
-    }
-
     public CharacterStateMatrix<Integer> getGainLossCountsMatrix() {
         final CharacterStateMatrix<Integer> matrix = new BasicCharacterStateMatrix<Integer>( getGainLossMatrix()
                 .getNumberOfIdentifiers(), 3 );
@@ -384,10 +248,6 @@ public final class DomainParsimonyCalculator {
         return _gain_loss_matrix;
     }
 
-    private List<GenomeWideCombinableDomains> getGenomeWideCombinableDomainsList() {
-        return _gwcd_list;
-    }
-
     public CharacterStateMatrix<BinaryStates> getInternalStatesMatrix() {
         return _binary_internal_states_matrix;
     }
@@ -409,14 +269,6 @@ public final class DomainParsimonyCalculator {
         return net;
     }
 
-    private Phylogeny getPhylogeny() {
-        return _phylogeny;
-    }
-
-    private SortedSet<String> getPositiveFilter() {
-        return _positive_filter;
-    }
-
     public int getSumOfGainsOnNode( final String node_identifier ) {
         return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.GAIN );
     }
@@ -429,18 +281,6 @@ public final class DomainParsimonyCalculator {
         return getSumOfGainsOnNode( node_identifier ) + getSumOfUnchangedPresentOnNode( node_identifier );
     }
 
-    int getSumOfUnchangedAbsentOnNode( final String node_identifier ) {
-        return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_ABSENT );
-    }
-
-    int getSumOfUnchangedOnNode( final String node_identifier ) {
-        return getSumOfUnchangedPresentOnNode( node_identifier ) + getSumOfUnchangedAbsentOnNode( node_identifier );
-    }
-
-    int getSumOfUnchangedPresentOnNode( final String node_identifier ) {
-        return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_PRESENT );
-    }
-
     public int getTotalGains() {
         return _total_gains;
     }
@@ -467,59 +307,219 @@ public final class DomainParsimonyCalculator {
         return present;
     }
 
-    SortedSet<String> getUnitsUnchangedAbsentOnNode( final String node_identifier ) {
-        return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_ABSENT );
-    }
-
-    SortedSet<String> getUnitsUnchangedPresentOnNode( final String node_identifier ) {
-        return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_PRESENT );
-    }
-
-    private void init() {
-        setDomainIdToSecondaryFeaturesMap( null );
-        setPositiveFilter( null );
-        reset();
+    int calculateNumberOfBinaryDomainCombination() {
+        if ( getGenomeWideCombinableDomainsList().isEmpty() ) {
+            throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
+        }
+        final Set<BinaryDomainCombination> all_binary_combinations = new HashSet<BinaryDomainCombination>();
+        for( final GenomeWideCombinableDomains gwcd : getGenomeWideCombinableDomainsList() ) {
+            for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
+                all_binary_combinations.add( bc );
+            }
+        }
+        return all_binary_combinations.size();
     }
 
-    private void reset() {
-        setGainLossMatrix( null );
-        setBinaryInternalStatesMatrix( null );
-        setCost( -1 );
-        setTotalGains( -1 );
-        setTotalLosses( -1 );
-        setTotalUnchanged( -1 );
+    CharacterStateMatrix<BinaryStates> createMatrixOfBinaryDomainCombinationPresenceOrAbsence() {
+        return createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
     }
 
-    private void setBinaryInternalStatesMatrix( final CharacterStateMatrix<BinaryStates> binary_states_matrix ) {
-        _binary_internal_states_matrix = binary_states_matrix;
+    CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence() {
+        return createMatrixOfDomainPresenceOrAbsence( getGenomeWideCombinableDomainsList(), getPositiveFilter() );
     }
 
-    private void setCost( final int cost ) {
-        _cost = cost;
+    CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeaturePresenceOrAbsence( final Map<Species, MappingResults> mapping_results_map ) {
+        return createMatrixOfSecondaryFeaturePresenceOrAbsence( getGenomeWideCombinableDomainsList(),
+                                                                getDomainIdToSecondaryFeaturesMap(),
+                                                                mapping_results_map );
     }
 
-    private void setDomainIdToSecondaryFeaturesMap( final Map<String, Set<String>> domain_id_to_secondary_features_map ) {
-        _domain_id_to_secondary_features_map = domain_id_to_secondary_features_map;
+    Phylogeny decoratePhylogenyWithDomains( final Phylogeny phylogeny ) {
+        for( final PhylogenyNodeIterator it = phylogeny.iteratorPostorder(); it.hasNext(); ) {
+            final PhylogenyNode node = it.next();
+            final String node_identifier = node.getName();
+            final BinaryCharacters bc = new BinaryCharacters( getUnitsOnNode( node_identifier ),
+                                                              getUnitsGainedOnNode( node_identifier ),
+                                                              getUnitsLostOnNode( node_identifier ),
+                                                              TYPE_FORBINARY_CHARACTERS,
+                                                              getSumOfPresentOnNode( node_identifier ),
+                                                              getSumOfGainsOnNode( node_identifier ),
+                                                              getSumOfLossesOnNode( node_identifier ) );
+            node.getNodeData().setBinaryCharacters( bc );
+        }
+        return phylogeny;
     }
 
-    private void setGainLossMatrix( final CharacterStateMatrix<GainLossStates> gain_loss_matrix ) {
-        _gain_loss_matrix = gain_loss_matrix;
+    int getSumOfUnchangedAbsentOnNode( final String node_identifier ) {
+        return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_ABSENT );
     }
 
-    private void setPositiveFilter( final SortedSet<String> positive_filter ) {
-        _positive_filter = positive_filter;
+    int getSumOfUnchangedOnNode( final String node_identifier ) {
+        return getSumOfUnchangedPresentOnNode( node_identifier ) + getSumOfUnchangedAbsentOnNode( node_identifier );
     }
 
-    private void setTotalGains( final int total_gains ) {
-        _total_gains = total_gains;
+    int getSumOfUnchangedPresentOnNode( final String node_identifier ) {
+        return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_PRESENT );
     }
 
-    private void setTotalLosses( final int total_losses ) {
-        _total_losses = total_losses;
+    SortedSet<String> getUnitsUnchangedAbsentOnNode( final String node_identifier ) {
+        return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_ABSENT );
     }
 
-    private void setTotalUnchanged( final int total_unchanged ) {
-        _total_unchanged = total_unchanged;
+    SortedSet<String> getUnitsUnchangedPresentOnNode( final String node_identifier ) {
+        return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_PRESENT );
+    }
+
+    private void executeDolloParsimony( final boolean on_domain_presence ) {
+        reset();
+        final DolloParsimony dollo = DolloParsimony.createInstance();
+        dollo.setReturnGainLossMatrix( true );
+        dollo.setReturnInternalStates( true );
+        CharacterStateMatrix<BinaryStates> states = null;
+        if ( on_domain_presence ) {
+            states = createMatrixOfDomainPresenceOrAbsence();
+        }
+        else {
+            states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence();
+        }
+        dollo.execute( getPhylogeny(), states );
+        setGainLossMatrix( dollo.getGainLossMatrix() );
+        setBinaryInternalStatesMatrix( dollo.getInternalStatesMatrix() );
+        setCost( dollo.getCost() );
+        setTotalGains( dollo.getTotalGains() );
+        setTotalLosses( dollo.getTotalLosses() );
+        setTotalUnchanged( dollo.getTotalUnchanged() );
+    }
+
+    private void executeFitchParsimony( final boolean on_domain_presence,
+                                        final boolean use_last,
+                                        final boolean randomize,
+                                        final long random_number_seed ) {
+        reset();
+        if ( use_last ) {
+            System.out.println( "   Fitch parsimony: use_last = true" );
+        }
+        final FitchParsimony<BinaryStates> fitch = new FitchParsimony<BinaryStates>();
+        fitch.setRandomize( randomize );
+        if ( randomize ) {
+            fitch.setRandomNumberSeed( random_number_seed );
+        }
+        fitch.setUseLast( use_last );
+        fitch.setReturnGainLossMatrix( true );
+        fitch.setReturnInternalStates( true );
+        CharacterStateMatrix<BinaryStates> states = null;
+        if ( on_domain_presence ) {
+            states = createMatrixOfDomainPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
+        }
+        else {
+            states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
+        }
+        fitch.execute( getPhylogeny(), states, true );
+        setGainLossMatrix( fitch.getGainLossMatrix() );
+        setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() );
+        setCost( fitch.getCost() );
+        setTotalGains( fitch.getTotalGains() );
+        setTotalLosses( fitch.getTotalLosses() );
+        setTotalUnchanged( fitch.getTotalUnchanged() );
+    }
+
+    private void executeFitchParsimonyOnSecondaryFeatures( final boolean use_last,
+                                                           final boolean randomize,
+                                                           final long random_number_seed ) {
+        reset();
+        if ( use_last ) {
+            System.out.println( "   Fitch parsimony: use_last = true" );
+        }
+        final FitchParsimony<BinaryStates> fitch = new FitchParsimony<BinaryStates>();
+        fitch.setRandomize( randomize );
+        if ( randomize ) {
+            fitch.setRandomNumberSeed( random_number_seed );
+        }
+        fitch.setUseLast( use_last );
+        fitch.setReturnGainLossMatrix( true );
+        fitch.setReturnInternalStates( true );
+        final Map<String, Set<String>> map = getDomainIdToSecondaryFeaturesMap();
+        final Map<String, String> newmap = new HashMap<String, String>();
+        final Iterator<Entry<String, Set<String>>> it = map.entrySet().iterator();
+        while ( it.hasNext() ) {
+            final Map.Entry<String, Set<String>> pair = it.next();
+            if ( pair.getValue().size() != 1 ) {
+                throw new IllegalArgumentException( pair.getKey() + " mapps to " + pair.getValue().size() + " items" );
+            }
+            newmap.put( pair.getKey(), ( String ) pair.getValue().toArray()[ 0 ] );
+        }
+        final CharacterStateMatrix<BinaryStates> states = createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList(),
+                                                                                                                                  newmap );
+        fitch.execute( getPhylogeny(), states, true );
+        setGainLossMatrix( fitch.getGainLossMatrix() );
+        setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() );
+        setCost( fitch.getCost() );
+        setTotalGains( fitch.getTotalGains() );
+        setTotalLosses( fitch.getTotalLosses() );
+        setTotalUnchanged( fitch.getTotalUnchanged() );
+    }
+
+    private Map<String, Set<String>> getDomainIdToSecondaryFeaturesMap() {
+        return _domain_id_to_secondary_features_map;
+    }
+
+    private List<GenomeWideCombinableDomains> getGenomeWideCombinableDomainsList() {
+        return _gwcd_list;
+    }
+
+    private Phylogeny getPhylogeny() {
+        return _phylogeny;
+    }
+
+    private SortedSet<String> getPositiveFilter() {
+        return _positive_filter;
+    }
+
+    private void init() {
+        setDomainIdToSecondaryFeaturesMap( null );
+        setPositiveFilter( null );
+        reset();
+    }
+
+    private void reset() {
+        setGainLossMatrix( null );
+        setBinaryInternalStatesMatrix( null );
+        setCost( -1 );
+        setTotalGains( -1 );
+        setTotalLosses( -1 );
+        setTotalUnchanged( -1 );
+    }
+
+    private void setBinaryInternalStatesMatrix( final CharacterStateMatrix<BinaryStates> binary_states_matrix ) {
+        _binary_internal_states_matrix = binary_states_matrix;
+    }
+
+    private void setCost( final int cost ) {
+        _cost = cost;
+    }
+
+    private void setDomainIdToSecondaryFeaturesMap( final Map<String, Set<String>> domain_id_to_secondary_features_map ) {
+        _domain_id_to_secondary_features_map = domain_id_to_secondary_features_map;
+    }
+
+    private void setGainLossMatrix( final CharacterStateMatrix<GainLossStates> gain_loss_matrix ) {
+        _gain_loss_matrix = gain_loss_matrix;
+    }
+
+    private void setPositiveFilter( final SortedSet<String> positive_filter ) {
+        _positive_filter = positive_filter;
+    }
+
+    private void setTotalGains( final int total_gains ) {
+        _total_gains = total_gains;
+    }
+
+    private void setTotalLosses( final int total_losses ) {
+        _total_losses = total_losses;
+    }
+
+    private void setTotalUnchanged( final int total_unchanged ) {
+        _total_unchanged = total_unchanged;
     }
 
     public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny ) {
@@ -544,68 +544,50 @@ public final class DomainParsimonyCalculator {
         return new DomainParsimonyCalculator( phylogeny, gwcd_list, domain_id_to_secondary_features_map );
     }
 
-    /**
-     * For folds instead of Pfam-domains, for example
-     * 
-     * 
-     * @param gwcd_list
-     * @return
-     */
-    static CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeaturePresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
-                                                                                               final Map<String, Set<String>> domain_id_to_second_features_map,
-                                                                                               final Map<Species, MappingResults> mapping_results_map ) {
+    public static CharacterStateMatrix<BinaryStates> createMatrixOfBinaryDomainCombinationPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list ) {
         if ( gwcd_list.isEmpty() ) {
             throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
         }
-        if ( ( domain_id_to_second_features_map == null ) || domain_id_to_second_features_map.isEmpty() ) {
-            throw new IllegalArgumentException( "domain id to secondary features map is null or empty" );
-        }
         final int number_of_identifiers = gwcd_list.size();
-        final SortedSet<String> all_secondary_features = new TreeSet<String>();
+        final SortedSet<BinaryDomainCombination> all_binary_combinations = new TreeSet<BinaryDomainCombination>();
+        final Set<BinaryDomainCombination>[] binary_combinations_per_genome = new HashSet[ number_of_identifiers ];
+        int identifier_index = 0;
         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
-            int mapped = 0;
-            int not_mapped = 0;
-            for( final String domain : gwcd.getAllDomainIds() ) {
-                if ( domain_id_to_second_features_map.containsKey( domain ) ) {
-                    all_secondary_features.addAll( domain_id_to_second_features_map.get( domain ) );
-                    mapped++;
-                }
-                else {
-                    not_mapped++;
-                }
-            }
-            if ( mapping_results_map != null ) {
-                final MappingResults mr = new MappingResults();
-                mr.setDescription( gwcd.getSpecies().getSpeciesId() );
-                mr.setSumOfSuccesses( mapped );
-                mr.setSumOfFailures( not_mapped );
-                mapping_results_map.put( gwcd.getSpecies(), mr );
+            binary_combinations_per_genome[ identifier_index ] = new HashSet<BinaryDomainCombination>();
+            for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
+                all_binary_combinations.add( bc );
+                binary_combinations_per_genome[ identifier_index ].add( bc );
             }
+            ++identifier_index;
         }
-        final int number_of_characters = all_secondary_features.size();
+        final int number_of_characters = all_binary_combinations.size();
         final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
                                                                                                                                                  number_of_characters );
         int character_index = 0;
-        for( final String second_id : all_secondary_features ) {
-            matrix.setCharacter( character_index++, second_id );
+        for( final BinaryDomainCombination bc : all_binary_combinations ) {
+            matrix.setCharacter( character_index++, bc.toString() );
         }
-        int identifier_index = 0;
+        identifier_index = 0;
         final Set<String> all_identifiers = new HashSet<String>();
         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
             final String species_id = gwcd.getSpecies().getSpeciesId();
             if ( all_identifiers.contains( species_id ) ) {
-                throw new IllegalArgumentException( "species [" + species_id + "] is not unique" );
+                throw new AssertionError( "species [" + species_id + "] is not unique" );
             }
             all_identifiers.add( species_id );
             matrix.setIdentifier( identifier_index, species_id );
-            final Set<String> all_second_per_gwcd = new HashSet<String>();
-            for( final String domain : gwcd.getAllDomainIds() ) {
-                if ( domain_id_to_second_features_map.containsKey( domain ) ) {
-                    all_second_per_gwcd.addAll( domain_id_to_second_features_map.get( domain ) );
-                }
-            }
             for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
-                if ( all_second_per_gwcd.contains( matrix.getCharacter( ci ) ) ) {
+                BinaryDomainCombination bc = null;
+                if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED_ADJACTANT ) {
+                    bc = AdjactantDirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
+                }
+                else if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED ) {
+                    bc = DirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
+                }
+                else {
+                    bc = BasicBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
+                }
+                if ( binary_combinations_per_genome[ identifier_index ].contains( bc ) ) {
                     matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
                 }
                 else {
@@ -617,65 +599,59 @@ public final class DomainParsimonyCalculator {
         return matrix;
     }
 
-    public static CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
-                                                                                                                             final Map<String, String> domain_id_to_second_features_map ) {
+    public static CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
+                                                                                            final SortedSet<String> positive_filter ) {
         if ( gwcd_list.isEmpty() ) {
             throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
         }
-        if ( ( domain_id_to_second_features_map == null ) || domain_id_to_second_features_map.isEmpty() ) {
-            throw new IllegalArgumentException( "domain id to secondary features map is null or empty" );
+        if ( ( positive_filter != null ) && ( positive_filter.size() < 1 ) ) {
+            throw new IllegalArgumentException( "positive filter is empty" );
         }
         final int number_of_identifiers = gwcd_list.size();
-        final SortedSet<BinaryDomainCombination> all_binary_combinations_mapped = new TreeSet<BinaryDomainCombination>();
-        final Set<BinaryDomainCombination>[] binary_combinations_per_genome_mapped = new HashSet[ number_of_identifiers ];
-        int identifier_index = 0;
-        final SortedSet<String> no_mappings = new TreeSet<String>();
+        final SortedSet<String> all_domain_ids = new TreeSet<String>();
         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
-            binary_combinations_per_genome_mapped[ identifier_index ] = new HashSet<BinaryDomainCombination>();
-            for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
-                final BinaryDomainCombination mapped_bc = mapBinaryDomainCombination( domain_id_to_second_features_map,
-                                                                                      bc,
-                                                                                      no_mappings );
-                all_binary_combinations_mapped.add( mapped_bc );
-                binary_combinations_per_genome_mapped[ identifier_index ].add( mapped_bc );
+            for( final String domain : gwcd.getAllDomainIds() ) {
+                all_domain_ids.add( domain );
             }
-            ++identifier_index;
         }
-        if ( !no_mappings.isEmpty() ) {
-            ForesterUtil.programMessage( surfacing.PRG_NAME, "No mappings for the following (" + no_mappings.size()
-                    + "):" );
-            for( final String id : no_mappings ) {
-                ForesterUtil.programMessage( surfacing.PRG_NAME, id );
+        int number_of_characters = all_domain_ids.size();
+        if ( positive_filter != null ) {
+            //number_of_characters = positive_filter.size(); -- bad if doms in filter but not in genomes 
+            number_of_characters = 0;
+            for( final String id : all_domain_ids ) {
+                if ( positive_filter.contains( id ) ) {
+                    number_of_characters++;
+                }
             }
         }
-        final int number_of_characters = all_binary_combinations_mapped.size();
         final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
                                                                                                                                                  number_of_characters );
         int character_index = 0;
-        for( final BinaryDomainCombination bc : all_binary_combinations_mapped ) {
-            matrix.setCharacter( character_index++, bc.toString() );
+        for( final String id : all_domain_ids ) {
+            if ( positive_filter == null ) {
+                matrix.setCharacter( character_index++, id );
+            }
+            else {
+                if ( positive_filter.contains( id ) ) {
+                    matrix.setCharacter( character_index++, id );
+                }
+            }
         }
-        identifier_index = 0;
+        int identifier_index = 0;
         final Set<String> all_identifiers = new HashSet<String>();
         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
             final String species_id = gwcd.getSpecies().getSpeciesId();
             if ( all_identifiers.contains( species_id ) ) {
-                throw new AssertionError( "species [" + species_id + "] is not unique" );
+                throw new IllegalArgumentException( "species [" + species_id + "] is not unique" );
             }
             all_identifiers.add( species_id );
             matrix.setIdentifier( identifier_index, species_id );
             for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
-                BinaryDomainCombination bc = null;
-                if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED_ADJACTANT ) {
-                    bc = AdjactantDirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
-                }
-                else if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED ) {
-                    bc = DirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
-                }
-                else {
-                    bc = BasicBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
+                if ( ForesterUtil.isEmpty( matrix.getCharacter( ci ) ) ) {
+                    throw new RuntimeException( "this should not have happened: problem with character #" + ci );
                 }
-                if ( binary_combinations_per_genome_mapped[ identifier_index ].contains( bc ) ) {
+                final String id = matrix.getCharacter( ci );
+                if ( gwcd.contains( id ) ) {
                     matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
                 }
                 else {
@@ -687,49 +663,42 @@ public final class DomainParsimonyCalculator {
         return matrix;
     }
 
-    private static BinaryDomainCombination mapBinaryDomainCombination( final Map<String, String> domain_id_to_second_features_map,
-                                                                       final BinaryDomainCombination bc,
-                                                                       final SortedSet<String> no_mappings ) {
-        String id0 = "";
-        String id1 = "";
-        if ( !domain_id_to_second_features_map.containsKey( bc.getId0() ) ) {
-            no_mappings.add( bc.getId0() );
-            id0 = bc.getId0();
-        }
-        else {
-            id0 = domain_id_to_second_features_map.get( bc.getId0() );
-        }
-        if ( !domain_id_to_second_features_map.containsKey( bc.getId1() ) ) {
-            no_mappings.add( bc.getId1() );
-            id1 = bc.getId1();
-        }
-        else {
-            id1 = domain_id_to_second_features_map.get( bc.getId1() );
-        }
-        return new BasicBinaryDomainCombination( id0, id1 );
-    }
-
-    public static CharacterStateMatrix<BinaryStates> createMatrixOfBinaryDomainCombinationPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list ) {
+    public static CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
+                                                                                                                             final Map<String, String> domain_id_to_second_features_map ) {
         if ( gwcd_list.isEmpty() ) {
             throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
         }
+        if ( ( domain_id_to_second_features_map == null ) || domain_id_to_second_features_map.isEmpty() ) {
+            throw new IllegalArgumentException( "domain id to secondary features map is null or empty" );
+        }
         final int number_of_identifiers = gwcd_list.size();
-        final SortedSet<BinaryDomainCombination> all_binary_combinations = new TreeSet<BinaryDomainCombination>();
-        final Set<BinaryDomainCombination>[] binary_combinations_per_genome = new HashSet[ number_of_identifiers ];
+        final SortedSet<BinaryDomainCombination> all_binary_combinations_mapped = new TreeSet<BinaryDomainCombination>();
+        final Set<BinaryDomainCombination>[] binary_combinations_per_genome_mapped = new HashSet[ number_of_identifiers ];
         int identifier_index = 0;
+        final SortedSet<String> no_mappings = new TreeSet<String>();
         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
-            binary_combinations_per_genome[ identifier_index ] = new HashSet<BinaryDomainCombination>();
+            binary_combinations_per_genome_mapped[ identifier_index ] = new HashSet<BinaryDomainCombination>();
             for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
-                all_binary_combinations.add( bc );
-                binary_combinations_per_genome[ identifier_index ].add( bc );
+                final BinaryDomainCombination mapped_bc = mapBinaryDomainCombination( domain_id_to_second_features_map,
+                                                                                      bc,
+                                                                                      no_mappings );
+                all_binary_combinations_mapped.add( mapped_bc );
+                binary_combinations_per_genome_mapped[ identifier_index ].add( mapped_bc );
             }
             ++identifier_index;
         }
-        final int number_of_characters = all_binary_combinations.size();
+        if ( !no_mappings.isEmpty() ) {
+            ForesterUtil.programMessage( surfacing.PRG_NAME, "No mappings for the following (" + no_mappings.size()
+                    + "):" );
+            for( final String id : no_mappings ) {
+                ForesterUtil.programMessage( surfacing.PRG_NAME, id );
+            }
+        }
+        final int number_of_characters = all_binary_combinations_mapped.size();
         final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
                                                                                                                                                  number_of_characters );
         int character_index = 0;
-        for( final BinaryDomainCombination bc : all_binary_combinations ) {
+        for( final BinaryDomainCombination bc : all_binary_combinations_mapped ) {
             matrix.setCharacter( character_index++, bc.toString() );
         }
         identifier_index = 0;
@@ -752,7 +721,7 @@ public final class DomainParsimonyCalculator {
                 else {
                     bc = BasicBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
                 }
-                if ( binary_combinations_per_genome[ identifier_index ].contains( bc ) ) {
+                if ( binary_combinations_per_genome_mapped[ identifier_index ].contains( bc ) ) {
                     matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
                 }
                 else {
@@ -768,43 +737,50 @@ public final class DomainParsimonyCalculator {
         return createMatrixOfDomainPresenceOrAbsence( gwcd_list, null );
     }
 
-    public static CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
-                                                                                            final SortedSet<String> positive_filter ) {
+    /**
+     * For folds instead of Pfam-domains, for example
+     * 
+     * 
+     * @param gwcd_list
+     * @return
+     */
+    static CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeaturePresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
+                                                                                               final Map<String, Set<String>> domain_id_to_second_features_map,
+                                                                                               final Map<Species, MappingResults> mapping_results_map ) {
         if ( gwcd_list.isEmpty() ) {
             throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
         }
-        if ( ( positive_filter != null ) && ( positive_filter.size() < 1 ) ) {
-            throw new IllegalArgumentException( "positive filter is empty" );
+        if ( ( domain_id_to_second_features_map == null ) || domain_id_to_second_features_map.isEmpty() ) {
+            throw new IllegalArgumentException( "domain id to secondary features map is null or empty" );
         }
         final int number_of_identifiers = gwcd_list.size();
-        final SortedSet<String> all_domain_ids = new TreeSet<String>();
+        final SortedSet<String> all_secondary_features = new TreeSet<String>();
         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
+            int mapped = 0;
+            int not_mapped = 0;
             for( final String domain : gwcd.getAllDomainIds() ) {
-                all_domain_ids.add( domain );
-            }
-        }
-        int number_of_characters = all_domain_ids.size();
-        if ( positive_filter != null ) {
-            //number_of_characters = positive_filter.size(); -- bad if doms in filter but not in genomes 
-            number_of_characters = 0;
-            for( final String id : all_domain_ids ) {
-                if ( positive_filter.contains( id ) ) {
-                    number_of_characters++;
+                if ( domain_id_to_second_features_map.containsKey( domain ) ) {
+                    all_secondary_features.addAll( domain_id_to_second_features_map.get( domain ) );
+                    mapped++;
+                }
+                else {
+                    not_mapped++;
                 }
             }
+            if ( mapping_results_map != null ) {
+                final MappingResults mr = new MappingResults();
+                mr.setDescription( gwcd.getSpecies().getSpeciesId() );
+                mr.setSumOfSuccesses( mapped );
+                mr.setSumOfFailures( not_mapped );
+                mapping_results_map.put( gwcd.getSpecies(), mr );
+            }
         }
+        final int number_of_characters = all_secondary_features.size();
         final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
                                                                                                                                                  number_of_characters );
         int character_index = 0;
-        for( final String id : all_domain_ids ) {
-            if ( positive_filter == null ) {
-                matrix.setCharacter( character_index++, id );
-            }
-            else {
-                if ( positive_filter.contains( id ) ) {
-                    matrix.setCharacter( character_index++, id );
-                }
-            }
+        for( final String second_id : all_secondary_features ) {
+            matrix.setCharacter( character_index++, second_id );
         }
         int identifier_index = 0;
         final Set<String> all_identifiers = new HashSet<String>();
@@ -815,12 +791,14 @@ public final class DomainParsimonyCalculator {
             }
             all_identifiers.add( species_id );
             matrix.setIdentifier( identifier_index, species_id );
-            for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
-                if ( ForesterUtil.isEmpty( matrix.getCharacter( ci ) ) ) {
-                    throw new RuntimeException( "this should not have happened: problem with character #" + ci );
+            final Set<String> all_second_per_gwcd = new HashSet<String>();
+            for( final String domain : gwcd.getAllDomainIds() ) {
+                if ( domain_id_to_second_features_map.containsKey( domain ) ) {
+                    all_second_per_gwcd.addAll( domain_id_to_second_features_map.get( domain ) );
                 }
-                final String id = matrix.getCharacter( ci );
-                if ( gwcd.contains( id ) ) {
+            }
+            for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
+                if ( all_second_per_gwcd.contains( matrix.getCharacter( ci ) ) ) {
                     matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
                 }
                 else {
@@ -879,4 +857,26 @@ public final class DomainParsimonyCalculator {
         }
         return d;
     }
+
+    private static BinaryDomainCombination mapBinaryDomainCombination( final Map<String, String> domain_id_to_second_features_map,
+                                                                       final BinaryDomainCombination bc,
+                                                                       final SortedSet<String> no_mappings ) {
+        String id0 = "";
+        String id1 = "";
+        if ( !domain_id_to_second_features_map.containsKey( bc.getId0() ) ) {
+            no_mappings.add( bc.getId0() );
+            id0 = bc.getId0();
+        }
+        else {
+            id0 = domain_id_to_second_features_map.get( bc.getId0() );
+        }
+        if ( !domain_id_to_second_features_map.containsKey( bc.getId1() ) ) {
+            no_mappings.add( bc.getId1() );
+            id1 = bc.getId1();
+        }
+        else {
+            id1 = domain_id_to_second_features_map.get( bc.getId1() );
+        }
+        return new BasicBinaryDomainCombination( id0, id1 );
+    }
 }
diff --git a/forester/java/src/org/forester/surfacing/DomainSimilarity.java b/forester/java/src/org/forester/surfacing/DomainSimilarity.java
deleted file mode 100644 (file)
index 5a0735e..0000000
+++ /dev/null
@@ -1,108 +0,0 @@
-// $Id:
-//
-// FORESTER -- software libraries and applications
-// for evolutionary biology research and applications.
-//
-// Copyright (C) 2008-2009 Christian M. Zmasek
-// Copyright (C) 2008-2009 Burnham Institute for Medical Research
-// All rights reserved
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License along with this library; if not, write to the Free Software
-// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
-//
-// Contact: phylosoft @ gmail . com
-// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
-
-package org.forester.surfacing;
-
-import java.util.Map;
-import java.util.SortedMap;
-import java.util.SortedSet;
-
-import org.forester.phylogeny.Phylogeny;
-import org.forester.species.Species;
-import org.forester.surfacing.PrintableDomainSimilarity.PRINT_OPTION;
-
-/*
- * This is to represent a measure of similarity between two or more domains from
- * different genomes.
- */
-public interface DomainSimilarity extends Comparable<DomainSimilarity> {
-
-    static public enum DomainSimilarityScoring {
-        DOMAINS, PROTEINS, COMBINATIONS;
-    }
-
-    public static enum DomainSimilaritySortField {
-        MIN, MAX, SD, MEAN, ABS_MAX_COUNTS_DIFFERENCE, MAX_COUNTS_DIFFERENCE, MAX_DIFFERENCE, SPECIES_COUNT, DOMAIN_ID,
-    }
-
-    public SortedSet<String> getCombinableDomainIds( final Species species_of_combinable_domain );
-
-    public String getDomainId();
-
-    /**
-     * For pairwise similarities, this should return the "difference"; for example the difference in counts
-     * for copy number based features (the same as getMaximalDifferenceInCounts(), or the number
-     * of actually different domain combinations. 
-     * For pairwise similarities, this should return the difference,
-     * while for comparisons of more than two domains, this should return the maximal difference
-     * 
-     * 
-     * 
-     * @return
-     */
-    public int getMaximalDifference();
-
-    /**
-     * For pairwise similarities, this should return the difference in counts,
-     * while for comparisons of more than two domains, this should return the maximal difference
-     * in counts
-     * 
-     * 
-     * @return the (maximal) difference in counts
-     */
-    public int getMaximalDifferenceInCounts();
-
-    public double getMaximalSimilarityScore();
-
-    public double getMeanSimilarityScore();
-
-    public double getMinimalSimilarityScore();
-
-    /**
-     * This should return the number of pairwise distances used to calculate
-     * this similarity score
-     * 
-     * @return the number of pairwise distances
-     */
-    public int getN();
-
-    public SortedSet<Species> getSpecies();
-
-    /**
-     * This should return a map, which maps species names to
-     * SpeciesSpecificDomainSimilariyData
-     * 
-     * 
-     * @return SortedMap<String, SpeciesSpecificDomainSimilariyData>
-     */
-    public SortedMap<Species, SpeciesSpecificDcData> getSpeciesData();
-
-    public double getStandardDeviationOfSimilarityScore();
-
-    public StringBuffer toStringBuffer( PRINT_OPTION print_option,
-                                        Map<String, Integer> tax_code_to_id_map,
-                                        Phylogeny phy );
-}
index fa0ea62..ad72c45 100644 (file)
@@ -32,16 +32,16 @@ import java.util.SortedSet;
 
 public interface DomainSimilarityCalculator {
 
-    public SortedSet<DomainSimilarity> calculateSimilarities( final PairwiseDomainSimilarityCalculator pairwise_calculator,
-                                                              final List<GenomeWideCombinableDomains> cdc_list,
-                                                              final boolean ignore_domains_without_combinations_in_any_genome,
-                                                              final boolean ignore_domains_specific_to_one_genome );;
+    public SortedSet<PrintableDomainSimilarity> calculateSimilarities( final PairwiseDomainSimilarityCalculator pairwise_calculator,
+                                                                       final List<GenomeWideCombinableDomains> cdc_list,
+                                                                       final boolean ignore_domains_without_combinations_in_any_genome,
+                                                                       final boolean ignore_domains_specific_to_one_genome );;
 
     public static enum Detailedness {
         BASIC, LIST_COMBINING_DOMAIN_FOR_EACH_SPECIES, PUNCTILIOUS
     }
 
     public static enum GoAnnotationOutput {
-        NONE, ALL
+        ALL, NONE
     }
 }
index 4dbe718..810e409 100644 (file)
@@ -52,8 +52,6 @@ public interface GenomeWideCombinableDomains {
 
     public DomainCombinationType getDomainCombinationType();
 
-    SortedSet<String> getMostPromiscuosDomain();
-
     /**
      * This should return a statistic for per domain 
      * promiscuity in a genome.
@@ -75,7 +73,9 @@ public interface GenomeWideCombinableDomains {
 
     public StringBuilder toStringBuilder( GenomeWideCombinableDomainsSortOrder order );
 
+    SortedSet<String> getMostPromiscuosDomain();
+
     public static enum GenomeWideCombinableDomainsSortOrder {
-        ALPHABETICAL_KEY_ID, KEY_DOMAIN_PROTEINS_COUNT, KEY_DOMAIN_COUNT, COMBINATIONS_COUNT
+        ALPHABETICAL_KEY_ID, COMBINATIONS_COUNT, KEY_DOMAIN_COUNT, KEY_DOMAIN_PROTEINS_COUNT
     }
 }
index 69a6a31..2589ed3 100644 (file)
@@ -29,8 +29,8 @@ package org.forester.surfacing;
 public class MappingResults {
 
     private String _description;
-    private int    _sum_of_successes;
     private int    _sum_of_failures;
+    private int    _sum_of_successes;
 
     public String getDescription() {
         return _description;
index 4a78a13..9c6e1b5 100644 (file)
@@ -52,8 +52,8 @@ import org.forester.util.ForesterUtil;
 public class PairwiseGenomeComparator {
 
     private List<DistanceMatrix> _domain_distance_scores_means;
-    private List<DistanceMatrix> _shared_domains_based_distances;
     private List<DistanceMatrix> _shared_binary_combinations_based_distances;
+    private List<DistanceMatrix> _shared_domains_based_distances;
 
     public PairwiseGenomeComparator() {
         init();
@@ -71,20 +71,14 @@ public class PairwiseGenomeComparator {
         return _shared_domains_based_distances;
     }
 
-    private void init() {
-        _domain_distance_scores_means = new ArrayList<DistanceMatrix>();
-        _shared_domains_based_distances = new ArrayList<DistanceMatrix>();
-        _shared_binary_combinations_based_distances = new ArrayList<DistanceMatrix>();
-    }
-
     public void performPairwiseComparisons( final StringBuilder html_desc,
                                             final boolean sort_by_species_count_first,
                                             final Detailedness detailedness,
                                             final boolean ignore_domains_without_combs_in_all_spec,
                                             final boolean ignore_domains_specific_to_one_species,
-                                            final DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field,
+                                            final PrintableDomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field,
                                             final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
-                                            final DomainSimilarity.DomainSimilarityScoring scoring,
+                                            final PrintableDomainSimilarity.DomainSimilarityScoring scoring,
                                             final Map<String, List<GoId>> domain_id_to_go_ids_map,
                                             final Map<GoId, GoTerm> go_id_to_term_map,
                                             final GoNameSpace go_namespace_limit,
@@ -146,7 +140,7 @@ public class PairwiseGenomeComparator {
                                                                                              sort_by_species_count_first,
                                                                                              true,
                                                                                              calc_similarity_scores );
-                final SortedSet<DomainSimilarity> similarities = calc
+                final SortedSet<PrintableDomainSimilarity> similarities = calc
                         .calculateSimilarities( pw_calc,
                                                 genome_pair,
                                                 ignore_domains_without_combs_in_all_spec,
@@ -299,6 +293,12 @@ public class PairwiseGenomeComparator {
         }
     }
 
+    private void init() {
+        _domain_distance_scores_means = new ArrayList<DistanceMatrix>();
+        _shared_domains_based_distances = new ArrayList<DistanceMatrix>();
+        _shared_binary_combinations_based_distances = new ArrayList<DistanceMatrix>();
+    }
+
     static private String[] getAllUniqueDomainIdAsArray( final List<GenomeWideCombinableDomains> list_of_genome_wide_combinable_domains ) {
         String[] all_domain_ids_array;
         final SortedSet<String> all_domain_ids = new TreeSet<String>();
index 6247714..7fe04aa 100644 (file)
@@ -44,11 +44,12 @@ import org.forester.species.Species;
 import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
 import org.forester.util.ForesterUtil;
 
-public class PrintableDomainSimilarity implements DomainSimilarity {
+public class PrintableDomainSimilarity implements Comparable<PrintableDomainSimilarity> {
 
-    final public static String                              SPECIES_SEPARATOR = "  ";
-    final private static int                                EQUAL             = 0;
-    final private static String                             NO_SPECIES        = "     ";
+    final public static String                              SPECIES_SEPARATOR          = "  ";
+    final private static int                                EQUAL                      = 0;
+    final private static String                             NO_SPECIES                 = "     ";
+    private static final boolean                            OUTPUT_TAXCODES_PER_DOMAIN = false;
     final private CombinableDomains                         _combinable_domains;
     private DomainSimilarityCalculator.Detailedness         _detailedness;
     final private double                                    _max;
@@ -158,8 +159,7 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
         }
     }
 
-    @Override
-    public int compareTo( final DomainSimilarity domain_similarity ) {
+    public int compareTo( final PrintableDomainSimilarity domain_similarity ) {
         if ( this == domain_similarity ) {
             return EQUAL;
         }
@@ -173,7 +173,6 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
         return compareByDomainId( domain_similarity );
     }
 
-    @Override
     public SortedSet<String> getCombinableDomainIds( final Species species_of_combinable_domain ) {
         final SortedSet<String> sorted_ids = new TreeSet<String>();
         if ( getSpeciesData().containsKey( species_of_combinable_domain ) ) {
@@ -185,42 +184,56 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
         return sorted_ids;
     }
 
-    @Override
     public String getDomainId() {
         return getCombinableDomains().getKeyDomain();
     }
 
-    @Override
+    /**
+     * For pairwise similarities, this should return the "difference"; for example the difference in counts
+     * for copy number based features (the same as getMaximalDifferenceInCounts(), or the number
+     * of actually different domain combinations. 
+     * For pairwise similarities, this should return the difference,
+     * while for comparisons of more than two domains, this should return the maximal difference
+     * 
+     */
     public int getMaximalDifference() {
         return _max_difference;
     }
 
-    @Override
+    /**
+     * For pairwise similarities, this should return the difference in counts,
+     * while for comparisons of more than two domains, this should return the maximal difference
+     * in counts
+     * 
+     * 
+     * @return the (maximal) difference in counts
+     */
     public int getMaximalDifferenceInCounts() {
         return _max_difference_in_counts;
     }
 
-    @Override
     public double getMaximalSimilarityScore() {
         return _max;
     }
 
-    @Override
     public double getMeanSimilarityScore() {
         return _mean;
     }
 
-    @Override
     public double getMinimalSimilarityScore() {
         return _min;
     }
 
-    @Override
+    /**
+     * This should return the number of pairwise distances used to calculate
+     * this similarity score
+     * 
+     * @return the number of pairwise distances
+     */
     public int getN() {
         return _n;
     }
 
-    @Override
     public SortedSet<Species> getSpecies() {
         final SortedSet<Species> species = new TreeSet<Species>();
         for( final Species s : getSpeciesData().keySet() ) {
@@ -233,12 +246,17 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
         return _species_order;
     }
 
-    @Override
+    /**
+     * This should return a map, which maps species names to
+     * SpeciesSpecificDomainSimilariyData
+     * 
+     * 
+     * @return SortedMap<String, SpeciesSpecificDomainSimilariyData>
+     */
     public SortedMap<Species, SpeciesSpecificDcData> getSpeciesData() {
         return _species_data;
     }
 
-    @Override
     public double getStandardDeviationOfSimilarityScore() {
         return _sd;
     }
@@ -254,7 +272,6 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
         _species_order = species_order;
     }
 
-    @Override
     public StringBuffer toStringBuffer( final PrintableDomainSimilarity.PRINT_OPTION print_option,
                                         final Map<String, Integer> tax_code_to_id_map,
                                         final Phylogeny phy ) {
@@ -262,7 +279,7 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
             case SIMPLE_TAB_DELIMITED:
                 return toStringBufferSimpleTabDelimited();
             case HTML:
-                return toStringBufferDetailedHTML( tax_code_to_id_map, phy );
+                return toStringBufferDetailedHTML( tax_code_to_id_map, phy, OUTPUT_TAXCODES_PER_DOMAIN );
             default:
                 throw new AssertionError( "Unknown print option: " + print_option );
         }
@@ -274,14 +291,17 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
                                                final Map<String, Integer> tax_code_to_id_map,
                                                final Phylogeny phy ) {
         if ( html ) {
+            sb.append( "<tr>" );
+            sb.append( "<td>" );
             addTaxWithLink( sb, species.getSpeciesId(), tax_code_to_id_map, phy );
+            sb.append( "</td>" );
         }
         else {
             sb.append( species.getSpeciesId() );
         }
         if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
             if ( html ) {
-                sb.append( ":" );
+                //sb.append( ":" );
             }
             else {
                 sb.append( "\t" );
@@ -289,7 +309,8 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
             sb.append( getSpeciesData().get( species ).toStringBuffer( getDetaildness(), html ) );
         }
         if ( html ) {
-            sb.append( "<br>" );
+            //sb.append( "<br>" );
+            sb.append( "</tr>" );
         }
         else {
             sb.append( "\n\t" );
@@ -332,7 +353,7 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
         sb.append( "</b>" );
     }
 
-    private int compareByDomainId( final DomainSimilarity other ) {
+    private int compareByDomainId( final PrintableDomainSimilarity other ) {
         return getDomainId().compareToIgnoreCase( other.getDomainId() );
     }
 
@@ -357,7 +378,7 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
         }
         for( final Map.Entry<String, SortedSet<String>> e : m.entrySet() ) {
             sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + e.getKey() + "\">" + e.getKey() + "</a>" );
-            sb.append( ": " );
+            sb.append( " " );
             sb.append( "<span style=\"font-size:7px\">" );
             for( final String tax : e.getValue() ) {
                 final String hex = SurfacingUtil.obtainHexColorStringDependingOnTaxonomyGroup( tax, null );
@@ -379,8 +400,35 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
         return sb;
     }
 
+    private StringBuffer getSpeciesDataInAlphabeticalOrder( final boolean html,
+                                                            final Map<String, Integer> tax_code_to_id_map,
+                                                            final Phylogeny phy ) {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( "<table>" );
+        for( final Species species : getSpeciesData().keySet() ) {
+            addSpeciesSpecificDomainData( sb, species, html, tax_code_to_id_map, phy );
+        }
+        sb.append( "</table>" );
+        return sb;
+    }
+
+    private StringBuffer getSpeciesDataInCustomOrder( final boolean html,
+                                                      final Map<String, Integer> tax_code_to_id_map,
+                                                      final Phylogeny phy ) {
+        final StringBuffer sb = new StringBuffer();
+        for( final Species order_species : getSpeciesCustomOrder() ) {
+            if ( getSpeciesData().keySet().contains( order_species ) ) {
+                addSpeciesSpecificDomainData( sb, order_species, html, tax_code_to_id_map, phy );
+            }
+            else {
+                sb.append( PrintableDomainSimilarity.NO_SPECIES );
+                sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
+            }
+        }
+        return sb;
+    }
+
     private StringBuffer getTaxonomyGroupDistribution( final Phylogeny tol ) {
-        //TODO work on me    
         final SortedMap<String, Set<String>> domain_to_species_set_map = new TreeMap<String, Set<String>>();
         for( final Species species : getSpeciesData().keySet() ) {
             for( final String combable_dom : getCombinableDomainIds( species ) ) {
@@ -394,8 +442,6 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
         sb.append( "<table>" );
         for( final Map.Entry<String, Set<String>> domain_to_species_set : domain_to_species_set_map.entrySet() ) {
             final Map<String, Integer> counts = new HashMap<String, Integer>();
-            //  final ValueComparator bvc = new ValueComparator( counts );
-            //  final SortedMap<String, Integer> sorted_counts = new TreeMap<String, Integer>( bvc );
             for( final String tax_code : domain_to_species_set.getValue() ) {
                 final String group = SurfacingUtil.obtainTaxonomyGroup( tax_code, tol );
                 if ( !ForesterUtil.isEmpty( group ) ) {
@@ -424,14 +470,12 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
                 }
                 counts_to_groups.get( c ).add( group_to_counts.getKey() );
             }
-            // sorted_counts.putAll( counts );
             sb.append( "<tr>" );
             sb.append( "<td>" );
             sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_to_species_set.getKey() + "\">"
                     + domain_to_species_set.getKey() + "</a>" );
-            sb.append( ": " );
+            sb.append( " " );
             sb.append( "</td>" );
-            // sb.append( "<span style=\"font-size:9px\">" );
             boolean first = true;
             for( final Entry<Integer, SortedSet<String>> count_to_groups : counts_to_groups.entrySet() ) {
                 if ( first ) {
@@ -445,7 +489,7 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
                 sb.append( "<td>" );
                 final SortedSet<String> groups = count_to_groups.getValue();
                 sb.append( count_to_groups.getKey() );
-                sb.append( ":" );
+                sb.append( " " );
                 for( final String group : groups ) {
                     final Color color = ForesterUtil.obtainColorDependingOnTaxonomyGroup( group );
                     if ( color == null ) {
@@ -465,83 +509,9 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
                 sb.append( "</td>" );
                 sb.append( "</tr>" );
             }
-            // sb.append( "</span>" );
             sb.append( ForesterUtil.getLineSeparator() );
         }
         sb.append( "</table>" );
-        // i am just a template and need to be modified for "printout" TODO
-        //        for( final Map.Entry<String, SortedSet<String>> e : m.entrySet() ) {
-        //            sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + e.getKey() + "\">" + e.getKey() + "</a>" );
-        //            sb.append( ": " );
-        //            sb.append( "<span style=\"font-size:8px\">" );
-        //            for( final String tax : e.getValue() ) {
-        //                final String hex = SurfacingUtil.obtainHexColorStringDependingOnTaxonomyGroup( tax, null );
-        //                if ( !ForesterUtil.isEmpty( hex ) ) {
-        //                    sb.append( "<span style=\"color:" );
-        //                    sb.append( hex );
-        //                    sb.append( "\">" );
-        //                    sb.append( tax );
-        //                    sb.append( "</span>" );
-        //                }
-        //                else {
-        //                    sb.append( tax );
-        //                }
-        //                sb.append( " " );
-        //            }
-        //            sb.append( "</span>" );
-        //            sb.append( "<br>\n" );
-        //        }
-        return sb;
-    }
-
-    /*
-     public class Testing {
-
-    public static void main(String[] args) {
-
-        HashMap<String,Double> map = new HashMap<String,Double>();
-        ValueComparator bvc =  new ValueComparator(map);
-        TreeMap<String,Double> sorted_map = new TreeMap<String,Double>(bvc);
-
-        map.put("A",99.5);
-        map.put("B",67.4);
-        map.put("C",67.4);
-        map.put("D",67.3);
-
-        System.out.println("unsorted map: "+map);
-
-        sorted_map.putAll(map);
-
-        System.out.println("results: "+sorted_map);
-    }
-    }
-
-       
-      
-     */
-    private StringBuffer getSpeciesDataInAlphabeticalOrder( final boolean html,
-                                                            final Map<String, Integer> tax_code_to_id_map,
-                                                            final Phylogeny phy ) {
-        final StringBuffer sb = new StringBuffer();
-        for( final Species species : getSpeciesData().keySet() ) {
-            addSpeciesSpecificDomainData( sb, species, html, tax_code_to_id_map, phy );
-        }
-        return sb;
-    }
-
-    private StringBuffer getSpeciesDataInCustomOrder( final boolean html,
-                                                      final Map<String, Integer> tax_code_to_id_map,
-                                                      final Phylogeny phy ) {
-        final StringBuffer sb = new StringBuffer();
-        for( final Species order_species : getSpeciesCustomOrder() ) {
-            if ( getSpeciesData().keySet().contains( order_species ) ) {
-                addSpeciesSpecificDomainData( sb, order_species, html, tax_code_to_id_map, phy );
-            }
-            else {
-                sb.append( PrintableDomainSimilarity.NO_SPECIES );
-                sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
-            }
-        }
         return sb;
     }
 
@@ -553,7 +523,9 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
         return _treat_as_binary_comparison;
     }
 
-    private StringBuffer toStringBufferDetailedHTML( final Map<String, Integer> tax_code_to_id_map, final Phylogeny phy ) {
+    private StringBuffer toStringBufferDetailedHTML( final Map<String, Integer> tax_code_to_id_map,
+                                                     final Phylogeny phy,
+                                                     final boolean output_tax_codes_per_domain ) {
         final StringBuffer sb = new StringBuffer();
         sb.append( "<tr>" );
         sb.append( "<td>" );
@@ -609,14 +581,18 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
         if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) {
             sb.append( "<td>" );
             sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map, phy ) );
-            sb.append( getDomainDataInAlphabeticalOrder() );
+            if ( output_tax_codes_per_domain ) {
+                sb.append( getDomainDataInAlphabeticalOrder() );
+            }
             sb.append( getTaxonomyGroupDistribution( phy ) );
             sb.append( "</td>" );
         }
         else {
             sb.append( "<td>" );
             sb.append( getSpeciesDataInCustomOrder( true, tax_code_to_id_map, phy ) );
-            sb.append( getDomainDataInAlphabeticalOrder() );
+            if ( output_tax_codes_per_domain ) {
+                sb.append( getDomainDataInAlphabeticalOrder() );
+            }
             sb.append( getTaxonomyGroupDistribution( phy ) );
             sb.append( "</td>" );
         }
@@ -633,6 +609,14 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
         return sb;
     }
 
+    static public enum DomainSimilarityScoring {
+        COMBINATIONS, DOMAINS, PROTEINS;
+    }
+
+    public static enum DomainSimilaritySortField {
+        ABS_MAX_COUNTS_DIFFERENCE, DOMAIN_ID, MAX, MAX_COUNTS_DIFFERENCE, MAX_DIFFERENCE, MEAN, MIN, SD, SPECIES_COUNT,
+    }
+
     public static enum PRINT_OPTION {
         HTML, SIMPLE_TAB_DELIMITED;
     }
index 92de951..be2fcc7 100644 (file)
@@ -98,25 +98,29 @@ class PrintableSpeciesSpecificDcData implements SpeciesSpecificDcData {
         final StringBuffer sb = new StringBuffer();
         if ( detailedness == DomainSimilarityCalculator.Detailedness.PUNCTILIOUS ) {
             if ( html ) {
-                sb.append( " " );
+                //sb.append( " " );
+                sb.append( "<td>" );
             }
             sb.append( getKeyDomainDomainsCount() );
             if ( html ) {
-                sb.append( ", " );
+                //sb.append( ", " );
+                sb.append( "</td><td>" );
             }
             else {
                 sb.append( "\t" );
             }
             sb.append( getKeyDomainProteinsCount() );
             if ( html ) {
-                sb.append( ", " );
+                // sb.append( ", " );
+                sb.append( "</td><td>" );
             }
             else {
                 sb.append( "\t" );
             }
             sb.append( getCombinableDomainsCount() );
-            if ( html && !getCombinableDomainIdToCountsMap().isEmpty() ) {
-                sb.append( ":" );
+            if ( html /*&& !getCombinableDomainIdToCountsMap().isEmpty()*/) {
+                // sb.append( ":" );
+                sb.append( "</td><td>" );
             }
         }
         if ( html ) {
@@ -148,6 +152,7 @@ class PrintableSpeciesSpecificDcData implements SpeciesSpecificDcData {
                 sb.append( link );
             }
             sb.append( "]" );
+            sb.append( "</td>" );
         }
         return sb;
     }
index eb75a9b..be08c39 100644 (file)
@@ -45,11 +45,11 @@ interface SpeciesSpecificDcData {
      */
     public SortedMap<String, Integer> getCombinableDomainIdToCountsMap();
 
+    public SortedSet<String> getKeyDomainProteins();
+
     public int getNumberOfProteinsExhibitingCombinationWith( final String domain_id );
 
     public StringBuffer toStringBuffer( final DomainSimilarityCalculator.Detailedness detailedness, boolean html );
 
-    public SortedSet<String> getKeyDomainProteins();
-
     void addKeyDomainProtein( String protein );
 }
index aa588a0..697c84e 100644 (file)
@@ -39,7 +39,7 @@ public class SurfacingConstants {
     public static final String NONE                           = "[none]";
     public static final String PFAM_FAMILY_ID_LINK            = "http://pfam.janelia.org/family/";
     public static final String UNIPROT_TAXONOMY_ID_LINK       = "http://www.uniprot.org/taxonomy/";
+    static final boolean       PRINT_MORE_DOM_SIMILARITY_INFO = false;
     static final boolean       SECONDARY_FEATURES_ARE_SCOP    = true;
     static final String        SECONDARY_FEATURES_SCOP_LINK   = "http://scop.mrc-lmb.cam.ac.uk/scop/search.cgi?key=";
-    static final boolean       PRINT_MORE_DOM_SIMILARITY_INFO = false;
 }
index 73cee01..c4f98d6 100644 (file)
@@ -143,9 +143,9 @@ public final class SurfacingUtil {
         }
     }
 
-    public static DescriptiveStatistics calculateDescriptiveStatisticsForMeanValues( final Set<DomainSimilarity> similarities ) {
+    public static DescriptiveStatistics calculateDescriptiveStatisticsForMeanValues( final Set<PrintableDomainSimilarity> similarities ) {
         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
-        for( final DomainSimilarity similarity : similarities ) {
+        for( final PrintableDomainSimilarity similarity : similarities ) {
             stats.addValue( similarity.getMeanSimilarityScore() );
         }
         return stats;
@@ -375,11 +375,11 @@ public final class SurfacingUtil {
         return m;
     }
 
-    public static void decoratePrintableDomainSimilarities( final SortedSet<DomainSimilarity> domain_similarities,
+    public static void decoratePrintableDomainSimilarities( final SortedSet<PrintableDomainSimilarity> domain_similarities,
                                                             final Detailedness detailedness ) {
-        for( final DomainSimilarity domain_similarity : domain_similarities ) {
+        for( final PrintableDomainSimilarity domain_similarity : domain_similarities ) {
             if ( domain_similarity instanceof PrintableDomainSimilarity ) {
-                final PrintableDomainSimilarity printable_domain_similarity = ( PrintableDomainSimilarity ) domain_similarity;
+                final PrintableDomainSimilarity printable_domain_similarity = domain_similarity;
                 printable_domain_similarity.setDetailedness( detailedness );
             }
         }
@@ -2218,11 +2218,11 @@ public final class SurfacingUtil {
                                                       final Writer simple_tab_writer,
                                                       final Writer single_writer,
                                                       Map<Character, Writer> split_writers,
-                                                      final SortedSet<DomainSimilarity> similarities,
+                                                      final SortedSet<PrintableDomainSimilarity> similarities,
                                                       final boolean treat_as_binary,
                                                       final List<Species> species_order,
                                                       final PrintableDomainSimilarity.PRINT_OPTION print_option,
-                                                      final DomainSimilarity.DomainSimilarityScoring scoring,
+                                                      final PrintableDomainSimilarity.DomainSimilarityScoring scoring,
                                                       final boolean verbose,
                                                       final Map<String, Integer> tax_code_to_id_map,
                                                       final Phylogeny phy,
@@ -2262,9 +2262,9 @@ public final class SurfacingUtil {
                 break;
         }
         //
-        for( final DomainSimilarity similarity : similarities ) {
+        for( final PrintableDomainSimilarity similarity : similarities ) {
             if ( ( species_order != null ) && !species_order.isEmpty() ) {
-                ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
+                ( similarity ).setSpeciesOrder( species_order );
             }
             if ( single_writer != null ) {
                 if ( !ForesterUtil.isEmpty( pos_filter_doms ) && pos_filter_doms.contains( similarity.getDomainId() ) ) {
@@ -2347,9 +2347,9 @@ public final class SurfacingUtil {
             w.write( SurfacingConstants.NL );
         }
         //
-        for( final DomainSimilarity similarity : similarities ) {
+        for( final PrintableDomainSimilarity similarity : similarities ) {
             if ( ( species_order != null ) && !species_order.isEmpty() ) {
-                ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
+                ( similarity ).setSpeciesOrder( species_order );
             }
             if ( simple_tab_writer != null ) {
                 simple_tab_writer.write( similarity.toStringBuffer( PRINT_OPTION.SIMPLE_TAB_DELIMITED,
index 168b6a4..dabbb06 100644 (file)
@@ -313,17 +313,17 @@ public class TestSurfacing {
             cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
                                                                            true,
                                                                            new BasicSpecies( "nemve" ) ) );
-            final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
+            final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
                                                                                          false,
                                                                                          false,
                                                                                          true );
-            final SortedSet<DomainSimilarity> sims = calc
+            final SortedSet<PrintableDomainSimilarity> sims = calc
                     .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
                                             cdc_list,
                                             true,
                                             true );
-            final Iterator<DomainSimilarity> sims_it = sims.iterator();
-            final DomainSimilarity sa = sims_it.next();
+            final Iterator<PrintableDomainSimilarity> sims_it = sims.iterator();
+            final PrintableDomainSimilarity sa = sims_it.next();
             if ( !sa.getDomainId().equals( "A" ) ) {
                 return false;
             }
@@ -364,7 +364,7 @@ public class TestSurfacing {
             if ( sa.getMaximalDifferenceInCounts() != 3 ) {
                 return false;
             }
-            final DomainSimilarity sb = sims_it.next();
+            final PrintableDomainSimilarity sb = sims_it.next();
             if ( !sb.getDomainId().equals( "B" ) ) {
                 return false;
             }
@@ -398,7 +398,7 @@ public class TestSurfacing {
             if ( sb.getMaximalDifferenceInCounts() != 2 ) {
                 return false;
             }
-            final DomainSimilarity sc = sims_it.next();
+            final PrintableDomainSimilarity sc = sims_it.next();
             if ( !sc.getDomainId().equals( "C" ) ) {
                 return false;
             }
@@ -464,17 +464,17 @@ public class TestSurfacing {
             cdc_list2.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
                                                                             false,
                                                                             new BasicSpecies( "nemve" ) ) );
-            final DomainSimilarityCalculator calc2 = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
+            final DomainSimilarityCalculator calc2 = new BasicDomainSimilarityCalculator( PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
                                                                                           false,
                                                                                           false,
                                                                                           true );
-            final SortedSet<DomainSimilarity> sims2 = calc2
+            final SortedSet<PrintableDomainSimilarity> sims2 = calc2
                     .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
                                             cdc_list2,
                                             false,
                                             true );
-            final Iterator<DomainSimilarity> sims_it2 = sims2.iterator();
-            final DomainSimilarity sa2 = sims_it2.next();
+            final Iterator<PrintableDomainSimilarity> sims_it2 = sims2.iterator();
+            final PrintableDomainSimilarity sa2 = sims_it2.next();
             if ( !sa2.getDomainId().equals( "A" ) ) {
                 return false;
             }
@@ -552,17 +552,17 @@ public class TestSurfacing {
             cdc_list3.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
                                                                             true,
                                                                             new BasicSpecies( "nemve" ) ) );
-            final DomainSimilarityCalculator calc3 = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
+            final DomainSimilarityCalculator calc3 = new BasicDomainSimilarityCalculator( PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
                                                                                           false,
                                                                                           false,
                                                                                           true );
-            final SortedSet<DomainSimilarity> sims3 = calc3
+            final SortedSet<PrintableDomainSimilarity> sims3 = calc3
                     .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
                                             cdc_list3,
                                             false,
                                             true );
-            final Iterator<DomainSimilarity> sims_it3 = sims3.iterator();
-            final DomainSimilarity sa3 = sims_it3.next();
+            final Iterator<PrintableDomainSimilarity> sims_it3 = sims3.iterator();
+            final PrintableDomainSimilarity sa3 = sims_it3.next();
             if ( !sa3.getDomainId().equals( "A" ) ) {
                 return false;
             }
@@ -596,17 +596,17 @@ public class TestSurfacing {
             cdc_list4.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
                                                                             false,
                                                                             new BasicSpecies( "nemve" ) ) );
-            final DomainSimilarityCalculator calc4 = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
+            final DomainSimilarityCalculator calc4 = new BasicDomainSimilarityCalculator( PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
                                                                                           true,
                                                                                           false,
                                                                                           true );
-            final SortedSet<DomainSimilarity> sims4 = calc4
+            final SortedSet<PrintableDomainSimilarity> sims4 = calc4
                     .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
                                             cdc_list4,
                                             false,
                                             true );
-            final Iterator<DomainSimilarity> sims_it4 = sims4.iterator();
-            final DomainSimilarity sa4 = sims_it4.next();
+            final Iterator<PrintableDomainSimilarity> sims_it4 = sims4.iterator();
+            final PrintableDomainSimilarity sa4 = sims_it4.next();
             if ( !sa4.getDomainId().equals( "A" ) ) {
                 return false;
             }
@@ -629,10 +629,10 @@ public class TestSurfacing {
             if ( ssdsd4.getNumberOfProteinsExhibitingCombinationWith( "X" ) != 3 ) {
                 return false;
             }
-            final SortedSet<DomainSimilarity> sims4_d = calc4
+            final SortedSet<PrintableDomainSimilarity> sims4_d = calc4
                     .calculateSimilarities( new DomainCountsBasedPairwiseSimilarityCalculator(), cdc_list4, false, true );
-            final Iterator<DomainSimilarity> sims_it4_d = sims4_d.iterator();
-            final DomainSimilarity sa4_d = sims_it4_d.next();
+            final Iterator<PrintableDomainSimilarity> sims_it4_d = sims4_d.iterator();
+            final PrintableDomainSimilarity sa4_d = sims_it4_d.next();
             if ( !sa4_d.getDomainId().equals( "A" ) ) {
                 return false;
             }
@@ -653,13 +653,13 @@ public class TestSurfacing {
             if ( sa4_d.getN() != 6 ) {
                 return false;
             }
-            final SortedSet<DomainSimilarity> sims4_p = calc4
+            final SortedSet<PrintableDomainSimilarity> sims4_p = calc4
                     .calculateSimilarities( new ProteinCountsBasedPairwiseDomainSimilarityCalculator(),
                                             cdc_list4,
                                             false,
                                             true );
-            final Iterator<DomainSimilarity> sims_it4_p = sims4_p.iterator();
-            final DomainSimilarity sa4_p = sims_it4_p.next();
+            final Iterator<PrintableDomainSimilarity> sims_it4_p = sims4_p.iterator();
+            final PrintableDomainSimilarity sa4_p = sims_it4_p.next();
             if ( !sa4_p.getDomainId().equals( "A" ) ) {
                 return false;
             }
@@ -708,10 +708,10 @@ public class TestSurfacing {
             cdc_list5.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
                                                                             true,
                                                                             new BasicSpecies( "nemve" ) ) );
-            final SortedSet<DomainSimilarity> sims5_d = calc4
+            final SortedSet<PrintableDomainSimilarity> sims5_d = calc4
                     .calculateSimilarities( new DomainCountsBasedPairwiseSimilarityCalculator(), cdc_list5, false, true );
-            final Iterator<DomainSimilarity> sims_it5_d = sims5_d.iterator();
-            final DomainSimilarity sa5_d = sims_it5_d.next();
+            final Iterator<PrintableDomainSimilarity> sims_it5_d = sims5_d.iterator();
+            final PrintableDomainSimilarity sa5_d = sims_it5_d.next();
             if ( sa5_d.getSpecies().size() != 4 ) {
                 return false;
             }
@@ -779,13 +779,13 @@ public class TestSurfacing {
             if ( sa5_d.getMaximalDifferenceInCounts() != 11 ) {
                 return false;
             }
-            final SortedSet<DomainSimilarity> sims5_p = calc4
+            final SortedSet<PrintableDomainSimilarity> sims5_p = calc4
                     .calculateSimilarities( new ProteinCountsBasedPairwiseDomainSimilarityCalculator(),
                                             cdc_list5,
                                             false,
                                             true );
-            final Iterator<DomainSimilarity> sims_it5_p = sims5_p.iterator();
-            final DomainSimilarity sa5_p = sims_it5_p.next();
+            final Iterator<PrintableDomainSimilarity> sims_it5_p = sims5_p.iterator();
+            final PrintableDomainSimilarity sa5_p = sims_it5_p.next();
             if ( !sa5_p.getDomainId().equals( "A" ) ) {
                 return false;
             }
@@ -843,10 +843,10 @@ public class TestSurfacing {
             cdc_list6.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
                                                                             false,
                                                                             new BasicSpecies( "nemve" ) ) );
-            final SortedSet<DomainSimilarity> sims6_d = calc4
+            final SortedSet<PrintableDomainSimilarity> sims6_d = calc4
                     .calculateSimilarities( new DomainCountsBasedPairwiseSimilarityCalculator(), cdc_list6, false, true );
-            final Iterator<DomainSimilarity> sims_it6_d = sims6_d.iterator();
-            final DomainSimilarity sa6_d = sims_it6_d.next();
+            final Iterator<PrintableDomainSimilarity> sims_it6_d = sims6_d.iterator();
+            final PrintableDomainSimilarity sa6_d = sims_it6_d.next();
             if ( sa6_d.getSpecies().size() != 4 ) {
                 return false;
             }
@@ -914,13 +914,13 @@ public class TestSurfacing {
             if ( sa6_d.getMaximalDifferenceInCounts() != 11 ) {
                 return false;
             }
-            final SortedSet<DomainSimilarity> sims6_p = calc4
+            final SortedSet<PrintableDomainSimilarity> sims6_p = calc4
                     .calculateSimilarities( new ProteinCountsBasedPairwiseDomainSimilarityCalculator(),
                                             cdc_list6,
                                             false,
                                             true );
-            final Iterator<DomainSimilarity> sims_it6_p = sims6_p.iterator();
-            final DomainSimilarity sa6_p = sims_it6_p.next();
+            final Iterator<PrintableDomainSimilarity> sims_it6_p = sims6_p.iterator();
+            final PrintableDomainSimilarity sa6_p = sims_it6_p.next();
             if ( !sa6_p.getDomainId().equals( "A" ) ) {
                 return false;
             }
@@ -1028,17 +1028,17 @@ public class TestSurfacing {
             cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
                                                                            true,
                                                                            new BasicSpecies( "nemve" ) ) );
-            final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
+            final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
                                                                                          false,
                                                                                          false,
                                                                                          true );
-            final SortedSet<DomainSimilarity> sims = calc
+            final SortedSet<PrintableDomainSimilarity> sims = calc
                     .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
                                             cdc_list,
                                             true,
                                             false );
-            final Iterator<DomainSimilarity> sims_it = sims.iterator();
-            final DomainSimilarity sa = sims_it.next();
+            final Iterator<PrintableDomainSimilarity> sims_it = sims.iterator();
+            final PrintableDomainSimilarity sa = sims_it.next();
             if ( !sa.getDomainId().equals( "A" ) ) {
                 return false;
             }
@@ -1069,7 +1069,7 @@ public class TestSurfacing {
             if ( sa.getMaximalDifferenceInCounts() != 0 ) {
                 return false;
             }
-            final DomainSimilarity sb = sims_it.next();
+            final PrintableDomainSimilarity sb = sims_it.next();
             if ( !sb.getDomainId().equals( "B" ) ) {
                 return false;
             }
@@ -1079,13 +1079,13 @@ public class TestSurfacing {
             if ( !sb.getSpecies().contains( new BasicSpecies( "rabbit" ) ) ) {
                 return false;
             }
-            final SortedSet<DomainSimilarity> sims2 = calc
+            final SortedSet<PrintableDomainSimilarity> sims2 = calc
                     .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
                                             cdc_list,
                                             true,
                                             true );
-            final Iterator<DomainSimilarity> sims_it2 = sims2.iterator();
-            final DomainSimilarity sa2 = sims_it2.next();
+            final Iterator<PrintableDomainSimilarity> sims_it2 = sims2.iterator();
+            final PrintableDomainSimilarity sa2 = sims_it2.next();
             if ( !sa2.getDomainId().equals( "D" ) ) {
                 return false;
             }
@@ -1137,11 +1137,11 @@ public class TestSurfacing {
             cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
                                                                            true,
                                                                            new BasicSpecies( "nemve" ) ) );
-            final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
+            final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
                                                                                          false,
                                                                                          false,
                                                                                          true );
-            final SortedSet<DomainSimilarity> sims = calc
+            final SortedSet<PrintableDomainSimilarity> sims = calc
                     .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
                                             cdc_list,
                                             false,
@@ -1149,8 +1149,8 @@ public class TestSurfacing {
             if ( sims.size() != 1 ) {
                 return false;
             }
-            final Iterator<DomainSimilarity> sims_it = sims.iterator();
-            final DomainSimilarity sa = sims_it.next();
+            final Iterator<PrintableDomainSimilarity> sims_it = sims.iterator();
+            final PrintableDomainSimilarity sa = sims_it.next();
             if ( !sa.getDomainId().equals( "A" ) ) {
                 return false;
             }
@@ -1169,7 +1169,7 @@ public class TestSurfacing {
             if ( !sa.getSpecies().contains( new BasicSpecies( "rabbit" ) ) ) {
                 return false;
             }
-            final SortedSet<DomainSimilarity> sims_ns = calc
+            final SortedSet<PrintableDomainSimilarity> sims_ns = calc
                     .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
                                             cdc_list,
                                             true,
@@ -1211,7 +1211,7 @@ public class TestSurfacing {
             cdc_list2.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve2,
                                                                             true,
                                                                             new BasicSpecies( "nemve" ) ) );
-            final SortedSet<DomainSimilarity> sims2 = calc
+            final SortedSet<PrintableDomainSimilarity> sims2 = calc
                     .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
                                             cdc_list2,
                                             true,
index 7391763..92cd3d8 100644 (file)
@@ -127,7 +127,7 @@ import org.forester.ws.wabi.TxSearch.TAX_RANK;
 @SuppressWarnings( "unused")
 public final class Test {
 
-    private final static boolean PERFORM_DB_TESTS          = true;
+    private final static boolean PERFORM_DB_TESTS          = false;
     private final static double  ZERO_DIFF                 = 1.0E-9;
     private final static String  PATH_TO_TEST_DATA         = System.getProperty( "user.dir" )
                                                                    + ForesterUtil.getFileSeparator() + "test_data"
@@ -501,7 +501,7 @@ public final class Test {
                 failed++;
             }
         }
-        System.exit( 0 );
+        /////////////////////System.exit( 0 );
         System.out.print( "UniProtKB id extraction: " );
         if ( Test.testExtractUniProtKbProteinSeqIdentifier() ) {
             System.out.println( "OK." );