From: cmzmasek@gmail.com Date: Wed, 6 Nov 2013 20:31:10 +0000 (+0000) Subject: inprogress X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=bd7bf76e7b7d9f4f643f05f6aadc4f517f875254;p=jalview.git inprogress --- diff --git a/forester/java/src/org/forester/application/surfacing.java b/forester/java/src/org/forester/application/surfacing.java index 302800d..2a2169e 100644 --- a/forester/java/src/org/forester/application/surfacing.java +++ b/forester/java/src/org/forester/application/surfacing.java @@ -63,6 +63,9 @@ import org.forester.surfacing.CombinationsBasedPairwiseDomainSimilarityCalculato import org.forester.surfacing.DomainCountsBasedPairwiseSimilarityCalculator; import org.forester.surfacing.DomainLengthsTable; import org.forester.surfacing.DomainParsimonyCalculator; +import org.forester.surfacing.DomainSimilarity; +import org.forester.surfacing.DomainSimilarity.DomainSimilarityScoring; +import org.forester.surfacing.DomainSimilarity.PRINT_OPTION; import org.forester.surfacing.DomainSimilarityCalculator; import org.forester.surfacing.DomainSimilarityCalculator.Detailedness; import org.forester.surfacing.GenomeWideCombinableDomains; @@ -70,9 +73,6 @@ import org.forester.surfacing.GenomeWideCombinableDomains.GenomeWideCombinableDo import org.forester.surfacing.MappingResults; import org.forester.surfacing.PairwiseDomainSimilarityCalculator; import org.forester.surfacing.PairwiseGenomeComparator; -import org.forester.surfacing.PrintableDomainSimilarity; -import org.forester.surfacing.PrintableDomainSimilarity.DomainSimilarityScoring; -import org.forester.surfacing.PrintableDomainSimilarity.PRINT_OPTION; import org.forester.surfacing.ProteinCountsBasedPairwiseDomainSimilarityCalculator; import org.forester.surfacing.SurfacingUtil; import org.forester.util.BasicDescriptiveStatistics; @@ -85,188 +85,188 @@ import org.forester.util.ForesterUtil; public class surfacing { - private static final int MINIMAL_NUMBER_OF_SIMILARITIES_FOR_SPLITTING = 1000; - public final static String DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS = "graph_analysis_out"; - public final static String DOMAIN_COMBINITONS_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS = "_dc.dot"; - public final static String PARSIMONY_OUTPUT_FITCH_PRESENT_BC_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS = "_fitch_present_dc.dot"; - public final static String DOMAIN_COMBINITON_COUNTS_OUTPUTFILE_SUFFIX = ".dcc"; + private static final int MINIMAL_NUMBER_OF_SIMILARITIES_FOR_SPLITTING = 1000; + public final static String DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS = "graph_analysis_out"; + public final static String DOMAIN_COMBINITONS_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS = "_dc.dot"; + public final static String PARSIMONY_OUTPUT_FITCH_PRESENT_BC_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS = "_fitch_present_dc.dot"; + public final static String DOMAIN_COMBINITON_COUNTS_OUTPUTFILE_SUFFIX = ".dcc"; // gain/loss: - public final static String PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_DOMAINS = "_dollo_gl_d"; - public final static String PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_BINARY_COMBINATIONS = "_dollo_gl_dc"; - public final static String PARSIMONY_OUTPUT_GL_SUFFIX_FITCH_DOMAINS = "_fitch_gl_d"; - public final static String PARSIMONY_OUTPUT_GL_SUFFIX_FITCH_BINARY_COMBINATIONS = "_fitch_gl_dc"; + public final static String PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_DOMAINS = "_dollo_gl_d"; + public final static String PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_BINARY_COMBINATIONS = "_dollo_gl_dc"; + public final static String PARSIMONY_OUTPUT_GL_SUFFIX_FITCH_DOMAINS = "_fitch_gl_d"; + public final static String PARSIMONY_OUTPUT_GL_SUFFIX_FITCH_BINARY_COMBINATIONS = "_fitch_gl_dc"; // gain/loss counts: - public final static String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_DOMAINS = "_dollo_glc_d"; - public final static String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_BINARY_COMBINATIONS = "_dollo_glc_dc"; - public final static String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_FITCH_DOMAINS = "_fitch_glc_d"; - public final static String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_FITCH_BINARY_COMBINATIONS = "_fitch_glc_dc"; + public final static String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_DOMAINS = "_dollo_glc_d"; + public final static String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_BINARY_COMBINATIONS = "_dollo_glc_dc"; + public final static String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_FITCH_DOMAINS = "_fitch_glc_d"; + public final static String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_FITCH_BINARY_COMBINATIONS = "_fitch_glc_dc"; // tables: - public final static String PARSIMONY_OUTPUT_FITCH_GAINS_BC = "_fitch_gains_dc"; - public final static String PARSIMONY_OUTPUT_FITCH_GAINS_HTML_BC = "_fitch_gains_dc.html"; - public final static String PARSIMONY_OUTPUT_FITCH_LOSSES_BC = "_fitch_losses_dc"; - public final static String PARSIMONY_OUTPUT_FITCH_LOSSES_HTML_BC = "_fitch_losses_dc.html"; - public final static String PARSIMONY_OUTPUT_FITCH_PRESENT_BC = "_fitch_present_dc"; - public final static String PARSIMONY_OUTPUT_FITCH_PRESENT_HTML_BC = "_fitch_present_dc.html"; - public final static String PARSIMONY_OUTPUT_DOLLO_GAINS_D = "_dollo_gains_d"; - public final static String PARSIMONY_OUTPUT_DOLLO_GAINS_HTML_D = "_dollo_gains_d.html"; - public final static String PARSIMONY_OUTPUT_DOLLO_LOSSES_D = "_dollo_losses_d"; - public final static String PARSIMONY_OUTPUT_DOLLO_LOSSES_HTML_D = "_dollo_losses_d.html"; - public final static String PARSIMONY_OUTPUT_DOLLO_PRESENT_D = "_dollo_present_d"; - public final static String PARSIMONY_OUTPUT_DOLLO_PRESENT_HTML_D = "_dollo_present_d.html"; - public final static String DOMAINS_PRESENT_NEXUS = "_dom.nex"; - public final static String BDC_PRESENT_NEXUS = "_dc.nex"; + public final static String PARSIMONY_OUTPUT_FITCH_GAINS_BC = "_fitch_gains_dc"; + public final static String PARSIMONY_OUTPUT_FITCH_GAINS_HTML_BC = "_fitch_gains_dc.html"; + public final static String PARSIMONY_OUTPUT_FITCH_LOSSES_BC = "_fitch_losses_dc"; + public final static String PARSIMONY_OUTPUT_FITCH_LOSSES_HTML_BC = "_fitch_losses_dc.html"; + public final static String PARSIMONY_OUTPUT_FITCH_PRESENT_BC = "_fitch_present_dc"; + public final static String PARSIMONY_OUTPUT_FITCH_PRESENT_HTML_BC = "_fitch_present_dc.html"; + public final static String PARSIMONY_OUTPUT_DOLLO_GAINS_D = "_dollo_gains_d"; + public final static String PARSIMONY_OUTPUT_DOLLO_GAINS_HTML_D = "_dollo_gains_d.html"; + public final static String PARSIMONY_OUTPUT_DOLLO_LOSSES_D = "_dollo_losses_d"; + public final static String PARSIMONY_OUTPUT_DOLLO_LOSSES_HTML_D = "_dollo_losses_d.html"; + public final static String PARSIMONY_OUTPUT_DOLLO_PRESENT_D = "_dollo_present_d"; + public final static String PARSIMONY_OUTPUT_DOLLO_PRESENT_HTML_D = "_dollo_present_d.html"; + public final static String DOMAINS_PRESENT_NEXUS = "_dom.nex"; + public final static String BDC_PRESENT_NEXUS = "_dc.nex"; // --- - public final static String PRG_NAME = "surfacing"; - public static final String DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO = "_d_dollo" - + ForesterConstants.PHYLO_XML_SUFFIX; - public static final String DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH = "_d_fitch" - + ForesterConstants.PHYLO_XML_SUFFIX; - public static final String BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO = "_dc_dollo" - + ForesterConstants.PHYLO_XML_SUFFIX; - public static final String BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH = "_dc_fitch" - + ForesterConstants.PHYLO_XML_SUFFIX; - public static final String NEXUS_EXTERNAL_DOMAINS = "_dom.nex"; - public static final String NEXUS_EXTERNAL_DOMAIN_COMBINATIONS = "_dc.nex"; - public static final String NEXUS_SECONDARY_FEATURES = "_secondary_features.nex"; - public static final String PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_SECONDARY_FEATURES = "_dollo_gl_secondary_features"; - public static final String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_SECONDARY_FEATURES = "_dollo_glc_secondary_features"; - public static final String PARSIMONY_OUTPUT_DOLLO_GAINS_SECONDARY_FEATURES = "_dollo_gains_secondary_features"; - public static final String PARSIMONY_OUTPUT_DOLLO_LOSSES_SECONDARY_FEATURES = "_dollo_losses_secondary_features"; - public static final String PARSIMONY_OUTPUT_DOLLO_PRESENT_SECONDARY_FEATURES = "_dollo_present_secondary_features"; - public static final String SECONDARY_FEATURES_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO = "_secondary_features_dollo" - + ForesterConstants.PHYLO_XML_SUFFIX; - public static final String PARSIMONY_OUTPUT_DOLLO_ALL_GOID_D_ALL_NAMESPACES = "_dollo_goid_d"; - public static final String PARSIMONY_OUTPUT_FITCH_ALL_GOID_BC_ALL_NAMESPACES = "_fitch_goid_dc"; - final static private String HELP_OPTION_1 = "help"; - final static private String HELP_OPTION_2 = "h"; - final static private String OUTPUT_DIR_OPTION = "out_dir"; - final static private String SCORING_OPTION = "scoring"; - private static final DomainSimilarityScoring SCORING_DEFAULT = PrintableDomainSimilarity.DomainSimilarityScoring.COMBINATIONS; - final static private String SCORING_DOMAIN_COUNT_BASED = "domains"; - final static private String SCORING_PROTEIN_COUNT_BASED = "proteins"; - final static private String SCORING_COMBINATION_BASED = "combinations"; - final static private String DETAILEDNESS_OPTION = "detail"; - private final static Detailedness DETAILEDNESS_DEFAULT = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS; - final static private String SPECIES_MATRIX_OPTION = "smatrix"; - final static private String DETAILEDNESS_BASIC = "basic"; - final static private String DETAILEDNESS_LIST_IDS = "list_ids"; - final static private String DETAILEDNESS_PUNCTILIOUS = "punctilious"; - final static private String DOMAIN_SIMILARITY_SORT_OPTION = "sort"; - private static final PrintableDomainSimilarity.DomainSimilaritySortField DOMAIN_SORT_FILD_DEFAULT = PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID; - final static private String DOMAIN_SIMILARITY_SORT_MIN = "min"; - final static private String DOMAIN_SIMILARITY_SORT_MAX = "max"; - final static private String DOMAIN_SIMILARITY_SORT_SD = "sd"; - final static private String DOMAIN_SIMILARITY_SORT_MEAN = "mean"; - final static private String DOMAIN_SIMILARITY_SORT_DIFF = "diff"; - final static private String DOMAIN_SIMILARITY_SORT_COUNTS_DIFF = "count_diff"; - final static private String DOMAIN_SIMILARITY_SORT_ABS_COUNTS_DIFF = "abs_count_diff"; - final static private String DOMAIN_SIMILARITY_SORT_SPECIES_COUNT = "species"; - final static private String DOMAIN_SIMILARITY_SORT_ALPHA = "alpha"; - final static private String DOMAIN_SIMILARITY_SORT_BY_SPECIES_COUNT_FIRST_OPTION = "species_first"; - final static private String DOMAIN_COUNT_SORT_OPTION = "dc_sort"; - private static final GenomeWideCombinableDomainsSortOrder DOMAINS_SORT_ORDER_DEFAULT = GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder.ALPHABETICAL_KEY_ID; - final static private String DOMAIN_COUNT_SORT_ALPHA = "alpha"; - final static private String DOMAIN_COUNT_SORT_KEY_DOMAIN_COUNT = "dom"; - final static private String DOMAIN_COUNT_SORT_KEY_DOMAIN_PROTEINS_COUNT = "prot"; - final static private String DOMAIN_COUNT_SORT_COMBINATIONS_COUNT = "comb"; - final static private String CUTOFF_SCORE_FILE_OPTION = "cos"; - final static private String NOT_IGNORE_DUFS_OPTION = "dufs"; - final static private String MAX_E_VALUE_OPTION = "e"; - final static private String MAX_ALLOWED_OVERLAP_OPTION = "mo"; - final static private String NO_ENGULFING_OVERLAP_OPTION = "no_eo"; - final static private String IGNORE_COMBINATION_WITH_SAME_OPTION = "ignore_self_comb"; - final static private String PERFORM_DC_REGAIN_PROTEINS_STATS_OPTION = "dc_regain_stats"; - final static private String DA_ANALYSIS_OPTION = "DA_analyis"; - final static private String USE_LAST_IN_FITCH_OPTION = "last"; - public final static String PAIRWISE_DOMAIN_COMPARISONS_PREFIX = "pwc_"; - final static private String PAIRWISE_DOMAIN_COMPARISONS_OPTION = "pwc"; - final static private String OUTPUT_FILE_OPTION = "o"; - final static private String PFAM_TO_GO_FILE_USE_OPTION = "p2g"; - final static private String GO_OBO_FILE_USE_OPTION = "obo"; - final static private String GO_NAMESPACE_LIMIT_OPTION = "go_namespace"; - final static private String GO_NAMESPACE_LIMIT_OPTION_MOLECULAR_FUNCTION = "molecular_function"; - final static private String GO_NAMESPACE_LIMIT_OPTION_BIOLOGICAL_PROCESS = "biological_process"; - final static private String GO_NAMESPACE_LIMIT_OPTION_CELLULAR_COMPONENT = "cellular_component"; - final static private String SECONDARY_FEATURES_PARSIMONY_MAP_FILE = "secondary"; - final static private String DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_TAB_DELIMITED = "simple_tab"; - final static private String DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_HTML = "simple_html"; - final static private String DOMAIN_SIMILARITY_PRINT_OPTION_DETAILED_HTML = "detailed_html"; - final static private String DOMAIN_SIMILARITY_PRINT_OPTION = "ds_output"; - private static final PRINT_OPTION DOMAIN_SIMILARITY_PRINT_OPTION_DEFAULT = PrintableDomainSimilarity.PRINT_OPTION.HTML; - final static private String IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_OPTION = "ignore_singlet_domains"; - final static private String IGNORE_VIRAL_IDS = "ignore_viral_ids"; - final static private boolean IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_DEFAULT = false; - final static private String IGNORE_DOMAINS_SPECIFIC_TO_ONE_SPECIES_OPTION = "ignore_species_specific_domains"; - final static private boolean IGNORE_DOMAINS_SPECIFIC_TO_ONE_SPECIES_OPTION_DEFAULT = false; - final static private String MATRIX_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX = "_mean_score.pwd"; - final static private String MATRIX_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX = "_domains.pwd"; - final static private String MATRIX_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX = "_bin_combinations.pwd"; - final static private String NJ_TREE_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX = "_mean_score_NJ" - + ForesterConstants.PHYLO_XML_SUFFIX; - final static private String NJ_TREE_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX = "_domains_NJ" - + ForesterConstants.PHYLO_XML_SUFFIX; - final static private String NJ_TREE_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX = "_bin_combinations_NJ" - + ForesterConstants.PHYLO_XML_SUFFIX; - final static private String FILTER_POSITIVE_OPTION = "pos_filter"; - final static private String FILTER_NEGATIVE_OPTION = "neg_filter"; - final static private String FILTER_NEGATIVE_DOMAINS_OPTION = "neg_dom_filter"; - final static private String INPUT_GENOMES_FILE_OPTION = "genomes"; - final static private String INPUT_SPECIES_TREE_OPTION = "species_tree"; - final static private String SEQ_EXTRACT_OPTION = "prot_extract"; - final static private String PRG_VERSION = "2.400"; - final static private String PRG_DATE = "131106"; - final static private String E_MAIL = "czmasek@burnham.org"; - final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing"; - final static private boolean IGNORE_DUFS_DEFAULT = true; - final static private boolean IGNORE_COMBINATION_WITH_SAME_DEFAULLT = false; - final static private double MAX_E_VALUE_DEFAULT = -1; - public final static int MAX_ALLOWED_OVERLAP_DEFAULT = -1; - private static final String RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION = "random_seed"; - private static final String CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS = "consider_bdc_direction"; - private static final String CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS_AND_ADJACENCY = "consider_bdc_adj"; - public static final String SEQ_EXTRACT_SUFFIX = ".prot"; - public static final String PLUS_MINUS_ANALYSIS_OPTION = "plus_minus"; - public static final String PLUS_MINUS_DOM_SUFFIX = "_plus_minus_dom.txt"; - public static final String PLUS_MINUS_DOM_SUFFIX_HTML = "_plus_minus_dom.html"; - public static final String PLUS_MINUS_DC_SUFFIX_HTML = "_plus_minus_dc.html"; - public static final int PLUS_MINUS_ANALYSIS_MIN_DIFF_DEFAULT = 0; - public static final double PLUS_MINUS_ANALYSIS_FACTOR_DEFAULT = 1.0; - public static final String PLUS_MINUS_ALL_GO_IDS_DOM_SUFFIX = "_plus_minus_go_ids_all.txt"; - public static final String PLUS_MINUS_PASSING_GO_IDS_DOM_SUFFIX = "_plus_minus_go_ids_passing.txt"; - private static final String OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS = "all_prot"; - final static private String OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION = "all_prot_e"; - public static final boolean VERBOSE = false; - private static final String OUTPUT_DOMAIN_COMBINATIONS_GAINED_MORE_THAN_ONCE_ANALYSIS_SUFFIX = "_fitch_dc_gains_counts"; - private static final String OUTPUT_DOMAIN_COMBINATIONS_LOST_MORE_THAN_ONCE_ANALYSIS_SUFFIX = "_fitch_dc_losses_counts"; - private static final String DOMAIN_LENGTHS_ANALYSIS_SUFFIX = "_domain_lengths_analysis"; - private static final boolean PERFORM_DOMAIN_LENGTH_ANALYSIS = true; - public static final String ALL_PFAMS_ENCOUNTERED_SUFFIX = "_all_encountered_pfams"; - public static final String ALL_PFAMS_ENCOUNTERED_WITH_GO_ANNOTATION_SUFFIX = "_all_encountered_pfams_with_go_annotation"; - public static final String ENCOUNTERED_PFAMS_SUMMARY_SUFFIX = "_encountered_pfams_summary"; - public static final String ALL_PFAMS_GAINED_AS_DOMAINS_SUFFIX = "_all_pfams_gained_as_domains"; - public static final String ALL_PFAMS_LOST_AS_DOMAINS_SUFFIX = "_all_pfams_lost_as_domains"; - public static final String ALL_PFAMS_GAINED_AS_DC_SUFFIX = "_all_pfams_gained_as_dc"; - public static final String ALL_PFAMS_LOST_AS_DC_SUFFIX = "_all_pfams_lost_as_dc"; - public static final String BASE_DIRECTORY_PER_NODE_DOMAIN_GAIN_LOSS_FILES = "PER_NODE_EVENTS"; - public static final String BASE_DIRECTORY_PER_SUBTREE_DOMAIN_GAIN_LOSS_FILES = "PER_SUBTREE_EVENTS"; - public static final String D_PROMISCUITY_FILE_SUFFIX = "_domain_promiscuities"; - private static final String LOG_FILE_SUFFIX = "_log.txt"; - private static final String DATA_FILE_SUFFIX = "_domain_combination_data.txt"; - private static final String DATA_FILE_DESC = "#SPECIES\tPRTEIN_ID\tN_TERM_DOMAIN\tC_TERM_DOMAIN\tN_TERM_DOMAIN_PER_DOMAIN_E_VALUE\tC_TERM_DOMAIN_PER_DOMAIN_E_VALUE\tN_TERM_DOMAIN_COUNTS_PER_PROTEIN\tC_TERM_DOMAIN_COUNTS_PER_PROTEIN"; - private static final String WRITE_TO_NEXUS_OPTION = "nexus"; - private static final INDIVIDUAL_SCORE_CUTOFF INDIVIDUAL_SCORE_CUTOFF_DEFAULT = INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE; //TODO look at me! change? - public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_counts.txt"; - public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists.txt"; - public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping.txt"; - public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique.txt"; - public static final String LIMIT_SPEC_FOR_PROT_EX = null; // e.g. "HUMAN"; set to null for not using this feature (default). - public static final String BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH_MAPPED = "_dc_MAPPED_secondary_features_fitch" - + ForesterConstants.PHYLO_XML_SUFFIX; - public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_counts_MAPPED.txt"; - public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_MAPPED.txt"; - public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt"; - public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt"; - private static final boolean CALC_SIMILARITY_SCORES = false; + public final static String PRG_NAME = "surfacing"; + public static final String DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO = "_d_dollo" + + ForesterConstants.PHYLO_XML_SUFFIX; + public static final String DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH = "_d_fitch" + + ForesterConstants.PHYLO_XML_SUFFIX; + public static final String BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO = "_dc_dollo" + + ForesterConstants.PHYLO_XML_SUFFIX; + public static final String BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH = "_dc_fitch" + + ForesterConstants.PHYLO_XML_SUFFIX; + public static final String NEXUS_EXTERNAL_DOMAINS = "_dom.nex"; + public static final String NEXUS_EXTERNAL_DOMAIN_COMBINATIONS = "_dc.nex"; + public static final String NEXUS_SECONDARY_FEATURES = "_secondary_features.nex"; + public static final String PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_SECONDARY_FEATURES = "_dollo_gl_secondary_features"; + public static final String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_SECONDARY_FEATURES = "_dollo_glc_secondary_features"; + public static final String PARSIMONY_OUTPUT_DOLLO_GAINS_SECONDARY_FEATURES = "_dollo_gains_secondary_features"; + public static final String PARSIMONY_OUTPUT_DOLLO_LOSSES_SECONDARY_FEATURES = "_dollo_losses_secondary_features"; + public static final String PARSIMONY_OUTPUT_DOLLO_PRESENT_SECONDARY_FEATURES = "_dollo_present_secondary_features"; + public static final String SECONDARY_FEATURES_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO = "_secondary_features_dollo" + + ForesterConstants.PHYLO_XML_SUFFIX; + public static final String PARSIMONY_OUTPUT_DOLLO_ALL_GOID_D_ALL_NAMESPACES = "_dollo_goid_d"; + public static final String PARSIMONY_OUTPUT_FITCH_ALL_GOID_BC_ALL_NAMESPACES = "_fitch_goid_dc"; + final static private String HELP_OPTION_1 = "help"; + final static private String HELP_OPTION_2 = "h"; + final static private String OUTPUT_DIR_OPTION = "out_dir"; + final static private String SCORING_OPTION = "scoring"; + private static final DomainSimilarityScoring SCORING_DEFAULT = DomainSimilarity.DomainSimilarityScoring.COMBINATIONS; + final static private String SCORING_DOMAIN_COUNT_BASED = "domains"; + final static private String SCORING_PROTEIN_COUNT_BASED = "proteins"; + final static private String SCORING_COMBINATION_BASED = "combinations"; + final static private String DETAILEDNESS_OPTION = "detail"; + private final static Detailedness DETAILEDNESS_DEFAULT = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS; + final static private String SPECIES_MATRIX_OPTION = "smatrix"; + final static private String DETAILEDNESS_BASIC = "basic"; + final static private String DETAILEDNESS_LIST_IDS = "list_ids"; + final static private String DETAILEDNESS_PUNCTILIOUS = "punctilious"; + final static private String DOMAIN_SIMILARITY_SORT_OPTION = "sort"; + private static final DomainSimilarity.DomainSimilaritySortField DOMAIN_SORT_FILD_DEFAULT = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID; + final static private String DOMAIN_SIMILARITY_SORT_MIN = "min"; + final static private String DOMAIN_SIMILARITY_SORT_MAX = "max"; + final static private String DOMAIN_SIMILARITY_SORT_SD = "sd"; + final static private String DOMAIN_SIMILARITY_SORT_MEAN = "mean"; + final static private String DOMAIN_SIMILARITY_SORT_DIFF = "diff"; + final static private String DOMAIN_SIMILARITY_SORT_COUNTS_DIFF = "count_diff"; + final static private String DOMAIN_SIMILARITY_SORT_ABS_COUNTS_DIFF = "abs_count_diff"; + final static private String DOMAIN_SIMILARITY_SORT_SPECIES_COUNT = "species"; + final static private String DOMAIN_SIMILARITY_SORT_ALPHA = "alpha"; + final static private String DOMAIN_SIMILARITY_SORT_BY_SPECIES_COUNT_FIRST_OPTION = "species_first"; + final static private String DOMAIN_COUNT_SORT_OPTION = "dc_sort"; + private static final GenomeWideCombinableDomainsSortOrder DOMAINS_SORT_ORDER_DEFAULT = GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder.ALPHABETICAL_KEY_ID; + final static private String DOMAIN_COUNT_SORT_ALPHA = "alpha"; + final static private String DOMAIN_COUNT_SORT_KEY_DOMAIN_COUNT = "dom"; + final static private String DOMAIN_COUNT_SORT_KEY_DOMAIN_PROTEINS_COUNT = "prot"; + final static private String DOMAIN_COUNT_SORT_COMBINATIONS_COUNT = "comb"; + final static private String CUTOFF_SCORE_FILE_OPTION = "cos"; + final static private String NOT_IGNORE_DUFS_OPTION = "dufs"; + final static private String MAX_E_VALUE_OPTION = "e"; + final static private String MAX_ALLOWED_OVERLAP_OPTION = "mo"; + final static private String NO_ENGULFING_OVERLAP_OPTION = "no_eo"; + final static private String IGNORE_COMBINATION_WITH_SAME_OPTION = "ignore_self_comb"; + final static private String PERFORM_DC_REGAIN_PROTEINS_STATS_OPTION = "dc_regain_stats"; + final static private String DA_ANALYSIS_OPTION = "DA_analyis"; + final static private String USE_LAST_IN_FITCH_OPTION = "last"; + public final static String PAIRWISE_DOMAIN_COMPARISONS_PREFIX = "pwc_"; + final static private String PAIRWISE_DOMAIN_COMPARISONS_OPTION = "pwc"; + final static private String OUTPUT_FILE_OPTION = "o"; + final static private String PFAM_TO_GO_FILE_USE_OPTION = "p2g"; + final static private String GO_OBO_FILE_USE_OPTION = "obo"; + final static private String GO_NAMESPACE_LIMIT_OPTION = "go_namespace"; + final static private String GO_NAMESPACE_LIMIT_OPTION_MOLECULAR_FUNCTION = "molecular_function"; + final static private String GO_NAMESPACE_LIMIT_OPTION_BIOLOGICAL_PROCESS = "biological_process"; + final static private String GO_NAMESPACE_LIMIT_OPTION_CELLULAR_COMPONENT = "cellular_component"; + final static private String SECONDARY_FEATURES_PARSIMONY_MAP_FILE = "secondary"; + final static private String DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_TAB_DELIMITED = "simple_tab"; + final static private String DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_HTML = "simple_html"; + final static private String DOMAIN_SIMILARITY_PRINT_OPTION_DETAILED_HTML = "detailed_html"; + final static private String DOMAIN_SIMILARITY_PRINT_OPTION = "ds_output"; + private static final PRINT_OPTION DOMAIN_SIMILARITY_PRINT_OPTION_DEFAULT = DomainSimilarity.PRINT_OPTION.HTML; + final static private String IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_OPTION = "ignore_singlet_domains"; + final static private String IGNORE_VIRAL_IDS = "ignore_viral_ids"; + final static private boolean IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_DEFAULT = false; + final static private String IGNORE_DOMAINS_SPECIFIC_TO_ONE_SPECIES_OPTION = "ignore_species_specific_domains"; + final static private boolean IGNORE_DOMAINS_SPECIFIC_TO_ONE_SPECIES_OPTION_DEFAULT = false; + final static private String MATRIX_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX = "_mean_score.pwd"; + final static private String MATRIX_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX = "_domains.pwd"; + final static private String MATRIX_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX = "_bin_combinations.pwd"; + final static private String NJ_TREE_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX = "_mean_score_NJ" + + ForesterConstants.PHYLO_XML_SUFFIX; + final static private String NJ_TREE_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX = "_domains_NJ" + + ForesterConstants.PHYLO_XML_SUFFIX; + final static private String NJ_TREE_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX = "_bin_combinations_NJ" + + ForesterConstants.PHYLO_XML_SUFFIX; + final static private String FILTER_POSITIVE_OPTION = "pos_filter"; + final static private String FILTER_NEGATIVE_OPTION = "neg_filter"; + final static private String FILTER_NEGATIVE_DOMAINS_OPTION = "neg_dom_filter"; + final static private String INPUT_GENOMES_FILE_OPTION = "genomes"; + final static private String INPUT_SPECIES_TREE_OPTION = "species_tree"; + final static private String SEQ_EXTRACT_OPTION = "prot_extract"; + final static private String PRG_VERSION = "2.400"; + final static private String PRG_DATE = "131106"; + final static private String E_MAIL = "czmasek@burnham.org"; + final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing"; + final static private boolean IGNORE_DUFS_DEFAULT = true; + final static private boolean IGNORE_COMBINATION_WITH_SAME_DEFAULLT = false; + final static private double MAX_E_VALUE_DEFAULT = -1; + public final static int MAX_ALLOWED_OVERLAP_DEFAULT = -1; + private static final String RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION = "random_seed"; + private static final String CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS = "consider_bdc_direction"; + private static final String CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS_AND_ADJACENCY = "consider_bdc_adj"; + public static final String SEQ_EXTRACT_SUFFIX = ".prot"; + public static final String PLUS_MINUS_ANALYSIS_OPTION = "plus_minus"; + public static final String PLUS_MINUS_DOM_SUFFIX = "_plus_minus_dom.txt"; + public static final String PLUS_MINUS_DOM_SUFFIX_HTML = "_plus_minus_dom.html"; + public static final String PLUS_MINUS_DC_SUFFIX_HTML = "_plus_minus_dc.html"; + public static final int PLUS_MINUS_ANALYSIS_MIN_DIFF_DEFAULT = 0; + public static final double PLUS_MINUS_ANALYSIS_FACTOR_DEFAULT = 1.0; + public static final String PLUS_MINUS_ALL_GO_IDS_DOM_SUFFIX = "_plus_minus_go_ids_all.txt"; + public static final String PLUS_MINUS_PASSING_GO_IDS_DOM_SUFFIX = "_plus_minus_go_ids_passing.txt"; + private static final String OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS = "all_prot"; + final static private String OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION = "all_prot_e"; + public static final boolean VERBOSE = false; + private static final String OUTPUT_DOMAIN_COMBINATIONS_GAINED_MORE_THAN_ONCE_ANALYSIS_SUFFIX = "_fitch_dc_gains_counts"; + private static final String OUTPUT_DOMAIN_COMBINATIONS_LOST_MORE_THAN_ONCE_ANALYSIS_SUFFIX = "_fitch_dc_losses_counts"; + private static final String DOMAIN_LENGTHS_ANALYSIS_SUFFIX = "_domain_lengths_analysis"; + private static final boolean PERFORM_DOMAIN_LENGTH_ANALYSIS = true; + public static final String ALL_PFAMS_ENCOUNTERED_SUFFIX = "_all_encountered_pfams"; + public static final String ALL_PFAMS_ENCOUNTERED_WITH_GO_ANNOTATION_SUFFIX = "_all_encountered_pfams_with_go_annotation"; + public static final String ENCOUNTERED_PFAMS_SUMMARY_SUFFIX = "_encountered_pfams_summary"; + public static final String ALL_PFAMS_GAINED_AS_DOMAINS_SUFFIX = "_all_pfams_gained_as_domains"; + public static final String ALL_PFAMS_LOST_AS_DOMAINS_SUFFIX = "_all_pfams_lost_as_domains"; + public static final String ALL_PFAMS_GAINED_AS_DC_SUFFIX = "_all_pfams_gained_as_dc"; + public static final String ALL_PFAMS_LOST_AS_DC_SUFFIX = "_all_pfams_lost_as_dc"; + public static final String BASE_DIRECTORY_PER_NODE_DOMAIN_GAIN_LOSS_FILES = "PER_NODE_EVENTS"; + public static final String BASE_DIRECTORY_PER_SUBTREE_DOMAIN_GAIN_LOSS_FILES = "PER_SUBTREE_EVENTS"; + public static final String D_PROMISCUITY_FILE_SUFFIX = "_domain_promiscuities"; + private static final String LOG_FILE_SUFFIX = "_log.txt"; + private static final String DATA_FILE_SUFFIX = "_domain_combination_data.txt"; + private static final String DATA_FILE_DESC = "#SPECIES\tPRTEIN_ID\tN_TERM_DOMAIN\tC_TERM_DOMAIN\tN_TERM_DOMAIN_PER_DOMAIN_E_VALUE\tC_TERM_DOMAIN_PER_DOMAIN_E_VALUE\tN_TERM_DOMAIN_COUNTS_PER_PROTEIN\tC_TERM_DOMAIN_COUNTS_PER_PROTEIN"; + private static final String WRITE_TO_NEXUS_OPTION = "nexus"; + private static final INDIVIDUAL_SCORE_CUTOFF INDIVIDUAL_SCORE_CUTOFF_DEFAULT = INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE; //TODO look at me! change? + public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_counts.txt"; + public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists.txt"; + public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping.txt"; + public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique.txt"; + public static final String LIMIT_SPEC_FOR_PROT_EX = null; // e.g. "HUMAN"; set to null for not using this feature (default). + public static final String BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH_MAPPED = "_dc_MAPPED_secondary_features_fitch" + + ForesterConstants.PHYLO_XML_SUFFIX; + public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_counts_MAPPED.txt"; + public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_MAPPED.txt"; + public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt"; + public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt"; + private static final boolean CALC_SIMILARITY_SCORES = false; public static void main( final String args[] ) { final long start_time = new Date().getTime(); @@ -542,7 +542,7 @@ public class surfacing { ForesterUtil.fatalError( surfacing.PRG_NAME, "no input genomes file given: " + surfacing.INPUT_GENOMES_FILE_OPTION + "=" ); } - PrintableDomainSimilarity.DomainSimilarityScoring scoring = SCORING_DEFAULT; + DomainSimilarity.DomainSimilarityScoring scoring = SCORING_DEFAULT; if ( cla.isOptionSet( surfacing.SCORING_OPTION ) ) { if ( !cla.isOptionValueSet( surfacing.SCORING_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, @@ -554,13 +554,13 @@ public class surfacing { } final String scoring_str = cla.getOptionValue( surfacing.SCORING_OPTION ); if ( scoring_str.equals( surfacing.SCORING_DOMAIN_COUNT_BASED ) ) { - scoring = PrintableDomainSimilarity.DomainSimilarityScoring.DOMAINS; + scoring = DomainSimilarity.DomainSimilarityScoring.DOMAINS; } else if ( scoring_str.equals( surfacing.SCORING_COMBINATION_BASED ) ) { - scoring = PrintableDomainSimilarity.DomainSimilarityScoring.COMBINATIONS; + scoring = DomainSimilarity.DomainSimilarityScoring.COMBINATIONS; } else if ( scoring_str.equals( surfacing.SCORING_PROTEIN_COUNT_BASED ) ) { - scoring = PrintableDomainSimilarity.DomainSimilarityScoring.PROTEINS; + scoring = DomainSimilarity.DomainSimilarityScoring.PROTEINS; } else { ForesterUtil.fatalError( surfacing.PRG_NAME, "unknown value \"" + scoring_str @@ -639,8 +639,8 @@ public class surfacing { } query_domain_ids = cla.getOptionValue( surfacing.SEQ_EXTRACT_OPTION ); } - PrintableDomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field = DOMAIN_SORT_FILD_DEFAULT; - PrintableDomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field_for_automated_pwc = DOMAIN_SORT_FILD_DEFAULT; + DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field = DOMAIN_SORT_FILD_DEFAULT; + DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field_for_automated_pwc = DOMAIN_SORT_FILD_DEFAULT; if ( cla.isOptionSet( surfacing.DOMAIN_SIMILARITY_SORT_OPTION ) ) { if ( !cla.isOptionValueSet( surfacing.DOMAIN_SIMILARITY_SORT_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for domain combinations similarities sorting: -" @@ -654,40 +654,40 @@ public class surfacing { } final String sort_str = cla.getOptionValue( surfacing.DOMAIN_SIMILARITY_SORT_OPTION ).toLowerCase(); if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_ALPHA ) ) { - domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID; - domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID; + domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID; + domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID; } else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_MAX ) ) { - domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.MAX; - domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID; + domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.MAX; + domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID; } else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_MIN ) ) { - domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.MIN; - domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID; + domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.MIN; + domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID; } else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_MEAN ) ) { - domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.MEAN; - domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.MEAN; + domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.MEAN; + domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.MEAN; } else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_SPECIES_COUNT ) ) { - domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.SPECIES_COUNT; - domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID; + domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.SPECIES_COUNT; + domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID; } else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_SD ) ) { - domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.SD; - domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID; + domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.SD; + domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID; } else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_DIFF ) ) { - domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.MAX_DIFFERENCE; - domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.MAX_DIFFERENCE; + domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.MAX_DIFFERENCE; + domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.MAX_DIFFERENCE; } else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_ABS_COUNTS_DIFF ) ) { - domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE; - domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE; + domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE; + domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE; } else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_COUNTS_DIFF ) ) { - domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE; - domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE; + domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE; + domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE; } else { ForesterUtil.fatalError( surfacing.PRG_NAME, "unknown value \"" + sort_str @@ -701,7 +701,7 @@ public class surfacing { + ">\"" ); } } - PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option = DOMAIN_SIMILARITY_PRINT_OPTION_DEFAULT; + DomainSimilarity.PRINT_OPTION domain_similarity_print_option = DOMAIN_SIMILARITY_PRINT_OPTION_DEFAULT; if ( cla.isOptionSet( surfacing.DOMAIN_SIMILARITY_PRINT_OPTION ) ) { if ( !cla.isOptionValueSet( surfacing.DOMAIN_SIMILARITY_PRINT_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for print option: -" @@ -711,13 +711,13 @@ public class surfacing { } final String sort = cla.getOptionValue( surfacing.DOMAIN_SIMILARITY_PRINT_OPTION ).toLowerCase(); if ( sort.equals( surfacing.DOMAIN_SIMILARITY_PRINT_OPTION_DETAILED_HTML ) ) { - domain_similarity_print_option = PrintableDomainSimilarity.PRINT_OPTION.HTML; + domain_similarity_print_option = DomainSimilarity.PRINT_OPTION.HTML; } else if ( sort.equals( surfacing.DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_HTML ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "simple HTML output not implemented yet :(" ); } else if ( sort.equals( surfacing.DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_TAB_DELIMITED ) ) { - domain_similarity_print_option = PrintableDomainSimilarity.PRINT_OPTION.SIMPLE_TAB_DELIMITED; + domain_similarity_print_option = DomainSimilarity.PRINT_OPTION.SIMPLE_TAB_DELIMITED; } else { ForesterUtil.fatalError( surfacing.PRG_NAME, "unknown value \"" + sort + "\" for print option: -" @@ -873,9 +873,9 @@ public class surfacing { + surfacing.GO_NAMESPACE_LIMIT_OPTION_CELLULAR_COMPONENT + ">\"" ); } } - if ( ( domain_similarity_sort_field == PrintableDomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE ) + if ( ( domain_similarity_sort_field == DomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE ) && ( number_of_genomes > 2 ) ) { - domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE; + domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE; } File[] intree_files = null; Phylogeny[] intrees = null; @@ -1776,7 +1776,7 @@ public class surfacing { if ( domain_id_to_go_ids_map != null ) { go_annotation_output = DomainSimilarityCalculator.GoAnnotationOutput.ALL; } - final SortedSet similarities = calc + final SortedSet similarities = calc .calculateSimilarities( pw_calc, gwcd_list, ignore_domains_without_combs_in_all_spec, diff --git a/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java b/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java index c4c5ab7..b245cbc 100644 --- a/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java +++ b/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java @@ -42,12 +42,12 @@ import org.forester.util.ForesterUtil; public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculator { - final PrintableDomainSimilarity.DomainSimilaritySortField _sort; - private final boolean _calc_similarity_score; - private final boolean _sort_by_species_count_first; - private final boolean _treat_as_binary_comparison; + final DomainSimilarity.DomainSimilaritySortField _sort; + private final boolean _calc_similarity_score; + private final boolean _sort_by_species_count_first; + private final boolean _treat_as_binary_comparison; - public BasicDomainSimilarityCalculator( final PrintableDomainSimilarity.DomainSimilaritySortField sort, + public BasicDomainSimilarityCalculator( final DomainSimilarity.DomainSimilaritySortField sort, final boolean sort_by_species_count_first, final boolean treat_as_binary_comparison, final boolean calc_similarity_score ) { @@ -58,14 +58,14 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat } @Override - public SortedSet calculateSimilarities( final PairwiseDomainSimilarityCalculator pairwise_calculator, - final List cdc_list, - final boolean ignore_domains_without_combinations_in_any_genome, - final boolean ignore_domains_specific_to_one_genome ) { + public SortedSet calculateSimilarities( final PairwiseDomainSimilarityCalculator pairwise_calculator, + final List cdc_list, + final boolean ignore_domains_without_combinations_in_any_genome, + final boolean ignore_domains_specific_to_one_genome ) { if ( cdc_list.size() < 2 ) { throw new IllegalArgumentException( "attempt to calculate multiple combinable domains similarity for less than two combinale domains collections" ); } - final SortedSet similarities = new TreeSet(); + final SortedSet similarities = new TreeSet(); final SortedSet keys = new TreeSet(); for( final GenomeWideCombinableDomains cdc : cdc_list ) { keys.addAll( ( cdc ).getAllCombinableDomainsIds().keySet() ); @@ -98,7 +98,7 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat } if ( same_id_cd_list.size() > 0 ) { if ( !ignore_domains_specific_to_one_genome || ( same_id_cd_list.size() > 1 ) ) { - final PrintableDomainSimilarity s = calculateSimilarity( pairwise_calculator, same_id_cd_list ); + final DomainSimilarity s = calculateSimilarity( pairwise_calculator, same_id_cd_list ); if ( s != null ) { similarities.add( s ); } @@ -119,33 +119,33 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat return _calc_similarity_score; } - private PrintableDomainSimilarity calculateSimilarity( final PairwiseDomainSimilarityCalculator pairwise_calculator, - final List domains_list ) { + private DomainSimilarity calculateSimilarity( final PairwiseDomainSimilarityCalculator pairwise_calculator, + final List domains_list ) { if ( domains_list.size() == 1 ) { final SortedMap species_data = new TreeMap(); species_data.put( domains_list.get( 0 ).getSpecies(), createSpeciesSpecificDomainSimilariyData( domains_list.get( 0 ) ) ); if ( !isCalcSimilarityScore() ) { - return new PrintableDomainSimilarity( domains_list.get( 0 ), - 0, - 0, - species_data, - isSortBySpeciesCountFirst(), - isTreatAsBinaryComparison() ); + return new DomainSimilarity( domains_list.get( 0 ), + 0, + 0, + species_data, + isSortBySpeciesCountFirst(), + isTreatAsBinaryComparison() ); } else { - return new PrintableDomainSimilarity( domains_list.get( 0 ), - 1.0, - 1.0, - 1.0, - 1.0, - 0.0, - 0, - 0, - 0, - species_data, - isSortBySpeciesCountFirst(), - isTreatAsBinaryComparison() ); + return new DomainSimilarity( domains_list.get( 0 ), + 1.0, + 1.0, + 1.0, + 1.0, + 0.0, + 0, + 0, + 0, + species_data, + isSortBySpeciesCountFirst(), + isTreatAsBinaryComparison() ); } } DescriptiveStatistics stat = null; @@ -199,43 +199,43 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat max_difference = Math.abs( max_difference ); } } - PrintableDomainSimilarity similarity = null; + DomainSimilarity similarity = null; if ( !isCalcSimilarityScore() ) { - similarity = new PrintableDomainSimilarity( domains_list.get( 0 ), - max_difference_in_counts, - max_difference, - species_data, - isSortBySpeciesCountFirst(), - isTreatAsBinaryComparison() ); + similarity = new DomainSimilarity( domains_list.get( 0 ), + max_difference_in_counts, + max_difference, + species_data, + isSortBySpeciesCountFirst(), + isTreatAsBinaryComparison() ); } else { if ( stat.getN() == 1 ) { - similarity = new PrintableDomainSimilarity( domains_list.get( 0 ), - stat.getMin(), - stat.getMax(), - stat.arithmeticMean(), - stat.median(), - 0.0, - stat.getN(), - max_difference_in_counts, - max_difference, - species_data, - isSortBySpeciesCountFirst(), - isTreatAsBinaryComparison() ); + similarity = new DomainSimilarity( domains_list.get( 0 ), + stat.getMin(), + stat.getMax(), + stat.arithmeticMean(), + stat.median(), + 0.0, + stat.getN(), + max_difference_in_counts, + max_difference, + species_data, + isSortBySpeciesCountFirst(), + isTreatAsBinaryComparison() ); } else { - similarity = new PrintableDomainSimilarity( domains_list.get( 0 ), - stat.getMin(), - stat.getMax(), - stat.arithmeticMean(), - stat.median(), - stat.sampleStandardDeviation(), - stat.getN(), - max_difference_in_counts, - max_difference, - species_data, - isSortBySpeciesCountFirst(), - isTreatAsBinaryComparison() ); + similarity = new DomainSimilarity( domains_list.get( 0 ), + stat.getMin(), + stat.getMax(), + stat.arithmeticMean(), + stat.median(), + stat.sampleStandardDeviation(), + stat.getN(), + max_difference_in_counts, + max_difference, + species_data, + isSortBySpeciesCountFirst(), + isTreatAsBinaryComparison() ); } } return similarity; diff --git a/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java b/forester/java/src/org/forester/surfacing/DomainSimilarity.java similarity index 93% rename from forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java rename to forester/java/src/org/forester/surfacing/DomainSimilarity.java index 7fe04aa..72d03f1 100644 --- a/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java +++ b/forester/java/src/org/forester/surfacing/DomainSimilarity.java @@ -44,7 +44,7 @@ import org.forester.species.Species; import org.forester.surfacing.DomainSimilarityCalculator.Detailedness; import org.forester.util.ForesterUtil; -public class PrintableDomainSimilarity implements Comparable { +public class DomainSimilarity implements Comparable { final public static String SPECIES_SEPARATOR = " "; final private static int EQUAL = 0; @@ -63,18 +63,18 @@ public class PrintableDomainSimilarity implements Comparable _species_order; private final boolean _treat_as_binary_comparison; - public PrintableDomainSimilarity( final CombinableDomains combinable_domains, - final double min, - final double max, - final double mean, - final double median, - final double sd, - final int n, - final int max_difference_in_counts, - final int max_difference, - final SortedMap species_data, - final boolean sort_by_species_count_first, - final boolean treat_as_binary_comparison ) { + public DomainSimilarity( final CombinableDomains combinable_domains, + final double min, + final double max, + final double mean, + final double median, + final double sd, + final int n, + final int max_difference_in_counts, + final int max_difference, + final SortedMap species_data, + final boolean sort_by_species_count_first, + final boolean treat_as_binary_comparison ) { if ( combinable_domains == null ) { throw new IllegalArgumentException( "attempt to use null combinable domains" ); } @@ -122,12 +122,12 @@ public class PrintableDomainSimilarity implements Comparable species_data, - final boolean sort_by_species_count_first, - final boolean treat_as_binary_comparison ) { + public DomainSimilarity( final CombinableDomains combinable_domains, + final int max_difference_in_counts, + final int max_difference, + final SortedMap species_data, + final boolean sort_by_species_count_first, + final boolean treat_as_binary_comparison ) { if ( combinable_domains == null ) { throw new IllegalArgumentException( "attempt to use null combinable domains" ); } @@ -159,7 +159,7 @@ public class PrintableDomainSimilarity implements Comparable tax_code_to_id_map, final Phylogeny phy ) { switch ( print_option ) { @@ -353,7 +353,7 @@ public class PrintableDomainSimilarity implements Comparable" ); } - private int compareByDomainId( final PrintableDomainSimilarity other ) { + private int compareByDomainId( final DomainSimilarity other ) { return getDomainId().compareToIgnoreCase( other.getDomainId() ); } @@ -421,8 +421,8 @@ public class PrintableDomainSimilarity implements Comparable calculateSimilarities( final PairwiseDomainSimilarityCalculator pairwise_calculator, - final List cdc_list, - final boolean ignore_domains_without_combinations_in_any_genome, - final boolean ignore_domains_specific_to_one_genome );; + public SortedSet calculateSimilarities( final PairwiseDomainSimilarityCalculator pairwise_calculator, + final List cdc_list, + final boolean ignore_domains_without_combinations_in_any_genome, + final boolean ignore_domains_specific_to_one_genome );; public static enum Detailedness { BASIC, LIST_COMBINING_DOMAIN_FOR_EACH_SPECIES, PUNCTILIOUS diff --git a/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java b/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java index 9c6e1b5..a3b0e66 100644 --- a/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java +++ b/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java @@ -76,9 +76,9 @@ public class PairwiseGenomeComparator { final Detailedness detailedness, final boolean ignore_domains_without_combs_in_all_spec, final boolean ignore_domains_specific_to_one_species, - final PrintableDomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field, - final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option, - final PrintableDomainSimilarity.DomainSimilarityScoring scoring, + final DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field, + final DomainSimilarity.PRINT_OPTION domain_similarity_print_option, + final DomainSimilarity.DomainSimilarityScoring scoring, final Map> domain_id_to_go_ids_map, final Map go_id_to_term_map, final GoNameSpace go_namespace_limit, @@ -140,7 +140,7 @@ public class PairwiseGenomeComparator { sort_by_species_count_first, true, calc_similarity_scores ); - final SortedSet similarities = calc + final SortedSet similarities = calc .calculateSimilarities( pw_calc, genome_pair, ignore_domains_without_combs_in_all_spec, diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java index c4f98d6..55bfb6f 100644 --- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java +++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java @@ -85,9 +85,9 @@ import org.forester.protein.BinaryDomainCombination; import org.forester.protein.Domain; import org.forester.protein.Protein; import org.forester.species.Species; +import org.forester.surfacing.DomainSimilarity.PRINT_OPTION; import org.forester.surfacing.DomainSimilarityCalculator.Detailedness; import org.forester.surfacing.GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder; -import org.forester.surfacing.PrintableDomainSimilarity.PRINT_OPTION; import org.forester.util.AsciiHistogram; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.BasicTable; @@ -143,9 +143,9 @@ public final class SurfacingUtil { } } - public static DescriptiveStatistics calculateDescriptiveStatisticsForMeanValues( final Set similarities ) { + public static DescriptiveStatistics calculateDescriptiveStatisticsForMeanValues( final Set similarities ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); - for( final PrintableDomainSimilarity similarity : similarities ) { + for( final DomainSimilarity similarity : similarities ) { stats.addValue( similarity.getMeanSimilarityScore() ); } return stats; @@ -158,7 +158,7 @@ public final class SurfacingUtil { } } - public static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option, + public static void checkWriteabilityForPairwiseComparisons( final DomainSimilarity.PRINT_OPTION domain_similarity_print_option, final String[][] input_file_properties, final String automated_pairwise_comparison_suffix, final File outdir ) { @@ -375,11 +375,11 @@ public final class SurfacingUtil { return m; } - public static void decoratePrintableDomainSimilarities( final SortedSet domain_similarities, + public static void decoratePrintableDomainSimilarities( final SortedSet domain_similarities, final Detailedness detailedness ) { - for( final PrintableDomainSimilarity domain_similarity : domain_similarities ) { - if ( domain_similarity instanceof PrintableDomainSimilarity ) { - final PrintableDomainSimilarity printable_domain_similarity = domain_similarity; + for( final DomainSimilarity domain_similarity : domain_similarities ) { + if ( domain_similarity instanceof DomainSimilarity ) { + final DomainSimilarity printable_domain_similarity = domain_similarity; printable_domain_similarity.setDetailedness( detailedness ); } } @@ -2218,11 +2218,11 @@ public final class SurfacingUtil { final Writer simple_tab_writer, final Writer single_writer, Map split_writers, - final SortedSet similarities, + final SortedSet similarities, final boolean treat_as_binary, final List species_order, - final PrintableDomainSimilarity.PRINT_OPTION print_option, - final PrintableDomainSimilarity.DomainSimilarityScoring scoring, + final DomainSimilarity.PRINT_OPTION print_option, + final DomainSimilarity.DomainSimilarityScoring scoring, final boolean verbose, final Map tax_code_to_id_map, final Phylogeny phy, @@ -2262,7 +2262,7 @@ public final class SurfacingUtil { break; } // - for( final PrintableDomainSimilarity similarity : similarities ) { + for( final DomainSimilarity similarity : similarities ) { if ( ( species_order != null ) && !species_order.isEmpty() ) { ( similarity ).setSpeciesOrder( species_order ); } @@ -2347,7 +2347,7 @@ public final class SurfacingUtil { w.write( SurfacingConstants.NL ); } // - for( final PrintableDomainSimilarity similarity : similarities ) { + for( final DomainSimilarity similarity : similarities ) { if ( ( species_order != null ) && !species_order.isEmpty() ) { ( similarity ).setSpeciesOrder( species_order ); } diff --git a/forester/java/src/org/forester/surfacing/TestSurfacing.java b/forester/java/src/org/forester/surfacing/TestSurfacing.java index dabbb06..168b6a4 100644 --- a/forester/java/src/org/forester/surfacing/TestSurfacing.java +++ b/forester/java/src/org/forester/surfacing/TestSurfacing.java @@ -313,17 +313,17 @@ public class TestSurfacing { cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve, true, new BasicSpecies( "nemve" ) ) ); - final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID, + final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID, false, false, true ); - final SortedSet sims = calc + final SortedSet sims = calc .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(), cdc_list, true, true ); - final Iterator sims_it = sims.iterator(); - final PrintableDomainSimilarity sa = sims_it.next(); + final Iterator sims_it = sims.iterator(); + final DomainSimilarity sa = sims_it.next(); if ( !sa.getDomainId().equals( "A" ) ) { return false; } @@ -364,7 +364,7 @@ public class TestSurfacing { if ( sa.getMaximalDifferenceInCounts() != 3 ) { return false; } - final PrintableDomainSimilarity sb = sims_it.next(); + final DomainSimilarity sb = sims_it.next(); if ( !sb.getDomainId().equals( "B" ) ) { return false; } @@ -398,7 +398,7 @@ public class TestSurfacing { if ( sb.getMaximalDifferenceInCounts() != 2 ) { return false; } - final PrintableDomainSimilarity sc = sims_it.next(); + final DomainSimilarity sc = sims_it.next(); if ( !sc.getDomainId().equals( "C" ) ) { return false; } @@ -464,17 +464,17 @@ public class TestSurfacing { cdc_list2.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve, false, new BasicSpecies( "nemve" ) ) ); - final DomainSimilarityCalculator calc2 = new BasicDomainSimilarityCalculator( PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID, + final DomainSimilarityCalculator calc2 = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID, false, false, true ); - final SortedSet sims2 = calc2 + final SortedSet sims2 = calc2 .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(), cdc_list2, false, true ); - final Iterator sims_it2 = sims2.iterator(); - final PrintableDomainSimilarity sa2 = sims_it2.next(); + final Iterator sims_it2 = sims2.iterator(); + final DomainSimilarity sa2 = sims_it2.next(); if ( !sa2.getDomainId().equals( "A" ) ) { return false; } @@ -552,17 +552,17 @@ public class TestSurfacing { cdc_list3.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve, true, new BasicSpecies( "nemve" ) ) ); - final DomainSimilarityCalculator calc3 = new BasicDomainSimilarityCalculator( PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID, + final DomainSimilarityCalculator calc3 = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID, false, false, true ); - final SortedSet sims3 = calc3 + final SortedSet sims3 = calc3 .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(), cdc_list3, false, true ); - final Iterator sims_it3 = sims3.iterator(); - final PrintableDomainSimilarity sa3 = sims_it3.next(); + final Iterator sims_it3 = sims3.iterator(); + final DomainSimilarity sa3 = sims_it3.next(); if ( !sa3.getDomainId().equals( "A" ) ) { return false; } @@ -596,17 +596,17 @@ public class TestSurfacing { cdc_list4.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve, false, new BasicSpecies( "nemve" ) ) ); - final DomainSimilarityCalculator calc4 = new BasicDomainSimilarityCalculator( PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID, + final DomainSimilarityCalculator calc4 = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID, true, false, true ); - final SortedSet sims4 = calc4 + final SortedSet sims4 = calc4 .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(), cdc_list4, false, true ); - final Iterator sims_it4 = sims4.iterator(); - final PrintableDomainSimilarity sa4 = sims_it4.next(); + final Iterator sims_it4 = sims4.iterator(); + final DomainSimilarity sa4 = sims_it4.next(); if ( !sa4.getDomainId().equals( "A" ) ) { return false; } @@ -629,10 +629,10 @@ public class TestSurfacing { if ( ssdsd4.getNumberOfProteinsExhibitingCombinationWith( "X" ) != 3 ) { return false; } - final SortedSet sims4_d = calc4 + final SortedSet sims4_d = calc4 .calculateSimilarities( new DomainCountsBasedPairwiseSimilarityCalculator(), cdc_list4, false, true ); - final Iterator sims_it4_d = sims4_d.iterator(); - final PrintableDomainSimilarity sa4_d = sims_it4_d.next(); + final Iterator sims_it4_d = sims4_d.iterator(); + final DomainSimilarity sa4_d = sims_it4_d.next(); if ( !sa4_d.getDomainId().equals( "A" ) ) { return false; } @@ -653,13 +653,13 @@ public class TestSurfacing { if ( sa4_d.getN() != 6 ) { return false; } - final SortedSet sims4_p = calc4 + final SortedSet sims4_p = calc4 .calculateSimilarities( new ProteinCountsBasedPairwiseDomainSimilarityCalculator(), cdc_list4, false, true ); - final Iterator sims_it4_p = sims4_p.iterator(); - final PrintableDomainSimilarity sa4_p = sims_it4_p.next(); + final Iterator sims_it4_p = sims4_p.iterator(); + final DomainSimilarity sa4_p = sims_it4_p.next(); if ( !sa4_p.getDomainId().equals( "A" ) ) { return false; } @@ -708,10 +708,10 @@ public class TestSurfacing { cdc_list5.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve, true, new BasicSpecies( "nemve" ) ) ); - final SortedSet sims5_d = calc4 + final SortedSet sims5_d = calc4 .calculateSimilarities( new DomainCountsBasedPairwiseSimilarityCalculator(), cdc_list5, false, true ); - final Iterator sims_it5_d = sims5_d.iterator(); - final PrintableDomainSimilarity sa5_d = sims_it5_d.next(); + final Iterator sims_it5_d = sims5_d.iterator(); + final DomainSimilarity sa5_d = sims_it5_d.next(); if ( sa5_d.getSpecies().size() != 4 ) { return false; } @@ -779,13 +779,13 @@ public class TestSurfacing { if ( sa5_d.getMaximalDifferenceInCounts() != 11 ) { return false; } - final SortedSet sims5_p = calc4 + final SortedSet sims5_p = calc4 .calculateSimilarities( new ProteinCountsBasedPairwiseDomainSimilarityCalculator(), cdc_list5, false, true ); - final Iterator sims_it5_p = sims5_p.iterator(); - final PrintableDomainSimilarity sa5_p = sims_it5_p.next(); + final Iterator sims_it5_p = sims5_p.iterator(); + final DomainSimilarity sa5_p = sims_it5_p.next(); if ( !sa5_p.getDomainId().equals( "A" ) ) { return false; } @@ -843,10 +843,10 @@ public class TestSurfacing { cdc_list6.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve, false, new BasicSpecies( "nemve" ) ) ); - final SortedSet sims6_d = calc4 + final SortedSet sims6_d = calc4 .calculateSimilarities( new DomainCountsBasedPairwiseSimilarityCalculator(), cdc_list6, false, true ); - final Iterator sims_it6_d = sims6_d.iterator(); - final PrintableDomainSimilarity sa6_d = sims_it6_d.next(); + final Iterator sims_it6_d = sims6_d.iterator(); + final DomainSimilarity sa6_d = sims_it6_d.next(); if ( sa6_d.getSpecies().size() != 4 ) { return false; } @@ -914,13 +914,13 @@ public class TestSurfacing { if ( sa6_d.getMaximalDifferenceInCounts() != 11 ) { return false; } - final SortedSet sims6_p = calc4 + final SortedSet sims6_p = calc4 .calculateSimilarities( new ProteinCountsBasedPairwiseDomainSimilarityCalculator(), cdc_list6, false, true ); - final Iterator sims_it6_p = sims6_p.iterator(); - final PrintableDomainSimilarity sa6_p = sims_it6_p.next(); + final Iterator sims_it6_p = sims6_p.iterator(); + final DomainSimilarity sa6_p = sims_it6_p.next(); if ( !sa6_p.getDomainId().equals( "A" ) ) { return false; } @@ -1028,17 +1028,17 @@ public class TestSurfacing { cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve, true, new BasicSpecies( "nemve" ) ) ); - final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID, + final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID, false, false, true ); - final SortedSet sims = calc + final SortedSet sims = calc .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(), cdc_list, true, false ); - final Iterator sims_it = sims.iterator(); - final PrintableDomainSimilarity sa = sims_it.next(); + final Iterator sims_it = sims.iterator(); + final DomainSimilarity sa = sims_it.next(); if ( !sa.getDomainId().equals( "A" ) ) { return false; } @@ -1069,7 +1069,7 @@ public class TestSurfacing { if ( sa.getMaximalDifferenceInCounts() != 0 ) { return false; } - final PrintableDomainSimilarity sb = sims_it.next(); + final DomainSimilarity sb = sims_it.next(); if ( !sb.getDomainId().equals( "B" ) ) { return false; } @@ -1079,13 +1079,13 @@ public class TestSurfacing { if ( !sb.getSpecies().contains( new BasicSpecies( "rabbit" ) ) ) { return false; } - final SortedSet sims2 = calc + final SortedSet sims2 = calc .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(), cdc_list, true, true ); - final Iterator sims_it2 = sims2.iterator(); - final PrintableDomainSimilarity sa2 = sims_it2.next(); + final Iterator sims_it2 = sims2.iterator(); + final DomainSimilarity sa2 = sims_it2.next(); if ( !sa2.getDomainId().equals( "D" ) ) { return false; } @@ -1137,11 +1137,11 @@ public class TestSurfacing { cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve, true, new BasicSpecies( "nemve" ) ) ); - final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID, + final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID, false, false, true ); - final SortedSet sims = calc + final SortedSet sims = calc .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(), cdc_list, false, @@ -1149,8 +1149,8 @@ public class TestSurfacing { if ( sims.size() != 1 ) { return false; } - final Iterator sims_it = sims.iterator(); - final PrintableDomainSimilarity sa = sims_it.next(); + final Iterator sims_it = sims.iterator(); + final DomainSimilarity sa = sims_it.next(); if ( !sa.getDomainId().equals( "A" ) ) { return false; } @@ -1169,7 +1169,7 @@ public class TestSurfacing { if ( !sa.getSpecies().contains( new BasicSpecies( "rabbit" ) ) ) { return false; } - final SortedSet sims_ns = calc + final SortedSet sims_ns = calc .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(), cdc_list, true, @@ -1211,7 +1211,7 @@ public class TestSurfacing { cdc_list2.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve2, true, new BasicSpecies( "nemve" ) ) ); - final SortedSet sims2 = calc + final SortedSet sims2 = calc .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(), cdc_list2, true,