import org.forester.surfacing.DomainCountsBasedPairwiseSimilarityCalculator;
import org.forester.surfacing.DomainLengthsTable;
import org.forester.surfacing.DomainParsimonyCalculator;
-import org.forester.surfacing.DomainSimilarity;
-import org.forester.surfacing.DomainSimilarity.DomainSimilarityScoring;
-import org.forester.surfacing.DomainSimilarity.DomainSimilaritySortField;
import org.forester.surfacing.DomainSimilarityCalculator;
import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
import org.forester.surfacing.GenomeWideCombinableDomains;
import org.forester.surfacing.PairwiseDomainSimilarityCalculator;
import org.forester.surfacing.PairwiseGenomeComparator;
import org.forester.surfacing.PrintableDomainSimilarity;
+import org.forester.surfacing.PrintableDomainSimilarity.DomainSimilarityScoring;
import org.forester.surfacing.PrintableDomainSimilarity.PRINT_OPTION;
import org.forester.surfacing.ProteinCountsBasedPairwiseDomainSimilarityCalculator;
import org.forester.surfacing.SurfacingUtil;
public class surfacing {
- private static final int MINIMAL_NUMBER_OF_SIMILARITIES_FOR_SPLITTING = 1000;
- public final static String DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS = "graph_analysis_out";
- public final static String DOMAIN_COMBINITONS_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS = "_dc.dot";
- public final static String PARSIMONY_OUTPUT_FITCH_PRESENT_BC_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS = "_fitch_present_dc.dot";
- public final static String DOMAIN_COMBINITON_COUNTS_OUTPUTFILE_SUFFIX = ".dcc";
+ private static final int MINIMAL_NUMBER_OF_SIMILARITIES_FOR_SPLITTING = 1000;
+ public final static String DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS = "graph_analysis_out";
+ public final static String DOMAIN_COMBINITONS_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS = "_dc.dot";
+ public final static String PARSIMONY_OUTPUT_FITCH_PRESENT_BC_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS = "_fitch_present_dc.dot";
+ public final static String DOMAIN_COMBINITON_COUNTS_OUTPUTFILE_SUFFIX = ".dcc";
// gain/loss:
- public final static String PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_DOMAINS = "_dollo_gl_d";
- public final static String PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_BINARY_COMBINATIONS = "_dollo_gl_dc";
- public final static String PARSIMONY_OUTPUT_GL_SUFFIX_FITCH_DOMAINS = "_fitch_gl_d";
- public final static String PARSIMONY_OUTPUT_GL_SUFFIX_FITCH_BINARY_COMBINATIONS = "_fitch_gl_dc";
+ public final static String PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_DOMAINS = "_dollo_gl_d";
+ public final static String PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_BINARY_COMBINATIONS = "_dollo_gl_dc";
+ public final static String PARSIMONY_OUTPUT_GL_SUFFIX_FITCH_DOMAINS = "_fitch_gl_d";
+ public final static String PARSIMONY_OUTPUT_GL_SUFFIX_FITCH_BINARY_COMBINATIONS = "_fitch_gl_dc";
// gain/loss counts:
- public final static String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_DOMAINS = "_dollo_glc_d";
- public final static String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_BINARY_COMBINATIONS = "_dollo_glc_dc";
- public final static String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_FITCH_DOMAINS = "_fitch_glc_d";
- public final static String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_FITCH_BINARY_COMBINATIONS = "_fitch_glc_dc";
+ public final static String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_DOMAINS = "_dollo_glc_d";
+ public final static String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_BINARY_COMBINATIONS = "_dollo_glc_dc";
+ public final static String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_FITCH_DOMAINS = "_fitch_glc_d";
+ public final static String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_FITCH_BINARY_COMBINATIONS = "_fitch_glc_dc";
// tables:
- public final static String PARSIMONY_OUTPUT_FITCH_GAINS_BC = "_fitch_gains_dc";
- public final static String PARSIMONY_OUTPUT_FITCH_GAINS_HTML_BC = "_fitch_gains_dc.html";
- public final static String PARSIMONY_OUTPUT_FITCH_LOSSES_BC = "_fitch_losses_dc";
- public final static String PARSIMONY_OUTPUT_FITCH_LOSSES_HTML_BC = "_fitch_losses_dc.html";
- public final static String PARSIMONY_OUTPUT_FITCH_PRESENT_BC = "_fitch_present_dc";
- public final static String PARSIMONY_OUTPUT_FITCH_PRESENT_HTML_BC = "_fitch_present_dc.html";
- public final static String PARSIMONY_OUTPUT_DOLLO_GAINS_D = "_dollo_gains_d";
- public final static String PARSIMONY_OUTPUT_DOLLO_GAINS_HTML_D = "_dollo_gains_d.html";
- public final static String PARSIMONY_OUTPUT_DOLLO_LOSSES_D = "_dollo_losses_d";
- public final static String PARSIMONY_OUTPUT_DOLLO_LOSSES_HTML_D = "_dollo_losses_d.html";
- public final static String PARSIMONY_OUTPUT_DOLLO_PRESENT_D = "_dollo_present_d";
- public final static String PARSIMONY_OUTPUT_DOLLO_PRESENT_HTML_D = "_dollo_present_d.html";
- public final static String DOMAINS_PRESENT_NEXUS = "_dom.nex";
- public final static String BDC_PRESENT_NEXUS = "_dc.nex";
+ public final static String PARSIMONY_OUTPUT_FITCH_GAINS_BC = "_fitch_gains_dc";
+ public final static String PARSIMONY_OUTPUT_FITCH_GAINS_HTML_BC = "_fitch_gains_dc.html";
+ public final static String PARSIMONY_OUTPUT_FITCH_LOSSES_BC = "_fitch_losses_dc";
+ public final static String PARSIMONY_OUTPUT_FITCH_LOSSES_HTML_BC = "_fitch_losses_dc.html";
+ public final static String PARSIMONY_OUTPUT_FITCH_PRESENT_BC = "_fitch_present_dc";
+ public final static String PARSIMONY_OUTPUT_FITCH_PRESENT_HTML_BC = "_fitch_present_dc.html";
+ public final static String PARSIMONY_OUTPUT_DOLLO_GAINS_D = "_dollo_gains_d";
+ public final static String PARSIMONY_OUTPUT_DOLLO_GAINS_HTML_D = "_dollo_gains_d.html";
+ public final static String PARSIMONY_OUTPUT_DOLLO_LOSSES_D = "_dollo_losses_d";
+ public final static String PARSIMONY_OUTPUT_DOLLO_LOSSES_HTML_D = "_dollo_losses_d.html";
+ public final static String PARSIMONY_OUTPUT_DOLLO_PRESENT_D = "_dollo_present_d";
+ public final static String PARSIMONY_OUTPUT_DOLLO_PRESENT_HTML_D = "_dollo_present_d.html";
+ public final static String DOMAINS_PRESENT_NEXUS = "_dom.nex";
+ public final static String BDC_PRESENT_NEXUS = "_dc.nex";
// ---
- public final static String PRG_NAME = "surfacing";
- public static final String DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO = "_d_dollo"
- + ForesterConstants.PHYLO_XML_SUFFIX;
- public static final String DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH = "_d_fitch"
- + ForesterConstants.PHYLO_XML_SUFFIX;
- public static final String BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO = "_dc_dollo"
- + ForesterConstants.PHYLO_XML_SUFFIX;
- public static final String BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH = "_dc_fitch"
- + ForesterConstants.PHYLO_XML_SUFFIX;
- public static final String NEXUS_EXTERNAL_DOMAINS = "_dom.nex";
- public static final String NEXUS_EXTERNAL_DOMAIN_COMBINATIONS = "_dc.nex";
- public static final String NEXUS_SECONDARY_FEATURES = "_secondary_features.nex";
- public static final String PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_SECONDARY_FEATURES = "_dollo_gl_secondary_features";
- public static final String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_SECONDARY_FEATURES = "_dollo_glc_secondary_features";
- public static final String PARSIMONY_OUTPUT_DOLLO_GAINS_SECONDARY_FEATURES = "_dollo_gains_secondary_features";
- public static final String PARSIMONY_OUTPUT_DOLLO_LOSSES_SECONDARY_FEATURES = "_dollo_losses_secondary_features";
- public static final String PARSIMONY_OUTPUT_DOLLO_PRESENT_SECONDARY_FEATURES = "_dollo_present_secondary_features";
- public static final String SECONDARY_FEATURES_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO = "_secondary_features_dollo"
- + ForesterConstants.PHYLO_XML_SUFFIX;
- public static final String PARSIMONY_OUTPUT_DOLLO_ALL_GOID_D_ALL_NAMESPACES = "_dollo_goid_d";
- public static final String PARSIMONY_OUTPUT_FITCH_ALL_GOID_BC_ALL_NAMESPACES = "_fitch_goid_dc";
- final static private String HELP_OPTION_1 = "help";
- final static private String HELP_OPTION_2 = "h";
- final static private String OUTPUT_DIR_OPTION = "out_dir";
- final static private String SCORING_OPTION = "scoring";
- private static final DomainSimilarityScoring SCORING_DEFAULT = DomainSimilarity.DomainSimilarityScoring.COMBINATIONS;
- final static private String SCORING_DOMAIN_COUNT_BASED = "domains";
- final static private String SCORING_PROTEIN_COUNT_BASED = "proteins";
- final static private String SCORING_COMBINATION_BASED = "combinations";
- final static private String DETAILEDNESS_OPTION = "detail";
- private final static Detailedness DETAILEDNESS_DEFAULT = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS;
- final static private String SPECIES_MATRIX_OPTION = "smatrix";
- final static private String DETAILEDNESS_BASIC = "basic";
- final static private String DETAILEDNESS_LIST_IDS = "list_ids";
- final static private String DETAILEDNESS_PUNCTILIOUS = "punctilious";
- final static private String DOMAIN_SIMILARITY_SORT_OPTION = "sort";
- private static final DomainSimilaritySortField DOMAIN_SORT_FILD_DEFAULT = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
- final static private String DOMAIN_SIMILARITY_SORT_MIN = "min";
- final static private String DOMAIN_SIMILARITY_SORT_MAX = "max";
- final static private String DOMAIN_SIMILARITY_SORT_SD = "sd";
- final static private String DOMAIN_SIMILARITY_SORT_MEAN = "mean";
- final static private String DOMAIN_SIMILARITY_SORT_DIFF = "diff";
- final static private String DOMAIN_SIMILARITY_SORT_COUNTS_DIFF = "count_diff";
- final static private String DOMAIN_SIMILARITY_SORT_ABS_COUNTS_DIFF = "abs_count_diff";
- final static private String DOMAIN_SIMILARITY_SORT_SPECIES_COUNT = "species";
- final static private String DOMAIN_SIMILARITY_SORT_ALPHA = "alpha";
- final static private String DOMAIN_SIMILARITY_SORT_BY_SPECIES_COUNT_FIRST_OPTION = "species_first";
- final static private String DOMAIN_COUNT_SORT_OPTION = "dc_sort";
- private static final GenomeWideCombinableDomainsSortOrder DOMAINS_SORT_ORDER_DEFAULT = GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder.ALPHABETICAL_KEY_ID;
- final static private String DOMAIN_COUNT_SORT_ALPHA = "alpha";
- final static private String DOMAIN_COUNT_SORT_KEY_DOMAIN_COUNT = "dom";
- final static private String DOMAIN_COUNT_SORT_KEY_DOMAIN_PROTEINS_COUNT = "prot";
- final static private String DOMAIN_COUNT_SORT_COMBINATIONS_COUNT = "comb";
- final static private String CUTOFF_SCORE_FILE_OPTION = "cos";
- final static private String NOT_IGNORE_DUFS_OPTION = "dufs";
- final static private String MAX_E_VALUE_OPTION = "e";
- final static private String MAX_ALLOWED_OVERLAP_OPTION = "mo";
- final static private String NO_ENGULFING_OVERLAP_OPTION = "no_eo";
- final static private String IGNORE_COMBINATION_WITH_SAME_OPTION = "ignore_self_comb";
- final static private String PERFORM_DC_REGAIN_PROTEINS_STATS_OPTION = "dc_regain_stats";
- final static private String DA_ANALYSIS_OPTION = "DA_analyis";
- final static private String USE_LAST_IN_FITCH_OPTION = "last";
- public final static String PAIRWISE_DOMAIN_COMPARISONS_PREFIX = "pwc_";
- final static private String PAIRWISE_DOMAIN_COMPARISONS_OPTION = "pwc";
- final static private String OUTPUT_FILE_OPTION = "o";
- final static private String PFAM_TO_GO_FILE_USE_OPTION = "p2g";
- final static private String GO_OBO_FILE_USE_OPTION = "obo";
- final static private String GO_NAMESPACE_LIMIT_OPTION = "go_namespace";
- final static private String GO_NAMESPACE_LIMIT_OPTION_MOLECULAR_FUNCTION = "molecular_function";
- final static private String GO_NAMESPACE_LIMIT_OPTION_BIOLOGICAL_PROCESS = "biological_process";
- final static private String GO_NAMESPACE_LIMIT_OPTION_CELLULAR_COMPONENT = "cellular_component";
- final static private String SECONDARY_FEATURES_PARSIMONY_MAP_FILE = "secondary";
- final static private String DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_TAB_DELIMITED = "simple_tab";
- final static private String DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_HTML = "simple_html";
- final static private String DOMAIN_SIMILARITY_PRINT_OPTION_DETAILED_HTML = "detailed_html";
- final static private String DOMAIN_SIMILARITY_PRINT_OPTION = "ds_output";
- private static final PRINT_OPTION DOMAIN_SIMILARITY_PRINT_OPTION_DEFAULT = PrintableDomainSimilarity.PRINT_OPTION.HTML;
- final static private String IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_OPTION = "ignore_singlet_domains";
- final static private String IGNORE_VIRAL_IDS = "ignore_viral_ids";
- final static private boolean IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_DEFAULT = false;
- final static private String IGNORE_DOMAINS_SPECIFIC_TO_ONE_SPECIES_OPTION = "ignore_species_specific_domains";
- final static private boolean IGNORE_DOMAINS_SPECIFIC_TO_ONE_SPECIES_OPTION_DEFAULT = false;
- final static private String MATRIX_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX = "_mean_score.pwd";
- final static private String MATRIX_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX = "_domains.pwd";
- final static private String MATRIX_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX = "_bin_combinations.pwd";
- final static private String NJ_TREE_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX = "_mean_score_NJ"
- + ForesterConstants.PHYLO_XML_SUFFIX;
- final static private String NJ_TREE_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX = "_domains_NJ"
- + ForesterConstants.PHYLO_XML_SUFFIX;
- final static private String NJ_TREE_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX = "_bin_combinations_NJ"
- + ForesterConstants.PHYLO_XML_SUFFIX;
- final static private String FILTER_POSITIVE_OPTION = "pos_filter";
- final static private String FILTER_NEGATIVE_OPTION = "neg_filter";
- final static private String FILTER_NEGATIVE_DOMAINS_OPTION = "neg_dom_filter";
- final static private String INPUT_GENOMES_FILE_OPTION = "genomes";
- final static private String INPUT_SPECIES_TREE_OPTION = "species_tree";
- final static private String SEQ_EXTRACT_OPTION = "prot_extract";
- final static private String PRG_VERSION = "2.304";
- final static private String PRG_DATE = "131024";
- final static private String E_MAIL = "czmasek@burnham.org";
- final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing";
- final static private boolean IGNORE_DUFS_DEFAULT = true;
- final static private boolean IGNORE_COMBINATION_WITH_SAME_DEFAULLT = false;
- final static private double MAX_E_VALUE_DEFAULT = -1;
- public final static int MAX_ALLOWED_OVERLAP_DEFAULT = -1;
- private static final String RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION = "random_seed";
- private static final String CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS = "consider_bdc_direction";
- private static final String CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS_AND_ADJACENCY = "consider_bdc_adj";
- public static final String SEQ_EXTRACT_SUFFIX = ".prot";
- public static final String PLUS_MINUS_ANALYSIS_OPTION = "plus_minus";
- public static final String PLUS_MINUS_DOM_SUFFIX = "_plus_minus_dom.txt";
- public static final String PLUS_MINUS_DOM_SUFFIX_HTML = "_plus_minus_dom.html";
- public static final String PLUS_MINUS_DC_SUFFIX_HTML = "_plus_minus_dc.html";
- public static final int PLUS_MINUS_ANALYSIS_MIN_DIFF_DEFAULT = 0;
- public static final double PLUS_MINUS_ANALYSIS_FACTOR_DEFAULT = 1.0;
- public static final String PLUS_MINUS_ALL_GO_IDS_DOM_SUFFIX = "_plus_minus_go_ids_all.txt";
- public static final String PLUS_MINUS_PASSING_GO_IDS_DOM_SUFFIX = "_plus_minus_go_ids_passing.txt";
- private static final String OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS = "all_prot";
- final static private String OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION = "all_prot_e";
- public static final boolean VERBOSE = false;
- private static final String OUTPUT_DOMAIN_COMBINATIONS_GAINED_MORE_THAN_ONCE_ANALYSIS_SUFFIX = "_fitch_dc_gains_counts";
- private static final String OUTPUT_DOMAIN_COMBINATIONS_LOST_MORE_THAN_ONCE_ANALYSIS_SUFFIX = "_fitch_dc_losses_counts";
- private static final String DOMAIN_LENGTHS_ANALYSIS_SUFFIX = "_domain_lengths_analysis";
- private static final boolean PERFORM_DOMAIN_LENGTH_ANALYSIS = true;
- public static final String ALL_PFAMS_ENCOUNTERED_SUFFIX = "_all_encountered_pfams";
- public static final String ALL_PFAMS_ENCOUNTERED_WITH_GO_ANNOTATION_SUFFIX = "_all_encountered_pfams_with_go_annotation";
- public static final String ENCOUNTERED_PFAMS_SUMMARY_SUFFIX = "_encountered_pfams_summary";
- public static final String ALL_PFAMS_GAINED_AS_DOMAINS_SUFFIX = "_all_pfams_gained_as_domains";
- public static final String ALL_PFAMS_LOST_AS_DOMAINS_SUFFIX = "_all_pfams_lost_as_domains";
- public static final String ALL_PFAMS_GAINED_AS_DC_SUFFIX = "_all_pfams_gained_as_dc";
- public static final String ALL_PFAMS_LOST_AS_DC_SUFFIX = "_all_pfams_lost_as_dc";
- public static final String BASE_DIRECTORY_PER_NODE_DOMAIN_GAIN_LOSS_FILES = "PER_NODE_EVENTS";
- public static final String BASE_DIRECTORY_PER_SUBTREE_DOMAIN_GAIN_LOSS_FILES = "PER_SUBTREE_EVENTS";
- public static final String D_PROMISCUITY_FILE_SUFFIX = "_domain_promiscuities";
- private static final String LOG_FILE_SUFFIX = "_log.txt";
- private static final String DATA_FILE_SUFFIX = "_domain_combination_data.txt";
- private static final String DATA_FILE_DESC = "#SPECIES\tPRTEIN_ID\tN_TERM_DOMAIN\tC_TERM_DOMAIN\tN_TERM_DOMAIN_PER_DOMAIN_E_VALUE\tC_TERM_DOMAIN_PER_DOMAIN_E_VALUE\tN_TERM_DOMAIN_COUNTS_PER_PROTEIN\tC_TERM_DOMAIN_COUNTS_PER_PROTEIN";
- private static final String WRITE_TO_NEXUS_OPTION = "nexus";
- private static final INDIVIDUAL_SCORE_CUTOFF INDIVIDUAL_SCORE_CUTOFF_DEFAULT = INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE; //TODO look at me! change?
- public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_counts.txt";
- public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists.txt";
- public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping.txt";
- public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique.txt";
- public static final String LIMIT_SPEC_FOR_PROT_EX = null; // e.g. "HUMAN"; set to null for not using this feature (default).
- public static final String BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH_MAPPED = "_dc_MAPPED_secondary_features_fitch"
- + ForesterConstants.PHYLO_XML_SUFFIX;
- public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_counts_MAPPED.txt";
- public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_MAPPED.txt";
- public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt";
- public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt";
- private static final boolean CALC_SIMILARITY_SCORES = false;
+ public final static String PRG_NAME = "surfacing";
+ public static final String DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO = "_d_dollo"
+ + ForesterConstants.PHYLO_XML_SUFFIX;
+ public static final String DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH = "_d_fitch"
+ + ForesterConstants.PHYLO_XML_SUFFIX;
+ public static final String BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO = "_dc_dollo"
+ + ForesterConstants.PHYLO_XML_SUFFIX;
+ public static final String BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH = "_dc_fitch"
+ + ForesterConstants.PHYLO_XML_SUFFIX;
+ public static final String NEXUS_EXTERNAL_DOMAINS = "_dom.nex";
+ public static final String NEXUS_EXTERNAL_DOMAIN_COMBINATIONS = "_dc.nex";
+ public static final String NEXUS_SECONDARY_FEATURES = "_secondary_features.nex";
+ public static final String PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_SECONDARY_FEATURES = "_dollo_gl_secondary_features";
+ public static final String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_SECONDARY_FEATURES = "_dollo_glc_secondary_features";
+ public static final String PARSIMONY_OUTPUT_DOLLO_GAINS_SECONDARY_FEATURES = "_dollo_gains_secondary_features";
+ public static final String PARSIMONY_OUTPUT_DOLLO_LOSSES_SECONDARY_FEATURES = "_dollo_losses_secondary_features";
+ public static final String PARSIMONY_OUTPUT_DOLLO_PRESENT_SECONDARY_FEATURES = "_dollo_present_secondary_features";
+ public static final String SECONDARY_FEATURES_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO = "_secondary_features_dollo"
+ + ForesterConstants.PHYLO_XML_SUFFIX;
+ public static final String PARSIMONY_OUTPUT_DOLLO_ALL_GOID_D_ALL_NAMESPACES = "_dollo_goid_d";
+ public static final String PARSIMONY_OUTPUT_FITCH_ALL_GOID_BC_ALL_NAMESPACES = "_fitch_goid_dc";
+ final static private String HELP_OPTION_1 = "help";
+ final static private String HELP_OPTION_2 = "h";
+ final static private String OUTPUT_DIR_OPTION = "out_dir";
+ final static private String SCORING_OPTION = "scoring";
+ private static final DomainSimilarityScoring SCORING_DEFAULT = PrintableDomainSimilarity.DomainSimilarityScoring.COMBINATIONS;
+ final static private String SCORING_DOMAIN_COUNT_BASED = "domains";
+ final static private String SCORING_PROTEIN_COUNT_BASED = "proteins";
+ final static private String SCORING_COMBINATION_BASED = "combinations";
+ final static private String DETAILEDNESS_OPTION = "detail";
+ private final static Detailedness DETAILEDNESS_DEFAULT = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS;
+ final static private String SPECIES_MATRIX_OPTION = "smatrix";
+ final static private String DETAILEDNESS_BASIC = "basic";
+ final static private String DETAILEDNESS_LIST_IDS = "list_ids";
+ final static private String DETAILEDNESS_PUNCTILIOUS = "punctilious";
+ final static private String DOMAIN_SIMILARITY_SORT_OPTION = "sort";
+ private static final PrintableDomainSimilarity.DomainSimilaritySortField DOMAIN_SORT_FILD_DEFAULT = PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
+ final static private String DOMAIN_SIMILARITY_SORT_MIN = "min";
+ final static private String DOMAIN_SIMILARITY_SORT_MAX = "max";
+ final static private String DOMAIN_SIMILARITY_SORT_SD = "sd";
+ final static private String DOMAIN_SIMILARITY_SORT_MEAN = "mean";
+ final static private String DOMAIN_SIMILARITY_SORT_DIFF = "diff";
+ final static private String DOMAIN_SIMILARITY_SORT_COUNTS_DIFF = "count_diff";
+ final static private String DOMAIN_SIMILARITY_SORT_ABS_COUNTS_DIFF = "abs_count_diff";
+ final static private String DOMAIN_SIMILARITY_SORT_SPECIES_COUNT = "species";
+ final static private String DOMAIN_SIMILARITY_SORT_ALPHA = "alpha";
+ final static private String DOMAIN_SIMILARITY_SORT_BY_SPECIES_COUNT_FIRST_OPTION = "species_first";
+ final static private String DOMAIN_COUNT_SORT_OPTION = "dc_sort";
+ private static final GenomeWideCombinableDomainsSortOrder DOMAINS_SORT_ORDER_DEFAULT = GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder.ALPHABETICAL_KEY_ID;
+ final static private String DOMAIN_COUNT_SORT_ALPHA = "alpha";
+ final static private String DOMAIN_COUNT_SORT_KEY_DOMAIN_COUNT = "dom";
+ final static private String DOMAIN_COUNT_SORT_KEY_DOMAIN_PROTEINS_COUNT = "prot";
+ final static private String DOMAIN_COUNT_SORT_COMBINATIONS_COUNT = "comb";
+ final static private String CUTOFF_SCORE_FILE_OPTION = "cos";
+ final static private String NOT_IGNORE_DUFS_OPTION = "dufs";
+ final static private String MAX_E_VALUE_OPTION = "e";
+ final static private String MAX_ALLOWED_OVERLAP_OPTION = "mo";
+ final static private String NO_ENGULFING_OVERLAP_OPTION = "no_eo";
+ final static private String IGNORE_COMBINATION_WITH_SAME_OPTION = "ignore_self_comb";
+ final static private String PERFORM_DC_REGAIN_PROTEINS_STATS_OPTION = "dc_regain_stats";
+ final static private String DA_ANALYSIS_OPTION = "DA_analyis";
+ final static private String USE_LAST_IN_FITCH_OPTION = "last";
+ public final static String PAIRWISE_DOMAIN_COMPARISONS_PREFIX = "pwc_";
+ final static private String PAIRWISE_DOMAIN_COMPARISONS_OPTION = "pwc";
+ final static private String OUTPUT_FILE_OPTION = "o";
+ final static private String PFAM_TO_GO_FILE_USE_OPTION = "p2g";
+ final static private String GO_OBO_FILE_USE_OPTION = "obo";
+ final static private String GO_NAMESPACE_LIMIT_OPTION = "go_namespace";
+ final static private String GO_NAMESPACE_LIMIT_OPTION_MOLECULAR_FUNCTION = "molecular_function";
+ final static private String GO_NAMESPACE_LIMIT_OPTION_BIOLOGICAL_PROCESS = "biological_process";
+ final static private String GO_NAMESPACE_LIMIT_OPTION_CELLULAR_COMPONENT = "cellular_component";
+ final static private String SECONDARY_FEATURES_PARSIMONY_MAP_FILE = "secondary";
+ final static private String DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_TAB_DELIMITED = "simple_tab";
+ final static private String DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_HTML = "simple_html";
+ final static private String DOMAIN_SIMILARITY_PRINT_OPTION_DETAILED_HTML = "detailed_html";
+ final static private String DOMAIN_SIMILARITY_PRINT_OPTION = "ds_output";
+ private static final PRINT_OPTION DOMAIN_SIMILARITY_PRINT_OPTION_DEFAULT = PrintableDomainSimilarity.PRINT_OPTION.HTML;
+ final static private String IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_OPTION = "ignore_singlet_domains";
+ final static private String IGNORE_VIRAL_IDS = "ignore_viral_ids";
+ final static private boolean IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_DEFAULT = false;
+ final static private String IGNORE_DOMAINS_SPECIFIC_TO_ONE_SPECIES_OPTION = "ignore_species_specific_domains";
+ final static private boolean IGNORE_DOMAINS_SPECIFIC_TO_ONE_SPECIES_OPTION_DEFAULT = false;
+ final static private String MATRIX_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX = "_mean_score.pwd";
+ final static private String MATRIX_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX = "_domains.pwd";
+ final static private String MATRIX_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX = "_bin_combinations.pwd";
+ final static private String NJ_TREE_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX = "_mean_score_NJ"
+ + ForesterConstants.PHYLO_XML_SUFFIX;
+ final static private String NJ_TREE_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX = "_domains_NJ"
+ + ForesterConstants.PHYLO_XML_SUFFIX;
+ final static private String NJ_TREE_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX = "_bin_combinations_NJ"
+ + ForesterConstants.PHYLO_XML_SUFFIX;
+ final static private String FILTER_POSITIVE_OPTION = "pos_filter";
+ final static private String FILTER_NEGATIVE_OPTION = "neg_filter";
+ final static private String FILTER_NEGATIVE_DOMAINS_OPTION = "neg_dom_filter";
+ final static private String INPUT_GENOMES_FILE_OPTION = "genomes";
+ final static private String INPUT_SPECIES_TREE_OPTION = "species_tree";
+ final static private String SEQ_EXTRACT_OPTION = "prot_extract";
+ final static private String PRG_VERSION = "2.400";
+ final static private String PRG_DATE = "131106";
+ final static private String E_MAIL = "czmasek@burnham.org";
+ final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing";
+ final static private boolean IGNORE_DUFS_DEFAULT = true;
+ final static private boolean IGNORE_COMBINATION_WITH_SAME_DEFAULLT = false;
+ final static private double MAX_E_VALUE_DEFAULT = -1;
+ public final static int MAX_ALLOWED_OVERLAP_DEFAULT = -1;
+ private static final String RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION = "random_seed";
+ private static final String CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS = "consider_bdc_direction";
+ private static final String CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS_AND_ADJACENCY = "consider_bdc_adj";
+ public static final String SEQ_EXTRACT_SUFFIX = ".prot";
+ public static final String PLUS_MINUS_ANALYSIS_OPTION = "plus_minus";
+ public static final String PLUS_MINUS_DOM_SUFFIX = "_plus_minus_dom.txt";
+ public static final String PLUS_MINUS_DOM_SUFFIX_HTML = "_plus_minus_dom.html";
+ public static final String PLUS_MINUS_DC_SUFFIX_HTML = "_plus_minus_dc.html";
+ public static final int PLUS_MINUS_ANALYSIS_MIN_DIFF_DEFAULT = 0;
+ public static final double PLUS_MINUS_ANALYSIS_FACTOR_DEFAULT = 1.0;
+ public static final String PLUS_MINUS_ALL_GO_IDS_DOM_SUFFIX = "_plus_minus_go_ids_all.txt";
+ public static final String PLUS_MINUS_PASSING_GO_IDS_DOM_SUFFIX = "_plus_minus_go_ids_passing.txt";
+ private static final String OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS = "all_prot";
+ final static private String OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION = "all_prot_e";
+ public static final boolean VERBOSE = false;
+ private static final String OUTPUT_DOMAIN_COMBINATIONS_GAINED_MORE_THAN_ONCE_ANALYSIS_SUFFIX = "_fitch_dc_gains_counts";
+ private static final String OUTPUT_DOMAIN_COMBINATIONS_LOST_MORE_THAN_ONCE_ANALYSIS_SUFFIX = "_fitch_dc_losses_counts";
+ private static final String DOMAIN_LENGTHS_ANALYSIS_SUFFIX = "_domain_lengths_analysis";
+ private static final boolean PERFORM_DOMAIN_LENGTH_ANALYSIS = true;
+ public static final String ALL_PFAMS_ENCOUNTERED_SUFFIX = "_all_encountered_pfams";
+ public static final String ALL_PFAMS_ENCOUNTERED_WITH_GO_ANNOTATION_SUFFIX = "_all_encountered_pfams_with_go_annotation";
+ public static final String ENCOUNTERED_PFAMS_SUMMARY_SUFFIX = "_encountered_pfams_summary";
+ public static final String ALL_PFAMS_GAINED_AS_DOMAINS_SUFFIX = "_all_pfams_gained_as_domains";
+ public static final String ALL_PFAMS_LOST_AS_DOMAINS_SUFFIX = "_all_pfams_lost_as_domains";
+ public static final String ALL_PFAMS_GAINED_AS_DC_SUFFIX = "_all_pfams_gained_as_dc";
+ public static final String ALL_PFAMS_LOST_AS_DC_SUFFIX = "_all_pfams_lost_as_dc";
+ public static final String BASE_DIRECTORY_PER_NODE_DOMAIN_GAIN_LOSS_FILES = "PER_NODE_EVENTS";
+ public static final String BASE_DIRECTORY_PER_SUBTREE_DOMAIN_GAIN_LOSS_FILES = "PER_SUBTREE_EVENTS";
+ public static final String D_PROMISCUITY_FILE_SUFFIX = "_domain_promiscuities";
+ private static final String LOG_FILE_SUFFIX = "_log.txt";
+ private static final String DATA_FILE_SUFFIX = "_domain_combination_data.txt";
+ private static final String DATA_FILE_DESC = "#SPECIES\tPRTEIN_ID\tN_TERM_DOMAIN\tC_TERM_DOMAIN\tN_TERM_DOMAIN_PER_DOMAIN_E_VALUE\tC_TERM_DOMAIN_PER_DOMAIN_E_VALUE\tN_TERM_DOMAIN_COUNTS_PER_PROTEIN\tC_TERM_DOMAIN_COUNTS_PER_PROTEIN";
+ private static final String WRITE_TO_NEXUS_OPTION = "nexus";
+ private static final INDIVIDUAL_SCORE_CUTOFF INDIVIDUAL_SCORE_CUTOFF_DEFAULT = INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE; //TODO look at me! change?
+ public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_counts.txt";
+ public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists.txt";
+ public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping.txt";
+ public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique.txt";
+ public static final String LIMIT_SPEC_FOR_PROT_EX = null; // e.g. "HUMAN"; set to null for not using this feature (default).
+ public static final String BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH_MAPPED = "_dc_MAPPED_secondary_features_fitch"
+ + ForesterConstants.PHYLO_XML_SUFFIX;
+ public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_counts_MAPPED.txt";
+ public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_MAPPED.txt";
+ public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt";
+ public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt";
+ private static final boolean CALC_SIMILARITY_SCORES = false;
public static void main( final String args[] ) {
final long start_time = new Date().getTime();
ForesterUtil.fatalError( surfacing.PRG_NAME, "no input genomes file given: "
+ surfacing.INPUT_GENOMES_FILE_OPTION + "=<file>" );
}
- DomainSimilarity.DomainSimilarityScoring scoring = SCORING_DEFAULT;
+ PrintableDomainSimilarity.DomainSimilarityScoring scoring = SCORING_DEFAULT;
if ( cla.isOptionSet( surfacing.SCORING_OPTION ) ) {
if ( !cla.isOptionValueSet( surfacing.SCORING_OPTION ) ) {
ForesterUtil.fatalError( surfacing.PRG_NAME,
}
final String scoring_str = cla.getOptionValue( surfacing.SCORING_OPTION );
if ( scoring_str.equals( surfacing.SCORING_DOMAIN_COUNT_BASED ) ) {
- scoring = DomainSimilarity.DomainSimilarityScoring.DOMAINS;
+ scoring = PrintableDomainSimilarity.DomainSimilarityScoring.DOMAINS;
}
else if ( scoring_str.equals( surfacing.SCORING_COMBINATION_BASED ) ) {
- scoring = DomainSimilarity.DomainSimilarityScoring.COMBINATIONS;
+ scoring = PrintableDomainSimilarity.DomainSimilarityScoring.COMBINATIONS;
}
else if ( scoring_str.equals( surfacing.SCORING_PROTEIN_COUNT_BASED ) ) {
- scoring = DomainSimilarity.DomainSimilarityScoring.PROTEINS;
+ scoring = PrintableDomainSimilarity.DomainSimilarityScoring.PROTEINS;
}
else {
ForesterUtil.fatalError( surfacing.PRG_NAME, "unknown value \"" + scoring_str
}
query_domain_ids = cla.getOptionValue( surfacing.SEQ_EXTRACT_OPTION );
}
- DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field = DOMAIN_SORT_FILD_DEFAULT;
- DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field_for_automated_pwc = DOMAIN_SORT_FILD_DEFAULT;
+ PrintableDomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field = DOMAIN_SORT_FILD_DEFAULT;
+ PrintableDomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field_for_automated_pwc = DOMAIN_SORT_FILD_DEFAULT;
if ( cla.isOptionSet( surfacing.DOMAIN_SIMILARITY_SORT_OPTION ) ) {
if ( !cla.isOptionValueSet( surfacing.DOMAIN_SIMILARITY_SORT_OPTION ) ) {
ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for domain combinations similarities sorting: -"
}
final String sort_str = cla.getOptionValue( surfacing.DOMAIN_SIMILARITY_SORT_OPTION ).toLowerCase();
if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_ALPHA ) ) {
- domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
- domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
+ domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
+ domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
}
else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_MAX ) ) {
- domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.MAX;
- domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
+ domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.MAX;
+ domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
}
else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_MIN ) ) {
- domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.MIN;
- domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
+ domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.MIN;
+ domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
}
else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_MEAN ) ) {
- domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.MEAN;
- domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.MEAN;
+ domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.MEAN;
+ domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.MEAN;
}
else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_SPECIES_COUNT ) ) {
- domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.SPECIES_COUNT;
- domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
+ domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.SPECIES_COUNT;
+ domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
}
else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_SD ) ) {
- domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.SD;
- domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
+ domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.SD;
+ domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID;
}
else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_DIFF ) ) {
- domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.MAX_DIFFERENCE;
- domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.MAX_DIFFERENCE;
+ domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.MAX_DIFFERENCE;
+ domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.MAX_DIFFERENCE;
}
else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_ABS_COUNTS_DIFF ) ) {
- domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE;
- domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE;
+ domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE;
+ domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE;
}
else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_COUNTS_DIFF ) ) {
- domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE;
- domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE;
+ domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE;
+ domain_similarity_sort_field_for_automated_pwc = PrintableDomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE;
}
else {
ForesterUtil.fatalError( surfacing.PRG_NAME, "unknown value \"" + sort_str
+ surfacing.GO_NAMESPACE_LIMIT_OPTION_CELLULAR_COMPONENT + ">\"" );
}
}
- if ( ( domain_similarity_sort_field == DomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE )
+ if ( ( domain_similarity_sort_field == PrintableDomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE )
&& ( number_of_genomes > 2 ) ) {
- domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE;
+ domain_similarity_sort_field = PrintableDomainSimilarity.DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE;
}
File[] intree_files = null;
Phylogeny[] intrees = null;
if ( domain_id_to_go_ids_map != null ) {
go_annotation_output = DomainSimilarityCalculator.GoAnnotationOutput.ALL;
}
- final SortedSet<DomainSimilarity> similarities = calc
+ final SortedSet<PrintableDomainSimilarity> similarities = calc
.calculateSimilarities( pw_calc,
gwcd_list,
ignore_domains_without_combs_in_all_spec,
String _id1;
String _str;
- BasicBinaryDomainCombination() {
- _id0 = null;
- _id1 = null;
- }
-
- private String getAsStr() {
- return _id0 + SEPARATOR + _id1;
- }
-
public BasicBinaryDomainCombination( final String id0, final String id1 ) {
if ( ( id0 == null ) || ( id1 == null ) ) {
throw new IllegalArgumentException( "attempt to create binary domain combination using null" );
}
}
+ BasicBinaryDomainCombination() {
+ _id0 = null;
+ _id1 = null;
+ }
+
@Override
public int compareTo( final BinaryDomainCombination binary_domain_combination ) {
if ( binary_domain_combination.getClass() != this.getClass() ) {
return getAsStr();
}
+ private String getAsStr() {
+ return _id0 + SEPARATOR + _id1;
+ }
+
public static BinaryDomainCombination createInstance( final String ids ) {
if ( ids.indexOf( BinaryDomainCombination.SEPARATOR ) < 1 ) {
throw new IllegalArgumentException( "Unexpected format for binary domain combination [" + ids + "]" );
public class BasicCombinableDomains implements CombinableDomains {
+ final private TreeMap<String, Integer> _combining_domains;
final private String _key_domain;
private int _key_domain_count;
- final private Species _species;
- final private TreeMap<String, Integer> _combining_domains;
final private Set<String> _proteins_with_key_domain;
+ final private Species _species;
public BasicCombinableDomains( final String key_domain, final Species species ) {
_key_domain = key_domain;
return sb;
}
- protected TreeMap<String, Integer> getCombiningDomains() {
- return _combining_domains;
- }
-
@Override
public String getKeyDomain() {
return _key_domain;
}
@Override
+ public Set<String> getKeyDomainProteins() {
+ return _proteins_with_key_domain;
+ }
+
+ @Override
public int getKeyDomainProteinsCount() {
return getKeyDomainProteins().size();
}
return sb.toString();
}
- @Override
- public Set<String> getKeyDomainProteins() {
- return _proteins_with_key_domain;
+ protected TreeMap<String, Integer> getCombiningDomains() {
+ return _combining_domains;
}
}
public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculator {
- final DomainSimilarity.DomainSimilaritySortField _sort;
- private final boolean _sort_by_species_count_first;
- private final boolean _treat_as_binary_comparison;
- private final boolean _calc_similarity_score;
+ final PrintableDomainSimilarity.DomainSimilaritySortField _sort;
+ private final boolean _calc_similarity_score;
+ private final boolean _sort_by_species_count_first;
+ private final boolean _treat_as_binary_comparison;
- public BasicDomainSimilarityCalculator( final DomainSimilarity.DomainSimilaritySortField sort,
+ public BasicDomainSimilarityCalculator( final PrintableDomainSimilarity.DomainSimilaritySortField sort,
final boolean sort_by_species_count_first,
final boolean treat_as_binary_comparison,
final boolean calc_similarity_score ) {
_calc_similarity_score = calc_similarity_score;
}
- public boolean isCalcSimilarityScore() {
- return _calc_similarity_score;
- }
-
@Override
- public SortedSet<DomainSimilarity> calculateSimilarities( final PairwiseDomainSimilarityCalculator pairwise_calculator,
- final List<GenomeWideCombinableDomains> cdc_list,
- final boolean ignore_domains_without_combinations_in_any_genome,
- final boolean ignore_domains_specific_to_one_genome ) {
+ public SortedSet<PrintableDomainSimilarity> calculateSimilarities( final PairwiseDomainSimilarityCalculator pairwise_calculator,
+ final List<GenomeWideCombinableDomains> cdc_list,
+ final boolean ignore_domains_without_combinations_in_any_genome,
+ final boolean ignore_domains_specific_to_one_genome ) {
if ( cdc_list.size() < 2 ) {
throw new IllegalArgumentException( "attempt to calculate multiple combinable domains similarity for less than two combinale domains collections" );
}
- final SortedSet<DomainSimilarity> similarities = new TreeSet<DomainSimilarity>();
+ final SortedSet<PrintableDomainSimilarity> similarities = new TreeSet<PrintableDomainSimilarity>();
final SortedSet<String> keys = new TreeSet<String>();
for( final GenomeWideCombinableDomains cdc : cdc_list ) {
keys.addAll( ( cdc ).getAllCombinableDomainsIds().keySet() );
}
if ( same_id_cd_list.size() > 0 ) {
if ( !ignore_domains_specific_to_one_genome || ( same_id_cd_list.size() > 1 ) ) {
- final DomainSimilarity s = calculateSimilarity( pairwise_calculator, same_id_cd_list );
+ final PrintableDomainSimilarity s = calculateSimilarity( pairwise_calculator, same_id_cd_list );
if ( s != null ) {
similarities.add( s );
}
return similarities;
}
- private DomainSimilarity calculateSimilarity( final PairwiseDomainSimilarityCalculator pairwise_calculator,
- final List<CombinableDomains> domains_list ) {
+ public boolean isCalcSimilarityScore() {
+ return _calc_similarity_score;
+ }
+
+ private PrintableDomainSimilarity calculateSimilarity( final PairwiseDomainSimilarityCalculator pairwise_calculator,
+ final List<CombinableDomains> domains_list ) {
if ( domains_list.size() == 1 ) {
final SortedMap<Species, SpeciesSpecificDcData> species_data = new TreeMap<Species, SpeciesSpecificDcData>();
species_data.put( domains_list.get( 0 ).getSpecies(),
max_difference = Math.abs( max_difference );
}
}
- DomainSimilarity similarity = null;
+ PrintableDomainSimilarity similarity = null;
if ( !isCalcSimilarityScore() ) {
similarity = new PrintableDomainSimilarity( domains_list.get( 0 ),
max_difference_in_counts,
public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDomains {
- private static final Comparator<CombinableDomains> DESCENDING_KEY_DOMAIN_COUNT_ORDER = new Comparator<CombinableDomains>() {
+ private static final Comparator<CombinableDomains> DESCENDING_COMBINATIONS_COUNT_ORDER = new Comparator<CombinableDomains>() {
@Override
public int compare( final CombinableDomains d1,
final CombinableDomains d2 ) {
- if ( d1.getKeyDomainCount() < d2
- .getKeyDomainCount() ) {
+ if ( d1.getNumberOfCombinableDomains() < d2
+ .getNumberOfCombinableDomains() ) {
return 1;
}
else if ( d1
- .getKeyDomainCount() > d2
- .getKeyDomainCount() ) {
+ .getNumberOfCombinableDomains() > d2
+ .getNumberOfCombinableDomains() ) {
return -1;
}
else {
}
}
};
- private static final Comparator<CombinableDomains> DESCENDING_KEY_DOMAIN_PROTEINS_COUNT_ORDER = new Comparator<CombinableDomains>() {
+ private static final Comparator<CombinableDomains> DESCENDING_KEY_DOMAIN_COUNT_ORDER = new Comparator<CombinableDomains>() {
@Override
public int compare( final CombinableDomains d1,
final CombinableDomains d2 ) {
- if ( d1.getKeyDomainProteinsCount() < d2
- .getKeyDomainProteinsCount() ) {
+ if ( d1.getKeyDomainCount() < d2
+ .getKeyDomainCount() ) {
return 1;
}
else if ( d1
- .getKeyDomainProteinsCount() > d2
- .getKeyDomainProteinsCount() ) {
+ .getKeyDomainCount() > d2
+ .getKeyDomainCount() ) {
return -1;
}
else {
}
}
};
- private static final Comparator<CombinableDomains> DESCENDING_COMBINATIONS_COUNT_ORDER = new Comparator<CombinableDomains>() {
+ private static final Comparator<CombinableDomains> DESCENDING_KEY_DOMAIN_PROTEINS_COUNT_ORDER = new Comparator<CombinableDomains>() {
@Override
public int compare( final CombinableDomains d1,
final CombinableDomains d2 ) {
- if ( d1.getNumberOfCombinableDomains() < d2
- .getNumberOfCombinableDomains() ) {
+ if ( d1.getKeyDomainProteinsCount() < d2
+ .getKeyDomainProteinsCount() ) {
return 1;
}
else if ( d1
- .getNumberOfCombinableDomains() > d2
- .getNumberOfCombinableDomains() ) {
+ .getKeyDomainProteinsCount() > d2
+ .getKeyDomainProteinsCount() ) {
return -1;
}
else {
}
};
final private SortedMap<String, CombinableDomains> _combinable_domains_map;
- final private Species _species;
final private DomainCombinationType _dc_type;
+ final private Species _species;
private BasicGenomeWideCombinableDomains( final Species species, final DomainCombinationType dc_type ) {
_combinable_domains_map = new TreeMap<String, CombinableDomains>();
_dc_type = dc_type;
}
- private void add( final String key, final CombinableDomains cdc ) {
- _combinable_domains_map.put( key, cdc );
- }
-
@Override
public boolean contains( final String key_id ) {
return _combinable_domains_map.containsKey( key_id );
return sb;
}
- private static void countDomains( final Map<String, Integer> domain_counts,
- final Set<String> saw_c,
- final String id_i ) {
- if ( domain_counts.containsKey( id_i ) ) {
- domain_counts.put( id_i, 1 + domain_counts.get( ( id_i ) ) );
- }
- else {
- domain_counts.put( id_i, 1 );
- }
- saw_c.add( id_i );
+ private void add( final String key, final CombinableDomains cdc ) {
+ _combinable_domains_map.put( key, cdc );
}
public static BasicGenomeWideCombinableDomains createInstance( final List<Protein> protein_list,
}
return instance;
}
+
+ private static void countDomains( final Map<String, Integer> domain_counts,
+ final Set<String> saw_c,
+ final String id_i ) {
+ if ( domain_counts.containsKey( id_i ) ) {
+ domain_counts.put( id_i, 1 + domain_counts.get( ( id_i ) ) );
+ }
+ else {
+ domain_counts.put( id_i, 1 );
+ }
+ saw_c.add( id_i );
+ }
}
public void addCombinableDomain( final String protein_domain );
/**
- *
- * This must return all domains in this set of combinable domains (i.e.
- * the key domain and all domains which can combine with the key domain).
- *
- * @return all domains
- */
- List<String> getAllDomains();
-
- List<String> getCombinableDomains();
-
- /**
* Returns the combinable domain identifiers sorted in alphabetical manner: -
* keys are the combinable domain identifiers - values are the counts of
* proteins exhibiting a particular combination
*/
public int getKeyDomainCount();
+ public Set<String> getKeyDomainProteins();
+
/**
* Returns how many proteins with the key domain are present in a given
* species genome.
*/
public int getKeyDomainProteinsCount();
- public Set<String> getKeyDomainProteins();
-
public int getNumberOfCombinableDomains();
public int getNumberOfProteinsExhibitingCombination( final String protein_domain );
public boolean isCombinable( final String protein_domain );
+ public List<BinaryDomainCombination> toBinaryDomainCombinations();
+
+ void addKeyDomainProtein( String protein );
+
+ /**
+ *
+ * This must return all domains in this set of combinable domains (i.e.
+ * the key domain and all domains which can combine with the key domain).
+ *
+ * @return all domains
+ */
+ List<String> getAllDomains();
+
+ List<String> getCombinableDomains();
+
/**
* Sets how many times the key domain is present in a given species genome.
*
* key domain count in species
*/
void setKeyDomainCount( final int key_domain_count );
-
- public List<BinaryDomainCombination> toBinaryDomainCombinations();
-
- void addKeyDomainProtein( String protein );
}
\ No newline at end of file
public class CombinationsBasedPairwiseDomainSimilarity implements PairwiseDomainSimilarity {
- private final int _same_domains;
- private final int _different_domains;
private final int _difference_in_counts;
+ private final int _different_domains;
+ private final int _same_domains;
private final double _score;
public CombinationsBasedPairwiseDomainSimilarity( final int same_domains,
public static final double MAX_SIMILARITY_SCORE = 1.0;
public static final double MIN_SIMILARITY_SCORE = 0.0;
+ private Set<BinaryDomainCombination> _all_binary_domain_combinations;
+ private Set<String> _all_domains;
+ private boolean _allow_domains_to_be_ignored;
+ private Set<BinaryDomainCombination> _binary_domain_combinations_specific_to_0;
+ private Set<BinaryDomainCombination> _binary_domain_combinations_specific_to_1;
final private GenomeWideCombinableDomains _combinable_domains_genome_0;
final private GenomeWideCombinableDomains _combinable_domains_genome_1;
private Set<String> _domain_ids_to_ignore;
- private boolean _allow_domains_to_be_ignored;
- private Set<String> _all_domains;
- private Set<String> _shared_domains;
private Set<String> _domains_specific_to_0;
private Set<String> _domains_specific_to_1;
- private Set<BinaryDomainCombination> _all_binary_domain_combinations;
private Set<BinaryDomainCombination> _shared_binary_domain_combinations;
- private Set<BinaryDomainCombination> _binary_domain_combinations_specific_to_0;
- private Set<BinaryDomainCombination> _binary_domain_combinations_specific_to_1;
+ private Set<String> _shared_domains;
public DomainArchitectureBasedGenomeSimilarityCalculator( final GenomeWideCombinableDomains combinable_domains_genome_0,
final GenomeWideCombinableDomains combinable_domains_genome_1 ) {
setDomainIdsToIgnore( new HashSet<String>() );
}
- private void forceRecalculation() {
- _all_domains = null;
- _shared_domains = null;
- _domains_specific_to_0 = null;
- _domains_specific_to_1 = null;
- _all_binary_domain_combinations = null;
- _shared_binary_domain_combinations = null;
- _binary_domain_combinations_specific_to_0 = null;
- _binary_domain_combinations_specific_to_1 = null;
- }
-
/**
* Does not return binary combinations which contain one or two domains
* to be ignored -- if ignoring is allowed.
return _all_domains;
}
- private Set<BinaryDomainCombination> getBinaryDomainCombinationsSpecificToGenome( final boolean specific_to_genome_0 ) {
- final Set<BinaryDomainCombination> specific = new HashSet<BinaryDomainCombination>();
- final Set<BinaryDomainCombination> bc0 = getCombinableDomainsGenome0().toBinaryDomainCombinations();
- final Set<BinaryDomainCombination> bc1 = getCombinableDomainsGenome1().toBinaryDomainCombinations();
- if ( specific_to_genome_0 ) {
- for( final BinaryDomainCombination binary_domain_combination0 : bc0 ) {
- if ( !bc1.contains( binary_domain_combination0 ) ) {
- specific.add( binary_domain_combination0 );
- }
- }
- }
- else {
- for( final BinaryDomainCombination binary_domain_combination1 : bc1 ) {
- if ( !bc0.contains( binary_domain_combination1 ) ) {
- specific.add( binary_domain_combination1 );
- }
- }
- }
- if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
- return pruneBinaryCombinations( specific );
- }
- return specific;
- }
-
public Set<BinaryDomainCombination> getBinaryDomainCombinationsSpecificToGenome0() {
if ( _binary_domain_combinations_specific_to_0 == null ) {
_binary_domain_combinations_specific_to_0 = getBinaryDomainCombinationsSpecificToGenome( true );
return _binary_domain_combinations_specific_to_1;
}
- private GenomeWideCombinableDomains getCombinableDomainsGenome0() {
- return _combinable_domains_genome_0;
- }
-
- private GenomeWideCombinableDomains getCombinableDomainsGenome1() {
- return _combinable_domains_genome_1;
- }
-
- private Set<String> getDomainIdsToIgnore() {
- return _domain_ids_to_ignore;
- }
-
- private Set<String> getDomainsSpecificToGenome( final boolean specific_to_genome_0 ) {
- final Set<String> specific = new HashSet<String>();
- final Set<String> d0 = getCombinableDomainsGenome0().getAllDomainIds();
- final Set<String> d1 = getCombinableDomainsGenome1().getAllDomainIds();
- if ( specific_to_genome_0 ) {
- for( final String domain0 : d0 ) {
- if ( !d1.contains( domain0 ) ) {
- specific.add( domain0 );
- }
- }
- }
- else {
- for( final String domain1 : d1 ) {
- if ( !d0.contains( domain1 ) ) {
- specific.add( domain1 );
- }
- }
- }
- if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
- return pruneDomains( specific );
- }
- return specific;
- }
-
public Set<String> getDomainsSpecificToGenome0() {
if ( _domains_specific_to_0 == null ) {
_domains_specific_to_0 = getDomainsSpecificToGenome( true );
return _shared_domains;
}
+ public void setAllowDomainsToBeIgnored( final boolean allow_domains_to_be_ignored ) {
+ forceRecalculation();
+ _allow_domains_to_be_ignored = allow_domains_to_be_ignored;
+ }
+
+ void setDomainIdsToIgnore( final Set<String> domain_ids_to_ignore ) {
+ forceRecalculation();
+ _domain_ids_to_ignore = domain_ids_to_ignore;
+ }
+
+ private void forceRecalculation() {
+ _all_domains = null;
+ _shared_domains = null;
+ _domains_specific_to_0 = null;
+ _domains_specific_to_1 = null;
+ _all_binary_domain_combinations = null;
+ _shared_binary_domain_combinations = null;
+ _binary_domain_combinations_specific_to_0 = null;
+ _binary_domain_combinations_specific_to_1 = null;
+ }
+
+ private Set<BinaryDomainCombination> getBinaryDomainCombinationsSpecificToGenome( final boolean specific_to_genome_0 ) {
+ final Set<BinaryDomainCombination> specific = new HashSet<BinaryDomainCombination>();
+ final Set<BinaryDomainCombination> bc0 = getCombinableDomainsGenome0().toBinaryDomainCombinations();
+ final Set<BinaryDomainCombination> bc1 = getCombinableDomainsGenome1().toBinaryDomainCombinations();
+ if ( specific_to_genome_0 ) {
+ for( final BinaryDomainCombination binary_domain_combination0 : bc0 ) {
+ if ( !bc1.contains( binary_domain_combination0 ) ) {
+ specific.add( binary_domain_combination0 );
+ }
+ }
+ }
+ else {
+ for( final BinaryDomainCombination binary_domain_combination1 : bc1 ) {
+ if ( !bc0.contains( binary_domain_combination1 ) ) {
+ specific.add( binary_domain_combination1 );
+ }
+ }
+ }
+ if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
+ return pruneBinaryCombinations( specific );
+ }
+ return specific;
+ }
+
+ private GenomeWideCombinableDomains getCombinableDomainsGenome0() {
+ return _combinable_domains_genome_0;
+ }
+
+ private GenomeWideCombinableDomains getCombinableDomainsGenome1() {
+ return _combinable_domains_genome_1;
+ }
+
+ private Set<String> getDomainIdsToIgnore() {
+ return _domain_ids_to_ignore;
+ }
+
+ private Set<String> getDomainsSpecificToGenome( final boolean specific_to_genome_0 ) {
+ final Set<String> specific = new HashSet<String>();
+ final Set<String> d0 = getCombinableDomainsGenome0().getAllDomainIds();
+ final Set<String> d1 = getCombinableDomainsGenome1().getAllDomainIds();
+ if ( specific_to_genome_0 ) {
+ for( final String domain0 : d0 ) {
+ if ( !d1.contains( domain0 ) ) {
+ specific.add( domain0 );
+ }
+ }
+ }
+ else {
+ for( final String domain1 : d1 ) {
+ if ( !d0.contains( domain1 ) ) {
+ specific.add( domain1 );
+ }
+ }
+ }
+ if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
+ return pruneDomains( specific );
+ }
+ return specific;
+ }
+
private void init() {
deleteAllDomainIdsToIgnore();
setAllowDomainsToBeIgnored( false );
}
return pruned;
}
-
- public void setAllowDomainsToBeIgnored( final boolean allow_domains_to_be_ignored ) {
- forceRecalculation();
- _allow_domains_to_be_ignored = allow_domains_to_be_ignored;
- }
-
- void setDomainIdsToIgnore( final Set<String> domain_ids_to_ignore ) {
- forceRecalculation();
- _domain_ids_to_ignore = domain_ids_to_ignore;
- }
}
\ No newline at end of file
*/
public final class DomainCountsDifferenceUtil {
- private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES = COPY_CALCULATION_MODE.MIN;
private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES = COPY_CALCULATION_MODE.MIN;
+ private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES = COPY_CALCULATION_MODE.MIN;
private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_LOW_COPY_SPECIES = COPY_CALCULATION_MODE.MAX;
private static final String PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX = ".prot";
- //FIXME really needs to be tested!
- private static void addCounts( final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
- final BinaryDomainCombination dc,
- final GenomeWideCombinableDomains genome,
- final Set<BinaryDomainCombination> bdc ) {
- if ( !copy_counts.containsKey( dc ) ) {
- copy_counts.put( dc, new ArrayList<Integer>() );
- }
- if ( bdc.contains( dc )
- && ( ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc.getId1() ) != null ) ) {
- final int count = ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains()
- .get( dc.getId1() );
- copy_counts.get( dc ).add( count );
- }
- else {
- copy_counts.get( dc ).add( 0 );
- }
- }
-
- private static void addCounts( final SortedMap<String, List<Integer>> copy_counts,
- final String domain,
- final GenomeWideCombinableDomains genome ) {
- if ( !copy_counts.containsKey( domain ) ) {
- copy_counts.put( domain, new ArrayList<Integer>() );
- }
- if ( genome.contains( domain ) ) {
- copy_counts.get( domain ).add( genome.get( domain ).getKeyDomainProteinsCount() );
- }
- else {
- copy_counts.get( domain ).add( 0 );
- }
- }
-
- private static StringBuilder addGoInformation( final String d,
- final Map<String, List<GoId>> domain_id_to_go_ids_map,
- final Map<GoId, GoTerm> go_id_to_term_map ) {
- final StringBuilder sb = new StringBuilder();
- if ( ( domain_id_to_go_ids_map == null ) || domain_id_to_go_ids_map.isEmpty()
- || !domain_id_to_go_ids_map.containsKey( d ) ) {
- return sb;
- }
- final List<GoId> go_ids = domain_id_to_go_ids_map.get( d );
- for( int i = 0; i < go_ids.size(); ++i ) {
- final GoId go_id = go_ids.get( i );
- if ( go_id_to_term_map.containsKey( go_id ) ) {
- appendGoTerm( sb, go_id_to_term_map.get( go_id ) );
- sb.append( "<br>" );
- }
- else {
- sb.append( "go id \"" + go_id + "\" not found [" + d + "]" );
- }
- }
- return sb;
- }
-
- private static void appendGoTerm( final StringBuilder sb, final GoTerm go_term ) {
- final GoId go_id = go_term.getGoId();
- sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id + "\" target=\"amigo_window\">" + go_id
- + "</a>" );
- sb.append( ":" );
- sb.append( go_term.getName() );
- sb.append( " [" );
- sb.append( go_term.getGoNameSpace().toShortString() );
- sb.append( "]" );
- }
-
public static void calculateCopyNumberDifferences( final List<GenomeWideCombinableDomains> genomes,
final SortedMap<Species, List<Protein>> protein_lists_per_species,
final List<String> high_copy_base_species,
writeGoIdsToFile( passing_gos_writer, go_ids_of_passing_domains );
}
+ //FIXME really needs to be tested!
+ private static void addCounts( final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
+ final BinaryDomainCombination dc,
+ final GenomeWideCombinableDomains genome,
+ final Set<BinaryDomainCombination> bdc ) {
+ if ( !copy_counts.containsKey( dc ) ) {
+ copy_counts.put( dc, new ArrayList<Integer>() );
+ }
+ if ( bdc.contains( dc )
+ && ( ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc.getId1() ) != null ) ) {
+ final int count = ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains()
+ .get( dc.getId1() );
+ copy_counts.get( dc ).add( count );
+ }
+ else {
+ copy_counts.get( dc ).add( 0 );
+ }
+ }
+
+ private static void addCounts( final SortedMap<String, List<Integer>> copy_counts,
+ final String domain,
+ final GenomeWideCombinableDomains genome ) {
+ if ( !copy_counts.containsKey( domain ) ) {
+ copy_counts.put( domain, new ArrayList<Integer>() );
+ }
+ if ( genome.contains( domain ) ) {
+ copy_counts.get( domain ).add( genome.get( domain ).getKeyDomainProteinsCount() );
+ }
+ else {
+ copy_counts.get( domain ).add( 0 );
+ }
+ }
+
+ private static StringBuilder addGoInformation( final String d,
+ final Map<String, List<GoId>> domain_id_to_go_ids_map,
+ final Map<GoId, GoTerm> go_id_to_term_map ) {
+ final StringBuilder sb = new StringBuilder();
+ if ( ( domain_id_to_go_ids_map == null ) || domain_id_to_go_ids_map.isEmpty()
+ || !domain_id_to_go_ids_map.containsKey( d ) ) {
+ return sb;
+ }
+ final List<GoId> go_ids = domain_id_to_go_ids_map.get( d );
+ for( int i = 0; i < go_ids.size(); ++i ) {
+ final GoId go_id = go_ids.get( i );
+ if ( go_id_to_term_map.containsKey( go_id ) ) {
+ appendGoTerm( sb, go_id_to_term_map.get( go_id ) );
+ sb.append( "<br>" );
+ }
+ else {
+ sb.append( "go id \"" + go_id + "\" not found [" + d + "]" );
+ }
+ }
+ return sb;
+ }
+
+ private static void appendGoTerm( final StringBuilder sb, final GoTerm go_term ) {
+ final GoId go_id = go_term.getGoId();
+ sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id + "\" target=\"amigo_window\">" + go_id
+ + "</a>" );
+ sb.append( ":" );
+ sb.append( go_term.getName() );
+ sb.append( " [" );
+ sb.append( go_term.getGoNameSpace().toShortString() );
+ sb.append( "]" );
+ }
+
private static void calculateDomainCountsBasedValue( final SortedMap<BinaryDomainCombination, Double> copy_values,
final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
final BinaryDomainCombination bdc,
}
public static enum COPY_CALCULATION_MODE {
- MEAN, MEDIAN, MAX, MIN
+ MAX, MEAN, MEDIAN, MIN
}
}
getLengthStatistic( species ).addValue( domain_length );
}
- private void addLengthStatistics( final Species species, final DescriptiveStatistics length_statistic ) {
- if ( getLengthStatistics().containsKey( species ) ) {
- throw new IllegalArgumentException( "length statistics for [" + species.getSpeciesId() + "] already added" );
- }
- getLengthStatistics().put( species, length_statistic );
- }
-
/**
* Returns descriptive statistics based on the arithmetic means
* for each species.
return getLengthStatistics().get( species );
}
- private SortedMap<Species, DescriptiveStatistics> getLengthStatistics() {
- return _length_statistics;
- }
-
public List<DescriptiveStatistics> getLengthStatisticsList() {
final List<DescriptiveStatistics> list = new ArrayList<DescriptiveStatistics>();
for( final DescriptiveStatistics stats : _length_statistics.values() ) {
public boolean isHasLengthStatistic( final Species species ) {
return getLengthStatistics().containsKey( species );
}
+
+ private void addLengthStatistics( final Species species, final DescriptiveStatistics length_statistic ) {
+ if ( getLengthStatistics().containsKey( species ) ) {
+ throw new IllegalArgumentException( "length statistics for [" + species.getSpeciesId() + "] already added" );
+ }
+ getLengthStatistics().put( species, length_statistic );
+ }
+
+ private SortedMap<Species, DescriptiveStatistics> getLengthStatistics() {
+ return _length_statistics;
+ }
}
_species = new ArrayList<Species>();
}
- private void addDomainLengths( final DomainLengths domain_lengths ) {
- if ( getDomainLengths().containsKey( domain_lengths.getDomainId() ) ) {
- throw new IllegalArgumentException( "domain lengths for [" + domain_lengths.getDomainId()
- + "] already added" );
- }
- getDomainLengths().put( domain_lengths.getDomainId(), domain_lengths );
- }
-
- private void addLength( final String domain_id, final Species species, final int domain_length ) {
- if ( !getDomainLengths().containsKey( domain_id ) ) {
- addDomainLengths( new DomainLengths( domain_id ) );
- }
- getDomainLengths().get( domain_id ).addLength( species, domain_length );
- }
-
public void addLengths( final List<Protein> protein_list ) {
for( final Protein protein : protein_list ) {
final Species species = protein.getSpecies();
return sb;
}
- private SortedMap<String, DomainLengths> getDomainLengths() {
- return _domain_lengths;
- }
-
public DomainLengths getDomainLengths( final String domain_id ) {
return getDomainLengths().get( domain_id );
}
public List<Species> getSpecies() {
return _species;
}
+
+ private void addDomainLengths( final DomainLengths domain_lengths ) {
+ if ( getDomainLengths().containsKey( domain_lengths.getDomainId() ) ) {
+ throw new IllegalArgumentException( "domain lengths for [" + domain_lengths.getDomainId()
+ + "] already added" );
+ }
+ getDomainLengths().put( domain_lengths.getDomainId(), domain_lengths );
+ }
+
+ private void addLength( final String domain_id, final Species species, final int domain_length ) {
+ if ( !getDomainLengths().containsKey( domain_id ) ) {
+ addDomainLengths( new DomainLengths( domain_id ) );
+ }
+ getDomainLengths().get( domain_id ).addLength( species, domain_length );
+ }
+
+ private SortedMap<String, DomainLengths> getDomainLengths() {
+ return _domain_lengths;
+ }
}
public final class DomainParsimonyCalculator {
private static final String TYPE_FORBINARY_CHARACTERS = "parsimony inferred";
- private CharacterStateMatrix<GainLossStates> _gain_loss_matrix;
private CharacterStateMatrix<BinaryStates> _binary_internal_states_matrix;
+ private int _cost;
+ private Map<String, Set<String>> _domain_id_to_secondary_features_map;
+ private CharacterStateMatrix<GainLossStates> _gain_loss_matrix;
private final List<GenomeWideCombinableDomains> _gwcd_list;
private final Phylogeny _phylogeny;
- private int _total_losses;
+ private SortedSet<String> _positive_filter;
private int _total_gains;
+ private int _total_losses;
private int _total_unchanged;
- private int _cost;
- private Map<String, Set<String>> _domain_id_to_secondary_features_map;
- private SortedSet<String> _positive_filter;
private DomainParsimonyCalculator( final Phylogeny phylogeny ) {
init();
setDomainIdToSecondaryFeaturesMap( domain_id_to_secondary_features_map );
}
- int calculateNumberOfBinaryDomainCombination() {
- if ( getGenomeWideCombinableDomainsList().isEmpty() ) {
- throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
- }
- final Set<BinaryDomainCombination> all_binary_combinations = new HashSet<BinaryDomainCombination>();
- for( final GenomeWideCombinableDomains gwcd : getGenomeWideCombinableDomainsList() ) {
- for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
- all_binary_combinations.add( bc );
- }
- }
- return all_binary_combinations.size();
- }
-
- CharacterStateMatrix<BinaryStates> createMatrixOfBinaryDomainCombinationPresenceOrAbsence() {
- return createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
- }
-
- CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence() {
- return createMatrixOfDomainPresenceOrAbsence( getGenomeWideCombinableDomainsList(), getPositiveFilter() );
- }
-
- CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeaturePresenceOrAbsence( final Map<Species, MappingResults> mapping_results_map ) {
- return createMatrixOfSecondaryFeaturePresenceOrAbsence( getGenomeWideCombinableDomainsList(),
- getDomainIdToSecondaryFeaturesMap(),
- mapping_results_map );
- }
-
- Phylogeny decoratePhylogenyWithDomains( final Phylogeny phylogeny ) {
- for( final PhylogenyNodeIterator it = phylogeny.iteratorPostorder(); it.hasNext(); ) {
- final PhylogenyNode node = it.next();
- final String node_identifier = node.getName();
- final BinaryCharacters bc = new BinaryCharacters( getUnitsOnNode( node_identifier ),
- getUnitsGainedOnNode( node_identifier ),
- getUnitsLostOnNode( node_identifier ),
- TYPE_FORBINARY_CHARACTERS,
- getSumOfPresentOnNode( node_identifier ),
- getSumOfGainsOnNode( node_identifier ),
- getSumOfLossesOnNode( node_identifier ) );
- node.getNodeData().setBinaryCharacters( bc );
- }
- return phylogeny;
- }
-
- private void executeDolloParsimony( final boolean on_domain_presence ) {
- reset();
- final DolloParsimony dollo = DolloParsimony.createInstance();
- dollo.setReturnGainLossMatrix( true );
- dollo.setReturnInternalStates( true );
- CharacterStateMatrix<BinaryStates> states = null;
- if ( on_domain_presence ) {
- states = createMatrixOfDomainPresenceOrAbsence();
- }
- else {
- states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence();
- }
- dollo.execute( getPhylogeny(), states );
- setGainLossMatrix( dollo.getGainLossMatrix() );
- setBinaryInternalStatesMatrix( dollo.getInternalStatesMatrix() );
- setCost( dollo.getCost() );
- setTotalGains( dollo.getTotalGains() );
- setTotalLosses( dollo.getTotalLosses() );
- setTotalUnchanged( dollo.getTotalUnchanged() );
- }
-
public void executeDolloParsimonyOnBinaryDomainCombintionPresence() {
executeDolloParsimony( false );
}
setTotalUnchanged( dollo.getTotalUnchanged() );
}
- private void executeFitchParsimony( final boolean on_domain_presence,
- final boolean use_last,
- final boolean randomize,
- final long random_number_seed ) {
- reset();
- if ( use_last ) {
- System.out.println( " Fitch parsimony: use_last = true" );
- }
- final FitchParsimony<BinaryStates> fitch = new FitchParsimony<BinaryStates>();
- fitch.setRandomize( randomize );
- if ( randomize ) {
- fitch.setRandomNumberSeed( random_number_seed );
- }
- fitch.setUseLast( use_last );
- fitch.setReturnGainLossMatrix( true );
- fitch.setReturnInternalStates( true );
- CharacterStateMatrix<BinaryStates> states = null;
- if ( on_domain_presence ) {
- states = createMatrixOfDomainPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
- }
- else {
- states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
- }
- fitch.execute( getPhylogeny(), states, true );
- setGainLossMatrix( fitch.getGainLossMatrix() );
- setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() );
- setCost( fitch.getCost() );
- setTotalGains( fitch.getTotalGains() );
- setTotalLosses( fitch.getTotalLosses() );
- setTotalUnchanged( fitch.getTotalUnchanged() );
- }
-
- private void executeFitchParsimonyOnSecondaryFeatures( final boolean use_last,
- final boolean randomize,
- final long random_number_seed ) {
- reset();
- if ( use_last ) {
- System.out.println( " Fitch parsimony: use_last = true" );
- }
- final FitchParsimony<BinaryStates> fitch = new FitchParsimony<BinaryStates>();
- fitch.setRandomize( randomize );
- if ( randomize ) {
- fitch.setRandomNumberSeed( random_number_seed );
- }
- fitch.setUseLast( use_last );
- fitch.setReturnGainLossMatrix( true );
- fitch.setReturnInternalStates( true );
- final Map<String, Set<String>> map = getDomainIdToSecondaryFeaturesMap();
- final Map<String, String> newmap = new HashMap<String, String>();
- final Iterator<Entry<String, Set<String>>> it = map.entrySet().iterator();
- while ( it.hasNext() ) {
- final Map.Entry<String, Set<String>> pair = it.next();
- if ( pair.getValue().size() != 1 ) {
- throw new IllegalArgumentException( pair.getKey() + " mapps to " + pair.getValue().size() + " items" );
- }
- newmap.put( pair.getKey(), ( String ) pair.getValue().toArray()[ 0 ] );
- }
- final CharacterStateMatrix<BinaryStates> states = createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList(),
- newmap );
- fitch.execute( getPhylogeny(), states, true );
- setGainLossMatrix( fitch.getGainLossMatrix() );
- setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() );
- setCost( fitch.getCost() );
- setTotalGains( fitch.getTotalGains() );
- setTotalLosses( fitch.getTotalLosses() );
- setTotalUnchanged( fitch.getTotalUnchanged() );
- }
-
public void executeFitchParsimonyOnBinaryDomainCombintion( final boolean use_last ) {
executeFitchParsimony( false, use_last, false, 0 );
}
- public void executeFitchParsimonyOnBinaryDomainCombintionOnSecondaryFeatures( final boolean use_last ) {
- executeFitchParsimonyOnSecondaryFeatures( use_last, false, 0 );
- }
-
public void executeFitchParsimonyOnBinaryDomainCombintion( final long random_number_seed ) {
executeFitchParsimony( false, false, true, random_number_seed );
}
+ public void executeFitchParsimonyOnBinaryDomainCombintionOnSecondaryFeatures( final boolean use_last ) {
+ executeFitchParsimonyOnSecondaryFeatures( use_last, false, 0 );
+ }
+
public void executeFitchParsimonyOnDomainPresence( final boolean use_last ) {
executeFitchParsimony( true, use_last, false, 0 );
}
return _cost;
}
- private Map<String, Set<String>> getDomainIdToSecondaryFeaturesMap() {
- return _domain_id_to_secondary_features_map;
- }
-
public CharacterStateMatrix<Integer> getGainLossCountsMatrix() {
final CharacterStateMatrix<Integer> matrix = new BasicCharacterStateMatrix<Integer>( getGainLossMatrix()
.getNumberOfIdentifiers(), 3 );
return _gain_loss_matrix;
}
- private List<GenomeWideCombinableDomains> getGenomeWideCombinableDomainsList() {
- return _gwcd_list;
- }
-
public CharacterStateMatrix<BinaryStates> getInternalStatesMatrix() {
return _binary_internal_states_matrix;
}
return net;
}
- private Phylogeny getPhylogeny() {
- return _phylogeny;
- }
-
- private SortedSet<String> getPositiveFilter() {
- return _positive_filter;
- }
-
public int getSumOfGainsOnNode( final String node_identifier ) {
return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.GAIN );
}
return getSumOfGainsOnNode( node_identifier ) + getSumOfUnchangedPresentOnNode( node_identifier );
}
- int getSumOfUnchangedAbsentOnNode( final String node_identifier ) {
- return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_ABSENT );
- }
-
- int getSumOfUnchangedOnNode( final String node_identifier ) {
- return getSumOfUnchangedPresentOnNode( node_identifier ) + getSumOfUnchangedAbsentOnNode( node_identifier );
- }
-
- int getSumOfUnchangedPresentOnNode( final String node_identifier ) {
- return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_PRESENT );
- }
-
public int getTotalGains() {
return _total_gains;
}
return present;
}
- SortedSet<String> getUnitsUnchangedAbsentOnNode( final String node_identifier ) {
- return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_ABSENT );
- }
-
- SortedSet<String> getUnitsUnchangedPresentOnNode( final String node_identifier ) {
- return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_PRESENT );
- }
-
- private void init() {
- setDomainIdToSecondaryFeaturesMap( null );
- setPositiveFilter( null );
- reset();
+ int calculateNumberOfBinaryDomainCombination() {
+ if ( getGenomeWideCombinableDomainsList().isEmpty() ) {
+ throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
+ }
+ final Set<BinaryDomainCombination> all_binary_combinations = new HashSet<BinaryDomainCombination>();
+ for( final GenomeWideCombinableDomains gwcd : getGenomeWideCombinableDomainsList() ) {
+ for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
+ all_binary_combinations.add( bc );
+ }
+ }
+ return all_binary_combinations.size();
}
- private void reset() {
- setGainLossMatrix( null );
- setBinaryInternalStatesMatrix( null );
- setCost( -1 );
- setTotalGains( -1 );
- setTotalLosses( -1 );
- setTotalUnchanged( -1 );
+ CharacterStateMatrix<BinaryStates> createMatrixOfBinaryDomainCombinationPresenceOrAbsence() {
+ return createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
}
- private void setBinaryInternalStatesMatrix( final CharacterStateMatrix<BinaryStates> binary_states_matrix ) {
- _binary_internal_states_matrix = binary_states_matrix;
+ CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence() {
+ return createMatrixOfDomainPresenceOrAbsence( getGenomeWideCombinableDomainsList(), getPositiveFilter() );
}
- private void setCost( final int cost ) {
- _cost = cost;
+ CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeaturePresenceOrAbsence( final Map<Species, MappingResults> mapping_results_map ) {
+ return createMatrixOfSecondaryFeaturePresenceOrAbsence( getGenomeWideCombinableDomainsList(),
+ getDomainIdToSecondaryFeaturesMap(),
+ mapping_results_map );
}
- private void setDomainIdToSecondaryFeaturesMap( final Map<String, Set<String>> domain_id_to_secondary_features_map ) {
- _domain_id_to_secondary_features_map = domain_id_to_secondary_features_map;
+ Phylogeny decoratePhylogenyWithDomains( final Phylogeny phylogeny ) {
+ for( final PhylogenyNodeIterator it = phylogeny.iteratorPostorder(); it.hasNext(); ) {
+ final PhylogenyNode node = it.next();
+ final String node_identifier = node.getName();
+ final BinaryCharacters bc = new BinaryCharacters( getUnitsOnNode( node_identifier ),
+ getUnitsGainedOnNode( node_identifier ),
+ getUnitsLostOnNode( node_identifier ),
+ TYPE_FORBINARY_CHARACTERS,
+ getSumOfPresentOnNode( node_identifier ),
+ getSumOfGainsOnNode( node_identifier ),
+ getSumOfLossesOnNode( node_identifier ) );
+ node.getNodeData().setBinaryCharacters( bc );
+ }
+ return phylogeny;
}
- private void setGainLossMatrix( final CharacterStateMatrix<GainLossStates> gain_loss_matrix ) {
- _gain_loss_matrix = gain_loss_matrix;
+ int getSumOfUnchangedAbsentOnNode( final String node_identifier ) {
+ return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_ABSENT );
}
- private void setPositiveFilter( final SortedSet<String> positive_filter ) {
- _positive_filter = positive_filter;
+ int getSumOfUnchangedOnNode( final String node_identifier ) {
+ return getSumOfUnchangedPresentOnNode( node_identifier ) + getSumOfUnchangedAbsentOnNode( node_identifier );
}
- private void setTotalGains( final int total_gains ) {
- _total_gains = total_gains;
+ int getSumOfUnchangedPresentOnNode( final String node_identifier ) {
+ return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_PRESENT );
}
- private void setTotalLosses( final int total_losses ) {
- _total_losses = total_losses;
+ SortedSet<String> getUnitsUnchangedAbsentOnNode( final String node_identifier ) {
+ return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_ABSENT );
}
- private void setTotalUnchanged( final int total_unchanged ) {
- _total_unchanged = total_unchanged;
+ SortedSet<String> getUnitsUnchangedPresentOnNode( final String node_identifier ) {
+ return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_PRESENT );
+ }
+
+ private void executeDolloParsimony( final boolean on_domain_presence ) {
+ reset();
+ final DolloParsimony dollo = DolloParsimony.createInstance();
+ dollo.setReturnGainLossMatrix( true );
+ dollo.setReturnInternalStates( true );
+ CharacterStateMatrix<BinaryStates> states = null;
+ if ( on_domain_presence ) {
+ states = createMatrixOfDomainPresenceOrAbsence();
+ }
+ else {
+ states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence();
+ }
+ dollo.execute( getPhylogeny(), states );
+ setGainLossMatrix( dollo.getGainLossMatrix() );
+ setBinaryInternalStatesMatrix( dollo.getInternalStatesMatrix() );
+ setCost( dollo.getCost() );
+ setTotalGains( dollo.getTotalGains() );
+ setTotalLosses( dollo.getTotalLosses() );
+ setTotalUnchanged( dollo.getTotalUnchanged() );
+ }
+
+ private void executeFitchParsimony( final boolean on_domain_presence,
+ final boolean use_last,
+ final boolean randomize,
+ final long random_number_seed ) {
+ reset();
+ if ( use_last ) {
+ System.out.println( " Fitch parsimony: use_last = true" );
+ }
+ final FitchParsimony<BinaryStates> fitch = new FitchParsimony<BinaryStates>();
+ fitch.setRandomize( randomize );
+ if ( randomize ) {
+ fitch.setRandomNumberSeed( random_number_seed );
+ }
+ fitch.setUseLast( use_last );
+ fitch.setReturnGainLossMatrix( true );
+ fitch.setReturnInternalStates( true );
+ CharacterStateMatrix<BinaryStates> states = null;
+ if ( on_domain_presence ) {
+ states = createMatrixOfDomainPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
+ }
+ else {
+ states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
+ }
+ fitch.execute( getPhylogeny(), states, true );
+ setGainLossMatrix( fitch.getGainLossMatrix() );
+ setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() );
+ setCost( fitch.getCost() );
+ setTotalGains( fitch.getTotalGains() );
+ setTotalLosses( fitch.getTotalLosses() );
+ setTotalUnchanged( fitch.getTotalUnchanged() );
+ }
+
+ private void executeFitchParsimonyOnSecondaryFeatures( final boolean use_last,
+ final boolean randomize,
+ final long random_number_seed ) {
+ reset();
+ if ( use_last ) {
+ System.out.println( " Fitch parsimony: use_last = true" );
+ }
+ final FitchParsimony<BinaryStates> fitch = new FitchParsimony<BinaryStates>();
+ fitch.setRandomize( randomize );
+ if ( randomize ) {
+ fitch.setRandomNumberSeed( random_number_seed );
+ }
+ fitch.setUseLast( use_last );
+ fitch.setReturnGainLossMatrix( true );
+ fitch.setReturnInternalStates( true );
+ final Map<String, Set<String>> map = getDomainIdToSecondaryFeaturesMap();
+ final Map<String, String> newmap = new HashMap<String, String>();
+ final Iterator<Entry<String, Set<String>>> it = map.entrySet().iterator();
+ while ( it.hasNext() ) {
+ final Map.Entry<String, Set<String>> pair = it.next();
+ if ( pair.getValue().size() != 1 ) {
+ throw new IllegalArgumentException( pair.getKey() + " mapps to " + pair.getValue().size() + " items" );
+ }
+ newmap.put( pair.getKey(), ( String ) pair.getValue().toArray()[ 0 ] );
+ }
+ final CharacterStateMatrix<BinaryStates> states = createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList(),
+ newmap );
+ fitch.execute( getPhylogeny(), states, true );
+ setGainLossMatrix( fitch.getGainLossMatrix() );
+ setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() );
+ setCost( fitch.getCost() );
+ setTotalGains( fitch.getTotalGains() );
+ setTotalLosses( fitch.getTotalLosses() );
+ setTotalUnchanged( fitch.getTotalUnchanged() );
+ }
+
+ private Map<String, Set<String>> getDomainIdToSecondaryFeaturesMap() {
+ return _domain_id_to_secondary_features_map;
+ }
+
+ private List<GenomeWideCombinableDomains> getGenomeWideCombinableDomainsList() {
+ return _gwcd_list;
+ }
+
+ private Phylogeny getPhylogeny() {
+ return _phylogeny;
+ }
+
+ private SortedSet<String> getPositiveFilter() {
+ return _positive_filter;
+ }
+
+ private void init() {
+ setDomainIdToSecondaryFeaturesMap( null );
+ setPositiveFilter( null );
+ reset();
+ }
+
+ private void reset() {
+ setGainLossMatrix( null );
+ setBinaryInternalStatesMatrix( null );
+ setCost( -1 );
+ setTotalGains( -1 );
+ setTotalLosses( -1 );
+ setTotalUnchanged( -1 );
+ }
+
+ private void setBinaryInternalStatesMatrix( final CharacterStateMatrix<BinaryStates> binary_states_matrix ) {
+ _binary_internal_states_matrix = binary_states_matrix;
+ }
+
+ private void setCost( final int cost ) {
+ _cost = cost;
+ }
+
+ private void setDomainIdToSecondaryFeaturesMap( final Map<String, Set<String>> domain_id_to_secondary_features_map ) {
+ _domain_id_to_secondary_features_map = domain_id_to_secondary_features_map;
+ }
+
+ private void setGainLossMatrix( final CharacterStateMatrix<GainLossStates> gain_loss_matrix ) {
+ _gain_loss_matrix = gain_loss_matrix;
+ }
+
+ private void setPositiveFilter( final SortedSet<String> positive_filter ) {
+ _positive_filter = positive_filter;
+ }
+
+ private void setTotalGains( final int total_gains ) {
+ _total_gains = total_gains;
+ }
+
+ private void setTotalLosses( final int total_losses ) {
+ _total_losses = total_losses;
+ }
+
+ private void setTotalUnchanged( final int total_unchanged ) {
+ _total_unchanged = total_unchanged;
}
public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny ) {
return new DomainParsimonyCalculator( phylogeny, gwcd_list, domain_id_to_secondary_features_map );
}
- /**
- * For folds instead of Pfam-domains, for example
- *
- *
- * @param gwcd_list
- * @return
- */
- static CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeaturePresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
- final Map<String, Set<String>> domain_id_to_second_features_map,
- final Map<Species, MappingResults> mapping_results_map ) {
+ public static CharacterStateMatrix<BinaryStates> createMatrixOfBinaryDomainCombinationPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list ) {
if ( gwcd_list.isEmpty() ) {
throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
}
- if ( ( domain_id_to_second_features_map == null ) || domain_id_to_second_features_map.isEmpty() ) {
- throw new IllegalArgumentException( "domain id to secondary features map is null or empty" );
- }
final int number_of_identifiers = gwcd_list.size();
- final SortedSet<String> all_secondary_features = new TreeSet<String>();
+ final SortedSet<BinaryDomainCombination> all_binary_combinations = new TreeSet<BinaryDomainCombination>();
+ final Set<BinaryDomainCombination>[] binary_combinations_per_genome = new HashSet[ number_of_identifiers ];
+ int identifier_index = 0;
for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
- int mapped = 0;
- int not_mapped = 0;
- for( final String domain : gwcd.getAllDomainIds() ) {
- if ( domain_id_to_second_features_map.containsKey( domain ) ) {
- all_secondary_features.addAll( domain_id_to_second_features_map.get( domain ) );
- mapped++;
- }
- else {
- not_mapped++;
- }
- }
- if ( mapping_results_map != null ) {
- final MappingResults mr = new MappingResults();
- mr.setDescription( gwcd.getSpecies().getSpeciesId() );
- mr.setSumOfSuccesses( mapped );
- mr.setSumOfFailures( not_mapped );
- mapping_results_map.put( gwcd.getSpecies(), mr );
+ binary_combinations_per_genome[ identifier_index ] = new HashSet<BinaryDomainCombination>();
+ for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
+ all_binary_combinations.add( bc );
+ binary_combinations_per_genome[ identifier_index ].add( bc );
}
+ ++identifier_index;
}
- final int number_of_characters = all_secondary_features.size();
+ final int number_of_characters = all_binary_combinations.size();
final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
number_of_characters );
int character_index = 0;
- for( final String second_id : all_secondary_features ) {
- matrix.setCharacter( character_index++, second_id );
+ for( final BinaryDomainCombination bc : all_binary_combinations ) {
+ matrix.setCharacter( character_index++, bc.toString() );
}
- int identifier_index = 0;
+ identifier_index = 0;
final Set<String> all_identifiers = new HashSet<String>();
for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
final String species_id = gwcd.getSpecies().getSpeciesId();
if ( all_identifiers.contains( species_id ) ) {
- throw new IllegalArgumentException( "species [" + species_id + "] is not unique" );
+ throw new AssertionError( "species [" + species_id + "] is not unique" );
}
all_identifiers.add( species_id );
matrix.setIdentifier( identifier_index, species_id );
- final Set<String> all_second_per_gwcd = new HashSet<String>();
- for( final String domain : gwcd.getAllDomainIds() ) {
- if ( domain_id_to_second_features_map.containsKey( domain ) ) {
- all_second_per_gwcd.addAll( domain_id_to_second_features_map.get( domain ) );
- }
- }
for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
- if ( all_second_per_gwcd.contains( matrix.getCharacter( ci ) ) ) {
+ BinaryDomainCombination bc = null;
+ if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED_ADJACTANT ) {
+ bc = AdjactantDirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
+ }
+ else if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED ) {
+ bc = DirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
+ }
+ else {
+ bc = BasicBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
+ }
+ if ( binary_combinations_per_genome[ identifier_index ].contains( bc ) ) {
matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
}
else {
return matrix;
}
- public static CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
- final Map<String, String> domain_id_to_second_features_map ) {
+ public static CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
+ final SortedSet<String> positive_filter ) {
if ( gwcd_list.isEmpty() ) {
throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
}
- if ( ( domain_id_to_second_features_map == null ) || domain_id_to_second_features_map.isEmpty() ) {
- throw new IllegalArgumentException( "domain id to secondary features map is null or empty" );
+ if ( ( positive_filter != null ) && ( positive_filter.size() < 1 ) ) {
+ throw new IllegalArgumentException( "positive filter is empty" );
}
final int number_of_identifiers = gwcd_list.size();
- final SortedSet<BinaryDomainCombination> all_binary_combinations_mapped = new TreeSet<BinaryDomainCombination>();
- final Set<BinaryDomainCombination>[] binary_combinations_per_genome_mapped = new HashSet[ number_of_identifiers ];
- int identifier_index = 0;
- final SortedSet<String> no_mappings = new TreeSet<String>();
+ final SortedSet<String> all_domain_ids = new TreeSet<String>();
for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
- binary_combinations_per_genome_mapped[ identifier_index ] = new HashSet<BinaryDomainCombination>();
- for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
- final BinaryDomainCombination mapped_bc = mapBinaryDomainCombination( domain_id_to_second_features_map,
- bc,
- no_mappings );
- all_binary_combinations_mapped.add( mapped_bc );
- binary_combinations_per_genome_mapped[ identifier_index ].add( mapped_bc );
+ for( final String domain : gwcd.getAllDomainIds() ) {
+ all_domain_ids.add( domain );
}
- ++identifier_index;
}
- if ( !no_mappings.isEmpty() ) {
- ForesterUtil.programMessage( surfacing.PRG_NAME, "No mappings for the following (" + no_mappings.size()
- + "):" );
- for( final String id : no_mappings ) {
- ForesterUtil.programMessage( surfacing.PRG_NAME, id );
+ int number_of_characters = all_domain_ids.size();
+ if ( positive_filter != null ) {
+ //number_of_characters = positive_filter.size(); -- bad if doms in filter but not in genomes
+ number_of_characters = 0;
+ for( final String id : all_domain_ids ) {
+ if ( positive_filter.contains( id ) ) {
+ number_of_characters++;
+ }
}
}
- final int number_of_characters = all_binary_combinations_mapped.size();
final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
number_of_characters );
int character_index = 0;
- for( final BinaryDomainCombination bc : all_binary_combinations_mapped ) {
- matrix.setCharacter( character_index++, bc.toString() );
+ for( final String id : all_domain_ids ) {
+ if ( positive_filter == null ) {
+ matrix.setCharacter( character_index++, id );
+ }
+ else {
+ if ( positive_filter.contains( id ) ) {
+ matrix.setCharacter( character_index++, id );
+ }
+ }
}
- identifier_index = 0;
+ int identifier_index = 0;
final Set<String> all_identifiers = new HashSet<String>();
for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
final String species_id = gwcd.getSpecies().getSpeciesId();
if ( all_identifiers.contains( species_id ) ) {
- throw new AssertionError( "species [" + species_id + "] is not unique" );
+ throw new IllegalArgumentException( "species [" + species_id + "] is not unique" );
}
all_identifiers.add( species_id );
matrix.setIdentifier( identifier_index, species_id );
for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
- BinaryDomainCombination bc = null;
- if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED_ADJACTANT ) {
- bc = AdjactantDirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
- }
- else if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED ) {
- bc = DirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
- }
- else {
- bc = BasicBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
+ if ( ForesterUtil.isEmpty( matrix.getCharacter( ci ) ) ) {
+ throw new RuntimeException( "this should not have happened: problem with character #" + ci );
}
- if ( binary_combinations_per_genome_mapped[ identifier_index ].contains( bc ) ) {
+ final String id = matrix.getCharacter( ci );
+ if ( gwcd.contains( id ) ) {
matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
}
else {
return matrix;
}
- private static BinaryDomainCombination mapBinaryDomainCombination( final Map<String, String> domain_id_to_second_features_map,
- final BinaryDomainCombination bc,
- final SortedSet<String> no_mappings ) {
- String id0 = "";
- String id1 = "";
- if ( !domain_id_to_second_features_map.containsKey( bc.getId0() ) ) {
- no_mappings.add( bc.getId0() );
- id0 = bc.getId0();
- }
- else {
- id0 = domain_id_to_second_features_map.get( bc.getId0() );
- }
- if ( !domain_id_to_second_features_map.containsKey( bc.getId1() ) ) {
- no_mappings.add( bc.getId1() );
- id1 = bc.getId1();
- }
- else {
- id1 = domain_id_to_second_features_map.get( bc.getId1() );
- }
- return new BasicBinaryDomainCombination( id0, id1 );
- }
-
- public static CharacterStateMatrix<BinaryStates> createMatrixOfBinaryDomainCombinationPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list ) {
+ public static CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
+ final Map<String, String> domain_id_to_second_features_map ) {
if ( gwcd_list.isEmpty() ) {
throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
}
+ if ( ( domain_id_to_second_features_map == null ) || domain_id_to_second_features_map.isEmpty() ) {
+ throw new IllegalArgumentException( "domain id to secondary features map is null or empty" );
+ }
final int number_of_identifiers = gwcd_list.size();
- final SortedSet<BinaryDomainCombination> all_binary_combinations = new TreeSet<BinaryDomainCombination>();
- final Set<BinaryDomainCombination>[] binary_combinations_per_genome = new HashSet[ number_of_identifiers ];
+ final SortedSet<BinaryDomainCombination> all_binary_combinations_mapped = new TreeSet<BinaryDomainCombination>();
+ final Set<BinaryDomainCombination>[] binary_combinations_per_genome_mapped = new HashSet[ number_of_identifiers ];
int identifier_index = 0;
+ final SortedSet<String> no_mappings = new TreeSet<String>();
for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
- binary_combinations_per_genome[ identifier_index ] = new HashSet<BinaryDomainCombination>();
+ binary_combinations_per_genome_mapped[ identifier_index ] = new HashSet<BinaryDomainCombination>();
for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
- all_binary_combinations.add( bc );
- binary_combinations_per_genome[ identifier_index ].add( bc );
+ final BinaryDomainCombination mapped_bc = mapBinaryDomainCombination( domain_id_to_second_features_map,
+ bc,
+ no_mappings );
+ all_binary_combinations_mapped.add( mapped_bc );
+ binary_combinations_per_genome_mapped[ identifier_index ].add( mapped_bc );
}
++identifier_index;
}
- final int number_of_characters = all_binary_combinations.size();
+ if ( !no_mappings.isEmpty() ) {
+ ForesterUtil.programMessage( surfacing.PRG_NAME, "No mappings for the following (" + no_mappings.size()
+ + "):" );
+ for( final String id : no_mappings ) {
+ ForesterUtil.programMessage( surfacing.PRG_NAME, id );
+ }
+ }
+ final int number_of_characters = all_binary_combinations_mapped.size();
final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
number_of_characters );
int character_index = 0;
- for( final BinaryDomainCombination bc : all_binary_combinations ) {
+ for( final BinaryDomainCombination bc : all_binary_combinations_mapped ) {
matrix.setCharacter( character_index++, bc.toString() );
}
identifier_index = 0;
else {
bc = BasicBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
}
- if ( binary_combinations_per_genome[ identifier_index ].contains( bc ) ) {
+ if ( binary_combinations_per_genome_mapped[ identifier_index ].contains( bc ) ) {
matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
}
else {
return createMatrixOfDomainPresenceOrAbsence( gwcd_list, null );
}
- public static CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
- final SortedSet<String> positive_filter ) {
+ /**
+ * For folds instead of Pfam-domains, for example
+ *
+ *
+ * @param gwcd_list
+ * @return
+ */
+ static CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeaturePresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
+ final Map<String, Set<String>> domain_id_to_second_features_map,
+ final Map<Species, MappingResults> mapping_results_map ) {
if ( gwcd_list.isEmpty() ) {
throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
}
- if ( ( positive_filter != null ) && ( positive_filter.size() < 1 ) ) {
- throw new IllegalArgumentException( "positive filter is empty" );
+ if ( ( domain_id_to_second_features_map == null ) || domain_id_to_second_features_map.isEmpty() ) {
+ throw new IllegalArgumentException( "domain id to secondary features map is null or empty" );
}
final int number_of_identifiers = gwcd_list.size();
- final SortedSet<String> all_domain_ids = new TreeSet<String>();
+ final SortedSet<String> all_secondary_features = new TreeSet<String>();
for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
+ int mapped = 0;
+ int not_mapped = 0;
for( final String domain : gwcd.getAllDomainIds() ) {
- all_domain_ids.add( domain );
- }
- }
- int number_of_characters = all_domain_ids.size();
- if ( positive_filter != null ) {
- //number_of_characters = positive_filter.size(); -- bad if doms in filter but not in genomes
- number_of_characters = 0;
- for( final String id : all_domain_ids ) {
- if ( positive_filter.contains( id ) ) {
- number_of_characters++;
+ if ( domain_id_to_second_features_map.containsKey( domain ) ) {
+ all_secondary_features.addAll( domain_id_to_second_features_map.get( domain ) );
+ mapped++;
+ }
+ else {
+ not_mapped++;
}
}
+ if ( mapping_results_map != null ) {
+ final MappingResults mr = new MappingResults();
+ mr.setDescription( gwcd.getSpecies().getSpeciesId() );
+ mr.setSumOfSuccesses( mapped );
+ mr.setSumOfFailures( not_mapped );
+ mapping_results_map.put( gwcd.getSpecies(), mr );
+ }
}
+ final int number_of_characters = all_secondary_features.size();
final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
number_of_characters );
int character_index = 0;
- for( final String id : all_domain_ids ) {
- if ( positive_filter == null ) {
- matrix.setCharacter( character_index++, id );
- }
- else {
- if ( positive_filter.contains( id ) ) {
- matrix.setCharacter( character_index++, id );
- }
- }
+ for( final String second_id : all_secondary_features ) {
+ matrix.setCharacter( character_index++, second_id );
}
int identifier_index = 0;
final Set<String> all_identifiers = new HashSet<String>();
}
all_identifiers.add( species_id );
matrix.setIdentifier( identifier_index, species_id );
- for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
- if ( ForesterUtil.isEmpty( matrix.getCharacter( ci ) ) ) {
- throw new RuntimeException( "this should not have happened: problem with character #" + ci );
+ final Set<String> all_second_per_gwcd = new HashSet<String>();
+ for( final String domain : gwcd.getAllDomainIds() ) {
+ if ( domain_id_to_second_features_map.containsKey( domain ) ) {
+ all_second_per_gwcd.addAll( domain_id_to_second_features_map.get( domain ) );
}
- final String id = matrix.getCharacter( ci );
- if ( gwcd.contains( id ) ) {
+ }
+ for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
+ if ( all_second_per_gwcd.contains( matrix.getCharacter( ci ) ) ) {
matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
}
else {
}
return d;
}
+
+ private static BinaryDomainCombination mapBinaryDomainCombination( final Map<String, String> domain_id_to_second_features_map,
+ final BinaryDomainCombination bc,
+ final SortedSet<String> no_mappings ) {
+ String id0 = "";
+ String id1 = "";
+ if ( !domain_id_to_second_features_map.containsKey( bc.getId0() ) ) {
+ no_mappings.add( bc.getId0() );
+ id0 = bc.getId0();
+ }
+ else {
+ id0 = domain_id_to_second_features_map.get( bc.getId0() );
+ }
+ if ( !domain_id_to_second_features_map.containsKey( bc.getId1() ) ) {
+ no_mappings.add( bc.getId1() );
+ id1 = bc.getId1();
+ }
+ else {
+ id1 = domain_id_to_second_features_map.get( bc.getId1() );
+ }
+ return new BasicBinaryDomainCombination( id0, id1 );
+ }
}
+++ /dev/null
-// $Id:
-//
-// FORESTER -- software libraries and applications
-// for evolutionary biology research and applications.
-//
-// Copyright (C) 2008-2009 Christian M. Zmasek
-// Copyright (C) 2008-2009 Burnham Institute for Medical Research
-// All rights reserved
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License along with this library; if not, write to the Free Software
-// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
-//
-// Contact: phylosoft @ gmail . com
-// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
-
-package org.forester.surfacing;
-
-import java.util.Map;
-import java.util.SortedMap;
-import java.util.SortedSet;
-
-import org.forester.phylogeny.Phylogeny;
-import org.forester.species.Species;
-import org.forester.surfacing.PrintableDomainSimilarity.PRINT_OPTION;
-
-/*
- * This is to represent a measure of similarity between two or more domains from
- * different genomes.
- */
-public interface DomainSimilarity extends Comparable<DomainSimilarity> {
-
- static public enum DomainSimilarityScoring {
- DOMAINS, PROTEINS, COMBINATIONS;
- }
-
- public static enum DomainSimilaritySortField {
- MIN, MAX, SD, MEAN, ABS_MAX_COUNTS_DIFFERENCE, MAX_COUNTS_DIFFERENCE, MAX_DIFFERENCE, SPECIES_COUNT, DOMAIN_ID,
- }
-
- public SortedSet<String> getCombinableDomainIds( final Species species_of_combinable_domain );
-
- public String getDomainId();
-
- /**
- * For pairwise similarities, this should return the "difference"; for example the difference in counts
- * for copy number based features (the same as getMaximalDifferenceInCounts(), or the number
- * of actually different domain combinations.
- * For pairwise similarities, this should return the difference,
- * while for comparisons of more than two domains, this should return the maximal difference
- *
- *
- *
- * @return
- */
- public int getMaximalDifference();
-
- /**
- * For pairwise similarities, this should return the difference in counts,
- * while for comparisons of more than two domains, this should return the maximal difference
- * in counts
- *
- *
- * @return the (maximal) difference in counts
- */
- public int getMaximalDifferenceInCounts();
-
- public double getMaximalSimilarityScore();
-
- public double getMeanSimilarityScore();
-
- public double getMinimalSimilarityScore();
-
- /**
- * This should return the number of pairwise distances used to calculate
- * this similarity score
- *
- * @return the number of pairwise distances
- */
- public int getN();
-
- public SortedSet<Species> getSpecies();
-
- /**
- * This should return a map, which maps species names to
- * SpeciesSpecificDomainSimilariyData
- *
- *
- * @return SortedMap<String, SpeciesSpecificDomainSimilariyData>
- */
- public SortedMap<Species, SpeciesSpecificDcData> getSpeciesData();
-
- public double getStandardDeviationOfSimilarityScore();
-
- public StringBuffer toStringBuffer( PRINT_OPTION print_option,
- Map<String, Integer> tax_code_to_id_map,
- Phylogeny phy );
-}
public interface DomainSimilarityCalculator {
- public SortedSet<DomainSimilarity> calculateSimilarities( final PairwiseDomainSimilarityCalculator pairwise_calculator,
- final List<GenomeWideCombinableDomains> cdc_list,
- final boolean ignore_domains_without_combinations_in_any_genome,
- final boolean ignore_domains_specific_to_one_genome );;
+ public SortedSet<PrintableDomainSimilarity> calculateSimilarities( final PairwiseDomainSimilarityCalculator pairwise_calculator,
+ final List<GenomeWideCombinableDomains> cdc_list,
+ final boolean ignore_domains_without_combinations_in_any_genome,
+ final boolean ignore_domains_specific_to_one_genome );;
public static enum Detailedness {
BASIC, LIST_COMBINING_DOMAIN_FOR_EACH_SPECIES, PUNCTILIOUS
}
public static enum GoAnnotationOutput {
- NONE, ALL
+ ALL, NONE
}
}
public DomainCombinationType getDomainCombinationType();
- SortedSet<String> getMostPromiscuosDomain();
-
/**
* This should return a statistic for per domain
* promiscuity in a genome.
public StringBuilder toStringBuilder( GenomeWideCombinableDomainsSortOrder order );
+ SortedSet<String> getMostPromiscuosDomain();
+
public static enum GenomeWideCombinableDomainsSortOrder {
- ALPHABETICAL_KEY_ID, KEY_DOMAIN_PROTEINS_COUNT, KEY_DOMAIN_COUNT, COMBINATIONS_COUNT
+ ALPHABETICAL_KEY_ID, COMBINATIONS_COUNT, KEY_DOMAIN_COUNT, KEY_DOMAIN_PROTEINS_COUNT
}
}
public class MappingResults {
private String _description;
- private int _sum_of_successes;
private int _sum_of_failures;
+ private int _sum_of_successes;
public String getDescription() {
return _description;
public class PairwiseGenomeComparator {
private List<DistanceMatrix> _domain_distance_scores_means;
- private List<DistanceMatrix> _shared_domains_based_distances;
private List<DistanceMatrix> _shared_binary_combinations_based_distances;
+ private List<DistanceMatrix> _shared_domains_based_distances;
public PairwiseGenomeComparator() {
init();
return _shared_domains_based_distances;
}
- private void init() {
- _domain_distance_scores_means = new ArrayList<DistanceMatrix>();
- _shared_domains_based_distances = new ArrayList<DistanceMatrix>();
- _shared_binary_combinations_based_distances = new ArrayList<DistanceMatrix>();
- }
-
public void performPairwiseComparisons( final StringBuilder html_desc,
final boolean sort_by_species_count_first,
final Detailedness detailedness,
final boolean ignore_domains_without_combs_in_all_spec,
final boolean ignore_domains_specific_to_one_species,
- final DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field,
+ final PrintableDomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field,
final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
- final DomainSimilarity.DomainSimilarityScoring scoring,
+ final PrintableDomainSimilarity.DomainSimilarityScoring scoring,
final Map<String, List<GoId>> domain_id_to_go_ids_map,
final Map<GoId, GoTerm> go_id_to_term_map,
final GoNameSpace go_namespace_limit,
sort_by_species_count_first,
true,
calc_similarity_scores );
- final SortedSet<DomainSimilarity> similarities = calc
+ final SortedSet<PrintableDomainSimilarity> similarities = calc
.calculateSimilarities( pw_calc,
genome_pair,
ignore_domains_without_combs_in_all_spec,
}
}
+ private void init() {
+ _domain_distance_scores_means = new ArrayList<DistanceMatrix>();
+ _shared_domains_based_distances = new ArrayList<DistanceMatrix>();
+ _shared_binary_combinations_based_distances = new ArrayList<DistanceMatrix>();
+ }
+
static private String[] getAllUniqueDomainIdAsArray( final List<GenomeWideCombinableDomains> list_of_genome_wide_combinable_domains ) {
String[] all_domain_ids_array;
final SortedSet<String> all_domain_ids = new TreeSet<String>();
import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
import org.forester.util.ForesterUtil;
-public class PrintableDomainSimilarity implements DomainSimilarity {
+public class PrintableDomainSimilarity implements Comparable<PrintableDomainSimilarity> {
- final public static String SPECIES_SEPARATOR = " ";
- final private static int EQUAL = 0;
- final private static String NO_SPECIES = " ";
+ final public static String SPECIES_SEPARATOR = " ";
+ final private static int EQUAL = 0;
+ final private static String NO_SPECIES = " ";
+ private static final boolean OUTPUT_TAXCODES_PER_DOMAIN = false;
final private CombinableDomains _combinable_domains;
private DomainSimilarityCalculator.Detailedness _detailedness;
final private double _max;
}
}
- @Override
- public int compareTo( final DomainSimilarity domain_similarity ) {
+ public int compareTo( final PrintableDomainSimilarity domain_similarity ) {
if ( this == domain_similarity ) {
return EQUAL;
}
return compareByDomainId( domain_similarity );
}
- @Override
public SortedSet<String> getCombinableDomainIds( final Species species_of_combinable_domain ) {
final SortedSet<String> sorted_ids = new TreeSet<String>();
if ( getSpeciesData().containsKey( species_of_combinable_domain ) ) {
return sorted_ids;
}
- @Override
public String getDomainId() {
return getCombinableDomains().getKeyDomain();
}
- @Override
+ /**
+ * For pairwise similarities, this should return the "difference"; for example the difference in counts
+ * for copy number based features (the same as getMaximalDifferenceInCounts(), or the number
+ * of actually different domain combinations.
+ * For pairwise similarities, this should return the difference,
+ * while for comparisons of more than two domains, this should return the maximal difference
+ *
+ */
public int getMaximalDifference() {
return _max_difference;
}
- @Override
+ /**
+ * For pairwise similarities, this should return the difference in counts,
+ * while for comparisons of more than two domains, this should return the maximal difference
+ * in counts
+ *
+ *
+ * @return the (maximal) difference in counts
+ */
public int getMaximalDifferenceInCounts() {
return _max_difference_in_counts;
}
- @Override
public double getMaximalSimilarityScore() {
return _max;
}
- @Override
public double getMeanSimilarityScore() {
return _mean;
}
- @Override
public double getMinimalSimilarityScore() {
return _min;
}
- @Override
+ /**
+ * This should return the number of pairwise distances used to calculate
+ * this similarity score
+ *
+ * @return the number of pairwise distances
+ */
public int getN() {
return _n;
}
- @Override
public SortedSet<Species> getSpecies() {
final SortedSet<Species> species = new TreeSet<Species>();
for( final Species s : getSpeciesData().keySet() ) {
return _species_order;
}
- @Override
+ /**
+ * This should return a map, which maps species names to
+ * SpeciesSpecificDomainSimilariyData
+ *
+ *
+ * @return SortedMap<String, SpeciesSpecificDomainSimilariyData>
+ */
public SortedMap<Species, SpeciesSpecificDcData> getSpeciesData() {
return _species_data;
}
- @Override
public double getStandardDeviationOfSimilarityScore() {
return _sd;
}
_species_order = species_order;
}
- @Override
public StringBuffer toStringBuffer( final PrintableDomainSimilarity.PRINT_OPTION print_option,
final Map<String, Integer> tax_code_to_id_map,
final Phylogeny phy ) {
case SIMPLE_TAB_DELIMITED:
return toStringBufferSimpleTabDelimited();
case HTML:
- return toStringBufferDetailedHTML( tax_code_to_id_map, phy );
+ return toStringBufferDetailedHTML( tax_code_to_id_map, phy, OUTPUT_TAXCODES_PER_DOMAIN );
default:
throw new AssertionError( "Unknown print option: " + print_option );
}
final Map<String, Integer> tax_code_to_id_map,
final Phylogeny phy ) {
if ( html ) {
+ sb.append( "<tr>" );
+ sb.append( "<td>" );
addTaxWithLink( sb, species.getSpeciesId(), tax_code_to_id_map, phy );
+ sb.append( "</td>" );
}
else {
sb.append( species.getSpeciesId() );
}
if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
if ( html ) {
- sb.append( ":" );
+ //sb.append( ":" );
}
else {
sb.append( "\t" );
sb.append( getSpeciesData().get( species ).toStringBuffer( getDetaildness(), html ) );
}
if ( html ) {
- sb.append( "<br>" );
+ //sb.append( "<br>" );
+ sb.append( "</tr>" );
}
else {
sb.append( "\n\t" );
sb.append( "</b>" );
}
- private int compareByDomainId( final DomainSimilarity other ) {
+ private int compareByDomainId( final PrintableDomainSimilarity other ) {
return getDomainId().compareToIgnoreCase( other.getDomainId() );
}
}
for( final Map.Entry<String, SortedSet<String>> e : m.entrySet() ) {
sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + e.getKey() + "\">" + e.getKey() + "</a>" );
- sb.append( ": " );
+ sb.append( " " );
sb.append( "<span style=\"font-size:7px\">" );
for( final String tax : e.getValue() ) {
final String hex = SurfacingUtil.obtainHexColorStringDependingOnTaxonomyGroup( tax, null );
return sb;
}
+ private StringBuffer getSpeciesDataInAlphabeticalOrder( final boolean html,
+ final Map<String, Integer> tax_code_to_id_map,
+ final Phylogeny phy ) {
+ final StringBuffer sb = new StringBuffer();
+ sb.append( "<table>" );
+ for( final Species species : getSpeciesData().keySet() ) {
+ addSpeciesSpecificDomainData( sb, species, html, tax_code_to_id_map, phy );
+ }
+ sb.append( "</table>" );
+ return sb;
+ }
+
+ private StringBuffer getSpeciesDataInCustomOrder( final boolean html,
+ final Map<String, Integer> tax_code_to_id_map,
+ final Phylogeny phy ) {
+ final StringBuffer sb = new StringBuffer();
+ for( final Species order_species : getSpeciesCustomOrder() ) {
+ if ( getSpeciesData().keySet().contains( order_species ) ) {
+ addSpeciesSpecificDomainData( sb, order_species, html, tax_code_to_id_map, phy );
+ }
+ else {
+ sb.append( PrintableDomainSimilarity.NO_SPECIES );
+ sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
+ }
+ }
+ return sb;
+ }
+
private StringBuffer getTaxonomyGroupDistribution( final Phylogeny tol ) {
- //TODO work on me
final SortedMap<String, Set<String>> domain_to_species_set_map = new TreeMap<String, Set<String>>();
for( final Species species : getSpeciesData().keySet() ) {
for( final String combable_dom : getCombinableDomainIds( species ) ) {
sb.append( "<table>" );
for( final Map.Entry<String, Set<String>> domain_to_species_set : domain_to_species_set_map.entrySet() ) {
final Map<String, Integer> counts = new HashMap<String, Integer>();
- // final ValueComparator bvc = new ValueComparator( counts );
- // final SortedMap<String, Integer> sorted_counts = new TreeMap<String, Integer>( bvc );
for( final String tax_code : domain_to_species_set.getValue() ) {
final String group = SurfacingUtil.obtainTaxonomyGroup( tax_code, tol );
if ( !ForesterUtil.isEmpty( group ) ) {
}
counts_to_groups.get( c ).add( group_to_counts.getKey() );
}
- // sorted_counts.putAll( counts );
sb.append( "<tr>" );
sb.append( "<td>" );
sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_to_species_set.getKey() + "\">"
+ domain_to_species_set.getKey() + "</a>" );
- sb.append( ": " );
+ sb.append( " " );
sb.append( "</td>" );
- // sb.append( "<span style=\"font-size:9px\">" );
boolean first = true;
for( final Entry<Integer, SortedSet<String>> count_to_groups : counts_to_groups.entrySet() ) {
if ( first ) {
sb.append( "<td>" );
final SortedSet<String> groups = count_to_groups.getValue();
sb.append( count_to_groups.getKey() );
- sb.append( ":" );
+ sb.append( " " );
for( final String group : groups ) {
final Color color = ForesterUtil.obtainColorDependingOnTaxonomyGroup( group );
if ( color == null ) {
sb.append( "</td>" );
sb.append( "</tr>" );
}
- // sb.append( "</span>" );
sb.append( ForesterUtil.getLineSeparator() );
}
sb.append( "</table>" );
- // i am just a template and need to be modified for "printout" TODO
- // for( final Map.Entry<String, SortedSet<String>> e : m.entrySet() ) {
- // sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + e.getKey() + "\">" + e.getKey() + "</a>" );
- // sb.append( ": " );
- // sb.append( "<span style=\"font-size:8px\">" );
- // for( final String tax : e.getValue() ) {
- // final String hex = SurfacingUtil.obtainHexColorStringDependingOnTaxonomyGroup( tax, null );
- // if ( !ForesterUtil.isEmpty( hex ) ) {
- // sb.append( "<span style=\"color:" );
- // sb.append( hex );
- // sb.append( "\">" );
- // sb.append( tax );
- // sb.append( "</span>" );
- // }
- // else {
- // sb.append( tax );
- // }
- // sb.append( " " );
- // }
- // sb.append( "</span>" );
- // sb.append( "<br>\n" );
- // }
- return sb;
- }
-
- /*
- public class Testing {
-
- public static void main(String[] args) {
-
- HashMap<String,Double> map = new HashMap<String,Double>();
- ValueComparator bvc = new ValueComparator(map);
- TreeMap<String,Double> sorted_map = new TreeMap<String,Double>(bvc);
-
- map.put("A",99.5);
- map.put("B",67.4);
- map.put("C",67.4);
- map.put("D",67.3);
-
- System.out.println("unsorted map: "+map);
-
- sorted_map.putAll(map);
-
- System.out.println("results: "+sorted_map);
- }
- }
-
-
-
- */
- private StringBuffer getSpeciesDataInAlphabeticalOrder( final boolean html,
- final Map<String, Integer> tax_code_to_id_map,
- final Phylogeny phy ) {
- final StringBuffer sb = new StringBuffer();
- for( final Species species : getSpeciesData().keySet() ) {
- addSpeciesSpecificDomainData( sb, species, html, tax_code_to_id_map, phy );
- }
- return sb;
- }
-
- private StringBuffer getSpeciesDataInCustomOrder( final boolean html,
- final Map<String, Integer> tax_code_to_id_map,
- final Phylogeny phy ) {
- final StringBuffer sb = new StringBuffer();
- for( final Species order_species : getSpeciesCustomOrder() ) {
- if ( getSpeciesData().keySet().contains( order_species ) ) {
- addSpeciesSpecificDomainData( sb, order_species, html, tax_code_to_id_map, phy );
- }
- else {
- sb.append( PrintableDomainSimilarity.NO_SPECIES );
- sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
- }
- }
return sb;
}
return _treat_as_binary_comparison;
}
- private StringBuffer toStringBufferDetailedHTML( final Map<String, Integer> tax_code_to_id_map, final Phylogeny phy ) {
+ private StringBuffer toStringBufferDetailedHTML( final Map<String, Integer> tax_code_to_id_map,
+ final Phylogeny phy,
+ final boolean output_tax_codes_per_domain ) {
final StringBuffer sb = new StringBuffer();
sb.append( "<tr>" );
sb.append( "<td>" );
if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) {
sb.append( "<td>" );
sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map, phy ) );
- sb.append( getDomainDataInAlphabeticalOrder() );
+ if ( output_tax_codes_per_domain ) {
+ sb.append( getDomainDataInAlphabeticalOrder() );
+ }
sb.append( getTaxonomyGroupDistribution( phy ) );
sb.append( "</td>" );
}
else {
sb.append( "<td>" );
sb.append( getSpeciesDataInCustomOrder( true, tax_code_to_id_map, phy ) );
- sb.append( getDomainDataInAlphabeticalOrder() );
+ if ( output_tax_codes_per_domain ) {
+ sb.append( getDomainDataInAlphabeticalOrder() );
+ }
sb.append( getTaxonomyGroupDistribution( phy ) );
sb.append( "</td>" );
}
return sb;
}
+ static public enum DomainSimilarityScoring {
+ COMBINATIONS, DOMAINS, PROTEINS;
+ }
+
+ public static enum DomainSimilaritySortField {
+ ABS_MAX_COUNTS_DIFFERENCE, DOMAIN_ID, MAX, MAX_COUNTS_DIFFERENCE, MAX_DIFFERENCE, MEAN, MIN, SD, SPECIES_COUNT,
+ }
+
public static enum PRINT_OPTION {
HTML, SIMPLE_TAB_DELIMITED;
}
final StringBuffer sb = new StringBuffer();
if ( detailedness == DomainSimilarityCalculator.Detailedness.PUNCTILIOUS ) {
if ( html ) {
- sb.append( " " );
+ //sb.append( " " );
+ sb.append( "<td>" );
}
sb.append( getKeyDomainDomainsCount() );
if ( html ) {
- sb.append( ", " );
+ //sb.append( ", " );
+ sb.append( "</td><td>" );
}
else {
sb.append( "\t" );
}
sb.append( getKeyDomainProteinsCount() );
if ( html ) {
- sb.append( ", " );
+ // sb.append( ", " );
+ sb.append( "</td><td>" );
}
else {
sb.append( "\t" );
}
sb.append( getCombinableDomainsCount() );
- if ( html && !getCombinableDomainIdToCountsMap().isEmpty() ) {
- sb.append( ":" );
+ if ( html /*&& !getCombinableDomainIdToCountsMap().isEmpty()*/) {
+ // sb.append( ":" );
+ sb.append( "</td><td>" );
}
}
if ( html ) {
sb.append( link );
}
sb.append( "]" );
+ sb.append( "</td>" );
}
return sb;
}
*/
public SortedMap<String, Integer> getCombinableDomainIdToCountsMap();
+ public SortedSet<String> getKeyDomainProteins();
+
public int getNumberOfProteinsExhibitingCombinationWith( final String domain_id );
public StringBuffer toStringBuffer( final DomainSimilarityCalculator.Detailedness detailedness, boolean html );
- public SortedSet<String> getKeyDomainProteins();
-
void addKeyDomainProtein( String protein );
}
public static final String NONE = "[none]";
public static final String PFAM_FAMILY_ID_LINK = "http://pfam.janelia.org/family/";
public static final String UNIPROT_TAXONOMY_ID_LINK = "http://www.uniprot.org/taxonomy/";
+ static final boolean PRINT_MORE_DOM_SIMILARITY_INFO = false;
static final boolean SECONDARY_FEATURES_ARE_SCOP = true;
static final String SECONDARY_FEATURES_SCOP_LINK = "http://scop.mrc-lmb.cam.ac.uk/scop/search.cgi?key=";
- static final boolean PRINT_MORE_DOM_SIMILARITY_INFO = false;
}
}
}
- public static DescriptiveStatistics calculateDescriptiveStatisticsForMeanValues( final Set<DomainSimilarity> similarities ) {
+ public static DescriptiveStatistics calculateDescriptiveStatisticsForMeanValues( final Set<PrintableDomainSimilarity> similarities ) {
final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
- for( final DomainSimilarity similarity : similarities ) {
+ for( final PrintableDomainSimilarity similarity : similarities ) {
stats.addValue( similarity.getMeanSimilarityScore() );
}
return stats;
return m;
}
- public static void decoratePrintableDomainSimilarities( final SortedSet<DomainSimilarity> domain_similarities,
+ public static void decoratePrintableDomainSimilarities( final SortedSet<PrintableDomainSimilarity> domain_similarities,
final Detailedness detailedness ) {
- for( final DomainSimilarity domain_similarity : domain_similarities ) {
+ for( final PrintableDomainSimilarity domain_similarity : domain_similarities ) {
if ( domain_similarity instanceof PrintableDomainSimilarity ) {
- final PrintableDomainSimilarity printable_domain_similarity = ( PrintableDomainSimilarity ) domain_similarity;
+ final PrintableDomainSimilarity printable_domain_similarity = domain_similarity;
printable_domain_similarity.setDetailedness( detailedness );
}
}
final Writer simple_tab_writer,
final Writer single_writer,
Map<Character, Writer> split_writers,
- final SortedSet<DomainSimilarity> similarities,
+ final SortedSet<PrintableDomainSimilarity> similarities,
final boolean treat_as_binary,
final List<Species> species_order,
final PrintableDomainSimilarity.PRINT_OPTION print_option,
- final DomainSimilarity.DomainSimilarityScoring scoring,
+ final PrintableDomainSimilarity.DomainSimilarityScoring scoring,
final boolean verbose,
final Map<String, Integer> tax_code_to_id_map,
final Phylogeny phy,
break;
}
//
- for( final DomainSimilarity similarity : similarities ) {
+ for( final PrintableDomainSimilarity similarity : similarities ) {
if ( ( species_order != null ) && !species_order.isEmpty() ) {
- ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
+ ( similarity ).setSpeciesOrder( species_order );
}
if ( single_writer != null ) {
if ( !ForesterUtil.isEmpty( pos_filter_doms ) && pos_filter_doms.contains( similarity.getDomainId() ) ) {
w.write( SurfacingConstants.NL );
}
//
- for( final DomainSimilarity similarity : similarities ) {
+ for( final PrintableDomainSimilarity similarity : similarities ) {
if ( ( species_order != null ) && !species_order.isEmpty() ) {
- ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
+ ( similarity ).setSpeciesOrder( species_order );
}
if ( simple_tab_writer != null ) {
simple_tab_writer.write( similarity.toStringBuffer( PRINT_OPTION.SIMPLE_TAB_DELIMITED,
cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
true,
new BasicSpecies( "nemve" ) ) );
- final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
+ final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
false,
false,
true );
- final SortedSet<DomainSimilarity> sims = calc
+ final SortedSet<PrintableDomainSimilarity> sims = calc
.calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
cdc_list,
true,
true );
- final Iterator<DomainSimilarity> sims_it = sims.iterator();
- final DomainSimilarity sa = sims_it.next();
+ final Iterator<PrintableDomainSimilarity> sims_it = sims.iterator();
+ final PrintableDomainSimilarity sa = sims_it.next();
if ( !sa.getDomainId().equals( "A" ) ) {
return false;
}
if ( sa.getMaximalDifferenceInCounts() != 3 ) {
return false;
}
- final DomainSimilarity sb = sims_it.next();
+ final PrintableDomainSimilarity sb = sims_it.next();
if ( !sb.getDomainId().equals( "B" ) ) {
return false;
}
if ( sb.getMaximalDifferenceInCounts() != 2 ) {
return false;
}
- final DomainSimilarity sc = sims_it.next();
+ final PrintableDomainSimilarity sc = sims_it.next();
if ( !sc.getDomainId().equals( "C" ) ) {
return false;
}
cdc_list2.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
false,
new BasicSpecies( "nemve" ) ) );
- final DomainSimilarityCalculator calc2 = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
+ final DomainSimilarityCalculator calc2 = new BasicDomainSimilarityCalculator( PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
false,
false,
true );
- final SortedSet<DomainSimilarity> sims2 = calc2
+ final SortedSet<PrintableDomainSimilarity> sims2 = calc2
.calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
cdc_list2,
false,
true );
- final Iterator<DomainSimilarity> sims_it2 = sims2.iterator();
- final DomainSimilarity sa2 = sims_it2.next();
+ final Iterator<PrintableDomainSimilarity> sims_it2 = sims2.iterator();
+ final PrintableDomainSimilarity sa2 = sims_it2.next();
if ( !sa2.getDomainId().equals( "A" ) ) {
return false;
}
cdc_list3.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
true,
new BasicSpecies( "nemve" ) ) );
- final DomainSimilarityCalculator calc3 = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
+ final DomainSimilarityCalculator calc3 = new BasicDomainSimilarityCalculator( PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
false,
false,
true );
- final SortedSet<DomainSimilarity> sims3 = calc3
+ final SortedSet<PrintableDomainSimilarity> sims3 = calc3
.calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
cdc_list3,
false,
true );
- final Iterator<DomainSimilarity> sims_it3 = sims3.iterator();
- final DomainSimilarity sa3 = sims_it3.next();
+ final Iterator<PrintableDomainSimilarity> sims_it3 = sims3.iterator();
+ final PrintableDomainSimilarity sa3 = sims_it3.next();
if ( !sa3.getDomainId().equals( "A" ) ) {
return false;
}
cdc_list4.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
false,
new BasicSpecies( "nemve" ) ) );
- final DomainSimilarityCalculator calc4 = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
+ final DomainSimilarityCalculator calc4 = new BasicDomainSimilarityCalculator( PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
true,
false,
true );
- final SortedSet<DomainSimilarity> sims4 = calc4
+ final SortedSet<PrintableDomainSimilarity> sims4 = calc4
.calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
cdc_list4,
false,
true );
- final Iterator<DomainSimilarity> sims_it4 = sims4.iterator();
- final DomainSimilarity sa4 = sims_it4.next();
+ final Iterator<PrintableDomainSimilarity> sims_it4 = sims4.iterator();
+ final PrintableDomainSimilarity sa4 = sims_it4.next();
if ( !sa4.getDomainId().equals( "A" ) ) {
return false;
}
if ( ssdsd4.getNumberOfProteinsExhibitingCombinationWith( "X" ) != 3 ) {
return false;
}
- final SortedSet<DomainSimilarity> sims4_d = calc4
+ final SortedSet<PrintableDomainSimilarity> sims4_d = calc4
.calculateSimilarities( new DomainCountsBasedPairwiseSimilarityCalculator(), cdc_list4, false, true );
- final Iterator<DomainSimilarity> sims_it4_d = sims4_d.iterator();
- final DomainSimilarity sa4_d = sims_it4_d.next();
+ final Iterator<PrintableDomainSimilarity> sims_it4_d = sims4_d.iterator();
+ final PrintableDomainSimilarity sa4_d = sims_it4_d.next();
if ( !sa4_d.getDomainId().equals( "A" ) ) {
return false;
}
if ( sa4_d.getN() != 6 ) {
return false;
}
- final SortedSet<DomainSimilarity> sims4_p = calc4
+ final SortedSet<PrintableDomainSimilarity> sims4_p = calc4
.calculateSimilarities( new ProteinCountsBasedPairwiseDomainSimilarityCalculator(),
cdc_list4,
false,
true );
- final Iterator<DomainSimilarity> sims_it4_p = sims4_p.iterator();
- final DomainSimilarity sa4_p = sims_it4_p.next();
+ final Iterator<PrintableDomainSimilarity> sims_it4_p = sims4_p.iterator();
+ final PrintableDomainSimilarity sa4_p = sims_it4_p.next();
if ( !sa4_p.getDomainId().equals( "A" ) ) {
return false;
}
cdc_list5.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
true,
new BasicSpecies( "nemve" ) ) );
- final SortedSet<DomainSimilarity> sims5_d = calc4
+ final SortedSet<PrintableDomainSimilarity> sims5_d = calc4
.calculateSimilarities( new DomainCountsBasedPairwiseSimilarityCalculator(), cdc_list5, false, true );
- final Iterator<DomainSimilarity> sims_it5_d = sims5_d.iterator();
- final DomainSimilarity sa5_d = sims_it5_d.next();
+ final Iterator<PrintableDomainSimilarity> sims_it5_d = sims5_d.iterator();
+ final PrintableDomainSimilarity sa5_d = sims_it5_d.next();
if ( sa5_d.getSpecies().size() != 4 ) {
return false;
}
if ( sa5_d.getMaximalDifferenceInCounts() != 11 ) {
return false;
}
- final SortedSet<DomainSimilarity> sims5_p = calc4
+ final SortedSet<PrintableDomainSimilarity> sims5_p = calc4
.calculateSimilarities( new ProteinCountsBasedPairwiseDomainSimilarityCalculator(),
cdc_list5,
false,
true );
- final Iterator<DomainSimilarity> sims_it5_p = sims5_p.iterator();
- final DomainSimilarity sa5_p = sims_it5_p.next();
+ final Iterator<PrintableDomainSimilarity> sims_it5_p = sims5_p.iterator();
+ final PrintableDomainSimilarity sa5_p = sims_it5_p.next();
if ( !sa5_p.getDomainId().equals( "A" ) ) {
return false;
}
cdc_list6.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
false,
new BasicSpecies( "nemve" ) ) );
- final SortedSet<DomainSimilarity> sims6_d = calc4
+ final SortedSet<PrintableDomainSimilarity> sims6_d = calc4
.calculateSimilarities( new DomainCountsBasedPairwiseSimilarityCalculator(), cdc_list6, false, true );
- final Iterator<DomainSimilarity> sims_it6_d = sims6_d.iterator();
- final DomainSimilarity sa6_d = sims_it6_d.next();
+ final Iterator<PrintableDomainSimilarity> sims_it6_d = sims6_d.iterator();
+ final PrintableDomainSimilarity sa6_d = sims_it6_d.next();
if ( sa6_d.getSpecies().size() != 4 ) {
return false;
}
if ( sa6_d.getMaximalDifferenceInCounts() != 11 ) {
return false;
}
- final SortedSet<DomainSimilarity> sims6_p = calc4
+ final SortedSet<PrintableDomainSimilarity> sims6_p = calc4
.calculateSimilarities( new ProteinCountsBasedPairwiseDomainSimilarityCalculator(),
cdc_list6,
false,
true );
- final Iterator<DomainSimilarity> sims_it6_p = sims6_p.iterator();
- final DomainSimilarity sa6_p = sims_it6_p.next();
+ final Iterator<PrintableDomainSimilarity> sims_it6_p = sims6_p.iterator();
+ final PrintableDomainSimilarity sa6_p = sims_it6_p.next();
if ( !sa6_p.getDomainId().equals( "A" ) ) {
return false;
}
cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
true,
new BasicSpecies( "nemve" ) ) );
- final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
+ final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
false,
false,
true );
- final SortedSet<DomainSimilarity> sims = calc
+ final SortedSet<PrintableDomainSimilarity> sims = calc
.calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
cdc_list,
true,
false );
- final Iterator<DomainSimilarity> sims_it = sims.iterator();
- final DomainSimilarity sa = sims_it.next();
+ final Iterator<PrintableDomainSimilarity> sims_it = sims.iterator();
+ final PrintableDomainSimilarity sa = sims_it.next();
if ( !sa.getDomainId().equals( "A" ) ) {
return false;
}
if ( sa.getMaximalDifferenceInCounts() != 0 ) {
return false;
}
- final DomainSimilarity sb = sims_it.next();
+ final PrintableDomainSimilarity sb = sims_it.next();
if ( !sb.getDomainId().equals( "B" ) ) {
return false;
}
if ( !sb.getSpecies().contains( new BasicSpecies( "rabbit" ) ) ) {
return false;
}
- final SortedSet<DomainSimilarity> sims2 = calc
+ final SortedSet<PrintableDomainSimilarity> sims2 = calc
.calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
cdc_list,
true,
true );
- final Iterator<DomainSimilarity> sims_it2 = sims2.iterator();
- final DomainSimilarity sa2 = sims_it2.next();
+ final Iterator<PrintableDomainSimilarity> sims_it2 = sims2.iterator();
+ final PrintableDomainSimilarity sa2 = sims_it2.next();
if ( !sa2.getDomainId().equals( "D" ) ) {
return false;
}
cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
true,
new BasicSpecies( "nemve" ) ) );
- final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
+ final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( PrintableDomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
false,
false,
true );
- final SortedSet<DomainSimilarity> sims = calc
+ final SortedSet<PrintableDomainSimilarity> sims = calc
.calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
cdc_list,
false,
if ( sims.size() != 1 ) {
return false;
}
- final Iterator<DomainSimilarity> sims_it = sims.iterator();
- final DomainSimilarity sa = sims_it.next();
+ final Iterator<PrintableDomainSimilarity> sims_it = sims.iterator();
+ final PrintableDomainSimilarity sa = sims_it.next();
if ( !sa.getDomainId().equals( "A" ) ) {
return false;
}
if ( !sa.getSpecies().contains( new BasicSpecies( "rabbit" ) ) ) {
return false;
}
- final SortedSet<DomainSimilarity> sims_ns = calc
+ final SortedSet<PrintableDomainSimilarity> sims_ns = calc
.calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
cdc_list,
true,
cdc_list2.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve2,
true,
new BasicSpecies( "nemve" ) ) );
- final SortedSet<DomainSimilarity> sims2 = calc
+ final SortedSet<PrintableDomainSimilarity> sims2 = calc
.calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
cdc_list2,
true,
@SuppressWarnings( "unused")
public final class Test {
- private final static boolean PERFORM_DB_TESTS = true;
+ private final static boolean PERFORM_DB_TESTS = false;
private final static double ZERO_DIFF = 1.0E-9;
private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" )
+ ForesterUtil.getFileSeparator() + "test_data"
failed++;
}
}
- System.exit( 0 );
+ /////////////////////System.exit( 0 );
System.out.print( "UniProtKB id extraction: " );
if ( Test.testExtractUniProtKbProteinSeqIdentifier() ) {
System.out.println( "OK." );