import org.forester.go.GoId;
import org.forester.go.GoNameSpace;
import org.forester.go.GoTerm;
+import org.forester.phylogeny.Phylogeny;
import org.forester.species.Species;
import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
import org.forester.util.DescriptiveStatistics;
public class PairwiseGenomeComparator {
private List<DistanceMatrix> _domain_distance_scores_means;
- private List<DistanceMatrix> _shared_domains_based_distances;
private List<DistanceMatrix> _shared_binary_combinations_based_distances;
+ private List<DistanceMatrix> _shared_domains_based_distances;
- //private List<HistogramData> _histogram_datas;
public PairwiseGenomeComparator() {
init();
}
return _domain_distance_scores_means;
}
- //public List<HistogramData> getHistogramDatas() {
- // return _histogram_datas;
- //}
public List<DistanceMatrix> getSharedBinaryCombinationsBasedDistances() {
return _shared_binary_combinations_based_distances;
}
return _shared_domains_based_distances;
}
- private void init() {
- //_histogram_datas = new ArrayList<HistogramData>();
- _domain_distance_scores_means = new ArrayList<DistanceMatrix>();
- _shared_domains_based_distances = new ArrayList<DistanceMatrix>();
- _shared_binary_combinations_based_distances = new ArrayList<DistanceMatrix>();
- }
-
public void performPairwiseComparisons( final StringBuilder html_desc,
final boolean sort_by_species_count_first,
final Detailedness detailedness,
final boolean ignore_domains_without_combs_in_all_spec,
final boolean ignore_domains_specific_to_one_species,
final DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field,
- final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
+ final DomainSimilarity.PRINT_OPTION domain_similarity_print_option,
final DomainSimilarity.DomainSimilarityScoring scoring,
final Map<String, List<GoId>> domain_id_to_go_ids_map,
final Map<GoId, GoTerm> go_id_to_term_map,
final String command_line_prg_name,
final File out_dir,
final boolean write_pairwise_comparisons,
- final Map<String, Integer> tax_code_to_id_map ) {
+ final Map<String, Integer> tax_code_to_id_map,
+ final boolean calc_similarity_scores,
+ final Phylogeny phy ) {
init();
- final BasicSymmetricalDistanceMatrix domain_distance_scores_means = new BasicSymmetricalDistanceMatrix( number_of_genomes );
- final BasicSymmetricalDistanceMatrix shared_domains_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
- final BasicSymmetricalDistanceMatrix shared_binary_combinations_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
+ final DistanceMatrix domain_distance_scores_means = new BasicSymmetricalDistanceMatrix( number_of_genomes );
+ final DistanceMatrix shared_domains_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
+ final DistanceMatrix shared_binary_combinations_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
if ( verbose ) {
System.out.println();
System.out.println( "Pairwise genome distances:" );
if ( ( list_of_genome_wide_combinable_domains.get( i ).getSize() < 1 )
|| ( list_of_genome_wide_combinable_domains.get( j ).getSize() < 1 ) ) {
domain_distance_scores_means
- .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE );
+ .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE );
shared_domains_based_distances
- .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE );
+ .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE );
shared_binary_combinations_based_distances
- .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE );
+ .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE );
continue;
}
final List<GenomeWideCombinableDomains> genome_pair = new ArrayList<GenomeWideCombinableDomains>( 2 );
}
final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( domain_similarity_sort_field,
sort_by_species_count_first,
+ true,
+ calc_similarity_scores,
true );
final SortedSet<DomainSimilarity> similarities = calc
.calculateSimilarities( pw_calc,
.calculateDescriptiveStatisticsForMeanValues( similarities );
final String species_j = species[ j ].getSpeciesId();
final DomainArchitectureBasedGenomeSimilarityCalculator genome_similarity_calculator = new DomainArchitectureBasedGenomeSimilarityCalculator( list_of_genome_wide_combinable_domains
- .get( i ),
+ .get( i ),
list_of_genome_wide_combinable_domains
- .get( j ) );
+ .get( j ) );
genome_similarity_calculator.setAllowDomainsToBeIgnored( false );
double dissimilarity_score_mean;
if ( stats.getN() < 1 ) {
}
break;
}
- DescriptiveStatistics pw_stats = null;
if ( write_pairwise_comparisons ) {
try {
final Writer writer = new BufferedWriter( new FileWriter( out_dir == null ? pairwise_similarities_output_file_str
: out_dir + ForesterUtil.FILE_SEPARATOR + pairwise_similarities_output_file_str ) );
- pw_stats = SurfacingUtil.writeDomainSimilaritiesToFile( html_desc,
- new StringBuilder( species_i + "-"
- + species_j ),
- writer,
- null,
- similarities,
- true,
- null,
- domain_similarity_print_option,
- scoring,
- false,
- tax_code_to_id_map,
- false );
+ SurfacingUtil.writeDomainSimilaritiesToFile( html_desc,
+ new StringBuilder( species_i + "-" + species_j ),
+ null,
+ writer,
+ null,
+ similarities,
+ true,
+ null,
+ domain_similarity_print_option,
+ scoring,
+ false,
+ tax_code_to_id_map,
+ phy,
+ null );
}
catch ( final IOException e ) {
ForesterUtil.fatalError( command_line_prg_name, "Failed to write similarites to: \""
+ pairwise_similarities_output_file_str + "\" [" + e.getMessage() + "]" );
}
}
- if ( pw_stats != null ) {
- if ( pw_stats.getMin() >= pw_stats.getMax() ) {
- ForesterUtil
- .printWarningMessage( command_line_prg_name, "for [" + species_i + "-" + species_j
- + "] score minimum is [" + pw_stats.getMin() + "] while score maximum is ["
- + pw_stats.getMax()
- + "], possibly indicating that a genome is compared to itself" );
- }
- }
}
}
getDomainDistanceScoresMeans().add( domain_distance_scores_means );
final SortedSet<String> domain_ids_to_ignore = randomlyPickDomainIds( all_unique_domain_ids,
jacknife_ratio,
generator );
- final BasicSymmetricalDistanceMatrix shared_domains_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
- final BasicSymmetricalDistanceMatrix shared_binary_combinations_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
+ final DistanceMatrix shared_domains_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
+ final DistanceMatrix shared_binary_combinations_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
for( int i = 0; i < number_of_genomes; ++i ) {
final String species_i = species[ i ].getSpeciesId();
shared_domains_based_distances.setIdentifier( i, species_i );
genome_pair.add( list_of_genome_wide_combinable_domains.get( i ) );
genome_pair.add( list_of_genome_wide_combinable_domains.get( j ) );
final DomainArchitectureBasedGenomeSimilarityCalculator genome_simiarity_calculator = new DomainArchitectureBasedGenomeSimilarityCalculator( list_of_genome_wide_combinable_domains
- .get( i ),
+ .get( i ),
list_of_genome_wide_combinable_domains
- .get( j ) );
+ .get( j ) );
genome_simiarity_calculator.setAllowDomainsToBeIgnored( true );
genome_simiarity_calculator.setDomainIdsToIgnore( domain_ids_to_ignore );
shared_domains_based_distances.setValue( i, j, 1.0 - genome_simiarity_calculator
- .calculateSharedDomainsBasedGenomeSimilarityScore() );
+ .calculateSharedDomainsBasedGenomeSimilarityScore() );
shared_binary_combinations_based_distances.setValue( i, j, 1.0 - genome_simiarity_calculator
- .calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore() );
+ .calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore() );
}
}
getSharedDomainsBasedDistances().add( shared_domains_based_distances );
}
}
+ private void init() {
+ _domain_distance_scores_means = new ArrayList<DistanceMatrix>();
+ _shared_domains_based_distances = new ArrayList<DistanceMatrix>();
+ _shared_binary_combinations_based_distances = new ArrayList<DistanceMatrix>();
+ }
+
static private String[] getAllUniqueDomainIdAsArray( final List<GenomeWideCombinableDomains> list_of_genome_wide_combinable_domains ) {
String[] all_domain_ids_array;
final SortedSet<String> all_domain_ids = new TreeSet<String>();