2 // 19:38:35 cmzmasek Exp $
3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 // Contact: phylosoft @ gmail . com
25 // WWW: www.phylosoft.org/forester
27 package org.forester.surfacing;
29 import java.util.HashSet;
32 public class DomainArchitectureBasedGenomeSimilarityCalculator {
34 public static final double MAX_SIMILARITY_SCORE = 1.0;
35 public static final double MIN_SIMILARITY_SCORE = 0.0;
36 final private GenomeWideCombinableDomains _combinable_domains_genome_0;
37 final private GenomeWideCombinableDomains _combinable_domains_genome_1;
38 private Set<DomainId> _domain_ids_to_ignore;
39 private boolean _allow_domains_to_be_ignored;
40 private Set<DomainId> _all_domains;
41 private Set<DomainId> _shared_domains;
42 private Set<DomainId> _domains_specific_to_0;
43 private Set<DomainId> _domains_specific_to_1;
44 private Set<BinaryDomainCombination> _all_binary_domain_combinations;
45 private Set<BinaryDomainCombination> _shared_binary_domain_combinations;
46 private Set<BinaryDomainCombination> _binary_domain_combinations_specific_to_0;
47 private Set<BinaryDomainCombination> _binary_domain_combinations_specific_to_1;
49 public DomainArchitectureBasedGenomeSimilarityCalculator( final GenomeWideCombinableDomains combinable_domains_genome_0,
50 final GenomeWideCombinableDomains combinable_domains_genome_1 ) {
51 if ( ( combinable_domains_genome_0 == null ) || ( combinable_domains_genome_0.getSize() < 1 )
52 || ( combinable_domains_genome_1 == null ) || ( combinable_domains_genome_1.getSize() < 1 ) ) {
53 throw new IllegalArgumentException( "attempt to compare null or empty combinable domains collection" );
55 if ( combinable_domains_genome_0.getSpecies().equals( combinable_domains_genome_1.getSpecies() ) ) {
56 throw new IllegalArgumentException( "attempt to compare combinable domains collection from the same species" );
58 _combinable_domains_genome_0 = combinable_domains_genome_0;
59 _combinable_domains_genome_1 = combinable_domains_genome_1;
64 public void addDomainIdToIgnore( final DomainId domain_id_to_ignore ) {
66 getDomainIdsToIgnore().add( domain_id_to_ignore );
70 * This returns a score between 0.0 (no binary domain combination in common)
71 * and 1.0 (all binary domain combinations in common) measuring the similarity between two
72 * genomes based on the number of shared binary domain combinations:
74 * t: sum of (distinct) binary domain combinations
75 * s: sum of shared (distinct) binary domain combinations
77 * 1 - ( ( t - s ) / t )
79 * @return shared binary domain combinations based similarity score
81 public double calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore() {
82 final double t = getAllBinaryDomainCombinations().size();
83 final double s = getSharedBinaryDomainCombinations().size();
85 return MIN_SIMILARITY_SCORE;
87 return ( MAX_SIMILARITY_SCORE - ( ( t - s ) / t ) );
91 * This returns a score between 0.0 (no domains in common)
92 * and 1.0 (all domains in common) measuring the similarity between two
93 * genomes based on the number of shared domains:
95 * t: sum of (distinct) domains
96 * s: sum of shared (distinct) domains
98 * 1 - ( ( t - s ) / t )
100 * @return shared domains based similarity score
102 public double calculateSharedDomainsBasedGenomeSimilarityScore() {
103 final double t = getAllDomains().size();
104 final double s = getSharedDomains().size();
106 return MIN_SIMILARITY_SCORE;
108 return ( MAX_SIMILARITY_SCORE - ( ( t - s ) / t ) );
111 public void deleteAllDomainIdsToIgnore() {
112 forceRecalculation();
113 setDomainIdsToIgnore( new HashSet<DomainId>() );
116 private void forceRecalculation() {
118 _shared_domains = null;
119 _domains_specific_to_0 = null;
120 _domains_specific_to_1 = null;
121 _all_binary_domain_combinations = null;
122 _shared_binary_domain_combinations = null;
123 _binary_domain_combinations_specific_to_0 = null;
124 _binary_domain_combinations_specific_to_1 = null;
128 * Does not return binary combinations which contain one or two domains
129 * to be ignored -- if ignoring is allowed.
131 * @return SortedSet<BinaryDomainCombination>
133 public Set<BinaryDomainCombination> getAllBinaryDomainCombinations() {
134 if ( _all_binary_domain_combinations == null ) {
135 final Set<BinaryDomainCombination> all = new HashSet<BinaryDomainCombination>();
136 all.addAll( getCombinableDomainsGenome0().toBinaryDomainCombinations() );
137 all.addAll( getCombinableDomainsGenome1().toBinaryDomainCombinations() );
138 if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
139 _all_binary_domain_combinations = pruneBinaryCombinations( all );
142 _all_binary_domain_combinations = all;
145 return _all_binary_domain_combinations;
149 * Does not return domains which are to be
150 * ignored -- if ignoring is allowed.
155 public Set<DomainId> getAllDomains() {
156 if ( _all_domains == null ) {
157 final Set<DomainId> all = new HashSet<DomainId>();
158 all.addAll( getCombinableDomainsGenome0().getAllDomainIds() );
159 all.addAll( getCombinableDomainsGenome1().getAllDomainIds() );
160 if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
161 _all_domains = pruneDomains( all );
170 private Set<BinaryDomainCombination> getBinaryDomainCombinationsSpecificToGenome( final boolean specific_to_genome_0 ) {
171 final Set<BinaryDomainCombination> specific = new HashSet<BinaryDomainCombination>();
172 final Set<BinaryDomainCombination> bc0 = getCombinableDomainsGenome0().toBinaryDomainCombinations();
173 final Set<BinaryDomainCombination> bc1 = getCombinableDomainsGenome1().toBinaryDomainCombinations();
174 if ( specific_to_genome_0 ) {
175 for( final BinaryDomainCombination binary_domain_combination0 : bc0 ) {
176 if ( !bc1.contains( binary_domain_combination0 ) ) {
177 specific.add( binary_domain_combination0 );
182 for( final BinaryDomainCombination binary_domain_combination1 : bc1 ) {
183 if ( !bc0.contains( binary_domain_combination1 ) ) {
184 specific.add( binary_domain_combination1 );
188 if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
189 return pruneBinaryCombinations( specific );
194 public Set<BinaryDomainCombination> getBinaryDomainCombinationsSpecificToGenome0() {
195 if ( _binary_domain_combinations_specific_to_0 == null ) {
196 _binary_domain_combinations_specific_to_0 = getBinaryDomainCombinationsSpecificToGenome( true );
198 return _binary_domain_combinations_specific_to_0;
201 public Set<BinaryDomainCombination> getBinaryDomainCombinationsSpecificToGenome1() {
202 if ( _binary_domain_combinations_specific_to_1 == null ) {
203 _binary_domain_combinations_specific_to_1 = getBinaryDomainCombinationsSpecificToGenome( false );
205 return _binary_domain_combinations_specific_to_1;
208 private GenomeWideCombinableDomains getCombinableDomainsGenome0() {
209 return _combinable_domains_genome_0;
212 private GenomeWideCombinableDomains getCombinableDomainsGenome1() {
213 return _combinable_domains_genome_1;
216 private Set<DomainId> getDomainIdsToIgnore() {
217 return _domain_ids_to_ignore;
220 private Set<DomainId> getDomainsSpecificToGenome( final boolean specific_to_genome_0 ) {
221 final Set<DomainId> specific = new HashSet<DomainId>();
222 final Set<DomainId> d0 = getCombinableDomainsGenome0().getAllDomainIds();
223 final Set<DomainId> d1 = getCombinableDomainsGenome1().getAllDomainIds();
224 if ( specific_to_genome_0 ) {
225 for( final DomainId domain0 : d0 ) {
226 if ( !d1.contains( domain0 ) ) {
227 specific.add( domain0 );
232 for( final DomainId domain1 : d1 ) {
233 if ( !d0.contains( domain1 ) ) {
234 specific.add( domain1 );
238 if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
239 return pruneDomains( specific );
244 public Set<DomainId> getDomainsSpecificToGenome0() {
245 if ( _domains_specific_to_0 == null ) {
246 _domains_specific_to_0 = getDomainsSpecificToGenome( true );
248 return _domains_specific_to_0;
251 public Set<DomainId> getDomainsSpecificToGenome1() {
252 if ( _domains_specific_to_1 == null ) {
253 _domains_specific_to_1 = getDomainsSpecificToGenome( false );
255 return _domains_specific_to_1;
258 public Set<BinaryDomainCombination> getSharedBinaryDomainCombinations() {
259 if ( _shared_binary_domain_combinations == null ) {
260 final Set<BinaryDomainCombination> shared = new HashSet<BinaryDomainCombination>();
261 final Set<BinaryDomainCombination> bc0 = getCombinableDomainsGenome0().toBinaryDomainCombinations();
262 final Set<BinaryDomainCombination> bc1 = getCombinableDomainsGenome1().toBinaryDomainCombinations();
263 for( final BinaryDomainCombination binary_domain_combination0 : bc0 ) {
264 if ( bc1.contains( binary_domain_combination0 ) ) {
265 shared.add( binary_domain_combination0 );
268 _shared_binary_domain_combinations = shared;
269 if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
270 _shared_binary_domain_combinations = pruneBinaryCombinations( shared );
273 return _shared_binary_domain_combinations;
276 public Set<DomainId> getSharedDomains() {
277 if ( _shared_domains == null ) {
278 final Set<DomainId> shared = new HashSet<DomainId>();
279 final Set<DomainId> d0 = getCombinableDomainsGenome0().getAllDomainIds();
280 final Set<DomainId> d1 = getCombinableDomainsGenome1().getAllDomainIds();
281 for( final DomainId domain0 : d0 ) {
282 if ( d1.contains( domain0 ) ) {
283 shared.add( domain0 );
286 _shared_domains = shared;
287 if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
288 _shared_domains = pruneDomains( shared );
291 return _shared_domains;
294 private void init() {
295 deleteAllDomainIdsToIgnore();
296 setAllowDomainsToBeIgnored( false );
299 private boolean isAllowDomainsToBeIgnored() {
300 return _allow_domains_to_be_ignored;
303 private Set<BinaryDomainCombination> pruneBinaryCombinations( final Set<BinaryDomainCombination> all ) {
304 final Set<BinaryDomainCombination> pruned = new HashSet<BinaryDomainCombination>();
305 for( final BinaryDomainCombination bc : all ) {
306 if ( ( !getDomainIdsToIgnore().contains( bc.getId0() ) )
307 && ( !getDomainIdsToIgnore().contains( bc.getId1() ) ) ) {
314 private Set<DomainId> pruneDomains( final Set<DomainId> all ) {
315 final Set<DomainId> pruned = new HashSet<DomainId>();
316 for( final DomainId d : all ) {
317 if ( !getDomainIdsToIgnore().contains( d ) ) {
324 public void setAllowDomainsToBeIgnored( final boolean allow_domains_to_be_ignored ) {
325 forceRecalculation();
326 _allow_domains_to_be_ignored = allow_domains_to_be_ignored;
329 void setDomainIdsToIgnore( final Set<DomainId> domain_ids_to_ignore ) {
330 forceRecalculation();
331 _domain_ids_to_ignore = domain_ids_to_ignore;