2 // 19:38:35 cmzmasek Exp $
3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 // Contact: phylosoft @ gmail . com
25 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
27 package org.forester.surfacing;
29 import java.util.HashSet;
32 import org.forester.protein.BinaryDomainCombination;
34 public class DomainArchitectureBasedGenomeSimilarityCalculator {
36 public static final double MAX_SIMILARITY_SCORE = 1.0;
37 public static final double MIN_SIMILARITY_SCORE = 0.0;
38 private Set<BinaryDomainCombination> _all_binary_domain_combinations;
39 private Set<String> _all_domains;
40 private boolean _allow_domains_to_be_ignored;
41 private Set<BinaryDomainCombination> _binary_domain_combinations_specific_to_0;
42 private Set<BinaryDomainCombination> _binary_domain_combinations_specific_to_1;
43 final private GenomeWideCombinableDomains _combinable_domains_genome_0;
44 final private GenomeWideCombinableDomains _combinable_domains_genome_1;
45 private Set<String> _domain_ids_to_ignore;
46 private Set<String> _domains_specific_to_0;
47 private Set<String> _domains_specific_to_1;
48 private Set<BinaryDomainCombination> _shared_binary_domain_combinations;
49 private Set<String> _shared_domains;
51 public DomainArchitectureBasedGenomeSimilarityCalculator( final GenomeWideCombinableDomains combinable_domains_genome_0,
52 final GenomeWideCombinableDomains combinable_domains_genome_1 ) {
53 if ( ( combinable_domains_genome_0 == null ) || ( combinable_domains_genome_0.getSize() < 1 )
54 || ( combinable_domains_genome_1 == null ) || ( combinable_domains_genome_1.getSize() < 1 ) ) {
55 throw new IllegalArgumentException( "attempt to compare null or empty combinable domains collection" );
57 if ( combinable_domains_genome_0.getSpecies().equals( combinable_domains_genome_1.getSpecies() ) ) {
58 throw new IllegalArgumentException( "attempt to compare combinable domains collection from the same species" );
60 _combinable_domains_genome_0 = combinable_domains_genome_0;
61 _combinable_domains_genome_1 = combinable_domains_genome_1;
66 public void addDomainIdToIgnore( final String domain_id_to_ignore ) {
68 getDomainIdsToIgnore().add( domain_id_to_ignore );
72 * This returns a score between 0.0 (no binary domain combination in common)
73 * and 1.0 (all binary domain combinations in common) measuring the similarity between two
74 * genomes based on the number of shared binary domain combinations:
76 * t: sum of (distinct) binary domain combinations
77 * s: sum of shared (distinct) binary domain combinations
79 * 1 - ( ( t - s ) / t )
81 * @return shared binary domain combinations based similarity score
83 public double calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore() {
84 final double t = getAllBinaryDomainCombinations().size();
85 final double s = getSharedBinaryDomainCombinations().size();
87 return MIN_SIMILARITY_SCORE;
89 return ( MAX_SIMILARITY_SCORE - ( ( t - s ) / t ) );
93 * This returns a score between 0.0 (no domains in common)
94 * and 1.0 (all domains in common) measuring the similarity between two
95 * genomes based on the number of shared domains:
97 * t: sum of (distinct) domains
98 * s: sum of shared (distinct) domains
100 * 1 - ( ( t - s ) / t )
102 * @return shared domains based similarity score
104 public double calculateSharedDomainsBasedGenomeSimilarityScore() {
105 final double t = getAllDomains().size();
106 final double s = getSharedDomains().size();
108 return MIN_SIMILARITY_SCORE;
110 return ( MAX_SIMILARITY_SCORE - ( ( t - s ) / t ) );
113 public void deleteAllDomainIdsToIgnore() {
114 forceRecalculation();
115 setDomainIdsToIgnore( new HashSet<String>() );
119 * Does not return binary combinations which contain one or two domains
120 * to be ignored -- if ignoring is allowed.
122 * @return SortedSet<BinaryDomainCombination>
124 public Set<BinaryDomainCombination> getAllBinaryDomainCombinations() {
125 if ( _all_binary_domain_combinations == null ) {
126 final Set<BinaryDomainCombination> all = new HashSet<BinaryDomainCombination>();
127 all.addAll( getCombinableDomainsGenome0().toBinaryDomainCombinations() );
128 all.addAll( getCombinableDomainsGenome1().toBinaryDomainCombinations() );
129 if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
130 _all_binary_domain_combinations = pruneBinaryCombinations( all );
133 _all_binary_domain_combinations = all;
136 return _all_binary_domain_combinations;
140 * Does not return domains which are to be
141 * ignored -- if ignoring is allowed.
146 public Set<String> getAllDomains() {
147 if ( _all_domains == null ) {
148 final Set<String> all = new HashSet<String>();
149 all.addAll( getCombinableDomainsGenome0().getAllDomainIds() );
150 all.addAll( getCombinableDomainsGenome1().getAllDomainIds() );
151 if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
152 _all_domains = pruneDomains( all );
161 public Set<BinaryDomainCombination> getBinaryDomainCombinationsSpecificToGenome0() {
162 if ( _binary_domain_combinations_specific_to_0 == null ) {
163 _binary_domain_combinations_specific_to_0 = getBinaryDomainCombinationsSpecificToGenome( true );
165 return _binary_domain_combinations_specific_to_0;
168 public Set<BinaryDomainCombination> getBinaryDomainCombinationsSpecificToGenome1() {
169 if ( _binary_domain_combinations_specific_to_1 == null ) {
170 _binary_domain_combinations_specific_to_1 = getBinaryDomainCombinationsSpecificToGenome( false );
172 return _binary_domain_combinations_specific_to_1;
175 public Set<String> getDomainsSpecificToGenome0() {
176 if ( _domains_specific_to_0 == null ) {
177 _domains_specific_to_0 = getDomainsSpecificToGenome( true );
179 return _domains_specific_to_0;
182 public Set<String> getDomainsSpecificToGenome1() {
183 if ( _domains_specific_to_1 == null ) {
184 _domains_specific_to_1 = getDomainsSpecificToGenome( false );
186 return _domains_specific_to_1;
189 public Set<BinaryDomainCombination> getSharedBinaryDomainCombinations() {
190 if ( _shared_binary_domain_combinations == null ) {
191 final Set<BinaryDomainCombination> shared = new HashSet<BinaryDomainCombination>();
192 final Set<BinaryDomainCombination> bc0 = getCombinableDomainsGenome0().toBinaryDomainCombinations();
193 final Set<BinaryDomainCombination> bc1 = getCombinableDomainsGenome1().toBinaryDomainCombinations();
194 for( final BinaryDomainCombination binary_domain_combination0 : bc0 ) {
195 if ( bc1.contains( binary_domain_combination0 ) ) {
196 shared.add( binary_domain_combination0 );
199 _shared_binary_domain_combinations = shared;
200 if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
201 _shared_binary_domain_combinations = pruneBinaryCombinations( shared );
204 return _shared_binary_domain_combinations;
207 public Set<String> getSharedDomains() {
208 if ( _shared_domains == null ) {
209 final Set<String> shared = new HashSet<String>();
210 final Set<String> d0 = getCombinableDomainsGenome0().getAllDomainIds();
211 final Set<String> d1 = getCombinableDomainsGenome1().getAllDomainIds();
212 for( final String domain0 : d0 ) {
213 if ( d1.contains( domain0 ) ) {
214 shared.add( domain0 );
217 _shared_domains = shared;
218 if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
219 _shared_domains = pruneDomains( shared );
222 return _shared_domains;
225 public void setAllowDomainsToBeIgnored( final boolean allow_domains_to_be_ignored ) {
226 forceRecalculation();
227 _allow_domains_to_be_ignored = allow_domains_to_be_ignored;
230 void setDomainIdsToIgnore( final Set<String> domain_ids_to_ignore ) {
231 forceRecalculation();
232 _domain_ids_to_ignore = domain_ids_to_ignore;
235 private void forceRecalculation() {
237 _shared_domains = null;
238 _domains_specific_to_0 = null;
239 _domains_specific_to_1 = null;
240 _all_binary_domain_combinations = null;
241 _shared_binary_domain_combinations = null;
242 _binary_domain_combinations_specific_to_0 = null;
243 _binary_domain_combinations_specific_to_1 = null;
246 private Set<BinaryDomainCombination> getBinaryDomainCombinationsSpecificToGenome( final boolean specific_to_genome_0 ) {
247 final Set<BinaryDomainCombination> specific = new HashSet<BinaryDomainCombination>();
248 final Set<BinaryDomainCombination> bc0 = getCombinableDomainsGenome0().toBinaryDomainCombinations();
249 final Set<BinaryDomainCombination> bc1 = getCombinableDomainsGenome1().toBinaryDomainCombinations();
250 if ( specific_to_genome_0 ) {
251 for( final BinaryDomainCombination binary_domain_combination0 : bc0 ) {
252 if ( !bc1.contains( binary_domain_combination0 ) ) {
253 specific.add( binary_domain_combination0 );
258 for( final BinaryDomainCombination binary_domain_combination1 : bc1 ) {
259 if ( !bc0.contains( binary_domain_combination1 ) ) {
260 specific.add( binary_domain_combination1 );
264 if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
265 return pruneBinaryCombinations( specific );
270 private GenomeWideCombinableDomains getCombinableDomainsGenome0() {
271 return _combinable_domains_genome_0;
274 private GenomeWideCombinableDomains getCombinableDomainsGenome1() {
275 return _combinable_domains_genome_1;
278 private Set<String> getDomainIdsToIgnore() {
279 return _domain_ids_to_ignore;
282 private Set<String> getDomainsSpecificToGenome( final boolean specific_to_genome_0 ) {
283 final Set<String> specific = new HashSet<String>();
284 final Set<String> d0 = getCombinableDomainsGenome0().getAllDomainIds();
285 final Set<String> d1 = getCombinableDomainsGenome1().getAllDomainIds();
286 if ( specific_to_genome_0 ) {
287 for( final String domain0 : d0 ) {
288 if ( !d1.contains( domain0 ) ) {
289 specific.add( domain0 );
294 for( final String domain1 : d1 ) {
295 if ( !d0.contains( domain1 ) ) {
296 specific.add( domain1 );
300 if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
301 return pruneDomains( specific );
306 private void init() {
307 deleteAllDomainIdsToIgnore();
308 setAllowDomainsToBeIgnored( false );
311 private boolean isAllowDomainsToBeIgnored() {
312 return _allow_domains_to_be_ignored;
315 private Set<BinaryDomainCombination> pruneBinaryCombinations( final Set<BinaryDomainCombination> all ) {
316 final Set<BinaryDomainCombination> pruned = new HashSet<BinaryDomainCombination>();
317 for( final BinaryDomainCombination bc : all ) {
318 if ( ( !getDomainIdsToIgnore().contains( bc.getId0() ) )
319 && ( !getDomainIdsToIgnore().contains( bc.getId1() ) ) ) {
326 private Set<String> pruneDomains( final Set<String> all ) {
327 final Set<String> pruned = new HashSet<String>();
328 for( final String d : all ) {
329 if ( !getDomainIdsToIgnore().contains( d ) ) {