2 package org.forester.surfacing;
4 import java.util.ArrayList;
5 import java.util.Collections;
6 import java.util.Comparator;
7 import java.util.HashMap;
8 import java.util.HashSet;
12 import java.util.SortedMap;
13 import java.util.SortedSet;
14 import java.util.TreeMap;
15 import java.util.TreeSet;
17 import org.forester.go.GoId;
18 import org.forester.protein.BinaryDomainCombination;
19 import org.forester.protein.BinaryDomainCombination.DomainCombinationType;
20 import org.forester.protein.Domain;
21 import org.forester.protein.Protein;
22 import org.forester.species.Species;
23 import org.forester.util.BasicDescriptiveStatistics;
24 import org.forester.util.DescriptiveStatistics;
25 import org.forester.util.ForesterUtil;
27 public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDomains {
29 private static final Comparator<CombinableDomains> DESCENDING_COMBINATIONS_COUNT_ORDER = new Comparator<CombinableDomains>() {
32 public int compare( final CombinableDomains d1,
33 final CombinableDomains d2 ) {
34 if ( d1.getNumberOfCombinableDomains() < d2
35 .getNumberOfCombinableDomains() ) {
39 .getNumberOfCombinableDomains() > d2
40 .getNumberOfCombinableDomains() ) {
51 private static final Comparator<CombinableDomains> DESCENDING_KEY_DOMAIN_COUNT_ORDER = new Comparator<CombinableDomains>() {
54 public int compare( final CombinableDomains d1,
55 final CombinableDomains d2 ) {
56 if ( d1.getKeyDomainCount() < d2
57 .getKeyDomainCount() ) {
61 .getKeyDomainCount() > d2
62 .getKeyDomainCount() ) {
73 private static final Comparator<CombinableDomains> DESCENDING_KEY_DOMAIN_PROTEINS_COUNT_ORDER = new Comparator<CombinableDomains>() {
76 public int compare( final CombinableDomains d1,
77 final CombinableDomains d2 ) {
78 if ( d1.getKeyDomainProteinsCount() < d2
79 .getKeyDomainProteinsCount() ) {
83 .getKeyDomainProteinsCount() > d2
84 .getKeyDomainProteinsCount() ) {
95 final private SortedMap<String, CombinableDomains> _combinable_domains_map;
96 final private DomainCombinationType _dc_type;
97 final private Species _species;
99 private BasicGenomeWideCombinableDomains( final Species species, final DomainCombinationType dc_type ) {
100 _combinable_domains_map = new TreeMap<String, CombinableDomains>();
106 public boolean contains( final String key_id ) {
107 return _combinable_domains_map.containsKey( key_id );
111 public CombinableDomains get( final String key_id ) {
112 return _combinable_domains_map.get( key_id );
116 public SortedMap<String, CombinableDomains> getAllCombinableDomainsIds() {
117 return _combinable_domains_map;
121 public SortedSet<String> getAllDomainIds() {
122 final SortedSet<String> domains = new TreeSet<String>();
123 for( final String key : getAllCombinableDomainsIds().keySet() ) {
124 final CombinableDomains cb = getAllCombinableDomainsIds().get( key );
125 final List<String> ds = cb.getAllDomains();
126 for( final String d : ds ) {
134 public DomainCombinationType getDomainCombinationType() {
139 public SortedSet<String> getMostPromiscuosDomain() {
140 final SortedSet<String> doms = new TreeSet<String>();
141 final int max = ( int ) getPerGenomeDomainPromiscuityStatistics().getMax();
142 for( final String key : getAllCombinableDomainsIds().keySet() ) {
143 final CombinableDomains cb = getAllCombinableDomainsIds().get( key );
144 if ( cb.getNumberOfCombinableDomains() == max ) {
152 public DescriptiveStatistics getPerGenomeDomainPromiscuityStatistics() {
153 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
154 for( final String key : getAllCombinableDomainsIds().keySet() ) {
155 final CombinableDomains cb = getAllCombinableDomainsIds().get( key );
156 stats.addValue( cb.getNumberOfCombinableDomains() );
162 public int getSize() {
163 return _combinable_domains_map.size();
167 public Species getSpecies() {
172 public SortedSet<BinaryDomainCombination> toBinaryDomainCombinations() {
173 final SortedSet<BinaryDomainCombination> binary_combinations = new TreeSet<BinaryDomainCombination>();
174 for( final String key : getAllCombinableDomainsIds().keySet() ) {
175 final CombinableDomains cb = getAllCombinableDomainsIds().get( key );
176 for( final BinaryDomainCombination b : cb.toBinaryDomainCombinations() ) {
177 binary_combinations.add( b );
180 return binary_combinations;
184 public String toString() {
185 return toStringBuilder( GenomeWideCombinableDomainsSortOrder.ALPHABETICAL_KEY_ID ).toString();
188 // Produces something like:
189 // 2-oxoacid_dh 5 5 2 Biotin_lipoyl [4], E3_binding [3]
191 public StringBuilder toStringBuilder( final GenomeWideCombinableDomainsSortOrder sort_order ) {
192 final StringBuilder sb = new StringBuilder();
193 final List<CombinableDomains> combinable_domains = new ArrayList<CombinableDomains>();
194 for( final String key : getAllCombinableDomainsIds().keySet() ) {
195 final CombinableDomains cb = getAllCombinableDomainsIds().get( key );
196 combinable_domains.add( cb );
198 if ( sort_order == GenomeWideCombinableDomainsSortOrder.KEY_DOMAIN_COUNT ) {
199 Collections.sort( combinable_domains, BasicGenomeWideCombinableDomains.DESCENDING_KEY_DOMAIN_COUNT_ORDER );
201 else if ( sort_order == GenomeWideCombinableDomainsSortOrder.KEY_DOMAIN_PROTEINS_COUNT ) {
202 Collections.sort( combinable_domains,
203 BasicGenomeWideCombinableDomains.DESCENDING_KEY_DOMAIN_PROTEINS_COUNT_ORDER );
205 else if ( sort_order == GenomeWideCombinableDomainsSortOrder.COMBINATIONS_COUNT ) {
206 Collections.sort( combinable_domains, BasicGenomeWideCombinableDomains.DESCENDING_COMBINATIONS_COUNT_ORDER );
208 for( final CombinableDomains cb : combinable_domains ) {
209 sb.append( ForesterUtil.pad( new StringBuffer( cb.getKeyDomain().toString() ), 18, ' ', false ) );
210 sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getKeyDomainCount() ), 8, ' ', false ) );
211 sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getKeyDomainProteinsCount() ), 8, ' ', false ) );
212 sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getNumberOfCombinableDomains() ), 8, ' ', false ) );
213 sb.append( cb.getCombiningDomainIdsAsStringBuilder() );
214 sb.append( ForesterUtil.getLineSeparator() );
219 private void add( final String key, final CombinableDomains cdc ) {
220 _combinable_domains_map.put( key, cdc );
223 public static BasicGenomeWideCombinableDomains createInstance( final List<Protein> protein_list,
224 final boolean ignore_combination_with_same_domain,
225 final Species species ) {
226 return createInstance( protein_list,
227 ignore_combination_with_same_domain,
230 DomainCombinationType.BASIC,
235 public static BasicGenomeWideCombinableDomains createInstance( final List<Protein> protein_list,
236 final boolean ignore_combination_with_same_domain,
237 final Species species,
238 final DomainCombinationType dc_type ) {
239 return createInstance( protein_list, ignore_combination_with_same_domain, species, null, dc_type, null, null );
242 public static BasicGenomeWideCombinableDomains createInstance( final List<Protein> protein_list,
243 final boolean ignore_combination_with_same_domain,
244 final Species species,
245 final Map<String, List<GoId>> domain_id_to_go_ids_map,
246 final DomainCombinationType dc_type,
247 final Map<String, DescriptiveStatistics> protein_length_stats_by_dc,
248 final Map<String, DescriptiveStatistics> domain_number_stats_by_dc ) {
249 final BasicGenomeWideCombinableDomains instance = new BasicGenomeWideCombinableDomains( species, dc_type );
250 final Map<String, Integer> domain_counts = new HashMap<String, Integer>();
251 for( final Protein protein : protein_list ) {
252 if ( !protein.getSpecies().equals( species ) ) {
253 throw new IllegalArgumentException( "species (" + protein.getSpecies()
254 + ") does not match species of combinable domains collection (" + species + ")" );
256 final Set<String> saw_i = new HashSet<String>();
257 final Set<String> saw_c = new HashSet<String>();
258 for( int i = 0; i < protein.getProteinDomains().size(); ++i ) {
259 final Domain pd_i = protein.getProteinDomain( i );
260 final String id_i = pd_i.getDomainId();
261 final int current_start = pd_i.getFrom();
262 BasicGenomeWideCombinableDomains.countDomains( domain_counts, saw_c, id_i );
263 if ( !saw_i.contains( id_i ) ) {
264 if ( dc_type == DomainCombinationType.BASIC ) {
267 CombinableDomains domain_combination = null;
268 if ( instance.contains( id_i ) ) {
269 domain_combination = instance.get( id_i );
272 if ( dc_type == DomainCombinationType.DIRECTED_ADJACTANT ) {
273 domain_combination = new AdjactantDirectedCombinableDomains( pd_i.getDomainId(), species );
275 else if ( dc_type == DomainCombinationType.DIRECTED ) {
276 domain_combination = new DirectedCombinableDomains( pd_i.getDomainId(), species );
279 domain_combination = new BasicCombinableDomains( pd_i.getDomainId(), species );
281 instance.add( id_i, domain_combination );
283 domain_combination.addKeyDomainProtein( protein.getProteinId().getId() );//^^^^^^^^^^^^^^
284 final Set<String> saw_j = new HashSet<String>();
285 if ( ignore_combination_with_same_domain ) {
288 Domain closest = null;
289 for( int j = 0; j < protein.getNumberOfProteinDomains(); ++j ) {
290 if ( ( dc_type != DomainCombinationType.BASIC )
291 && ( current_start >= protein.getProteinDomain( j ).getFrom() ) ) {
295 final String id = protein.getProteinDomain( j ).getDomainId();
296 if ( !saw_j.contains( id ) ) {
298 if ( dc_type != DomainCombinationType.DIRECTED_ADJACTANT ) {
300 .addCombinableDomain( protein.getProteinDomain( j ).getDomainId() );
303 if ( closest == null ) {
304 closest = protein.getProteinDomain( j );
307 if ( protein.getProteinDomain( j ).getFrom() < closest.getFrom() ) {
308 closest = protein.getProteinDomain( j );
315 if ( ( dc_type == DomainCombinationType.DIRECTED_ADJACTANT ) && ( closest != null ) ) {
316 domain_combination.addCombinableDomain( closest.getDomainId() );
318 if ( protein_length_stats_by_dc != null ) {
319 final List<BinaryDomainCombination> dcs = domain_combination.toBinaryDomainCombinations();
320 for( final BinaryDomainCombination dc : dcs ) {
321 final String dc_str = dc.toString();
322 if ( !protein_length_stats_by_dc.containsKey( dc_str ) ) {
323 protein_length_stats_by_dc.put( dc_str, new BasicDescriptiveStatistics() );
325 protein_length_stats_by_dc.get( dc_str ).addValue( protein.getLength() );
328 if ( domain_number_stats_by_dc != null ) {
329 final List<BinaryDomainCombination> dcs = domain_combination.toBinaryDomainCombinations();
330 for( final BinaryDomainCombination dc : dcs ) {
331 final String dc_str = dc.toString();
332 if ( !domain_number_stats_by_dc.containsKey( dc_str ) ) {
333 domain_number_stats_by_dc.put( dc_str, new BasicDescriptiveStatistics() );
335 domain_number_stats_by_dc.get( dc_str ).addValue( protein.getNumberOfProteinDomains() );
341 for( final String key_id : domain_counts.keySet() ) {
342 instance.get( key_id ).setKeyDomainCount( domain_counts.get( key_id ) );
347 private static void countDomains( final Map<String, Integer> domain_counts,
348 final Set<String> saw_c,
349 final String id_i ) {
350 if ( domain_counts.containsKey( id_i ) ) {
351 domain_counts.put( id_i, 1 + domain_counts.get( ( id_i ) ) );
354 domain_counts.put( id_i, 1 );