1d115b22171e9c1ced4389e77c21a68436dc902b
[jalview.git] / forester / java / src / org / forester / surfacing / BasicGenomeWideCombinableDomains.java
1
2 package org.forester.surfacing;
3
4 import java.util.ArrayList;
5 import java.util.Collections;
6 import java.util.Comparator;
7 import java.util.HashMap;
8 import java.util.HashSet;
9 import java.util.List;
10 import java.util.Map;
11 import java.util.Set;
12 import java.util.SortedMap;
13 import java.util.SortedSet;
14 import java.util.TreeMap;
15 import java.util.TreeSet;
16
17 import org.forester.go.GoId;
18 import org.forester.protein.BinaryDomainCombination;
19 import org.forester.protein.BinaryDomainCombination.DomainCombinationType;
20 import org.forester.protein.Domain;
21 import org.forester.protein.Protein;
22 import org.forester.species.Species;
23 import org.forester.util.BasicDescriptiveStatistics;
24 import org.forester.util.DescriptiveStatistics;
25 import org.forester.util.ForesterUtil;
26
27 public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDomains {
28
29     private static final Comparator<CombinableDomains> DESCENDING_COMBINATIONS_COUNT_ORDER        = new Comparator<CombinableDomains>() {
30
31                                                                                                       @Override
32                                                                                                       public int compare( final CombinableDomains d1,
33                                                                                                                           final CombinableDomains d2 ) {
34                                                                                                           if ( d1.getNumberOfCombinableDomains() < d2
35                                                                                                                   .getNumberOfCombinableDomains() ) {
36                                                                                                               return 1;
37                                                                                                           }
38                                                                                                           else if ( d1
39                                                                                                                   .getNumberOfCombinableDomains() > d2
40                                                                                                                   .getNumberOfCombinableDomains() ) {
41                                                                                                               return -1;
42                                                                                                           }
43                                                                                                           else {
44                                                                                                               return d1
45                                                                                                                       .getKeyDomain()
46                                                                                                                       .compareTo( d2
47                                                                                                                               .getKeyDomain() );
48                                                                                                           }
49                                                                                                       }
50                                                                                                   };
51     private static final Comparator<CombinableDomains> DESCENDING_KEY_DOMAIN_COUNT_ORDER          = new Comparator<CombinableDomains>() {
52
53                                                                                                       @Override
54                                                                                                       public int compare( final CombinableDomains d1,
55                                                                                                                           final CombinableDomains d2 ) {
56                                                                                                           if ( d1.getKeyDomainCount() < d2
57                                                                                                                   .getKeyDomainCount() ) {
58                                                                                                               return 1;
59                                                                                                           }
60                                                                                                           else if ( d1
61                                                                                                                   .getKeyDomainCount() > d2
62                                                                                                                   .getKeyDomainCount() ) {
63                                                                                                               return -1;
64                                                                                                           }
65                                                                                                           else {
66                                                                                                               return d1
67                                                                                                                       .getKeyDomain()
68                                                                                                                       .compareTo( d2
69                                                                                                                               .getKeyDomain() );
70                                                                                                           }
71                                                                                                       }
72                                                                                                   };
73     private static final Comparator<CombinableDomains> DESCENDING_KEY_DOMAIN_PROTEINS_COUNT_ORDER = new Comparator<CombinableDomains>() {
74
75                                                                                                       @Override
76                                                                                                       public int compare( final CombinableDomains d1,
77                                                                                                                           final CombinableDomains d2 ) {
78                                                                                                           if ( d1.getKeyDomainProteinsCount() < d2
79                                                                                                                   .getKeyDomainProteinsCount() ) {
80                                                                                                               return 1;
81                                                                                                           }
82                                                                                                           else if ( d1
83                                                                                                                   .getKeyDomainProteinsCount() > d2
84                                                                                                                   .getKeyDomainProteinsCount() ) {
85                                                                                                               return -1;
86                                                                                                           }
87                                                                                                           else {
88                                                                                                               return d1
89                                                                                                                       .getKeyDomain()
90                                                                                                                       .compareTo( d2
91                                                                                                                               .getKeyDomain() );
92                                                                                                           }
93                                                                                                       }
94                                                                                                   };
95     final private SortedMap<String, CombinableDomains> _combinable_domains_map;
96     final private DomainCombinationType                _dc_type;
97     final private Species                              _species;
98
99     private BasicGenomeWideCombinableDomains( final Species species, final DomainCombinationType dc_type ) {
100         _combinable_domains_map = new TreeMap<String, CombinableDomains>();
101         _species = species;
102         _dc_type = dc_type;
103     }
104
105     @Override
106     public boolean contains( final String key_id ) {
107         return _combinable_domains_map.containsKey( key_id );
108     }
109
110     @Override
111     public CombinableDomains get( final String key_id ) {
112         return _combinable_domains_map.get( key_id );
113     }
114
115     @Override
116     public SortedMap<String, CombinableDomains> getAllCombinableDomainsIds() {
117         return _combinable_domains_map;
118     }
119
120     @Override
121     public SortedSet<String> getAllDomainIds() {
122         final SortedSet<String> domains = new TreeSet<String>();
123         for( final String key : getAllCombinableDomainsIds().keySet() ) {
124             final CombinableDomains cb = getAllCombinableDomainsIds().get( key );
125             final List<String> ds = cb.getAllDomains();
126             for( final String d : ds ) {
127                 domains.add( d );
128             }
129         }
130         return domains;
131     }
132
133     @Override
134     public DomainCombinationType getDomainCombinationType() {
135         return _dc_type;
136     }
137
138     @Override
139     public SortedSet<String> getMostPromiscuosDomain() {
140         final SortedSet<String> doms = new TreeSet<String>();
141         final int max = ( int ) getPerGenomeDomainPromiscuityStatistics().getMax();
142         for( final String key : getAllCombinableDomainsIds().keySet() ) {
143             final CombinableDomains cb = getAllCombinableDomainsIds().get( key );
144             if ( cb.getNumberOfCombinableDomains() == max ) {
145                 doms.add( key );
146             }
147         }
148         return doms;
149     }
150
151     @Override
152     public DescriptiveStatistics getPerGenomeDomainPromiscuityStatistics() {
153         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
154         for( final String key : getAllCombinableDomainsIds().keySet() ) {
155             final CombinableDomains cb = getAllCombinableDomainsIds().get( key );
156             stats.addValue( cb.getNumberOfCombinableDomains() );
157         }
158         return stats;
159     }
160
161     @Override
162     public int getSize() {
163         return _combinable_domains_map.size();
164     }
165
166     @Override
167     public Species getSpecies() {
168         return _species;
169     }
170
171     @Override
172     public SortedSet<BinaryDomainCombination> toBinaryDomainCombinations() {
173         final SortedSet<BinaryDomainCombination> binary_combinations = new TreeSet<BinaryDomainCombination>();
174         for( final String key : getAllCombinableDomainsIds().keySet() ) {
175             final CombinableDomains cb = getAllCombinableDomainsIds().get( key );
176             for( final BinaryDomainCombination b : cb.toBinaryDomainCombinations() ) {
177                 binary_combinations.add( b );
178             }
179         }
180         return binary_combinations;
181     }
182
183     @Override
184     public String toString() {
185         return toStringBuilder( GenomeWideCombinableDomainsSortOrder.ALPHABETICAL_KEY_ID ).toString();
186     }
187
188     // Produces something like: 
189     // 2-oxoacid_dh      5       5       2      Biotin_lipoyl [4], E3_binding [3]
190     @Override
191     public StringBuilder toStringBuilder( final GenomeWideCombinableDomainsSortOrder sort_order ) {
192         final StringBuilder sb = new StringBuilder();
193         final List<CombinableDomains> combinable_domains = new ArrayList<CombinableDomains>();
194         for( final String key : getAllCombinableDomainsIds().keySet() ) {
195             final CombinableDomains cb = getAllCombinableDomainsIds().get( key );
196             combinable_domains.add( cb );
197         }
198         if ( sort_order == GenomeWideCombinableDomainsSortOrder.KEY_DOMAIN_COUNT ) {
199             Collections.sort( combinable_domains, BasicGenomeWideCombinableDomains.DESCENDING_KEY_DOMAIN_COUNT_ORDER );
200         }
201         else if ( sort_order == GenomeWideCombinableDomainsSortOrder.KEY_DOMAIN_PROTEINS_COUNT ) {
202             Collections.sort( combinable_domains,
203                               BasicGenomeWideCombinableDomains.DESCENDING_KEY_DOMAIN_PROTEINS_COUNT_ORDER );
204         }
205         else if ( sort_order == GenomeWideCombinableDomainsSortOrder.COMBINATIONS_COUNT ) {
206             Collections.sort( combinable_domains, BasicGenomeWideCombinableDomains.DESCENDING_COMBINATIONS_COUNT_ORDER );
207         }
208         for( final CombinableDomains cb : combinable_domains ) {
209             sb.append( ForesterUtil.pad( new StringBuffer( cb.getKeyDomain().toString() ), 18, ' ', false ) );
210             sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getKeyDomainCount() ), 8, ' ', false ) );
211             sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getKeyDomainProteinsCount() ), 8, ' ', false ) );
212             sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getNumberOfCombinableDomains() ), 8, ' ', false ) );
213             sb.append( cb.getCombiningDomainIdsAsStringBuilder() );
214             sb.append( ForesterUtil.getLineSeparator() );
215         }
216         return sb;
217     }
218
219     private void add( final String key, final CombinableDomains cdc ) {
220         _combinable_domains_map.put( key, cdc );
221     }
222
223     public static BasicGenomeWideCombinableDomains createInstance( final List<Protein> protein_list,
224                                                                    final boolean ignore_combination_with_same_domain,
225                                                                    final Species species ) {
226         return createInstance( protein_list,
227                                ignore_combination_with_same_domain,
228                                species,
229                                null,
230                                DomainCombinationType.BASIC,
231                                null,
232                                null );
233     }
234
235     public static BasicGenomeWideCombinableDomains createInstance( final List<Protein> protein_list,
236                                                                    final boolean ignore_combination_with_same_domain,
237                                                                    final Species species,
238                                                                    final DomainCombinationType dc_type ) {
239         return createInstance( protein_list, ignore_combination_with_same_domain, species, null, dc_type, null, null );
240     }
241
242     public static BasicGenomeWideCombinableDomains createInstance( final List<Protein> protein_list,
243                                                                    final boolean ignore_combination_with_same_domain,
244                                                                    final Species species,
245                                                                    final Map<String, List<GoId>> domain_id_to_go_ids_map,
246                                                                    final DomainCombinationType dc_type,
247                                                                    final Map<String, DescriptiveStatistics> protein_length_stats_by_dc,
248                                                                    final Map<String, DescriptiveStatistics> domain_number_stats_by_dc ) {
249         final BasicGenomeWideCombinableDomains instance = new BasicGenomeWideCombinableDomains( species, dc_type );
250         final Map<String, Integer> domain_counts = new HashMap<String, Integer>();
251         for( final Protein protein : protein_list ) {
252             if ( !protein.getSpecies().equals( species ) ) {
253                 throw new IllegalArgumentException( "species (" + protein.getSpecies()
254                         + ") does not match species of combinable domains collection (" + species + ")" );
255             }
256             final Set<String> saw_i = new HashSet<String>();
257             final Set<String> saw_c = new HashSet<String>();
258             for( int i = 0; i < protein.getProteinDomains().size(); ++i ) {
259                 final Domain pd_i = protein.getProteinDomain( i );
260                 final String id_i = pd_i.getDomainId();
261                 final int current_start = pd_i.getFrom();
262                 BasicGenomeWideCombinableDomains.countDomains( domain_counts, saw_c, id_i );
263                 if ( !saw_i.contains( id_i ) ) {
264                     if ( dc_type == DomainCombinationType.BASIC ) {
265                         saw_i.add( id_i );
266                     }
267                     CombinableDomains domain_combination = null;
268                     if ( instance.contains( id_i ) ) {
269                         domain_combination = instance.get( id_i );
270                     }
271                     else {
272                         if ( dc_type == DomainCombinationType.DIRECTED_ADJACTANT ) {
273                             domain_combination = new AdjactantDirectedCombinableDomains( pd_i.getDomainId(), species );
274                         }
275                         else if ( dc_type == DomainCombinationType.DIRECTED ) {
276                             domain_combination = new DirectedCombinableDomains( pd_i.getDomainId(), species );
277                         }
278                         else {
279                             domain_combination = new BasicCombinableDomains( pd_i.getDomainId(), species );
280                         }
281                         instance.add( id_i, domain_combination );
282                     }
283                     domain_combination.addKeyDomainProtein( protein.getProteinId().getId() );//^^^^^^^^^^^^^^
284                     final Set<String> saw_j = new HashSet<String>();
285                     if ( ignore_combination_with_same_domain ) {
286                         saw_j.add( id_i );
287                     }
288                     Domain closest = null;
289                     for( int j = 0; j < protein.getNumberOfProteinDomains(); ++j ) {
290                         if ( ( dc_type != DomainCombinationType.BASIC )
291                                 && ( current_start >= protein.getProteinDomain( j ).getFrom() ) ) {
292                             continue;
293                         }
294                         if ( i != j ) {
295                             final String id = protein.getProteinDomain( j ).getDomainId();
296                             if ( !saw_j.contains( id ) ) {
297                                 saw_j.add( id );
298                                 if ( dc_type != DomainCombinationType.DIRECTED_ADJACTANT ) {
299                                     domain_combination
300                                             .addCombinableDomain( protein.getProteinDomain( j ).getDomainId() );
301                                 }
302                                 else {
303                                     if ( closest == null ) {
304                                         closest = protein.getProteinDomain( j );
305                                     }
306                                     else {
307                                         if ( protein.getProteinDomain( j ).getFrom() < closest.getFrom() ) {
308                                             closest = protein.getProteinDomain( j );
309                                         }
310                                     }
311                                 }
312                             }
313                         }
314                     }
315                     if ( ( dc_type == DomainCombinationType.DIRECTED_ADJACTANT ) && ( closest != null ) ) {
316                         domain_combination.addCombinableDomain( closest.getDomainId() );
317                     }
318                     if ( protein_length_stats_by_dc != null ) {
319                         final List<BinaryDomainCombination> dcs = domain_combination.toBinaryDomainCombinations();
320                         for( final BinaryDomainCombination dc : dcs ) {
321                             final String dc_str = dc.toString();
322                             if ( !protein_length_stats_by_dc.containsKey( dc_str ) ) {
323                                 protein_length_stats_by_dc.put( dc_str, new BasicDescriptiveStatistics() );
324                             }
325                             protein_length_stats_by_dc.get( dc_str ).addValue( protein.getLength() );
326                         }
327                     }
328                     if ( domain_number_stats_by_dc != null ) {
329                         final List<BinaryDomainCombination> dcs = domain_combination.toBinaryDomainCombinations();
330                         for( final BinaryDomainCombination dc : dcs ) {
331                             final String dc_str = dc.toString();
332                             if ( !domain_number_stats_by_dc.containsKey( dc_str ) ) {
333                                 domain_number_stats_by_dc.put( dc_str, new BasicDescriptiveStatistics() );
334                             }
335                             domain_number_stats_by_dc.get( dc_str ).addValue( protein.getNumberOfProteinDomains() );
336                         }
337                     }
338                 }
339             }
340         }
341         for( final String key_id : domain_counts.keySet() ) {
342             instance.get( key_id ).setKeyDomainCount( domain_counts.get( key_id ) );
343         }
344         return instance;
345     }
346
347     private static void countDomains( final Map<String, Integer> domain_counts,
348                                       final Set<String> saw_c,
349                                       final String id_i ) {
350         if ( domain_counts.containsKey( id_i ) ) {
351             domain_counts.put( id_i, 1 + domain_counts.get( ( id_i ) ) );
352         }
353         else {
354             domain_counts.put( id_i, 1 );
355         }
356         saw_c.add( id_i );
357     }
358 }