in progress
[jalview.git] / forester / java / src / org / forester / surfacing / BasicGenomeWideCombinableDomains.java
1
2 package org.forester.surfacing;
3
4 import java.util.ArrayList;
5 import java.util.Collections;
6 import java.util.Comparator;
7 import java.util.HashMap;
8 import java.util.HashSet;
9 import java.util.List;
10 import java.util.Map;
11 import java.util.Set;
12 import java.util.SortedMap;
13 import java.util.SortedSet;
14 import java.util.TreeMap;
15 import java.util.TreeSet;
16
17 import org.forester.go.GoId;
18 import org.forester.protein.BinaryDomainCombination;
19 import org.forester.protein.BinaryDomainCombination.DomainCombinationType;
20 import org.forester.protein.Domain;
21 import org.forester.protein.Protein;
22 import org.forester.species.Species;
23 import org.forester.util.BasicDescriptiveStatistics;
24 import org.forester.util.DescriptiveStatistics;
25 import org.forester.util.ForesterUtil;
26
27 public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDomains {
28
29     private static final Comparator<CombinableDomains> DESCENDING_KEY_DOMAIN_COUNT_ORDER          = new Comparator<CombinableDomains>() {
30
31                                                                                                       @Override
32                                                                                                       public int compare( final CombinableDomains d1,
33                                                                                                                           final CombinableDomains d2 ) {
34                                                                                                           if ( d1.getKeyDomainCount() < d2
35                                                                                                                   .getKeyDomainCount() ) {
36                                                                                                               return 1;
37                                                                                                           }
38                                                                                                           else if ( d1
39                                                                                                                   .getKeyDomainCount() > d2
40                                                                                                                   .getKeyDomainCount() ) {
41                                                                                                               return -1;
42                                                                                                           }
43                                                                                                           else {
44                                                                                                               return d1
45                                                                                                                       .getKeyDomain()
46                                                                                                                       .compareTo( d2
47                                                                                                                               .getKeyDomain() );
48                                                                                                           }
49                                                                                                       }
50                                                                                                   };
51     private static final Comparator<CombinableDomains> DESCENDING_KEY_DOMAIN_PROTEINS_COUNT_ORDER = new Comparator<CombinableDomains>() {
52
53                                                                                                       @Override
54                                                                                                       public int compare( final CombinableDomains d1,
55                                                                                                                           final CombinableDomains d2 ) {
56                                                                                                           if ( d1.getKeyDomainProteinsCount() < d2
57                                                                                                                   .getKeyDomainProteinsCount() ) {
58                                                                                                               return 1;
59                                                                                                           }
60                                                                                                           else if ( d1
61                                                                                                                   .getKeyDomainProteinsCount() > d2
62                                                                                                                   .getKeyDomainProteinsCount() ) {
63                                                                                                               return -1;
64                                                                                                           }
65                                                                                                           else {
66                                                                                                               return d1
67                                                                                                                       .getKeyDomain()
68                                                                                                                       .compareTo( d2
69                                                                                                                               .getKeyDomain() );
70                                                                                                           }
71                                                                                                       }
72                                                                                                   };
73     private static final Comparator<CombinableDomains> DESCENDING_COMBINATIONS_COUNT_ORDER        = new Comparator<CombinableDomains>() {
74
75                                                                                                       @Override
76                                                                                                       public int compare( final CombinableDomains d1,
77                                                                                                                           final CombinableDomains d2 ) {
78                                                                                                           if ( d1.getNumberOfCombinableDomains() < d2
79                                                                                                                   .getNumberOfCombinableDomains() ) {
80                                                                                                               return 1;
81                                                                                                           }
82                                                                                                           else if ( d1
83                                                                                                                   .getNumberOfCombinableDomains() > d2
84                                                                                                                   .getNumberOfCombinableDomains() ) {
85                                                                                                               return -1;
86                                                                                                           }
87                                                                                                           else {
88                                                                                                               return d1
89                                                                                                                       .getKeyDomain()
90                                                                                                                       .compareTo( d2
91                                                                                                                               .getKeyDomain() );
92                                                                                                           }
93                                                                                                       }
94                                                                                                   };
95     final private SortedMap<String, CombinableDomains> _combinable_domains_map;
96     final private Species                              _species;
97     final private DomainCombinationType                _dc_type;
98
99     private BasicGenomeWideCombinableDomains( final Species species, final DomainCombinationType dc_type ) {
100         _combinable_domains_map = new TreeMap<String, CombinableDomains>();
101         _species = species;
102         _dc_type = dc_type;
103     }
104
105     private void add( final String key, final CombinableDomains cdc ) {
106         _combinable_domains_map.put( key, cdc );
107     }
108
109     @Override
110     public boolean contains( final String key_id ) {
111         return _combinable_domains_map.containsKey( key_id );
112     }
113
114     @Override
115     public CombinableDomains get( final String key_id ) {
116         return _combinable_domains_map.get( key_id );
117     }
118
119     @Override
120     public SortedMap<String, CombinableDomains> getAllCombinableDomainsIds() {
121         return _combinable_domains_map;
122     }
123
124     @Override
125     public SortedSet<String> getAllDomainIds() {
126         final SortedSet<String> domains = new TreeSet<String>();
127         for( final String key : getAllCombinableDomainsIds().keySet() ) {
128             final CombinableDomains cb = getAllCombinableDomainsIds().get( key );
129             final List<String> ds = cb.getAllDomains();
130             for( final String d : ds ) {
131                 domains.add( d );
132             }
133         }
134         return domains;
135     }
136
137     @Override
138     public DomainCombinationType getDomainCombinationType() {
139         return _dc_type;
140     }
141
142     @Override
143     public SortedSet<String> getMostPromiscuosDomain() {
144         final SortedSet<String> doms = new TreeSet<String>();
145         final int max = ( int ) getPerGenomeDomainPromiscuityStatistics().getMax();
146         for( final String key : getAllCombinableDomainsIds().keySet() ) {
147             final CombinableDomains cb = getAllCombinableDomainsIds().get( key );
148             if ( cb.getNumberOfCombinableDomains() == max ) {
149                 doms.add( key );
150             }
151         }
152         return doms;
153     }
154
155     @Override
156     public DescriptiveStatistics getPerGenomeDomainPromiscuityStatistics() {
157         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
158         for( final String key : getAllCombinableDomainsIds().keySet() ) {
159             final CombinableDomains cb = getAllCombinableDomainsIds().get( key );
160             stats.addValue( cb.getNumberOfCombinableDomains() );
161         }
162         return stats;
163     }
164
165     @Override
166     public int getSize() {
167         return _combinable_domains_map.size();
168     }
169
170     @Override
171     public Species getSpecies() {
172         return _species;
173     }
174
175     @Override
176     public SortedSet<BinaryDomainCombination> toBinaryDomainCombinations() {
177         final SortedSet<BinaryDomainCombination> binary_combinations = new TreeSet<BinaryDomainCombination>();
178         for( final String key : getAllCombinableDomainsIds().keySet() ) {
179             final CombinableDomains cb = getAllCombinableDomainsIds().get( key );
180             for( final BinaryDomainCombination b : cb.toBinaryDomainCombinations() ) {
181                 binary_combinations.add( b );
182             }
183         }
184         return binary_combinations;
185     }
186
187     @Override
188     public String toString() {
189         return toStringBuilder( GenomeWideCombinableDomainsSortOrder.ALPHABETICAL_KEY_ID ).toString();
190     }
191
192     // Produces something like: 
193     // 2-oxoacid_dh      5       5       2      Biotin_lipoyl [4], E3_binding [3]
194     @Override
195     public StringBuilder toStringBuilder( final GenomeWideCombinableDomainsSortOrder sort_order ) {
196         final StringBuilder sb = new StringBuilder();
197         final List<CombinableDomains> combinable_domains = new ArrayList<CombinableDomains>();
198         for( final String key : getAllCombinableDomainsIds().keySet() ) {
199             final CombinableDomains cb = getAllCombinableDomainsIds().get( key );
200             combinable_domains.add( cb );
201         }
202         if ( sort_order == GenomeWideCombinableDomainsSortOrder.KEY_DOMAIN_COUNT ) {
203             Collections.sort( combinable_domains, BasicGenomeWideCombinableDomains.DESCENDING_KEY_DOMAIN_COUNT_ORDER );
204         }
205         else if ( sort_order == GenomeWideCombinableDomainsSortOrder.KEY_DOMAIN_PROTEINS_COUNT ) {
206             Collections.sort( combinable_domains,
207                               BasicGenomeWideCombinableDomains.DESCENDING_KEY_DOMAIN_PROTEINS_COUNT_ORDER );
208         }
209         else if ( sort_order == GenomeWideCombinableDomainsSortOrder.COMBINATIONS_COUNT ) {
210             Collections.sort( combinable_domains, BasicGenomeWideCombinableDomains.DESCENDING_COMBINATIONS_COUNT_ORDER );
211         }
212         for( final CombinableDomains cb : combinable_domains ) {
213             sb.append( ForesterUtil.pad( new StringBuffer( cb.getKeyDomain().toString() ), 18, ' ', false ) );
214             sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getKeyDomainCount() ), 8, ' ', false ) );
215             sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getKeyDomainProteinsCount() ), 8, ' ', false ) );
216             sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getNumberOfCombinableDomains() ), 8, ' ', false ) );
217             sb.append( cb.getCombiningDomainIdsAsStringBuilder() );
218             sb.append( ForesterUtil.getLineSeparator() );
219         }
220         return sb;
221     }
222
223     private static void countDomains( final Map<String, Integer> domain_counts,
224                                       final Set<String> saw_c,
225                                       final String id_i ) {
226         if ( domain_counts.containsKey( id_i ) ) {
227             domain_counts.put( id_i, 1 + domain_counts.get( ( id_i ) ) );
228         }
229         else {
230             domain_counts.put( id_i, 1 );
231         }
232         saw_c.add( id_i );
233     }
234
235     public static BasicGenomeWideCombinableDomains createInstance( final List<Protein> protein_list,
236                                                                    final boolean ignore_combination_with_same_domain,
237                                                                    final Species species ) {
238         return createInstance( protein_list,
239                                ignore_combination_with_same_domain,
240                                species,
241                                null,
242                                DomainCombinationType.BASIC,
243                                null,
244                                null );
245     }
246
247     public static BasicGenomeWideCombinableDomains createInstance( final List<Protein> protein_list,
248                                                                    final boolean ignore_combination_with_same_domain,
249                                                                    final Species species,
250                                                                    final DomainCombinationType dc_type ) {
251         return createInstance( protein_list, ignore_combination_with_same_domain, species, null, dc_type, null, null );
252     }
253
254     public static BasicGenomeWideCombinableDomains createInstance( final List<Protein> protein_list,
255                                                                    final boolean ignore_combination_with_same_domain,
256                                                                    final Species species,
257                                                                    final Map<String, List<GoId>> domain_id_to_go_ids_map,
258                                                                    final DomainCombinationType dc_type,
259                                                                    final Map<String, DescriptiveStatistics> protein_length_stats_by_dc,
260                                                                    final Map<String, DescriptiveStatistics> domain_number_stats_by_dc ) {
261         final BasicGenomeWideCombinableDomains instance = new BasicGenomeWideCombinableDomains( species, dc_type );
262         final Map<String, Integer> domain_counts = new HashMap<String, Integer>();
263         for( final Protein protein : protein_list ) {
264             if ( !protein.getSpecies().equals( species ) ) {
265                 throw new IllegalArgumentException( "species (" + protein.getSpecies()
266                         + ") does not match species of combinable domains collection (" + species + ")" );
267             }
268             final Set<String> saw_i = new HashSet<String>();
269             final Set<String> saw_c = new HashSet<String>();
270             for( int i = 0; i < protein.getProteinDomains().size(); ++i ) {
271                 final Domain pd_i = protein.getProteinDomain( i );
272                 final String id_i = pd_i.getDomainId();
273                 final int current_start = pd_i.getFrom();
274                 BasicGenomeWideCombinableDomains.countDomains( domain_counts, saw_c, id_i );
275                 if ( !saw_i.contains( id_i ) ) {
276                     if ( dc_type == DomainCombinationType.BASIC ) {
277                         saw_i.add( id_i );
278                     }
279                     CombinableDomains domain_combination = null;
280                     if ( instance.contains( id_i ) ) {
281                         domain_combination = instance.get( id_i );
282                     }
283                     else {
284                         if ( dc_type == DomainCombinationType.DIRECTED_ADJACTANT ) {
285                             domain_combination = new AdjactantDirectedCombinableDomains( pd_i.getDomainId(), species );
286                         }
287                         else if ( dc_type == DomainCombinationType.DIRECTED ) {
288                             domain_combination = new DirectedCombinableDomains( pd_i.getDomainId(), species );
289                         }
290                         else {
291                             domain_combination = new BasicCombinableDomains( pd_i.getDomainId(), species );
292                         }
293                         instance.add( id_i, domain_combination );
294                     }
295                     domain_combination.addKeyDomainProtein( protein.getProteinId().getId() );//^^^^^^^^^^^^^^
296                     final Set<String> saw_j = new HashSet<String>();
297                     if ( ignore_combination_with_same_domain ) {
298                         saw_j.add( id_i );
299                     }
300                     Domain closest = null;
301                     for( int j = 0; j < protein.getNumberOfProteinDomains(); ++j ) {
302                         if ( ( dc_type != DomainCombinationType.BASIC )
303                                 && ( current_start >= protein.getProteinDomain( j ).getFrom() ) ) {
304                             continue;
305                         }
306                         if ( i != j ) {
307                             final String id = protein.getProteinDomain( j ).getDomainId();
308                             if ( !saw_j.contains( id ) ) {
309                                 saw_j.add( id );
310                                 if ( dc_type != DomainCombinationType.DIRECTED_ADJACTANT ) {
311                                     domain_combination
312                                             .addCombinableDomain( protein.getProteinDomain( j ).getDomainId() );
313                                 }
314                                 else {
315                                     if ( closest == null ) {
316                                         closest = protein.getProteinDomain( j );
317                                     }
318                                     else {
319                                         if ( protein.getProteinDomain( j ).getFrom() < closest.getFrom() ) {
320                                             closest = protein.getProteinDomain( j );
321                                         }
322                                     }
323                                 }
324                             }
325                         }
326                     }
327                     if ( ( dc_type == DomainCombinationType.DIRECTED_ADJACTANT ) && ( closest != null ) ) {
328                         domain_combination.addCombinableDomain( closest.getDomainId() );
329                     }
330                     if ( protein_length_stats_by_dc != null ) {
331                         final List<BinaryDomainCombination> dcs = domain_combination.toBinaryDomainCombinations();
332                         for( final BinaryDomainCombination dc : dcs ) {
333                             final String dc_str = dc.toString();
334                             if ( !protein_length_stats_by_dc.containsKey( dc_str ) ) {
335                                 protein_length_stats_by_dc.put( dc_str, new BasicDescriptiveStatistics() );
336                             }
337                             protein_length_stats_by_dc.get( dc_str ).addValue( protein.getLength() );
338                         }
339                     }
340                     if ( domain_number_stats_by_dc != null ) {
341                         final List<BinaryDomainCombination> dcs = domain_combination.toBinaryDomainCombinations();
342                         for( final BinaryDomainCombination dc : dcs ) {
343                             final String dc_str = dc.toString();
344                             if ( !domain_number_stats_by_dc.containsKey( dc_str ) ) {
345                                 domain_number_stats_by_dc.put( dc_str, new BasicDescriptiveStatistics() );
346                             }
347                             domain_number_stats_by_dc.get( dc_str ).addValue( protein.getNumberOfProteinDomains() );
348                         }
349                     }
350                 }
351             }
352         }
353         for( final String key_id : domain_counts.keySet() ) {
354             instance.get( key_id ).setKeyDomainCount( domain_counts.get( key_id ) );
355         }
356         return instance;
357     }
358 }