a36aa12eca65fa24241e78e58ba34f97f8a16fd8
[jalview.git] / forester / java / src / org / forester / surfacing / DomainCountsDifferenceUtil.java
1 // $Id:
2 // $
3 //
4 // FORESTER -- software libraries and applications
5 // for evolutionary biology research and applications.
6 //
7 // Copyright (C) 2008-2009 Christian M. Zmasek
8 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // All rights reserved
10 // 
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
15 //
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
20 // 
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 //
25 // Contact: phylosoft @ gmail . com
26 // WWW: www.phylosoft.org/forester
27
28 package org.forester.surfacing;
29
30 import java.io.BufferedWriter;
31 import java.io.File;
32 import java.io.FileWriter;
33 import java.io.IOException;
34 import java.io.Writer;
35 import java.text.DecimalFormat;
36 import java.text.NumberFormat;
37 import java.util.ArrayList;
38 import java.util.HashMap;
39 import java.util.List;
40 import java.util.Map;
41 import java.util.Set;
42 import java.util.SortedMap;
43 import java.util.SortedSet;
44 import java.util.TreeMap;
45 import java.util.TreeSet;
46
47 import org.forester.go.GoId;
48 import org.forester.go.GoTerm;
49 import org.forester.util.BasicDescriptiveStatistics;
50 import org.forester.util.DescriptiveStatistics;
51 import org.forester.util.ForesterUtil;
52
53 /*
54  * Poorly designed static class which essential has one method:
55  * calculateCopyNumberDifferences.
56  */
57 public final class DomainCountsDifferenceUtil {
58
59     private final static NumberFormat          FORMATTER                                   = new DecimalFormat( "0.0E0" );
60     private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES = COPY_CALCULATION_MODE.MIN;
61     private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES   = COPY_CALCULATION_MODE.MIN;
62     private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_LOW_COPY_SPECIES         = COPY_CALCULATION_MODE.MAX;
63     private static final String                PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX         = ".prot";
64
65     //FIXME really needs to be tested! 
66     private static void addCounts( final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
67                                    final BinaryDomainCombination dc,
68                                    final GenomeWideCombinableDomains genome,
69                                    final Set<BinaryDomainCombination> bdc ) {
70         if ( !copy_counts.containsKey( dc ) ) {
71             copy_counts.put( dc, new ArrayList<Integer>() );
72         }
73         if ( bdc.contains( dc )
74                 && ( ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc.getId1() ) != null ) ) {
75             final int count = ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc
76                     .getId1() );
77             copy_counts.get( dc ).add( count );
78         }
79         else {
80             copy_counts.get( dc ).add( 0 );
81         }
82     }
83
84     private static void addCounts( final SortedMap<DomainId, List<Integer>> copy_counts,
85                                    final DomainId domain,
86                                    final GenomeWideCombinableDomains genome ) {
87         if ( !copy_counts.containsKey( domain ) ) {
88             copy_counts.put( domain, new ArrayList<Integer>() );
89         }
90         if ( genome.contains( domain ) ) {
91             copy_counts.get( domain ).add( genome.get( domain ).getKeyDomainProteinsCount() );
92         }
93         else {
94             copy_counts.get( domain ).add( 0 );
95         }
96     }
97
98     private static StringBuilder addGoInformation( final DomainId d,
99                                                    final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
100                                                    final Map<GoId, GoTerm> go_id_to_term_map ) {
101         final StringBuilder sb = new StringBuilder();
102         if ( ( domain_id_to_go_ids_map == null ) || domain_id_to_go_ids_map.isEmpty()
103                 || !domain_id_to_go_ids_map.containsKey( d ) ) {
104             return sb;
105         }
106         final List<GoId> go_ids = domain_id_to_go_ids_map.get( d );
107         for( int i = 0; i < go_ids.size(); ++i ) {
108             final GoId go_id = go_ids.get( i );
109             if ( go_id_to_term_map.containsKey( go_id ) ) {
110                 appendGoTerm( sb, go_id_to_term_map.get( go_id ) );
111                 sb.append( "<br>" );
112             }
113             else {
114                 sb.append( "go id \"" + go_id + "\" not found [" + d.getId() + "]" );
115             }
116         }
117         return sb;
118     }
119
120     private static void appendGoTerm( final StringBuilder sb, final GoTerm go_term ) {
121         final GoId go_id = go_term.getGoId();
122         sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id + "\" target=\"amigo_window\">" + go_id
123                 + "</a>" );
124         sb.append( ":" );
125         sb.append( go_term.getName() );
126         sb.append( " [" );
127         sb.append( go_term.getGoNameSpace().toShortString() );
128         sb.append( "]" );
129     }
130
131     public static void calculateCopyNumberDifferences( final List<GenomeWideCombinableDomains> genomes,
132                                                        final SortedMap<Species, List<Protein>> protein_lists_per_species,
133                                                        final List<String> high_copy_base_species,
134                                                        final List<String> high_copy_target_species,
135                                                        final List<String> low_copy_species,
136                                                        final int min_diff,
137                                                        final Double factor,
138                                                        final File plain_output_dom,
139                                                        final File html_output_dom,
140                                                        final File html_output_dc,
141                                                        final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
142                                                        final Map<GoId, GoTerm> go_id_to_term_map,
143                                                        final File all_domains_go_ids_out_dom,
144                                                        final File passing_domains_go_ids_out_dom,
145                                                        final File proteins_file_base ) throws IOException {
146         if ( genomes.size() < 1 ) {
147             throw new IllegalArgumentException( "attempt to use empty list of genomes for domain difference calculation" );
148         }
149         if ( ( high_copy_base_species.size() < 1 ) || ( low_copy_species.size() < 1 ) ) {
150             throw new IllegalArgumentException( "attempt to use empty list of species for domain difference calculation" );
151         }
152         if ( high_copy_base_species.contains( high_copy_target_species )
153                 || low_copy_species.contains( high_copy_target_species ) ) {
154             throw new IllegalArgumentException( "species [" + high_copy_target_species
155                     + "] appears in other list as well" );
156         }
157         if ( min_diff < 0 ) {
158             throw new IllegalArgumentException( "attempt to use negative addition [" + min_diff + "]" );
159         }
160         if ( factor <= 0.0 ) {
161             throw new IllegalArgumentException( "attempt to use factor equal or smaller than 0.0 [" + factor + "]" );
162         }
163         SurfacingUtil.checkForOutputFileWriteability( plain_output_dom );
164         SurfacingUtil.checkForOutputFileWriteability( html_output_dom );
165         SurfacingUtil.checkForOutputFileWriteability( html_output_dc );
166         SurfacingUtil.checkForOutputFileWriteability( all_domains_go_ids_out_dom );
167         SurfacingUtil.checkForOutputFileWriteability( passing_domains_go_ids_out_dom );
168         final Writer plain_writer = new BufferedWriter( new FileWriter( plain_output_dom ) );
169         final Writer html_writer = new BufferedWriter( new FileWriter( html_output_dom ) );
170         final Writer html_writer_dc = new BufferedWriter( new FileWriter( html_output_dc ) );
171         final Writer all_gos_writer = new BufferedWriter( new FileWriter( all_domains_go_ids_out_dom ) );
172         final Writer passing_gos_writer = new BufferedWriter( new FileWriter( passing_domains_go_ids_out_dom ) );
173         final SortedMap<DomainId, Double> high_copy_base_values = new TreeMap<DomainId, Double>();
174         final SortedMap<DomainId, Double> high_copy_target_values = new TreeMap<DomainId, Double>();
175         final SortedMap<DomainId, Double> low_copy_values = new TreeMap<DomainId, Double>();
176         final SortedMap<DomainId, List<Integer>> high_copy_base_copy_counts = new TreeMap<DomainId, List<Integer>>();
177         final SortedMap<DomainId, List<Integer>> high_copy_target_copy_counts = new TreeMap<DomainId, List<Integer>>();
178         final SortedMap<DomainId, List<Integer>> low_copy_copy_counts = new TreeMap<DomainId, List<Integer>>();
179         final SortedSet<DomainId> all_domains = new TreeSet<DomainId>();
180         final SortedMap<BinaryDomainCombination, Double> high_copy_base_values_dc = new TreeMap<BinaryDomainCombination, Double>();
181         final SortedMap<BinaryDomainCombination, Double> high_copy_target_values_dc = new TreeMap<BinaryDomainCombination, Double>();
182         final SortedMap<BinaryDomainCombination, Double> low_copy_values_dc = new TreeMap<BinaryDomainCombination, Double>();
183         final SortedMap<BinaryDomainCombination, List<Integer>> high_copy_base_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
184         final SortedMap<BinaryDomainCombination, List<Integer>> high_copy_target_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
185         final SortedMap<BinaryDomainCombination, List<Integer>> low_copy_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
186         final SortedSet<BinaryDomainCombination> all_dcs = new TreeSet<BinaryDomainCombination>();
187         final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome = new HashMap<String, Set<BinaryDomainCombination>>();
188         final SortedSet<GoId> go_ids_of_passing_domains = new TreeSet<GoId>();
189         final SortedSet<GoId> go_ids_all = new TreeSet<GoId>();
190         for( final GenomeWideCombinableDomains genome : genomes ) {
191             final SortedSet<DomainId> domains = genome.getAllDomainIds();
192             final SortedSet<BinaryDomainCombination> dcs = genome.toBinaryDomainCombinations();
193             final String species = genome.getSpecies().getSpeciesId();
194             bdcs_per_genome.put( species, genome.toBinaryDomainCombinations() );
195             for( final DomainId d : domains ) {
196                 all_domains.add( d );
197                 if ( domain_id_to_go_ids_map.containsKey( d ) ) {
198                     go_ids_all.addAll( domain_id_to_go_ids_map.get( d ) );
199                 }
200             }
201             for( final BinaryDomainCombination dc : dcs ) {
202                 all_dcs.add( dc );
203             }
204         }
205         for( final DomainId domain : all_domains ) {
206             for( final GenomeWideCombinableDomains genome : genomes ) {
207                 final String species = genome.getSpecies().getSpeciesId();
208                 if ( high_copy_base_species.contains( species ) ) {
209                     DomainCountsDifferenceUtil.addCounts( high_copy_base_copy_counts, domain, genome );
210                 }
211                 if ( high_copy_target_species.contains( species ) ) {
212                     DomainCountsDifferenceUtil.addCounts( high_copy_target_copy_counts, domain, genome );
213                 }
214                 if ( low_copy_species.contains( species ) ) {
215                     DomainCountsDifferenceUtil.addCounts( low_copy_copy_counts, domain, genome );
216                 }
217             }
218         }
219         for( final BinaryDomainCombination dc : all_dcs ) {
220             for( final GenomeWideCombinableDomains genome : genomes ) {
221                 final String species = genome.getSpecies().getSpeciesId();
222                 if ( high_copy_base_species.contains( species ) ) {
223                     DomainCountsDifferenceUtil.addCounts( high_copy_base_copy_counts_dc, dc, genome, bdcs_per_genome
224                             .get( species ) );
225                 }
226                 if ( high_copy_target_species.contains( species ) ) {
227                     DomainCountsDifferenceUtil.addCounts( high_copy_target_copy_counts_dc, dc, genome, bdcs_per_genome
228                             .get( species ) );
229                 }
230                 if ( low_copy_species.contains( species ) ) {
231                     DomainCountsDifferenceUtil.addCounts( low_copy_copy_counts_dc, dc, genome, bdcs_per_genome
232                             .get( species ) );
233                 }
234             }
235         }
236         for( final DomainId domain : all_domains ) {
237             calculateDomainCountsBasedValue( high_copy_target_values,
238                                              high_copy_target_copy_counts,
239                                              domain,
240                                              COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
241             calculateDomainCountsBasedValue( high_copy_base_values,
242                                              high_copy_base_copy_counts,
243                                              domain,
244                                              COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
245             calculateDomainCountsBasedValue( low_copy_values,
246                                              low_copy_copy_counts,
247                                              domain,
248                                              COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
249         }
250         for( final BinaryDomainCombination dc : all_dcs ) {
251             calculateDomainCountsBasedValue( high_copy_target_values_dc,
252                                              high_copy_target_copy_counts_dc,
253                                              dc,
254                                              COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
255             calculateDomainCountsBasedValue( high_copy_base_values_dc,
256                                              high_copy_base_copy_counts_dc,
257                                              dc,
258                                              COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
259             calculateDomainCountsBasedValue( low_copy_values_dc,
260                                              low_copy_copy_counts_dc,
261                                              dc,
262                                              COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
263         }
264         writeDomainValuesToFiles( genomes,
265                                   high_copy_base_species,
266                                   high_copy_target_species,
267                                   low_copy_species,
268                                   min_diff,
269                                   factor,
270                                   domain_id_to_go_ids_map,
271                                   go_id_to_term_map,
272                                   plain_writer,
273                                   html_writer,
274                                   proteins_file_base,
275                                   high_copy_base_values,
276                                   high_copy_target_values,
277                                   low_copy_values,
278                                   all_domains,
279                                   go_ids_of_passing_domains,
280                                   protein_lists_per_species );
281         writeDomainCombinationValuesToFiles( genomes,
282                                              high_copy_base_species,
283                                              high_copy_target_species,
284                                              low_copy_species,
285                                              min_diff,
286                                              factor,
287                                              html_writer_dc,
288                                              high_copy_base_values_dc,
289                                              high_copy_target_values_dc,
290                                              low_copy_values_dc,
291                                              all_dcs,
292                                              bdcs_per_genome );
293         writeGoIdsToFile( all_gos_writer, go_ids_all );
294         writeGoIdsToFile( passing_gos_writer, go_ids_of_passing_domains );
295     }
296
297     private static void calculateDomainCountsBasedValue( final SortedMap<BinaryDomainCombination, Double> copy_values,
298                                                          final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
299                                                          final BinaryDomainCombination bdc,
300                                                          final COPY_CALCULATION_MODE copy_calc_mode ) {
301         if ( copy_counts.containsKey( bdc ) ) {
302             switch ( copy_calc_mode ) {
303                 case MAX:
304                     DomainCountsDifferenceUtil.calculateMaxCount( copy_values, copy_counts, bdc );
305                     break;
306                 case MIN:
307                     DomainCountsDifferenceUtil.calculateMinCount( copy_values, copy_counts, bdc );
308                     break;
309                 case MEAN:
310                     DomainCountsDifferenceUtil.calculateMeanCount( copy_values, copy_counts, bdc );
311                     break;
312                 case MEDIAN:
313                     DomainCountsDifferenceUtil.calculateMedianCount( copy_values, copy_counts, bdc );
314                     break;
315                 default:
316                     throw new IllegalArgumentException();
317             }
318         }
319         else {
320             copy_values.put( bdc, Double.valueOf( 0.0 ) );
321         }
322     }
323
324     private static void calculateDomainCountsBasedValue( final SortedMap<DomainId, Double> copy_values,
325                                                          final SortedMap<DomainId, List<Integer>> copy_counts,
326                                                          final DomainId domain,
327                                                          final COPY_CALCULATION_MODE copy_calc_mode ) {
328         if ( copy_counts.containsKey( domain ) ) {
329             switch ( copy_calc_mode ) {
330                 case MAX:
331                     DomainCountsDifferenceUtil.calculateMaxCount( copy_values, copy_counts, domain );
332                     break;
333                 case MIN:
334                     DomainCountsDifferenceUtil.calculateMinCount( copy_values, copy_counts, domain );
335                     break;
336                 case MEAN:
337                     DomainCountsDifferenceUtil.calculateMeanCount( copy_values, copy_counts, domain );
338                     break;
339                 case MEDIAN:
340                     DomainCountsDifferenceUtil.calculateMedianCount( copy_values, copy_counts, domain );
341                     break;
342                 default:
343                     throw new IllegalArgumentException();
344             }
345         }
346         else {
347             copy_values.put( domain, Double.valueOf( 0.0 ) );
348         }
349     }
350
351     private static void calculateMaxCount( final SortedMap<BinaryDomainCombination, Double> results,
352                                            final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
353                                            final BinaryDomainCombination bdc ) {
354         final List<Integer> counts = copy_counts.get( bdc );
355         int max = 0;
356         for( final Integer count : counts ) {
357             if ( count > max ) {
358                 max = count;
359             }
360         }
361         results.put( bdc, ( double ) max );
362     }
363
364     private static void calculateMaxCount( final SortedMap<DomainId, Double> results,
365                                            final SortedMap<DomainId, List<Integer>> copy_counts,
366                                            final DomainId domain ) {
367         final List<Integer> counts = copy_counts.get( domain );
368         int max = 0;
369         for( final Integer count : counts ) {
370             if ( count > max ) {
371                 max = count;
372             }
373         }
374         results.put( domain, ( double ) max );
375     }
376
377     private static void calculateMeanCount( final SortedMap<BinaryDomainCombination, Double> results,
378                                             final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
379                                             final BinaryDomainCombination bdc ) {
380         final List<Integer> counts = copy_counts.get( bdc );
381         int sum = 0;
382         for( final Integer count : counts ) {
383             sum += count;
384         }
385         results.put( bdc, ( ( double ) sum ) / ( ( double ) counts.size() ) );
386     }
387
388     private static void calculateMeanCount( final SortedMap<DomainId, Double> results,
389                                             final SortedMap<DomainId, List<Integer>> copy_counts,
390                                             final DomainId domain ) {
391         final List<Integer> counts = copy_counts.get( domain );
392         int sum = 0;
393         for( final Integer count : counts ) {
394             sum += count;
395         }
396         results.put( domain, ( ( double ) sum ) / ( ( double ) counts.size() ) );
397     }
398
399     private static void calculateMedianCount( final SortedMap<BinaryDomainCombination, Double> results,
400                                               final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
401                                               final BinaryDomainCombination bdc ) {
402         final List<Integer> counts = copy_counts.get( bdc );
403         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
404         for( final Integer count : counts ) {
405             stats.addValue( count );
406         }
407         results.put( bdc, stats.median() );
408     }
409
410     private static void calculateMedianCount( final SortedMap<DomainId, Double> results,
411                                               final SortedMap<DomainId, List<Integer>> copy_counts,
412                                               final DomainId domain ) {
413         final List<Integer> counts = copy_counts.get( domain );
414         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
415         for( final Integer count : counts ) {
416             stats.addValue( count );
417         }
418         results.put( domain, stats.median() );
419     }
420
421     private static void calculateMinCount( final SortedMap<BinaryDomainCombination, Double> results,
422                                            final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
423                                            final BinaryDomainCombination bdc ) {
424         final List<Integer> counts = copy_counts.get( bdc );
425         int min = Integer.MAX_VALUE;
426         for( final Integer count : counts ) {
427             if ( count < min ) {
428                 min = count;
429             }
430         }
431         results.put( bdc, ( double ) min );
432     }
433
434     private static void calculateMinCount( final SortedMap<DomainId, Double> results,
435                                            final SortedMap<DomainId, List<Integer>> copy_counts,
436                                            final DomainId domain ) {
437         final List<Integer> counts = copy_counts.get( domain );
438         int min = Integer.MAX_VALUE;
439         for( final Integer count : counts ) {
440             if ( count < min ) {
441                 min = count;
442             }
443         }
444         results.put( domain, ( double ) min );
445     }
446
447     private static String combinableDomaindToString( final CombinableDomains cd ) {
448         final StringBuilder sb = new StringBuilder();
449         sb.append( cd.getKeyDomainProteinsCount() );
450         sb.append( "\t[" );
451         sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) );
452         sb.append( "]" );
453         return sb.toString();
454     }
455
456     private static String combinableDomaindToStringHtml( final CombinableDomains cd ) {
457         final StringBuilder sb = new StringBuilder();
458         sb.append( "[" );
459         sb.append( cd.getKeyDomainCount() );
460         sb.append( ", <b>" );
461         sb.append( cd.getKeyDomainProteinsCount() );
462         sb.append( "</b>, " );
463         sb.append( cd.getNumberOfCombinableDomains() );
464         sb.append( "]</td><td>[" );
465         sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) );
466         sb.append( "]</td><td>" );
467         sb.append( cd.getCombiningDomainIdsAsStringBuilder() );
468         return sb.toString();
469     }
470
471     private static void writeCopyNumberValues( final SortedMap<BinaryDomainCombination, Double> copy_means,
472                                                final BinaryDomainCombination bdc,
473                                                final GenomeWideCombinableDomains genome,
474                                                final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome,
475                                                final String species,
476                                                final Writer html_writer,
477                                                final String color ) throws IOException {
478         html_writer.write( "<td> " );
479         if ( !ForesterUtil.isEmpty( color ) ) {
480             html_writer.write( "<font color=\"" + color + "\">" );
481         }
482         html_writer.write( "<b>" + species + ":</b> " );
483         if ( !ForesterUtil.isEmpty( color ) ) {
484             html_writer.write( "</font>" );
485         }
486         html_writer.write( "</td><td>" );
487         if ( bdcs_per_genome.get( species ).contains( bdc ) && ( copy_means.get( bdc ) > 0 ) ) {
488             final int count = ( ( BasicCombinableDomains ) genome.get( bdc.getId0() ) ).getCombiningDomains().get( bdc
489                     .getId1() );
490             html_writer.write( count + "" );
491         }
492         else {
493             html_writer.write( "0" );
494         }
495         html_writer.write( "</td>" );
496     }
497
498     private static void writeCopyNumberValues( final SortedMap<DomainId, Double> copy_means,
499                                                final DomainId domain,
500                                                final GenomeWideCombinableDomains genome,
501                                                final String species,
502                                                final Writer plain_writer,
503                                                final Writer html_writer,
504                                                final String color ) throws IOException {
505         plain_writer.write( "  " + species + "\t" );
506         html_writer.write( "<td> " );
507         if ( !ForesterUtil.isEmpty( color ) ) {
508             html_writer.write( "<font color=\"" + color + "\">" );
509         }
510         html_writer.write( "<b>" + species + ":</b> " );
511         if ( !ForesterUtil.isEmpty( color ) ) {
512             html_writer.write( "</font>" );
513         }
514         html_writer.write( "</td><td>" );
515         if ( genome.contains( domain ) && ( copy_means.get( domain ) > 0 ) ) {
516             plain_writer.write( DomainCountsDifferenceUtil.combinableDomaindToString( genome.get( domain ) ) );
517             html_writer.write( DomainCountsDifferenceUtil.combinableDomaindToStringHtml( genome.get( domain ) ) );
518         }
519         else {
520             plain_writer.write( "0" );
521             html_writer.write( "0" );
522         }
523         html_writer.write( "</td>" );
524         plain_writer.write( SurfacingConstants.NL );
525     }
526
527     private static void writeDomainCombinationValuesToFiles( final List<GenomeWideCombinableDomains> genomes,
528                                                              final List<String> high_copy_base_species,
529                                                              final List<String> high_copy_target_species,
530                                                              final List<String> low_copy_species,
531                                                              final int min_diff,
532                                                              final Double factor,
533                                                              final Writer html_writer,
534                                                              final SortedMap<BinaryDomainCombination, Double> high_copy_base_values,
535                                                              final SortedMap<BinaryDomainCombination, Double> high_copy_target_values,
536                                                              final SortedMap<BinaryDomainCombination, Double> low_copy_values,
537                                                              final SortedSet<BinaryDomainCombination> all_bdcs,
538                                                              final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome )
539             throws IOException {
540         int counter = 0;
541         int total_absense_counter = 0;
542         int not_total_absense_counter = 0;
543         SurfacingUtil.addHtmlHead( html_writer, "Binary Domain Combination Copy Differences" );
544         html_writer.write( "<body><table>" );
545         for( final BinaryDomainCombination bdc : all_bdcs ) {
546             if ( ( high_copy_base_values.get( bdc ) > 0 ) && ( high_copy_target_values.get( bdc ) > 0 )
547                     && ( high_copy_base_values.get( bdc ) >= low_copy_values.get( bdc ) ) ) {
548                 if ( high_copy_target_values.get( bdc ) >= min_diff + ( factor * low_copy_values.get( bdc ) ) ) {
549                     if ( low_copy_values.get( bdc ) <= 0.0 ) {
550                         ++total_absense_counter;
551                     }
552                     else {
553                         ++not_total_absense_counter;
554                     }
555                     ++counter;
556                     html_writer.write( "<tr><td><a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + bdc.getId0()
557                             + "\">" + bdc.getId0() + "</a> = <a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK
558                             + bdc.getId1() + "\">" + bdc.getId1() + "</a>" );
559                     html_writer.write( "</td><td>" );
560                     html_writer.write( "<table>" );
561                     for( final GenomeWideCombinableDomains genome : genomes ) {
562                         final String species = genome.getSpecies().getSpeciesId();
563                         if ( high_copy_target_species.contains( species ) ) {
564                             html_writer.write( "<tr>" );
565                             writeCopyNumberValues( high_copy_target_values,
566                                                    bdc,
567                                                    genome,
568                                                    bdcs_per_genome,
569                                                    species,
570                                                    html_writer,
571                                                    "#0000FF" );
572                             html_writer.write( "</tr>" );
573                         }
574                         else if ( low_copy_species.contains( species ) ) {
575                             html_writer.write( "<tr>" );
576                             writeCopyNumberValues( low_copy_values,
577                                                    bdc,
578                                                    genome,
579                                                    bdcs_per_genome,
580                                                    species,
581                                                    html_writer,
582                                                    "#A0A0A0" );
583                             html_writer.write( "</tr>" );
584                         }
585                         else if ( high_copy_base_species.contains( species ) ) {
586                             html_writer.write( "<tr>" );
587                             writeCopyNumberValues( high_copy_base_values,
588                                                    bdc,
589                                                    genome,
590                                                    bdcs_per_genome,
591                                                    species,
592                                                    html_writer,
593                                                    "#404040" );
594                             html_writer.write( "</tr>" );
595                         }
596                     }
597                     html_writer.write( "</table>" );
598                     html_writer.write( "</td></tr>" );
599                     html_writer.write( SurfacingConstants.NL );
600                 }
601             }
602         }
603         html_writer.write( "</table>" );
604         html_writer.write( SurfacingConstants.NL );
605         html_writer.write( "<hr>" );
606         html_writer.write( SurfacingConstants.NL );
607         html_writer.write( "Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
608         html_writer.write( "<br>" );
609         html_writer.write( SurfacingConstants.NL );
610         html_writer.write( "Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
611         html_writer.write( "<br>" );
612         html_writer.write( SurfacingConstants.NL );
613         html_writer.write( "Calculation mode for high copy target : " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
614         html_writer.write( SurfacingConstants.NL );
615         html_writer.write( "<br>" );
616         html_writer.write( "Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
617         html_writer.write( SurfacingConstants.NL );
618         html_writer.write( "<br>" );
619         html_writer.write( "Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
620         html_writer.write( SurfacingConstants.NL );
621         html_writer.write( "<br>" );
622         html_writer.write( "Minimal difference : " + min_diff );
623         html_writer.write( SurfacingConstants.NL );
624         html_writer.write( "<br>" );
625         html_writer.write( "Factor : " + factor );
626         html_writer.write( SurfacingConstants.NL );
627         html_writer.write( "<br>" );
628         html_writer.write( "Lower copy binary domain combinations : " + counter );
629         html_writer.write( SurfacingConstants.NL );
630         html_writer.write( "<br>" );
631         html_writer.write( "Total absence : " + total_absense_counter );
632         html_writer.write( SurfacingConstants.NL );
633         html_writer.write( "<br>" );
634         html_writer.write( "Not total absence : " + not_total_absense_counter );
635         html_writer.write( SurfacingConstants.NL );
636         html_writer.write( "<br>" );
637         html_writer.write( "Total binary domain combinations : " + all_bdcs.size() );
638         html_writer.write( SurfacingConstants.NL );
639         html_writer.write( "<hr>" );
640         html_writer.write( SurfacingConstants.NL );
641         html_writer.write( "</body></html>" );
642         html_writer.write( SurfacingConstants.NL );
643         html_writer.close();
644     }
645
646     private static void writeDomainValuesToFiles( final List<GenomeWideCombinableDomains> genomes,
647                                                   final List<String> high_copy_base_species,
648                                                   final List<String> high_copy_target_species,
649                                                   final List<String> low_copy_species,
650                                                   final int min_diff,
651                                                   final Double factor,
652                                                   final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
653                                                   final Map<GoId, GoTerm> go_id_to_term_map,
654                                                   final Writer plain_writer,
655                                                   final Writer html_writer,
656                                                   final File proteins_file_base,
657                                                   final SortedMap<DomainId, Double> high_copy_base_values,
658                                                   final SortedMap<DomainId, Double> high_copy_target_values,
659                                                   final SortedMap<DomainId, Double> low_copy_values,
660                                                   final SortedSet<DomainId> all_domains,
661                                                   final SortedSet<GoId> go_ids_of_passing_domains,
662                                                   final SortedMap<Species, List<Protein>> protein_lists_per_species )
663             throws IOException {
664         int counter = 0;
665         int total_absense_counter = 0;
666         int not_total_absense_counter = 0;
667         SurfacingUtil.addHtmlHead( html_writer, "Domain Copy Differences" );
668         html_writer.write( "<body><table>" );
669         for( final DomainId domain_id : all_domains ) {
670             if ( ( high_copy_base_values.get( domain_id ) > 0 ) && ( high_copy_target_values.get( domain_id ) > 0 )
671                     && ( high_copy_base_values.get( domain_id ) >= low_copy_values.get( domain_id ) ) ) {
672                 if ( high_copy_target_values.get( domain_id ) >= min_diff
673                         + ( factor * low_copy_values.get( domain_id ) ) ) {
674                     if ( low_copy_values.get( domain_id ) <= 0.0 ) {
675                         ++total_absense_counter;
676                     }
677                     else {
678                         ++not_total_absense_counter;
679                     }
680                     ++counter;
681                     writeProteinsToFile( proteins_file_base, protein_lists_per_species, domain_id );
682                     if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
683                         go_ids_of_passing_domains.addAll( domain_id_to_go_ids_map.get( domain_id ) );
684                     }
685                     plain_writer.write( domain_id.getId() );
686                     plain_writer.write( SurfacingConstants.NL );
687                     html_writer.write( "<tr><td><a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK
688                             + domain_id.getId() + "\">" + domain_id.getId() + "</a></td><td>" );
689                     html_writer.write( addGoInformation( domain_id, domain_id_to_go_ids_map, go_id_to_term_map )
690                             .toString() );
691                     html_writer.write( "</td><td>" );
692                     html_writer.write( "<table>" );
693                     for( final GenomeWideCombinableDomains genome : genomes ) {
694                         final String species = genome.getSpecies().getSpeciesId();
695                         if ( high_copy_target_species.contains( species ) ) {
696                             html_writer.write( "<tr>" );
697                             writeCopyNumberValues( high_copy_target_values,
698                                                    domain_id,
699                                                    genome,
700                                                    species,
701                                                    plain_writer,
702                                                    html_writer,
703                                                    "#0000FF" );
704                             html_writer.write( "</tr>" );
705                         }
706                         else if ( low_copy_species.contains( species ) ) {
707                             html_writer.write( "<tr>" );
708                             writeCopyNumberValues( low_copy_values,
709                                                    domain_id,
710                                                    genome,
711                                                    species,
712                                                    plain_writer,
713                                                    html_writer,
714                                                    "#A0A0A0" );
715                             html_writer.write( "</tr>" );
716                         }
717                         else if ( high_copy_base_species.contains( species ) ) {
718                             html_writer.write( "<tr>" );
719                             writeCopyNumberValues( high_copy_base_values,
720                                                    domain_id,
721                                                    genome,
722                                                    species,
723                                                    plain_writer,
724                                                    html_writer,
725                                                    "#404040" );
726                             html_writer.write( "</tr>" );
727                         }
728                     }
729                     html_writer.write( "</table>" );
730                     html_writer.write( "</td></tr>" );
731                     html_writer.write( SurfacingConstants.NL );
732                     plain_writer.write( SurfacingConstants.NL );
733                 }
734             }
735         }
736         html_writer.write( "</table>" );
737         html_writer.write( SurfacingConstants.NL );
738         html_writer.write( "<hr>" );
739         html_writer.write( SurfacingConstants.NL );
740         html_writer.write( "Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
741         html_writer.write( "<br>" );
742         html_writer.write( SurfacingConstants.NL );
743         html_writer.write( "Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
744         html_writer.write( "<br>" );
745         html_writer.write( SurfacingConstants.NL );
746         html_writer.write( "Calculation mode for high copy target : " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
747         html_writer.write( SurfacingConstants.NL );
748         html_writer.write( "<br>" );
749         html_writer.write( "Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
750         html_writer.write( SurfacingConstants.NL );
751         html_writer.write( "<br>" );
752         html_writer.write( "Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
753         html_writer.write( SurfacingConstants.NL );
754         html_writer.write( "<br>" );
755         html_writer.write( "Minimal difference : " + min_diff );
756         html_writer.write( SurfacingConstants.NL );
757         html_writer.write( "<br>" );
758         html_writer.write( "Factor : " + factor );
759         html_writer.write( SurfacingConstants.NL );
760         html_writer.write( "<br>" );
761         html_writer.write( "Lower copy domains : " + counter );
762         html_writer.write( SurfacingConstants.NL );
763         html_writer.write( "<br>" );
764         html_writer.write( "Total absence : " + total_absense_counter );
765         html_writer.write( SurfacingConstants.NL );
766         html_writer.write( "<br>" );
767         html_writer.write( "Not total absence : " + not_total_absense_counter );
768         html_writer.write( SurfacingConstants.NL );
769         html_writer.write( "<br>" );
770         html_writer.write( "Total domains : " + all_domains.size() );
771         html_writer.write( SurfacingConstants.NL );
772         html_writer.write( "<hr>" );
773         html_writer.write( SurfacingConstants.NL );
774         html_writer.write( "</body></html>" );
775         html_writer.write( SurfacingConstants.NL );
776         html_writer.close();
777         plain_writer.write( "# Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
778         plain_writer.write( SurfacingConstants.NL );
779         plain_writer.write( "# Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
780         plain_writer.write( SurfacingConstants.NL );
781         plain_writer.write( "# Calculation mode for high copy target: " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
782         plain_writer.write( SurfacingConstants.NL );
783         plain_writer.write( "# Calculation mode for high copy base  : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
784         plain_writer.write( SurfacingConstants.NL );
785         plain_writer.write( "# Calculation mode for low copy        : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
786         plain_writer.write( SurfacingConstants.NL );
787         plain_writer.write( "# Minimal difference: " + min_diff );
788         plain_writer.write( SurfacingConstants.NL );
789         plain_writer.write( "# Factor            : " + factor );
790         plain_writer.write( SurfacingConstants.NL );
791         plain_writer.write( "# Lower copy domains: " + counter );
792         plain_writer.write( SurfacingConstants.NL );
793         plain_writer.write( "# Total absence     : " + total_absense_counter );
794         plain_writer.write( SurfacingConstants.NL );
795         plain_writer.write( "# Not total absence : " + not_total_absense_counter );
796         plain_writer.write( SurfacingConstants.NL );
797         plain_writer.write( "# Total domains     : " + all_domains.size() );
798         plain_writer.write( SurfacingConstants.NL );
799         plain_writer.close();
800     }
801
802     private static void writeGoIdsToFile( final Writer writer, final SortedSet<GoId> gos ) throws IOException {
803         for( final GoId go_id : gos ) {
804             writer.write( go_id.toString() );
805             writer.write( SurfacingConstants.NL );
806         }
807         writer.close();
808     }
809
810     private static void writeProteinsToFile( final File proteins_file_base,
811                                              final SortedMap<Species, List<Protein>> protein_lists_per_species,
812                                              final DomainId domain_id ) throws IOException {
813         final File my_proteins_file = new File( proteins_file_base.getParentFile() + ForesterUtil.FILE_SEPARATOR
814                 + domain_id + PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX );
815         SurfacingUtil.checkForOutputFileWriteability( my_proteins_file );
816         final Writer proteins_file_writer = new BufferedWriter( new FileWriter( my_proteins_file ) );
817         SurfacingUtil.extractProteinNames( protein_lists_per_species, domain_id, proteins_file_writer, "\t" );
818         proteins_file_writer.close();
819         System.out.println( "Wrote proteins list to \"" + my_proteins_file + "\"" );
820     }
821
822     public static enum COPY_CALCULATION_MODE {
823         MEAN, MEDIAN, MAX, MIN
824     }
825 }