in progress
[jalview.git] / forester / java / src / org / forester / surfacing / DomainCountsDifferenceUtil.java
1 // $Id:
2 // $
3 //
4 // FORESTER -- software libraries and applications
5 // for evolutionary biology research and applications.
6 //
7 // Copyright (C) 2008-2009 Christian M. Zmasek
8 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // All rights reserved
10 //
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
15 //
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 //
25 // Contact: phylosoft @ gmail . com
26 // WWW: www.phylosoft.org/forester
27
28 package org.forester.surfacing;
29
30 import java.io.BufferedWriter;
31 import java.io.File;
32 import java.io.FileWriter;
33 import java.io.IOException;
34 import java.io.Writer;
35 import java.text.DecimalFormat;
36 import java.text.NumberFormat;
37 import java.util.ArrayList;
38 import java.util.HashMap;
39 import java.util.List;
40 import java.util.Map;
41 import java.util.Set;
42 import java.util.SortedMap;
43 import java.util.SortedSet;
44 import java.util.TreeMap;
45 import java.util.TreeSet;
46
47 import org.forester.application.surfacing;
48 import org.forester.go.GoId;
49 import org.forester.go.GoTerm;
50 import org.forester.protein.BinaryDomainCombination;
51 import org.forester.protein.DomainId;
52 import org.forester.protein.Protein;
53 import org.forester.species.Species;
54 import org.forester.util.BasicDescriptiveStatistics;
55 import org.forester.util.DescriptiveStatistics;
56 import org.forester.util.ForesterUtil;
57
58 /*
59  * Poorly designed static class which essential has one method:
60  * calculateCopyNumberDifferences.
61  */
62 public final class DomainCountsDifferenceUtil {
63
64     private final static NumberFormat          FORMATTER                                   = new DecimalFormat( "0.0E0" );
65     private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES = COPY_CALCULATION_MODE.MIN;
66     private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES   = COPY_CALCULATION_MODE.MIN;
67     private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_LOW_COPY_SPECIES         = COPY_CALCULATION_MODE.MAX;
68     private static final String                PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX         = ".prot";
69
70     //FIXME really needs to be tested! 
71     private static void addCounts( final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
72                                    final BinaryDomainCombination dc,
73                                    final GenomeWideCombinableDomains genome,
74                                    final Set<BinaryDomainCombination> bdc ) {
75         if ( !copy_counts.containsKey( dc ) ) {
76             copy_counts.put( dc, new ArrayList<Integer>() );
77         }
78         if ( bdc.contains( dc )
79                 && ( ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc.getId1() ) != null ) ) {
80             final int count = ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains()
81                     .get( dc.getId1() );
82             copy_counts.get( dc ).add( count );
83         }
84         else {
85             copy_counts.get( dc ).add( 0 );
86         }
87     }
88
89     private static void addCounts( final SortedMap<DomainId, List<Integer>> copy_counts,
90                                    final DomainId domain,
91                                    final GenomeWideCombinableDomains genome ) {
92         if ( !copy_counts.containsKey( domain ) ) {
93             copy_counts.put( domain, new ArrayList<Integer>() );
94         }
95         if ( genome.contains( domain ) ) {
96             copy_counts.get( domain ).add( genome.get( domain ).getKeyDomainProteinsCount() );
97         }
98         else {
99             copy_counts.get( domain ).add( 0 );
100         }
101     }
102
103     private static StringBuilder addGoInformation( final DomainId d,
104                                                    final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
105                                                    final Map<GoId, GoTerm> go_id_to_term_map ) {
106         final StringBuilder sb = new StringBuilder();
107         if ( ( domain_id_to_go_ids_map == null ) || domain_id_to_go_ids_map.isEmpty()
108                 || !domain_id_to_go_ids_map.containsKey( d ) ) {
109             return sb;
110         }
111         final List<GoId> go_ids = domain_id_to_go_ids_map.get( d );
112         for( int i = 0; i < go_ids.size(); ++i ) {
113             final GoId go_id = go_ids.get( i );
114             if ( go_id_to_term_map.containsKey( go_id ) ) {
115                 appendGoTerm( sb, go_id_to_term_map.get( go_id ) );
116                 sb.append( "<br>" );
117             }
118             else {
119                 sb.append( "go id \"" + go_id + "\" not found [" + d.getId() + "]" );
120             }
121         }
122         return sb;
123     }
124
125     private static void appendGoTerm( final StringBuilder sb, final GoTerm go_term ) {
126         final GoId go_id = go_term.getGoId();
127         sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id + "\" target=\"amigo_window\">" + go_id
128                 + "</a>" );
129         sb.append( ":" );
130         sb.append( go_term.getName() );
131         sb.append( " [" );
132         sb.append( go_term.getGoNameSpace().toShortString() );
133         sb.append( "]" );
134     }
135
136     public static void calculateCopyNumberDifferences( final List<GenomeWideCombinableDomains> genomes,
137                                                        final SortedMap<Species, List<Protein>> protein_lists_per_species,
138                                                        final List<String> high_copy_base_species,
139                                                        final List<String> high_copy_target_species,
140                                                        final List<String> low_copy_species,
141                                                        final int min_diff,
142                                                        final Double factor,
143                                                        final File plain_output_dom,
144                                                        final File html_output_dom,
145                                                        final File html_output_dc,
146                                                        final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
147                                                        final Map<GoId, GoTerm> go_id_to_term_map,
148                                                        final File all_domains_go_ids_out_dom,
149                                                        final File passing_domains_go_ids_out_dom,
150                                                        final File proteins_file_base ) throws IOException {
151         if ( genomes.size() < 1 ) {
152             throw new IllegalArgumentException( "attempt to use empty list of genomes for domain difference calculation" );
153         }
154         if ( ( high_copy_base_species.size() < 1 ) || ( low_copy_species.size() < 1 ) ) {
155             throw new IllegalArgumentException( "attempt to use empty list of species for domain difference calculation" );
156         }
157         if ( high_copy_base_species.contains( high_copy_target_species )
158                 || low_copy_species.contains( high_copy_target_species ) ) {
159             throw new IllegalArgumentException( "species [" + high_copy_target_species
160                     + "] appears in other list as well" );
161         }
162         if ( min_diff < 0 ) {
163             throw new IllegalArgumentException( "attempt to use negative addition [" + min_diff + "]" );
164         }
165         if ( factor <= 0.0 ) {
166             throw new IllegalArgumentException( "attempt to use factor equal or smaller than 0.0 [" + factor + "]" );
167         }
168         SurfacingUtil.checkForOutputFileWriteability( plain_output_dom );
169         SurfacingUtil.checkForOutputFileWriteability( html_output_dom );
170         SurfacingUtil.checkForOutputFileWriteability( html_output_dc );
171         SurfacingUtil.checkForOutputFileWriteability( all_domains_go_ids_out_dom );
172         SurfacingUtil.checkForOutputFileWriteability( passing_domains_go_ids_out_dom );
173         final Writer plain_writer = new BufferedWriter( new FileWriter( plain_output_dom ) );
174         final Writer html_writer = new BufferedWriter( new FileWriter( html_output_dom ) );
175         final Writer html_writer_dc = new BufferedWriter( new FileWriter( html_output_dc ) );
176         final Writer all_gos_writer = new BufferedWriter( new FileWriter( all_domains_go_ids_out_dom ) );
177         final Writer passing_gos_writer = new BufferedWriter( new FileWriter( passing_domains_go_ids_out_dom ) );
178         final SortedMap<DomainId, Double> high_copy_base_values = new TreeMap<DomainId, Double>();
179         final SortedMap<DomainId, Double> high_copy_target_values = new TreeMap<DomainId, Double>();
180         final SortedMap<DomainId, Double> low_copy_values = new TreeMap<DomainId, Double>();
181         final SortedMap<DomainId, List<Integer>> high_copy_base_copy_counts = new TreeMap<DomainId, List<Integer>>();
182         final SortedMap<DomainId, List<Integer>> high_copy_target_copy_counts = new TreeMap<DomainId, List<Integer>>();
183         final SortedMap<DomainId, List<Integer>> low_copy_copy_counts = new TreeMap<DomainId, List<Integer>>();
184         final SortedSet<DomainId> all_domains = new TreeSet<DomainId>();
185         final SortedMap<BinaryDomainCombination, Double> high_copy_base_values_dc = new TreeMap<BinaryDomainCombination, Double>();
186         final SortedMap<BinaryDomainCombination, Double> high_copy_target_values_dc = new TreeMap<BinaryDomainCombination, Double>();
187         final SortedMap<BinaryDomainCombination, Double> low_copy_values_dc = new TreeMap<BinaryDomainCombination, Double>();
188         final SortedMap<BinaryDomainCombination, List<Integer>> high_copy_base_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
189         final SortedMap<BinaryDomainCombination, List<Integer>> high_copy_target_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
190         final SortedMap<BinaryDomainCombination, List<Integer>> low_copy_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
191         final SortedSet<BinaryDomainCombination> all_dcs = new TreeSet<BinaryDomainCombination>();
192         final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome = new HashMap<String, Set<BinaryDomainCombination>>();
193         final SortedSet<GoId> go_ids_of_passing_domains = new TreeSet<GoId>();
194         final SortedSet<GoId> go_ids_all = new TreeSet<GoId>();
195         for( final GenomeWideCombinableDomains genome : genomes ) {
196             final SortedSet<DomainId> domains = genome.getAllDomainIds();
197             final SortedSet<BinaryDomainCombination> dcs = genome.toBinaryDomainCombinations();
198             final String species = genome.getSpecies().getSpeciesId();
199             bdcs_per_genome.put( species, genome.toBinaryDomainCombinations() );
200             for( final DomainId d : domains ) {
201                 all_domains.add( d );
202                 if ( domain_id_to_go_ids_map.containsKey( d ) ) {
203                     go_ids_all.addAll( domain_id_to_go_ids_map.get( d ) );
204                 }
205             }
206             for( final BinaryDomainCombination dc : dcs ) {
207                 all_dcs.add( dc );
208             }
209         }
210         for( final DomainId domain : all_domains ) {
211             for( final GenomeWideCombinableDomains genome : genomes ) {
212                 final String species = genome.getSpecies().getSpeciesId();
213                 if ( high_copy_base_species.contains( species ) ) {
214                     DomainCountsDifferenceUtil.addCounts( high_copy_base_copy_counts, domain, genome );
215                 }
216                 if ( high_copy_target_species.contains( species ) ) {
217                     DomainCountsDifferenceUtil.addCounts( high_copy_target_copy_counts, domain, genome );
218                 }
219                 if ( low_copy_species.contains( species ) ) {
220                     DomainCountsDifferenceUtil.addCounts( low_copy_copy_counts, domain, genome );
221                 }
222             }
223         }
224         for( final BinaryDomainCombination dc : all_dcs ) {
225             for( final GenomeWideCombinableDomains genome : genomes ) {
226                 final String species = genome.getSpecies().getSpeciesId();
227                 if ( high_copy_base_species.contains( species ) ) {
228                     DomainCountsDifferenceUtil.addCounts( high_copy_base_copy_counts_dc,
229                                                           dc,
230                                                           genome,
231                                                           bdcs_per_genome.get( species ) );
232                 }
233                 if ( high_copy_target_species.contains( species ) ) {
234                     DomainCountsDifferenceUtil.addCounts( high_copy_target_copy_counts_dc,
235                                                           dc,
236                                                           genome,
237                                                           bdcs_per_genome.get( species ) );
238                 }
239                 if ( low_copy_species.contains( species ) ) {
240                     DomainCountsDifferenceUtil.addCounts( low_copy_copy_counts_dc,
241                                                           dc,
242                                                           genome,
243                                                           bdcs_per_genome.get( species ) );
244                 }
245             }
246         }
247         for( final DomainId domain : all_domains ) {
248             calculateDomainCountsBasedValue( high_copy_target_values,
249                                              high_copy_target_copy_counts,
250                                              domain,
251                                              COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
252             calculateDomainCountsBasedValue( high_copy_base_values,
253                                              high_copy_base_copy_counts,
254                                              domain,
255                                              COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
256             calculateDomainCountsBasedValue( low_copy_values,
257                                              low_copy_copy_counts,
258                                              domain,
259                                              COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
260         }
261         for( final BinaryDomainCombination dc : all_dcs ) {
262             calculateDomainCountsBasedValue( high_copy_target_values_dc,
263                                              high_copy_target_copy_counts_dc,
264                                              dc,
265                                              COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
266             calculateDomainCountsBasedValue( high_copy_base_values_dc,
267                                              high_copy_base_copy_counts_dc,
268                                              dc,
269                                              COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
270             calculateDomainCountsBasedValue( low_copy_values_dc,
271                                              low_copy_copy_counts_dc,
272                                              dc,
273                                              COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
274         }
275         writeDomainValuesToFiles( genomes,
276                                   high_copy_base_species,
277                                   high_copy_target_species,
278                                   low_copy_species,
279                                   min_diff,
280                                   factor,
281                                   domain_id_to_go_ids_map,
282                                   go_id_to_term_map,
283                                   plain_writer,
284                                   html_writer,
285                                   proteins_file_base,
286                                   high_copy_base_values,
287                                   high_copy_target_values,
288                                   low_copy_values,
289                                   all_domains,
290                                   go_ids_of_passing_domains,
291                                   protein_lists_per_species );
292         writeDomainCombinationValuesToFiles( genomes,
293                                              high_copy_base_species,
294                                              high_copy_target_species,
295                                              low_copy_species,
296                                              min_diff,
297                                              factor,
298                                              html_writer_dc,
299                                              high_copy_base_values_dc,
300                                              high_copy_target_values_dc,
301                                              low_copy_values_dc,
302                                              all_dcs,
303                                              bdcs_per_genome );
304         writeGoIdsToFile( all_gos_writer, go_ids_all );
305         writeGoIdsToFile( passing_gos_writer, go_ids_of_passing_domains );
306     }
307
308     private static void calculateDomainCountsBasedValue( final SortedMap<BinaryDomainCombination, Double> copy_values,
309                                                          final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
310                                                          final BinaryDomainCombination bdc,
311                                                          final COPY_CALCULATION_MODE copy_calc_mode ) {
312         if ( copy_counts.containsKey( bdc ) ) {
313             switch ( copy_calc_mode ) {
314                 case MAX:
315                     DomainCountsDifferenceUtil.calculateMaxCount( copy_values, copy_counts, bdc );
316                     break;
317                 case MIN:
318                     DomainCountsDifferenceUtil.calculateMinCount( copy_values, copy_counts, bdc );
319                     break;
320                 case MEAN:
321                     DomainCountsDifferenceUtil.calculateMeanCount( copy_values, copy_counts, bdc );
322                     break;
323                 case MEDIAN:
324                     DomainCountsDifferenceUtil.calculateMedianCount( copy_values, copy_counts, bdc );
325                     break;
326                 default:
327                     throw new IllegalArgumentException();
328             }
329         }
330         else {
331             copy_values.put( bdc, Double.valueOf( 0.0 ) );
332         }
333     }
334
335     private static void calculateDomainCountsBasedValue( final SortedMap<DomainId, Double> copy_values,
336                                                          final SortedMap<DomainId, List<Integer>> copy_counts,
337                                                          final DomainId domain,
338                                                          final COPY_CALCULATION_MODE copy_calc_mode ) {
339         if ( copy_counts.containsKey( domain ) ) {
340             switch ( copy_calc_mode ) {
341                 case MAX:
342                     DomainCountsDifferenceUtil.calculateMaxCount( copy_values, copy_counts, domain );
343                     break;
344                 case MIN:
345                     DomainCountsDifferenceUtil.calculateMinCount( copy_values, copy_counts, domain );
346                     break;
347                 case MEAN:
348                     DomainCountsDifferenceUtil.calculateMeanCount( copy_values, copy_counts, domain );
349                     break;
350                 case MEDIAN:
351                     DomainCountsDifferenceUtil.calculateMedianCount( copy_values, copy_counts, domain );
352                     break;
353                 default:
354                     throw new IllegalArgumentException();
355             }
356         }
357         else {
358             copy_values.put( domain, Double.valueOf( 0.0 ) );
359         }
360     }
361
362     private static void calculateMaxCount( final SortedMap<BinaryDomainCombination, Double> results,
363                                            final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
364                                            final BinaryDomainCombination bdc ) {
365         final List<Integer> counts = copy_counts.get( bdc );
366         int max = 0;
367         for( final Integer count : counts ) {
368             if ( count > max ) {
369                 max = count;
370             }
371         }
372         results.put( bdc, ( double ) max );
373     }
374
375     private static void calculateMaxCount( final SortedMap<DomainId, Double> results,
376                                            final SortedMap<DomainId, List<Integer>> copy_counts,
377                                            final DomainId domain ) {
378         final List<Integer> counts = copy_counts.get( domain );
379         int max = 0;
380         for( final Integer count : counts ) {
381             if ( count > max ) {
382                 max = count;
383             }
384         }
385         results.put( domain, ( double ) max );
386     }
387
388     private static void calculateMeanCount( final SortedMap<BinaryDomainCombination, Double> results,
389                                             final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
390                                             final BinaryDomainCombination bdc ) {
391         final List<Integer> counts = copy_counts.get( bdc );
392         int sum = 0;
393         for( final Integer count : counts ) {
394             sum += count;
395         }
396         results.put( bdc, ( ( double ) sum ) / ( ( double ) counts.size() ) );
397     }
398
399     private static void calculateMeanCount( final SortedMap<DomainId, Double> results,
400                                             final SortedMap<DomainId, List<Integer>> copy_counts,
401                                             final DomainId domain ) {
402         final List<Integer> counts = copy_counts.get( domain );
403         int sum = 0;
404         for( final Integer count : counts ) {
405             sum += count;
406         }
407         results.put( domain, ( ( double ) sum ) / ( ( double ) counts.size() ) );
408     }
409
410     private static void calculateMedianCount( final SortedMap<BinaryDomainCombination, Double> results,
411                                               final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
412                                               final BinaryDomainCombination bdc ) {
413         final List<Integer> counts = copy_counts.get( bdc );
414         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
415         for( final Integer count : counts ) {
416             stats.addValue( count );
417         }
418         results.put( bdc, stats.median() );
419     }
420
421     private static void calculateMedianCount( final SortedMap<DomainId, Double> results,
422                                               final SortedMap<DomainId, List<Integer>> copy_counts,
423                                               final DomainId domain ) {
424         final List<Integer> counts = copy_counts.get( domain );
425         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
426         for( final Integer count : counts ) {
427             stats.addValue( count );
428         }
429         results.put( domain, stats.median() );
430     }
431
432     private static void calculateMinCount( final SortedMap<BinaryDomainCombination, Double> results,
433                                            final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
434                                            final BinaryDomainCombination bdc ) {
435         final List<Integer> counts = copy_counts.get( bdc );
436         int min = Integer.MAX_VALUE;
437         for( final Integer count : counts ) {
438             if ( count < min ) {
439                 min = count;
440             }
441         }
442         results.put( bdc, ( double ) min );
443     }
444
445     private static void calculateMinCount( final SortedMap<DomainId, Double> results,
446                                            final SortedMap<DomainId, List<Integer>> copy_counts,
447                                            final DomainId domain ) {
448         final List<Integer> counts = copy_counts.get( domain );
449         int min = Integer.MAX_VALUE;
450         for( final Integer count : counts ) {
451             if ( count < min ) {
452                 min = count;
453             }
454         }
455         results.put( domain, ( double ) min );
456     }
457
458     private static String combinableDomaindToString( final CombinableDomains cd ) {
459         final StringBuilder sb = new StringBuilder();
460         sb.append( cd.getKeyDomainProteinsCount() );
461         sb.append( "\t[" );
462         sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) );
463         sb.append( "]" );
464         return sb.toString();
465     }
466
467     private static String combinableDomaindToStringHtml( final CombinableDomains cd ) {
468         final StringBuilder sb = new StringBuilder();
469         sb.append( "[" );
470         sb.append( cd.getKeyDomainCount() );
471         sb.append( ", <b>" );
472         sb.append( cd.getKeyDomainProteinsCount() );
473         sb.append( "</b>, " );
474         sb.append( cd.getNumberOfCombinableDomains() );
475         sb.append( "]</td><td>[" );
476         sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) );
477         sb.append( "]</td><td>" );
478         sb.append( cd.getCombiningDomainIdsAsStringBuilder() );
479         return sb.toString();
480     }
481
482     private static void writeCopyNumberValues( final SortedMap<BinaryDomainCombination, Double> copy_means,
483                                                final BinaryDomainCombination bdc,
484                                                final GenomeWideCombinableDomains genome,
485                                                final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome,
486                                                final String species,
487                                                final Writer html_writer,
488                                                final String color ) throws IOException {
489         html_writer.write( "<td> " );
490         if ( !ForesterUtil.isEmpty( color ) ) {
491             html_writer.write( "<font color=\"" + color + "\">" );
492         }
493         html_writer.write( "<b>" + species + ":</b> " );
494         if ( !ForesterUtil.isEmpty( color ) ) {
495             html_writer.write( "</font>" );
496         }
497         html_writer.write( "</td><td>" );
498         if ( bdcs_per_genome.get( species ).contains( bdc ) && ( copy_means.get( bdc ) > 0 ) ) {
499             final int count = ( ( BasicCombinableDomains ) genome.get( bdc.getId0() ) ).getCombiningDomains()
500                     .get( bdc.getId1() );
501             html_writer.write( count + "" );
502         }
503         else {
504             html_writer.write( "0" );
505         }
506         html_writer.write( "</td>" );
507     }
508
509     private static void writeCopyNumberValues( final SortedMap<DomainId, Double> copy_means,
510                                                final DomainId domain,
511                                                final GenomeWideCombinableDomains genome,
512                                                final String species,
513                                                final Writer plain_writer,
514                                                final Writer html_writer,
515                                                final String color ) throws IOException {
516         plain_writer.write( "  " + species + "\t" );
517         html_writer.write( "<td> " );
518         if ( !ForesterUtil.isEmpty( color ) ) {
519             html_writer.write( "<font color=\"" + color + "\">" );
520         }
521         html_writer.write( "<b>" + species + ":</b> " );
522         if ( !ForesterUtil.isEmpty( color ) ) {
523             html_writer.write( "</font>" );
524         }
525         html_writer.write( "</td><td>" );
526         if ( genome.contains( domain ) && ( copy_means.get( domain ) > 0 ) ) {
527             plain_writer.write( DomainCountsDifferenceUtil.combinableDomaindToString( genome.get( domain ) ) );
528             html_writer.write( DomainCountsDifferenceUtil.combinableDomaindToStringHtml( genome.get( domain ) ) );
529         }
530         else {
531             plain_writer.write( "0" );
532             html_writer.write( "0" );
533         }
534         html_writer.write( "</td>" );
535         plain_writer.write( SurfacingConstants.NL );
536     }
537
538     private static void writeDomainCombinationValuesToFiles( final List<GenomeWideCombinableDomains> genomes,
539                                                              final List<String> high_copy_base_species,
540                                                              final List<String> high_copy_target_species,
541                                                              final List<String> low_copy_species,
542                                                              final int min_diff,
543                                                              final Double factor,
544                                                              final Writer html_writer,
545                                                              final SortedMap<BinaryDomainCombination, Double> high_copy_base_values,
546                                                              final SortedMap<BinaryDomainCombination, Double> high_copy_target_values,
547                                                              final SortedMap<BinaryDomainCombination, Double> low_copy_values,
548                                                              final SortedSet<BinaryDomainCombination> all_bdcs,
549                                                              final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome )
550             throws IOException {
551         int counter = 0;
552         int total_absense_counter = 0;
553         int not_total_absense_counter = 0;
554         SurfacingUtil.addHtmlHead( html_writer, "Binary Domain Combination Copy Differences" );
555         html_writer.write( "<body><table>" );
556         for( final BinaryDomainCombination bdc : all_bdcs ) {
557             if ( ( high_copy_base_values.get( bdc ) > 0 ) && ( high_copy_target_values.get( bdc ) > 0 )
558                     && ( high_copy_base_values.get( bdc ) >= low_copy_values.get( bdc ) ) ) {
559                 if ( high_copy_target_values.get( bdc ) >= ( min_diff + ( factor * low_copy_values.get( bdc ) ) ) ) {
560                     if ( low_copy_values.get( bdc ) <= 0.0 ) {
561                         ++total_absense_counter;
562                     }
563                     else {
564                         ++not_total_absense_counter;
565                     }
566                     ++counter;
567                     html_writer.write( "<tr><td><a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + bdc.getId0()
568                             + "\">" + bdc.getId0() + "</a> = <a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK
569                             + bdc.getId1() + "\">" + bdc.getId1() + "</a>" );
570                     html_writer.write( "</td><td>" );
571                     html_writer.write( "<table>" );
572                     for( final GenomeWideCombinableDomains genome : genomes ) {
573                         final String species = genome.getSpecies().getSpeciesId();
574                         if ( high_copy_target_species.contains( species ) ) {
575                             html_writer.write( "<tr>" );
576                             writeCopyNumberValues( high_copy_target_values,
577                                                    bdc,
578                                                    genome,
579                                                    bdcs_per_genome,
580                                                    species,
581                                                    html_writer,
582                                                    "#0000FF" );
583                             html_writer.write( "</tr>" );
584                         }
585                         else if ( low_copy_species.contains( species ) ) {
586                             html_writer.write( "<tr>" );
587                             writeCopyNumberValues( low_copy_values,
588                                                    bdc,
589                                                    genome,
590                                                    bdcs_per_genome,
591                                                    species,
592                                                    html_writer,
593                                                    "#A0A0A0" );
594                             html_writer.write( "</tr>" );
595                         }
596                         else if ( high_copy_base_species.contains( species ) ) {
597                             html_writer.write( "<tr>" );
598                             writeCopyNumberValues( high_copy_base_values,
599                                                    bdc,
600                                                    genome,
601                                                    bdcs_per_genome,
602                                                    species,
603                                                    html_writer,
604                                                    "#404040" );
605                             html_writer.write( "</tr>" );
606                         }
607                     }
608                     html_writer.write( "</table>" );
609                     html_writer.write( "</td></tr>" );
610                     html_writer.write( SurfacingConstants.NL );
611                 }
612             }
613         }
614         html_writer.write( "</table>" );
615         html_writer.write( SurfacingConstants.NL );
616         html_writer.write( "<hr>" );
617         html_writer.write( SurfacingConstants.NL );
618         html_writer.write( "Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
619         html_writer.write( "<br>" );
620         html_writer.write( SurfacingConstants.NL );
621         html_writer.write( "Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
622         html_writer.write( "<br>" );
623         html_writer.write( SurfacingConstants.NL );
624         html_writer.write( "Calculation mode for high copy target : " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
625         html_writer.write( SurfacingConstants.NL );
626         html_writer.write( "<br>" );
627         html_writer.write( "Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
628         html_writer.write( SurfacingConstants.NL );
629         html_writer.write( "<br>" );
630         html_writer.write( "Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
631         html_writer.write( SurfacingConstants.NL );
632         html_writer.write( "<br>" );
633         html_writer.write( "Minimal difference : " + min_diff );
634         html_writer.write( SurfacingConstants.NL );
635         html_writer.write( "<br>" );
636         html_writer.write( "Factor : " + factor );
637         html_writer.write( SurfacingConstants.NL );
638         html_writer.write( "<br>" );
639         html_writer.write( "Lower copy binary domain combinations : " + counter );
640         html_writer.write( SurfacingConstants.NL );
641         html_writer.write( "<br>" );
642         html_writer.write( "Total absence : " + total_absense_counter );
643         html_writer.write( SurfacingConstants.NL );
644         html_writer.write( "<br>" );
645         html_writer.write( "Not total absence : " + not_total_absense_counter );
646         html_writer.write( SurfacingConstants.NL );
647         html_writer.write( "<br>" );
648         html_writer.write( "Total binary domain combinations : " + all_bdcs.size() );
649         html_writer.write( SurfacingConstants.NL );
650         html_writer.write( "<hr>" );
651         html_writer.write( SurfacingConstants.NL );
652         html_writer.write( "</body></html>" );
653         html_writer.write( SurfacingConstants.NL );
654         html_writer.close();
655     }
656
657     private static void writeDomainValuesToFiles( final List<GenomeWideCombinableDomains> genomes,
658                                                   final List<String> high_copy_base_species,
659                                                   final List<String> high_copy_target_species,
660                                                   final List<String> low_copy_species,
661                                                   final int min_diff,
662                                                   final Double factor,
663                                                   final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
664                                                   final Map<GoId, GoTerm> go_id_to_term_map,
665                                                   final Writer plain_writer,
666                                                   final Writer html_writer,
667                                                   final File proteins_file_base,
668                                                   final SortedMap<DomainId, Double> high_copy_base_values,
669                                                   final SortedMap<DomainId, Double> high_copy_target_values,
670                                                   final SortedMap<DomainId, Double> low_copy_values,
671                                                   final SortedSet<DomainId> all_domains,
672                                                   final SortedSet<GoId> go_ids_of_passing_domains,
673                                                   final SortedMap<Species, List<Protein>> protein_lists_per_species )
674             throws IOException {
675         int counter = 0;
676         int total_absense_counter = 0;
677         int not_total_absense_counter = 0;
678         SurfacingUtil.addHtmlHead( html_writer, "Domain Copy Differences" );
679         html_writer.write( "<body><table>" );
680         for( final DomainId domain_id : all_domains ) {
681             if ( ( high_copy_base_values.get( domain_id ) > 0 ) && ( high_copy_target_values.get( domain_id ) > 0 )
682                     && ( high_copy_base_values.get( domain_id ) >= low_copy_values.get( domain_id ) ) ) {
683                 if ( high_copy_target_values.get( domain_id ) >= ( min_diff + ( factor * low_copy_values
684                         .get( domain_id ) ) ) ) {
685                     if ( low_copy_values.get( domain_id ) <= 0.0 ) {
686                         ++total_absense_counter;
687                     }
688                     else {
689                         ++not_total_absense_counter;
690                     }
691                     ++counter;
692                     writeProteinsToFile( proteins_file_base, protein_lists_per_species, domain_id );
693                     if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
694                         go_ids_of_passing_domains.addAll( domain_id_to_go_ids_map.get( domain_id ) );
695                     }
696                     plain_writer.write( domain_id.getId() );
697                     plain_writer.write( SurfacingConstants.NL );
698                     html_writer.write( "<tr><td><a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK
699                             + domain_id.getId() + "\">" + domain_id.getId() + "</a></td><td>" );
700                     html_writer.write( addGoInformation( domain_id, domain_id_to_go_ids_map, go_id_to_term_map )
701                             .toString() );
702                     html_writer.write( "</td><td>" );
703                     html_writer.write( "<table>" );
704                     for( final GenomeWideCombinableDomains genome : genomes ) {
705                         final String species = genome.getSpecies().getSpeciesId();
706                         if ( high_copy_target_species.contains( species ) ) {
707                             html_writer.write( "<tr>" );
708                             writeCopyNumberValues( high_copy_target_values,
709                                                    domain_id,
710                                                    genome,
711                                                    species,
712                                                    plain_writer,
713                                                    html_writer,
714                                                    "#0000FF" );
715                             html_writer.write( "</tr>" );
716                         }
717                         else if ( low_copy_species.contains( species ) ) {
718                             html_writer.write( "<tr>" );
719                             writeCopyNumberValues( low_copy_values,
720                                                    domain_id,
721                                                    genome,
722                                                    species,
723                                                    plain_writer,
724                                                    html_writer,
725                                                    "#A0A0A0" );
726                             html_writer.write( "</tr>" );
727                         }
728                         else if ( high_copy_base_species.contains( species ) ) {
729                             html_writer.write( "<tr>" );
730                             writeCopyNumberValues( high_copy_base_values,
731                                                    domain_id,
732                                                    genome,
733                                                    species,
734                                                    plain_writer,
735                                                    html_writer,
736                                                    "#404040" );
737                             html_writer.write( "</tr>" );
738                         }
739                     }
740                     html_writer.write( "</table>" );
741                     html_writer.write( "</td></tr>" );
742                     html_writer.write( SurfacingConstants.NL );
743                     plain_writer.write( SurfacingConstants.NL );
744                 }
745             }
746         }
747         html_writer.write( "</table>" );
748         html_writer.write( SurfacingConstants.NL );
749         html_writer.write( "<hr>" );
750         html_writer.write( SurfacingConstants.NL );
751         html_writer.write( "Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
752         html_writer.write( "<br>" );
753         html_writer.write( SurfacingConstants.NL );
754         html_writer.write( "Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
755         html_writer.write( "<br>" );
756         html_writer.write( SurfacingConstants.NL );
757         html_writer.write( "Calculation mode for high copy target : " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
758         html_writer.write( SurfacingConstants.NL );
759         html_writer.write( "<br>" );
760         html_writer.write( "Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
761         html_writer.write( SurfacingConstants.NL );
762         html_writer.write( "<br>" );
763         html_writer.write( "Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
764         html_writer.write( SurfacingConstants.NL );
765         html_writer.write( "<br>" );
766         html_writer.write( "Minimal difference : " + min_diff );
767         html_writer.write( SurfacingConstants.NL );
768         html_writer.write( "<br>" );
769         html_writer.write( "Factor : " + factor );
770         html_writer.write( SurfacingConstants.NL );
771         html_writer.write( "<br>" );
772         html_writer.write( "Lower copy domains : " + counter );
773         html_writer.write( SurfacingConstants.NL );
774         html_writer.write( "<br>" );
775         html_writer.write( "Total absence : " + total_absense_counter );
776         html_writer.write( SurfacingConstants.NL );
777         html_writer.write( "<br>" );
778         html_writer.write( "Not total absence : " + not_total_absense_counter );
779         html_writer.write( SurfacingConstants.NL );
780         html_writer.write( "<br>" );
781         html_writer.write( "Total domains : " + all_domains.size() );
782         html_writer.write( SurfacingConstants.NL );
783         html_writer.write( "<hr>" );
784         html_writer.write( SurfacingConstants.NL );
785         html_writer.write( "</body></html>" );
786         html_writer.write( SurfacingConstants.NL );
787         html_writer.close();
788         plain_writer.write( "# Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
789         plain_writer.write( SurfacingConstants.NL );
790         plain_writer.write( "# Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
791         plain_writer.write( SurfacingConstants.NL );
792         plain_writer.write( "# Calculation mode for high copy target: " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
793         plain_writer.write( SurfacingConstants.NL );
794         plain_writer.write( "# Calculation mode for high copy base  : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
795         plain_writer.write( SurfacingConstants.NL );
796         plain_writer.write( "# Calculation mode for low copy        : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
797         plain_writer.write( SurfacingConstants.NL );
798         plain_writer.write( "# Minimal difference: " + min_diff );
799         plain_writer.write( SurfacingConstants.NL );
800         plain_writer.write( "# Factor            : " + factor );
801         plain_writer.write( SurfacingConstants.NL );
802         plain_writer.write( "# Lower copy domains: " + counter );
803         plain_writer.write( SurfacingConstants.NL );
804         plain_writer.write( "# Total absence     : " + total_absense_counter );
805         plain_writer.write( SurfacingConstants.NL );
806         plain_writer.write( "# Not total absence : " + not_total_absense_counter );
807         plain_writer.write( SurfacingConstants.NL );
808         plain_writer.write( "# Total domains     : " + all_domains.size() );
809         plain_writer.write( SurfacingConstants.NL );
810         plain_writer.close();
811     }
812
813     private static void writeGoIdsToFile( final Writer writer, final SortedSet<GoId> gos ) throws IOException {
814         for( final GoId go_id : gos ) {
815             writer.write( go_id.toString() );
816             writer.write( SurfacingConstants.NL );
817         }
818         writer.close();
819     }
820
821     private static void writeProteinsToFile( final File proteins_file_base,
822                                              final SortedMap<Species, List<Protein>> protein_lists_per_species,
823                                              final DomainId domain_id ) throws IOException {
824         final File my_proteins_file = new File( proteins_file_base.getParentFile() + ForesterUtil.FILE_SEPARATOR
825                 + domain_id + PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX );
826         SurfacingUtil.checkForOutputFileWriteability( my_proteins_file );
827         final Writer proteins_file_writer = new BufferedWriter( new FileWriter( my_proteins_file ) );
828         SurfacingUtil.extractProteinNames( protein_lists_per_species,
829                                            domain_id,
830                                            proteins_file_writer,
831                                            "\t",
832                                            surfacing.LIMIT_SPEC_FOR_PROT_EX,
833                                            -1 );
834         proteins_file_writer.close();
835         System.out.println( "Wrote proteins list to \"" + my_proteins_file + "\"" );
836     }
837
838     public static enum COPY_CALCULATION_MODE {
839         MEAN, MEDIAN, MAX, MIN
840     }
841 }