janiela -> xfam
[jalview.git] / forester / java / src / org / forester / surfacing / DomainCountsDifferenceUtil.java
1 // $Id:
2 // $
3 //
4 // FORESTER -- software libraries and applications
5 // for evolutionary biology research and applications.
6 //
7 // Copyright (C) 2008-2009 Christian M. Zmasek
8 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // All rights reserved
10 //
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
15 //
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 //
25 // Contact: phylosoft @ gmail . com
26 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
27
28 package org.forester.surfacing;
29
30 import java.io.BufferedWriter;
31 import java.io.File;
32 import java.io.FileWriter;
33 import java.io.IOException;
34 import java.io.Writer;
35 import java.util.ArrayList;
36 import java.util.HashMap;
37 import java.util.List;
38 import java.util.Map;
39 import java.util.Set;
40 import java.util.SortedMap;
41 import java.util.SortedSet;
42 import java.util.TreeMap;
43 import java.util.TreeSet;
44
45 import org.forester.application.surfacing;
46 import org.forester.go.GoId;
47 import org.forester.go.GoTerm;
48 import org.forester.protein.BinaryDomainCombination;
49 import org.forester.protein.Protein;
50 import org.forester.species.Species;
51 import org.forester.util.BasicDescriptiveStatistics;
52 import org.forester.util.DescriptiveStatistics;
53 import org.forester.util.ForesterUtil;
54
55 /*
56  * Poorly designed static class which essential has one method:
57  * calculateCopyNumberDifferences.
58  */
59 public final class DomainCountsDifferenceUtil {
60
61     private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES   = COPY_CALCULATION_MODE.MIN;
62     private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES = COPY_CALCULATION_MODE.MIN;
63     private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_LOW_COPY_SPECIES         = COPY_CALCULATION_MODE.MAX;
64     private static final String                PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX         = ".prot";
65
66     public static void calculateCopyNumberDifferences( final List<GenomeWideCombinableDomains> genomes,
67                                                        final SortedMap<Species, List<Protein>> protein_lists_per_species,
68                                                        final List<String> high_copy_base_species,
69                                                        final List<String> high_copy_target_species,
70                                                        final List<String> low_copy_species,
71                                                        final int min_diff,
72                                                        final Double factor,
73                                                        final File plain_output_dom,
74                                                        final File html_output_dom,
75                                                        final File html_output_dc,
76                                                        final Map<String, List<GoId>> domain_id_to_go_ids_map,
77                                                        final Map<GoId, GoTerm> go_id_to_term_map,
78                                                        final File all_domains_go_ids_out_dom,
79                                                        final File passing_domains_go_ids_out_dom,
80                                                        final File proteins_file_base ) throws IOException {
81         if ( genomes.size() < 1 ) {
82             throw new IllegalArgumentException( "attempt to use empty list of genomes for domain difference calculation" );
83         }
84         if ( ( high_copy_base_species.size() < 1 ) || ( low_copy_species.size() < 1 ) ) {
85             throw new IllegalArgumentException( "attempt to use empty list of species for domain difference calculation" );
86         }
87         if ( high_copy_base_species.contains( high_copy_target_species )
88                 || low_copy_species.contains( high_copy_target_species ) ) {
89             throw new IllegalArgumentException( "species [" + high_copy_target_species
90                     + "] appears in other list as well" );
91         }
92         if ( min_diff < 0 ) {
93             throw new IllegalArgumentException( "attempt to use negative addition [" + min_diff + "]" );
94         }
95         if ( factor <= 0.0 ) {
96             throw new IllegalArgumentException( "attempt to use factor equal or smaller than 0.0 [" + factor + "]" );
97         }
98         SurfacingUtil.checkForOutputFileWriteability( plain_output_dom );
99         SurfacingUtil.checkForOutputFileWriteability( html_output_dom );
100         SurfacingUtil.checkForOutputFileWriteability( html_output_dc );
101         SurfacingUtil.checkForOutputFileWriteability( all_domains_go_ids_out_dom );
102         SurfacingUtil.checkForOutputFileWriteability( passing_domains_go_ids_out_dom );
103         final Writer plain_writer = new BufferedWriter( new FileWriter( plain_output_dom ) );
104         final Writer html_writer = new BufferedWriter( new FileWriter( html_output_dom ) );
105         final Writer html_writer_dc = new BufferedWriter( new FileWriter( html_output_dc ) );
106         final Writer all_gos_writer = new BufferedWriter( new FileWriter( all_domains_go_ids_out_dom ) );
107         final Writer passing_gos_writer = new BufferedWriter( new FileWriter( passing_domains_go_ids_out_dom ) );
108         final SortedMap<String, Double> high_copy_base_values = new TreeMap<String, Double>();
109         final SortedMap<String, Double> high_copy_target_values = new TreeMap<String, Double>();
110         final SortedMap<String, Double> low_copy_values = new TreeMap<String, Double>();
111         final SortedMap<String, List<Integer>> high_copy_base_copy_counts = new TreeMap<String, List<Integer>>();
112         final SortedMap<String, List<Integer>> high_copy_target_copy_counts = new TreeMap<String, List<Integer>>();
113         final SortedMap<String, List<Integer>> low_copy_copy_counts = new TreeMap<String, List<Integer>>();
114         final SortedSet<String> all_domains = new TreeSet<String>();
115         final SortedMap<BinaryDomainCombination, Double> high_copy_base_values_dc = new TreeMap<BinaryDomainCombination, Double>();
116         final SortedMap<BinaryDomainCombination, Double> high_copy_target_values_dc = new TreeMap<BinaryDomainCombination, Double>();
117         final SortedMap<BinaryDomainCombination, Double> low_copy_values_dc = new TreeMap<BinaryDomainCombination, Double>();
118         final SortedMap<BinaryDomainCombination, List<Integer>> high_copy_base_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
119         final SortedMap<BinaryDomainCombination, List<Integer>> high_copy_target_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
120         final SortedMap<BinaryDomainCombination, List<Integer>> low_copy_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
121         final SortedSet<BinaryDomainCombination> all_dcs = new TreeSet<BinaryDomainCombination>();
122         final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome = new HashMap<String, Set<BinaryDomainCombination>>();
123         final SortedSet<GoId> go_ids_of_passing_domains = new TreeSet<GoId>();
124         final SortedSet<GoId> go_ids_all = new TreeSet<GoId>();
125         for( final GenomeWideCombinableDomains genome : genomes ) {
126             final SortedSet<String> domains = genome.getAllDomainIds();
127             final SortedSet<BinaryDomainCombination> dcs = genome.toBinaryDomainCombinations();
128             final String species = genome.getSpecies().getSpeciesId();
129             bdcs_per_genome.put( species, genome.toBinaryDomainCombinations() );
130             for( final String d : domains ) {
131                 all_domains.add( d );
132                 if ( domain_id_to_go_ids_map.containsKey( d ) ) {
133                     go_ids_all.addAll( domain_id_to_go_ids_map.get( d ) );
134                 }
135             }
136             for( final BinaryDomainCombination dc : dcs ) {
137                 all_dcs.add( dc );
138             }
139         }
140         for( final String domain : all_domains ) {
141             for( final GenomeWideCombinableDomains genome : genomes ) {
142                 final String species = genome.getSpecies().getSpeciesId();
143                 if ( high_copy_base_species.contains( species ) ) {
144                     DomainCountsDifferenceUtil.addCounts( high_copy_base_copy_counts, domain, genome );
145                 }
146                 if ( high_copy_target_species.contains( species ) ) {
147                     DomainCountsDifferenceUtil.addCounts( high_copy_target_copy_counts, domain, genome );
148                 }
149                 if ( low_copy_species.contains( species ) ) {
150                     DomainCountsDifferenceUtil.addCounts( low_copy_copy_counts, domain, genome );
151                 }
152             }
153         }
154         for( final BinaryDomainCombination dc : all_dcs ) {
155             for( final GenomeWideCombinableDomains genome : genomes ) {
156                 final String species = genome.getSpecies().getSpeciesId();
157                 if ( high_copy_base_species.contains( species ) ) {
158                     DomainCountsDifferenceUtil.addCounts( high_copy_base_copy_counts_dc,
159                                                           dc,
160                                                           genome,
161                                                           bdcs_per_genome.get( species ) );
162                 }
163                 if ( high_copy_target_species.contains( species ) ) {
164                     DomainCountsDifferenceUtil.addCounts( high_copy_target_copy_counts_dc,
165                                                           dc,
166                                                           genome,
167                                                           bdcs_per_genome.get( species ) );
168                 }
169                 if ( low_copy_species.contains( species ) ) {
170                     DomainCountsDifferenceUtil.addCounts( low_copy_copy_counts_dc,
171                                                           dc,
172                                                           genome,
173                                                           bdcs_per_genome.get( species ) );
174                 }
175             }
176         }
177         for( final String domain : all_domains ) {
178             calculateDomainCountsBasedValue( high_copy_target_values,
179                                              high_copy_target_copy_counts,
180                                              domain,
181                                              COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
182             calculateDomainCountsBasedValue( high_copy_base_values,
183                                              high_copy_base_copy_counts,
184                                              domain,
185                                              COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
186             calculateDomainCountsBasedValue( low_copy_values,
187                                              low_copy_copy_counts,
188                                              domain,
189                                              COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
190         }
191         for( final BinaryDomainCombination dc : all_dcs ) {
192             calculateDomainCountsBasedValue( high_copy_target_values_dc,
193                                              high_copy_target_copy_counts_dc,
194                                              dc,
195                                              COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
196             calculateDomainCountsBasedValue( high_copy_base_values_dc,
197                                              high_copy_base_copy_counts_dc,
198                                              dc,
199                                              COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
200             calculateDomainCountsBasedValue( low_copy_values_dc,
201                                              low_copy_copy_counts_dc,
202                                              dc,
203                                              COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
204         }
205         writeDomainValuesToFiles( genomes,
206                                   high_copy_base_species,
207                                   high_copy_target_species,
208                                   low_copy_species,
209                                   min_diff,
210                                   factor,
211                                   domain_id_to_go_ids_map,
212                                   go_id_to_term_map,
213                                   plain_writer,
214                                   html_writer,
215                                   proteins_file_base,
216                                   high_copy_base_values,
217                                   high_copy_target_values,
218                                   low_copy_values,
219                                   all_domains,
220                                   go_ids_of_passing_domains,
221                                   protein_lists_per_species );
222         writeDomainCombinationValuesToFiles( genomes,
223                                              high_copy_base_species,
224                                              high_copy_target_species,
225                                              low_copy_species,
226                                              min_diff,
227                                              factor,
228                                              html_writer_dc,
229                                              high_copy_base_values_dc,
230                                              high_copy_target_values_dc,
231                                              low_copy_values_dc,
232                                              all_dcs,
233                                              bdcs_per_genome );
234         writeGoIdsToFile( all_gos_writer, go_ids_all );
235         writeGoIdsToFile( passing_gos_writer, go_ids_of_passing_domains );
236     }
237
238     //FIXME really needs to be tested! 
239     private static void addCounts( final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
240                                    final BinaryDomainCombination dc,
241                                    final GenomeWideCombinableDomains genome,
242                                    final Set<BinaryDomainCombination> bdc ) {
243         if ( !copy_counts.containsKey( dc ) ) {
244             copy_counts.put( dc, new ArrayList<Integer>() );
245         }
246         if ( bdc.contains( dc )
247                 && ( ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc.getId1() ) != null ) ) {
248             final int count = ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains()
249                     .get( dc.getId1() );
250             copy_counts.get( dc ).add( count );
251         }
252         else {
253             copy_counts.get( dc ).add( 0 );
254         }
255     }
256
257     private static void addCounts( final SortedMap<String, List<Integer>> copy_counts,
258                                    final String domain,
259                                    final GenomeWideCombinableDomains genome ) {
260         if ( !copy_counts.containsKey( domain ) ) {
261             copy_counts.put( domain, new ArrayList<Integer>() );
262         }
263         if ( genome.contains( domain ) ) {
264             copy_counts.get( domain ).add( genome.get( domain ).getKeyDomainProteinsCount() );
265         }
266         else {
267             copy_counts.get( domain ).add( 0 );
268         }
269     }
270
271     private static StringBuilder addGoInformation( final String d,
272                                                    final Map<String, List<GoId>> domain_id_to_go_ids_map,
273                                                    final Map<GoId, GoTerm> go_id_to_term_map ) {
274         final StringBuilder sb = new StringBuilder();
275         if ( ( domain_id_to_go_ids_map == null ) || domain_id_to_go_ids_map.isEmpty()
276                 || !domain_id_to_go_ids_map.containsKey( d ) ) {
277             return sb;
278         }
279         final List<GoId> go_ids = domain_id_to_go_ids_map.get( d );
280         for( int i = 0; i < go_ids.size(); ++i ) {
281             final GoId go_id = go_ids.get( i );
282             if ( go_id_to_term_map.containsKey( go_id ) ) {
283                 appendGoTerm( sb, go_id_to_term_map.get( go_id ) );
284                 sb.append( "<br>" );
285             }
286             else {
287                 sb.append( "go id \"" + go_id + "\" not found [" + d + "]" );
288             }
289         }
290         return sb;
291     }
292
293     private static void appendGoTerm( final StringBuilder sb, final GoTerm go_term ) {
294         final GoId go_id = go_term.getGoId();
295         sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id + "\" target=\"amigo_window\">" + go_id
296                 + "</a>" );
297         sb.append( ":" );
298         sb.append( go_term.getName() );
299         sb.append( " [" );
300         sb.append( go_term.getGoNameSpace().toShortString() );
301         sb.append( "]" );
302     }
303
304     private static void calculateDomainCountsBasedValue( final SortedMap<BinaryDomainCombination, Double> copy_values,
305                                                          final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
306                                                          final BinaryDomainCombination bdc,
307                                                          final COPY_CALCULATION_MODE copy_calc_mode ) {
308         if ( copy_counts.containsKey( bdc ) ) {
309             switch ( copy_calc_mode ) {
310                 case MAX:
311                     DomainCountsDifferenceUtil.calculateMaxCount( copy_values, copy_counts, bdc );
312                     break;
313                 case MIN:
314                     DomainCountsDifferenceUtil.calculateMinCount( copy_values, copy_counts, bdc );
315                     break;
316                 case MEAN:
317                     DomainCountsDifferenceUtil.calculateMeanCount( copy_values, copy_counts, bdc );
318                     break;
319                 case MEDIAN:
320                     DomainCountsDifferenceUtil.calculateMedianCount( copy_values, copy_counts, bdc );
321                     break;
322                 default:
323                     throw new IllegalArgumentException();
324             }
325         }
326         else {
327             copy_values.put( bdc, Double.valueOf( 0.0 ) );
328         }
329     }
330
331     private static void calculateDomainCountsBasedValue( final SortedMap<String, Double> copy_values,
332                                                          final SortedMap<String, List<Integer>> copy_counts,
333                                                          final String domain,
334                                                          final COPY_CALCULATION_MODE copy_calc_mode ) {
335         if ( copy_counts.containsKey( domain ) ) {
336             switch ( copy_calc_mode ) {
337                 case MAX:
338                     DomainCountsDifferenceUtil.calculateMaxCount( copy_values, copy_counts, domain );
339                     break;
340                 case MIN:
341                     DomainCountsDifferenceUtil.calculateMinCount( copy_values, copy_counts, domain );
342                     break;
343                 case MEAN:
344                     DomainCountsDifferenceUtil.calculateMeanCount( copy_values, copy_counts, domain );
345                     break;
346                 case MEDIAN:
347                     DomainCountsDifferenceUtil.calculateMedianCount( copy_values, copy_counts, domain );
348                     break;
349                 default:
350                     throw new IllegalArgumentException();
351             }
352         }
353         else {
354             copy_values.put( domain, Double.valueOf( 0.0 ) );
355         }
356     }
357
358     private static void calculateMaxCount( final SortedMap<BinaryDomainCombination, Double> results,
359                                            final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
360                                            final BinaryDomainCombination bdc ) {
361         final List<Integer> counts = copy_counts.get( bdc );
362         int max = 0;
363         for( final Integer count : counts ) {
364             if ( count > max ) {
365                 max = count;
366             }
367         }
368         results.put( bdc, ( double ) max );
369     }
370
371     private static void calculateMaxCount( final SortedMap<String, Double> results,
372                                            final SortedMap<String, List<Integer>> copy_counts,
373                                            final String domain ) {
374         final List<Integer> counts = copy_counts.get( domain );
375         int max = 0;
376         for( final Integer count : counts ) {
377             if ( count > max ) {
378                 max = count;
379             }
380         }
381         results.put( domain, ( double ) max );
382     }
383
384     private static void calculateMeanCount( final SortedMap<BinaryDomainCombination, Double> results,
385                                             final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
386                                             final BinaryDomainCombination bdc ) {
387         final List<Integer> counts = copy_counts.get( bdc );
388         int sum = 0;
389         for( final Integer count : counts ) {
390             sum += count;
391         }
392         results.put( bdc, ( ( double ) sum ) / ( ( double ) counts.size() ) );
393     }
394
395     private static void calculateMeanCount( final SortedMap<String, Double> results,
396                                             final SortedMap<String, List<Integer>> copy_counts,
397                                             final String domain ) {
398         final List<Integer> counts = copy_counts.get( domain );
399         int sum = 0;
400         for( final Integer count : counts ) {
401             sum += count;
402         }
403         results.put( domain, ( ( double ) sum ) / ( ( double ) counts.size() ) );
404     }
405
406     private static void calculateMedianCount( final SortedMap<BinaryDomainCombination, Double> results,
407                                               final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
408                                               final BinaryDomainCombination bdc ) {
409         final List<Integer> counts = copy_counts.get( bdc );
410         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
411         for( final Integer count : counts ) {
412             stats.addValue( count );
413         }
414         results.put( bdc, stats.median() );
415     }
416
417     private static void calculateMedianCount( final SortedMap<String, Double> results,
418                                               final SortedMap<String, List<Integer>> copy_counts,
419                                               final String domain ) {
420         final List<Integer> counts = copy_counts.get( domain );
421         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
422         for( final Integer count : counts ) {
423             stats.addValue( count );
424         }
425         results.put( domain, stats.median() );
426     }
427
428     private static void calculateMinCount( final SortedMap<BinaryDomainCombination, Double> results,
429                                            final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
430                                            final BinaryDomainCombination bdc ) {
431         final List<Integer> counts = copy_counts.get( bdc );
432         int min = Integer.MAX_VALUE;
433         for( final Integer count : counts ) {
434             if ( count < min ) {
435                 min = count;
436             }
437         }
438         results.put( bdc, ( double ) min );
439     }
440
441     private static void calculateMinCount( final SortedMap<String, Double> results,
442                                            final SortedMap<String, List<Integer>> copy_counts,
443                                            final String domain ) {
444         final List<Integer> counts = copy_counts.get( domain );
445         int min = Integer.MAX_VALUE;
446         for( final Integer count : counts ) {
447             if ( count < min ) {
448                 min = count;
449             }
450         }
451         results.put( domain, ( double ) min );
452     }
453
454     private static String combinableDomaindToString( final CombinableDomains cd ) {
455         final StringBuilder sb = new StringBuilder();
456         sb.append( cd.getKeyDomainProteinsCount() );
457         return sb.toString();
458     }
459
460     private static String combinableDomaindToStringHtml( final CombinableDomains cd ) {
461         final StringBuilder sb = new StringBuilder();
462         sb.append( "[" );
463         sb.append( cd.getKeyDomainCount() );
464         sb.append( ", <b>" );
465         sb.append( cd.getKeyDomainProteinsCount() );
466         sb.append( "</b>, " );
467         sb.append( cd.getNumberOfCombinableDomains() );
468         sb.append( "]</td><td>" );
469         sb.append( cd.getCombiningDomainIdsAsStringBuilder() );
470         return sb.toString();
471     }
472
473     private static void writeCopyNumberValues( final SortedMap<BinaryDomainCombination, Double> copy_means,
474                                                final BinaryDomainCombination bdc,
475                                                final GenomeWideCombinableDomains genome,
476                                                final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome,
477                                                final String species,
478                                                final Writer html_writer,
479                                                final String color ) throws IOException {
480         html_writer.write( "<td> " );
481         if ( !ForesterUtil.isEmpty( color ) ) {
482             html_writer.write( "<font color=\"" + color + "\">" );
483         }
484         html_writer.write( "<b>" + species + ":</b> " );
485         if ( !ForesterUtil.isEmpty( color ) ) {
486             html_writer.write( "</font>" );
487         }
488         html_writer.write( "</td><td>" );
489         if ( bdcs_per_genome.get( species ).contains( bdc ) && ( copy_means.get( bdc ) > 0 ) ) {
490             final int count = ( ( BasicCombinableDomains ) genome.get( bdc.getId0() ) ).getCombiningDomains()
491                     .get( bdc.getId1() );
492             html_writer.write( count + "" );
493         }
494         else {
495             html_writer.write( "0" );
496         }
497         html_writer.write( "</td>" );
498     }
499
500     private static void writeCopyNumberValues( final SortedMap<String, Double> copy_means,
501                                                final String domain,
502                                                final GenomeWideCombinableDomains genome,
503                                                final String species,
504                                                final Writer plain_writer,
505                                                final Writer html_writer,
506                                                final String color ) throws IOException {
507         plain_writer.write( "  " + species + "\t" );
508         html_writer.write( "<td> " );
509         if ( !ForesterUtil.isEmpty( color ) ) {
510             html_writer.write( "<font color=\"" + color + "\">" );
511         }
512         html_writer.write( "<b>" + species + ":</b> " );
513         if ( !ForesterUtil.isEmpty( color ) ) {
514             html_writer.write( "</font>" );
515         }
516         html_writer.write( "</td><td>" );
517         if ( genome.contains( domain ) && ( copy_means.get( domain ) > 0 ) ) {
518             plain_writer.write( DomainCountsDifferenceUtil.combinableDomaindToString( genome.get( domain ) ) );
519             html_writer.write( DomainCountsDifferenceUtil.combinableDomaindToStringHtml( genome.get( domain ) ) );
520         }
521         else {
522             plain_writer.write( "0" );
523             html_writer.write( "0" );
524         }
525         html_writer.write( "</td>" );
526         plain_writer.write( SurfacingConstants.NL );
527     }
528
529     private static void writeDomainCombinationValuesToFiles( final List<GenomeWideCombinableDomains> genomes,
530                                                              final List<String> high_copy_base_species,
531                                                              final List<String> high_copy_target_species,
532                                                              final List<String> low_copy_species,
533                                                              final int min_diff,
534                                                              final Double factor,
535                                                              final Writer html_writer,
536                                                              final SortedMap<BinaryDomainCombination, Double> high_copy_base_values,
537                                                              final SortedMap<BinaryDomainCombination, Double> high_copy_target_values,
538                                                              final SortedMap<BinaryDomainCombination, Double> low_copy_values,
539                                                              final SortedSet<BinaryDomainCombination> all_bdcs,
540                                                              final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome )
541             throws IOException {
542         int counter = 0;
543         int total_absense_counter = 0;
544         int not_total_absense_counter = 0;
545         SurfacingUtil.writeHtmlHead( html_writer, "Binary Domain Combination Copy Differences" );
546         html_writer.write( "<body><table>" );
547         for( final BinaryDomainCombination bdc : all_bdcs ) {
548             if ( ( high_copy_base_values.get( bdc ) > 0 ) && ( high_copy_target_values.get( bdc ) > 0 )
549                     && ( high_copy_base_values.get( bdc ) >= low_copy_values.get( bdc ) ) ) {
550                 if ( high_copy_target_values.get( bdc ) >= ( min_diff + ( factor * low_copy_values.get( bdc ) ) ) ) {
551                     if ( low_copy_values.get( bdc ) <= 0.0 ) {
552                         ++total_absense_counter;
553                     }
554                     else {
555                         ++not_total_absense_counter;
556                     }
557                     ++counter;
558                     html_writer.write( "<tr><td><a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + bdc.getId0()
559                             + "\">" + bdc.getId0() + "</a> = <a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK
560                             + bdc.getId1() + "\">" + bdc.getId1() + "</a>" );
561                     html_writer.write( "</td><td>" );
562                     html_writer.write( "<table>" );
563                     for( final GenomeWideCombinableDomains genome : genomes ) {
564                         final String species = genome.getSpecies().getSpeciesId();
565                         if ( high_copy_target_species.contains( species ) ) {
566                             html_writer.write( "<tr>" );
567                             writeCopyNumberValues( high_copy_target_values,
568                                                    bdc,
569                                                    genome,
570                                                    bdcs_per_genome,
571                                                    species,
572                                                    html_writer,
573                                                    "#0000FF" );
574                             html_writer.write( "</tr>" );
575                         }
576                         else if ( low_copy_species.contains( species ) ) {
577                             html_writer.write( "<tr>" );
578                             writeCopyNumberValues( low_copy_values,
579                                                    bdc,
580                                                    genome,
581                                                    bdcs_per_genome,
582                                                    species,
583                                                    html_writer,
584                                                    "#A0A0A0" );
585                             html_writer.write( "</tr>" );
586                         }
587                         else if ( high_copy_base_species.contains( species ) ) {
588                             html_writer.write( "<tr>" );
589                             writeCopyNumberValues( high_copy_base_values,
590                                                    bdc,
591                                                    genome,
592                                                    bdcs_per_genome,
593                                                    species,
594                                                    html_writer,
595                                                    "#404040" );
596                             html_writer.write( "</tr>" );
597                         }
598                     }
599                     html_writer.write( "</table>" );
600                     html_writer.write( "</td></tr>" );
601                     html_writer.write( SurfacingConstants.NL );
602                 }
603             }
604         }
605         html_writer.write( "</table>" );
606         html_writer.write( SurfacingConstants.NL );
607         html_writer.write( "<hr>" );
608         html_writer.write( SurfacingConstants.NL );
609         html_writer.write( "Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
610         html_writer.write( "<br>" );
611         html_writer.write( SurfacingConstants.NL );
612         html_writer.write( "Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
613         html_writer.write( "<br>" );
614         html_writer.write( SurfacingConstants.NL );
615         html_writer.write( "Calculation mode for high copy target : " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
616         html_writer.write( SurfacingConstants.NL );
617         html_writer.write( "<br>" );
618         html_writer.write( "Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
619         html_writer.write( SurfacingConstants.NL );
620         html_writer.write( "<br>" );
621         html_writer.write( "Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
622         html_writer.write( SurfacingConstants.NL );
623         html_writer.write( "<br>" );
624         html_writer.write( "Minimal difference : " + min_diff );
625         html_writer.write( SurfacingConstants.NL );
626         html_writer.write( "<br>" );
627         html_writer.write( "Factor : " + factor );
628         html_writer.write( SurfacingConstants.NL );
629         html_writer.write( "<br>" );
630         html_writer.write( "Lower copy binary domain combinations : " + counter );
631         html_writer.write( SurfacingConstants.NL );
632         html_writer.write( "<br>" );
633         html_writer.write( "Total absence : " + total_absense_counter );
634         html_writer.write( SurfacingConstants.NL );
635         html_writer.write( "<br>" );
636         html_writer.write( "Not total absence : " + not_total_absense_counter );
637         html_writer.write( SurfacingConstants.NL );
638         html_writer.write( "<br>" );
639         html_writer.write( "Total binary domain combinations : " + all_bdcs.size() );
640         html_writer.write( SurfacingConstants.NL );
641         html_writer.write( "<hr>" );
642         html_writer.write( SurfacingConstants.NL );
643         html_writer.write( "</body></html>" );
644         html_writer.write( SurfacingConstants.NL );
645         html_writer.close();
646     }
647
648     private static void writeDomainValuesToFiles( final List<GenomeWideCombinableDomains> genomes,
649                                                   final List<String> high_copy_base_species,
650                                                   final List<String> high_copy_target_species,
651                                                   final List<String> low_copy_species,
652                                                   final int min_diff,
653                                                   final Double factor,
654                                                   final Map<String, List<GoId>> domain_id_to_go_ids_map,
655                                                   final Map<GoId, GoTerm> go_id_to_term_map,
656                                                   final Writer plain_writer,
657                                                   final Writer html_writer,
658                                                   final File proteins_file_base,
659                                                   final SortedMap<String, Double> high_copy_base_values,
660                                                   final SortedMap<String, Double> high_copy_target_values,
661                                                   final SortedMap<String, Double> low_copy_values,
662                                                   final SortedSet<String> all_domains,
663                                                   final SortedSet<GoId> go_ids_of_passing_domains,
664                                                   final SortedMap<Species, List<Protein>> protein_lists_per_species )
665             throws IOException {
666         int counter = 0;
667         int total_absense_counter = 0;
668         int not_total_absense_counter = 0;
669         SurfacingUtil.writeHtmlHead( html_writer, "Domain Copy Differences" );
670         html_writer.write( "<body><table>" );
671         for( final String domain_id : all_domains ) {
672             if ( ( high_copy_base_values.get( domain_id ) > 0 ) && ( high_copy_target_values.get( domain_id ) > 0 )
673                     && ( high_copy_base_values.get( domain_id ) >= low_copy_values.get( domain_id ) ) ) {
674                 if ( high_copy_target_values.get( domain_id ) >= ( min_diff + ( factor * low_copy_values
675                         .get( domain_id ) ) ) ) {
676                     if ( low_copy_values.get( domain_id ) <= 0.0 ) {
677                         ++total_absense_counter;
678                     }
679                     else {
680                         ++not_total_absense_counter;
681                     }
682                     ++counter;
683                     writeProteinsToFile( proteins_file_base, protein_lists_per_species, domain_id );
684                     if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
685                         go_ids_of_passing_domains.addAll( domain_id_to_go_ids_map.get( domain_id ) );
686                     }
687                     plain_writer.write( domain_id );
688                     plain_writer.write( SurfacingConstants.NL );
689                     html_writer.write( "<tr><td><a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_id
690                             + "\">" + domain_id + "</a></td><td>" );
691                     html_writer.write( addGoInformation( domain_id, domain_id_to_go_ids_map, go_id_to_term_map )
692                             .toString() );
693                     html_writer.write( "</td><td>" );
694                     html_writer.write( "<table>" );
695                     for( final GenomeWideCombinableDomains genome : genomes ) {
696                         final String species = genome.getSpecies().getSpeciesId();
697                         if ( high_copy_target_species.contains( species ) ) {
698                             html_writer.write( "<tr>" );
699                             writeCopyNumberValues( high_copy_target_values,
700                                                    domain_id,
701                                                    genome,
702                                                    species,
703                                                    plain_writer,
704                                                    html_writer,
705                                                    "#0000FF" );
706                             html_writer.write( "</tr>" );
707                         }
708                         else if ( low_copy_species.contains( species ) ) {
709                             html_writer.write( "<tr>" );
710                             writeCopyNumberValues( low_copy_values,
711                                                    domain_id,
712                                                    genome,
713                                                    species,
714                                                    plain_writer,
715                                                    html_writer,
716                                                    "#A0A0A0" );
717                             html_writer.write( "</tr>" );
718                         }
719                         else if ( high_copy_base_species.contains( species ) ) {
720                             html_writer.write( "<tr>" );
721                             writeCopyNumberValues( high_copy_base_values,
722                                                    domain_id,
723                                                    genome,
724                                                    species,
725                                                    plain_writer,
726                                                    html_writer,
727                                                    "#404040" );
728                             html_writer.write( "</tr>" );
729                         }
730                     }
731                     html_writer.write( "</table>" );
732                     html_writer.write( "</td></tr>" );
733                     html_writer.write( SurfacingConstants.NL );
734                     plain_writer.write( SurfacingConstants.NL );
735                 }
736             }
737         }
738         html_writer.write( "</table>" );
739         html_writer.write( SurfacingConstants.NL );
740         html_writer.write( "<hr>" );
741         html_writer.write( SurfacingConstants.NL );
742         html_writer.write( "Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
743         html_writer.write( "<br>" );
744         html_writer.write( SurfacingConstants.NL );
745         html_writer.write( "Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
746         html_writer.write( "<br>" );
747         html_writer.write( SurfacingConstants.NL );
748         html_writer.write( "Calculation mode for high copy target : " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
749         html_writer.write( SurfacingConstants.NL );
750         html_writer.write( "<br>" );
751         html_writer.write( "Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
752         html_writer.write( SurfacingConstants.NL );
753         html_writer.write( "<br>" );
754         html_writer.write( "Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
755         html_writer.write( SurfacingConstants.NL );
756         html_writer.write( "<br>" );
757         html_writer.write( "Minimal difference : " + min_diff );
758         html_writer.write( SurfacingConstants.NL );
759         html_writer.write( "<br>" );
760         html_writer.write( "Factor : " + factor );
761         html_writer.write( SurfacingConstants.NL );
762         html_writer.write( "<br>" );
763         html_writer.write( "Lower copy domains : " + counter );
764         html_writer.write( SurfacingConstants.NL );
765         html_writer.write( "<br>" );
766         html_writer.write( "Total absence : " + total_absense_counter );
767         html_writer.write( SurfacingConstants.NL );
768         html_writer.write( "<br>" );
769         html_writer.write( "Not total absence : " + not_total_absense_counter );
770         html_writer.write( SurfacingConstants.NL );
771         html_writer.write( "<br>" );
772         html_writer.write( "Total domains : " + all_domains.size() );
773         html_writer.write( SurfacingConstants.NL );
774         html_writer.write( "<hr>" );
775         html_writer.write( SurfacingConstants.NL );
776         html_writer.write( "</body></html>" );
777         html_writer.write( SurfacingConstants.NL );
778         html_writer.close();
779         plain_writer.write( "# Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
780         plain_writer.write( SurfacingConstants.NL );
781         plain_writer.write( "# Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
782         plain_writer.write( SurfacingConstants.NL );
783         plain_writer.write( "# Calculation mode for high copy target: " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
784         plain_writer.write( SurfacingConstants.NL );
785         plain_writer.write( "# Calculation mode for high copy base  : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
786         plain_writer.write( SurfacingConstants.NL );
787         plain_writer.write( "# Calculation mode for low copy        : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
788         plain_writer.write( SurfacingConstants.NL );
789         plain_writer.write( "# Minimal difference: " + min_diff );
790         plain_writer.write( SurfacingConstants.NL );
791         plain_writer.write( "# Factor            : " + factor );
792         plain_writer.write( SurfacingConstants.NL );
793         plain_writer.write( "# Lower copy domains: " + counter );
794         plain_writer.write( SurfacingConstants.NL );
795         plain_writer.write( "# Total absence     : " + total_absense_counter );
796         plain_writer.write( SurfacingConstants.NL );
797         plain_writer.write( "# Not total absence : " + not_total_absense_counter );
798         plain_writer.write( SurfacingConstants.NL );
799         plain_writer.write( "# Total domains     : " + all_domains.size() );
800         plain_writer.write( SurfacingConstants.NL );
801         plain_writer.close();
802     }
803
804     private static void writeGoIdsToFile( final Writer writer, final SortedSet<GoId> gos ) throws IOException {
805         for( final GoId go_id : gos ) {
806             writer.write( go_id.toString() );
807             writer.write( SurfacingConstants.NL );
808         }
809         writer.close();
810     }
811
812     private static void writeProteinsToFile( final File proteins_file_base,
813                                              final SortedMap<Species, List<Protein>> protein_lists_per_species,
814                                              final String domain_id ) throws IOException {
815         final File my_proteins_file = new File( proteins_file_base.getParentFile() + ForesterUtil.FILE_SEPARATOR
816                 + domain_id + PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX );
817         SurfacingUtil.checkForOutputFileWriteability( my_proteins_file );
818         final Writer proteins_file_writer = new BufferedWriter( new FileWriter( my_proteins_file ) );
819         SurfacingUtil.extractProteinNames( protein_lists_per_species,
820                                            domain_id,
821                                            proteins_file_writer,
822                                            "\t",
823                                            surfacing.LIMIT_SPEC_FOR_PROT_EX,
824                                            -1 );
825         proteins_file_writer.close();
826         System.out.println( "Wrote proteins list to \"" + my_proteins_file + "\"" );
827     }
828
829     public static enum COPY_CALCULATION_MODE {
830         MAX, MEAN, MEDIAN, MIN
831     }
832 }