4 // FORESTER -- software libraries and applications
5 // for evolutionary biology research and applications.
7 // Copyright (C) 2008-2009 Christian M. Zmasek
8 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 // Contact: phylosoft @ gmail . com
26 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
28 package org.forester.surfacing;
30 import java.io.BufferedWriter;
32 import java.io.FileWriter;
33 import java.io.IOException;
34 import java.io.Writer;
35 import java.text.DecimalFormat;
36 import java.text.NumberFormat;
37 import java.util.ArrayList;
38 import java.util.HashMap;
39 import java.util.List;
42 import java.util.SortedMap;
43 import java.util.SortedSet;
44 import java.util.TreeMap;
45 import java.util.TreeSet;
47 import org.forester.application.surfacing;
48 import org.forester.go.GoId;
49 import org.forester.go.GoTerm;
50 import org.forester.protein.BinaryDomainCombination;
51 import org.forester.protein.Protein;
52 import org.forester.species.Species;
53 import org.forester.util.BasicDescriptiveStatistics;
54 import org.forester.util.DescriptiveStatistics;
55 import org.forester.util.ForesterUtil;
58 * Poorly designed static class which essential has one method:
59 * calculateCopyNumberDifferences.
61 public final class DomainCountsDifferenceUtil {
63 private final static NumberFormat FORMATTER = new DecimalFormat( "0.0E0" );
64 private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES = COPY_CALCULATION_MODE.MIN;
65 private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES = COPY_CALCULATION_MODE.MIN;
66 private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_LOW_COPY_SPECIES = COPY_CALCULATION_MODE.MAX;
67 private static final String PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX = ".prot";
69 //FIXME really needs to be tested!
70 private static void addCounts( final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
71 final BinaryDomainCombination dc,
72 final GenomeWideCombinableDomains genome,
73 final Set<BinaryDomainCombination> bdc ) {
74 if ( !copy_counts.containsKey( dc ) ) {
75 copy_counts.put( dc, new ArrayList<Integer>() );
77 if ( bdc.contains( dc )
78 && ( ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc.getId1() ) != null ) ) {
79 final int count = ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains()
81 copy_counts.get( dc ).add( count );
84 copy_counts.get( dc ).add( 0 );
88 private static void addCounts( final SortedMap<String, List<Integer>> copy_counts,
90 final GenomeWideCombinableDomains genome ) {
91 if ( !copy_counts.containsKey( domain ) ) {
92 copy_counts.put( domain, new ArrayList<Integer>() );
94 if ( genome.contains( domain ) ) {
95 copy_counts.get( domain ).add( genome.get( domain ).getKeyDomainProteinsCount() );
98 copy_counts.get( domain ).add( 0 );
102 private static StringBuilder addGoInformation( final String d,
103 final Map<String, List<GoId>> domain_id_to_go_ids_map,
104 final Map<GoId, GoTerm> go_id_to_term_map ) {
105 final StringBuilder sb = new StringBuilder();
106 if ( ( domain_id_to_go_ids_map == null ) || domain_id_to_go_ids_map.isEmpty()
107 || !domain_id_to_go_ids_map.containsKey( d ) ) {
110 final List<GoId> go_ids = domain_id_to_go_ids_map.get( d );
111 for( int i = 0; i < go_ids.size(); ++i ) {
112 final GoId go_id = go_ids.get( i );
113 if ( go_id_to_term_map.containsKey( go_id ) ) {
114 appendGoTerm( sb, go_id_to_term_map.get( go_id ) );
118 sb.append( "go id \"" + go_id + "\" not found [" + d + "]" );
124 private static void appendGoTerm( final StringBuilder sb, final GoTerm go_term ) {
125 final GoId go_id = go_term.getGoId();
126 sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id + "\" target=\"amigo_window\">" + go_id
129 sb.append( go_term.getName() );
131 sb.append( go_term.getGoNameSpace().toShortString() );
135 public static void calculateCopyNumberDifferences( final List<GenomeWideCombinableDomains> genomes,
136 final SortedMap<Species, List<Protein>> protein_lists_per_species,
137 final List<String> high_copy_base_species,
138 final List<String> high_copy_target_species,
139 final List<String> low_copy_species,
142 final File plain_output_dom,
143 final File html_output_dom,
144 final File html_output_dc,
145 final Map<String, List<GoId>> domain_id_to_go_ids_map,
146 final Map<GoId, GoTerm> go_id_to_term_map,
147 final File all_domains_go_ids_out_dom,
148 final File passing_domains_go_ids_out_dom,
149 final File proteins_file_base ) throws IOException {
150 if ( genomes.size() < 1 ) {
151 throw new IllegalArgumentException( "attempt to use empty list of genomes for domain difference calculation" );
153 if ( ( high_copy_base_species.size() < 1 ) || ( low_copy_species.size() < 1 ) ) {
154 throw new IllegalArgumentException( "attempt to use empty list of species for domain difference calculation" );
156 if ( high_copy_base_species.contains( high_copy_target_species )
157 || low_copy_species.contains( high_copy_target_species ) ) {
158 throw new IllegalArgumentException( "species [" + high_copy_target_species
159 + "] appears in other list as well" );
161 if ( min_diff < 0 ) {
162 throw new IllegalArgumentException( "attempt to use negative addition [" + min_diff + "]" );
164 if ( factor <= 0.0 ) {
165 throw new IllegalArgumentException( "attempt to use factor equal or smaller than 0.0 [" + factor + "]" );
167 SurfacingUtil.checkForOutputFileWriteability( plain_output_dom );
168 SurfacingUtil.checkForOutputFileWriteability( html_output_dom );
169 SurfacingUtil.checkForOutputFileWriteability( html_output_dc );
170 SurfacingUtil.checkForOutputFileWriteability( all_domains_go_ids_out_dom );
171 SurfacingUtil.checkForOutputFileWriteability( passing_domains_go_ids_out_dom );
172 final Writer plain_writer = new BufferedWriter( new FileWriter( plain_output_dom ) );
173 final Writer html_writer = new BufferedWriter( new FileWriter( html_output_dom ) );
174 final Writer html_writer_dc = new BufferedWriter( new FileWriter( html_output_dc ) );
175 final Writer all_gos_writer = new BufferedWriter( new FileWriter( all_domains_go_ids_out_dom ) );
176 final Writer passing_gos_writer = new BufferedWriter( new FileWriter( passing_domains_go_ids_out_dom ) );
177 final SortedMap<String, Double> high_copy_base_values = new TreeMap<String, Double>();
178 final SortedMap<String, Double> high_copy_target_values = new TreeMap<String, Double>();
179 final SortedMap<String, Double> low_copy_values = new TreeMap<String, Double>();
180 final SortedMap<String, List<Integer>> high_copy_base_copy_counts = new TreeMap<String, List<Integer>>();
181 final SortedMap<String, List<Integer>> high_copy_target_copy_counts = new TreeMap<String, List<Integer>>();
182 final SortedMap<String, List<Integer>> low_copy_copy_counts = new TreeMap<String, List<Integer>>();
183 final SortedSet<String> all_domains = new TreeSet<String>();
184 final SortedMap<BinaryDomainCombination, Double> high_copy_base_values_dc = new TreeMap<BinaryDomainCombination, Double>();
185 final SortedMap<BinaryDomainCombination, Double> high_copy_target_values_dc = new TreeMap<BinaryDomainCombination, Double>();
186 final SortedMap<BinaryDomainCombination, Double> low_copy_values_dc = new TreeMap<BinaryDomainCombination, Double>();
187 final SortedMap<BinaryDomainCombination, List<Integer>> high_copy_base_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
188 final SortedMap<BinaryDomainCombination, List<Integer>> high_copy_target_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
189 final SortedMap<BinaryDomainCombination, List<Integer>> low_copy_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
190 final SortedSet<BinaryDomainCombination> all_dcs = new TreeSet<BinaryDomainCombination>();
191 final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome = new HashMap<String, Set<BinaryDomainCombination>>();
192 final SortedSet<GoId> go_ids_of_passing_domains = new TreeSet<GoId>();
193 final SortedSet<GoId> go_ids_all = new TreeSet<GoId>();
194 for( final GenomeWideCombinableDomains genome : genomes ) {
195 final SortedSet<String> domains = genome.getAllDomainIds();
196 final SortedSet<BinaryDomainCombination> dcs = genome.toBinaryDomainCombinations();
197 final String species = genome.getSpecies().getSpeciesId();
198 bdcs_per_genome.put( species, genome.toBinaryDomainCombinations() );
199 for( final String d : domains ) {
200 all_domains.add( d );
201 if ( domain_id_to_go_ids_map.containsKey( d ) ) {
202 go_ids_all.addAll( domain_id_to_go_ids_map.get( d ) );
205 for( final BinaryDomainCombination dc : dcs ) {
209 for( final String domain : all_domains ) {
210 for( final GenomeWideCombinableDomains genome : genomes ) {
211 final String species = genome.getSpecies().getSpeciesId();
212 if ( high_copy_base_species.contains( species ) ) {
213 DomainCountsDifferenceUtil.addCounts( high_copy_base_copy_counts, domain, genome );
215 if ( high_copy_target_species.contains( species ) ) {
216 DomainCountsDifferenceUtil.addCounts( high_copy_target_copy_counts, domain, genome );
218 if ( low_copy_species.contains( species ) ) {
219 DomainCountsDifferenceUtil.addCounts( low_copy_copy_counts, domain, genome );
223 for( final BinaryDomainCombination dc : all_dcs ) {
224 for( final GenomeWideCombinableDomains genome : genomes ) {
225 final String species = genome.getSpecies().getSpeciesId();
226 if ( high_copy_base_species.contains( species ) ) {
227 DomainCountsDifferenceUtil.addCounts( high_copy_base_copy_counts_dc,
230 bdcs_per_genome.get( species ) );
232 if ( high_copy_target_species.contains( species ) ) {
233 DomainCountsDifferenceUtil.addCounts( high_copy_target_copy_counts_dc,
236 bdcs_per_genome.get( species ) );
238 if ( low_copy_species.contains( species ) ) {
239 DomainCountsDifferenceUtil.addCounts( low_copy_copy_counts_dc,
242 bdcs_per_genome.get( species ) );
246 for( final String domain : all_domains ) {
247 calculateDomainCountsBasedValue( high_copy_target_values,
248 high_copy_target_copy_counts,
250 COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
251 calculateDomainCountsBasedValue( high_copy_base_values,
252 high_copy_base_copy_counts,
254 COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
255 calculateDomainCountsBasedValue( low_copy_values,
256 low_copy_copy_counts,
258 COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
260 for( final BinaryDomainCombination dc : all_dcs ) {
261 calculateDomainCountsBasedValue( high_copy_target_values_dc,
262 high_copy_target_copy_counts_dc,
264 COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
265 calculateDomainCountsBasedValue( high_copy_base_values_dc,
266 high_copy_base_copy_counts_dc,
268 COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
269 calculateDomainCountsBasedValue( low_copy_values_dc,
270 low_copy_copy_counts_dc,
272 COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
274 writeDomainValuesToFiles( genomes,
275 high_copy_base_species,
276 high_copy_target_species,
280 domain_id_to_go_ids_map,
285 high_copy_base_values,
286 high_copy_target_values,
289 go_ids_of_passing_domains,
290 protein_lists_per_species );
291 writeDomainCombinationValuesToFiles( genomes,
292 high_copy_base_species,
293 high_copy_target_species,
298 high_copy_base_values_dc,
299 high_copy_target_values_dc,
303 writeGoIdsToFile( all_gos_writer, go_ids_all );
304 writeGoIdsToFile( passing_gos_writer, go_ids_of_passing_domains );
307 private static void calculateDomainCountsBasedValue( final SortedMap<BinaryDomainCombination, Double> copy_values,
308 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
309 final BinaryDomainCombination bdc,
310 final COPY_CALCULATION_MODE copy_calc_mode ) {
311 if ( copy_counts.containsKey( bdc ) ) {
312 switch ( copy_calc_mode ) {
314 DomainCountsDifferenceUtil.calculateMaxCount( copy_values, copy_counts, bdc );
317 DomainCountsDifferenceUtil.calculateMinCount( copy_values, copy_counts, bdc );
320 DomainCountsDifferenceUtil.calculateMeanCount( copy_values, copy_counts, bdc );
323 DomainCountsDifferenceUtil.calculateMedianCount( copy_values, copy_counts, bdc );
326 throw new IllegalArgumentException();
330 copy_values.put( bdc, Double.valueOf( 0.0 ) );
334 private static void calculateDomainCountsBasedValue( final SortedMap<String, Double> copy_values,
335 final SortedMap<String, List<Integer>> copy_counts,
337 final COPY_CALCULATION_MODE copy_calc_mode ) {
338 if ( copy_counts.containsKey( domain ) ) {
339 switch ( copy_calc_mode ) {
341 DomainCountsDifferenceUtil.calculateMaxCount( copy_values, copy_counts, domain );
344 DomainCountsDifferenceUtil.calculateMinCount( copy_values, copy_counts, domain );
347 DomainCountsDifferenceUtil.calculateMeanCount( copy_values, copy_counts, domain );
350 DomainCountsDifferenceUtil.calculateMedianCount( copy_values, copy_counts, domain );
353 throw new IllegalArgumentException();
357 copy_values.put( domain, Double.valueOf( 0.0 ) );
361 private static void calculateMaxCount( final SortedMap<BinaryDomainCombination, Double> results,
362 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
363 final BinaryDomainCombination bdc ) {
364 final List<Integer> counts = copy_counts.get( bdc );
366 for( final Integer count : counts ) {
371 results.put( bdc, ( double ) max );
374 private static void calculateMaxCount( final SortedMap<String, Double> results,
375 final SortedMap<String, List<Integer>> copy_counts,
376 final String domain ) {
377 final List<Integer> counts = copy_counts.get( domain );
379 for( final Integer count : counts ) {
384 results.put( domain, ( double ) max );
387 private static void calculateMeanCount( final SortedMap<BinaryDomainCombination, Double> results,
388 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
389 final BinaryDomainCombination bdc ) {
390 final List<Integer> counts = copy_counts.get( bdc );
392 for( final Integer count : counts ) {
395 results.put( bdc, ( ( double ) sum ) / ( ( double ) counts.size() ) );
398 private static void calculateMeanCount( final SortedMap<String, Double> results,
399 final SortedMap<String, List<Integer>> copy_counts,
400 final String domain ) {
401 final List<Integer> counts = copy_counts.get( domain );
403 for( final Integer count : counts ) {
406 results.put( domain, ( ( double ) sum ) / ( ( double ) counts.size() ) );
409 private static void calculateMedianCount( final SortedMap<BinaryDomainCombination, Double> results,
410 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
411 final BinaryDomainCombination bdc ) {
412 final List<Integer> counts = copy_counts.get( bdc );
413 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
414 for( final Integer count : counts ) {
415 stats.addValue( count );
417 results.put( bdc, stats.median() );
420 private static void calculateMedianCount( final SortedMap<String, Double> results,
421 final SortedMap<String, List<Integer>> copy_counts,
422 final String domain ) {
423 final List<Integer> counts = copy_counts.get( domain );
424 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
425 for( final Integer count : counts ) {
426 stats.addValue( count );
428 results.put( domain, stats.median() );
431 private static void calculateMinCount( final SortedMap<BinaryDomainCombination, Double> results,
432 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
433 final BinaryDomainCombination bdc ) {
434 final List<Integer> counts = copy_counts.get( bdc );
435 int min = Integer.MAX_VALUE;
436 for( final Integer count : counts ) {
441 results.put( bdc, ( double ) min );
444 private static void calculateMinCount( final SortedMap<String, Double> results,
445 final SortedMap<String, List<Integer>> copy_counts,
446 final String domain ) {
447 final List<Integer> counts = copy_counts.get( domain );
448 int min = Integer.MAX_VALUE;
449 for( final Integer count : counts ) {
454 results.put( domain, ( double ) min );
457 private static String combinableDomaindToString( final CombinableDomains cd ) {
458 final StringBuilder sb = new StringBuilder();
459 sb.append( cd.getKeyDomainProteinsCount() );
461 sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) );
463 return sb.toString();
466 private static String combinableDomaindToStringHtml( final CombinableDomains cd ) {
467 final StringBuilder sb = new StringBuilder();
469 sb.append( cd.getKeyDomainCount() );
470 sb.append( ", <b>" );
471 sb.append( cd.getKeyDomainProteinsCount() );
472 sb.append( "</b>, " );
473 sb.append( cd.getNumberOfCombinableDomains() );
474 sb.append( "]</td><td>[" );
475 sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) );
476 sb.append( "]</td><td>" );
477 sb.append( cd.getCombiningDomainIdsAsStringBuilder() );
478 return sb.toString();
481 private static void writeCopyNumberValues( final SortedMap<BinaryDomainCombination, Double> copy_means,
482 final BinaryDomainCombination bdc,
483 final GenomeWideCombinableDomains genome,
484 final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome,
485 final String species,
486 final Writer html_writer,
487 final String color ) throws IOException {
488 html_writer.write( "<td> " );
489 if ( !ForesterUtil.isEmpty( color ) ) {
490 html_writer.write( "<font color=\"" + color + "\">" );
492 html_writer.write( "<b>" + species + ":</b> " );
493 if ( !ForesterUtil.isEmpty( color ) ) {
494 html_writer.write( "</font>" );
496 html_writer.write( "</td><td>" );
497 if ( bdcs_per_genome.get( species ).contains( bdc ) && ( copy_means.get( bdc ) > 0 ) ) {
498 final int count = ( ( BasicCombinableDomains ) genome.get( bdc.getId0() ) ).getCombiningDomains()
499 .get( bdc.getId1() );
500 html_writer.write( count + "" );
503 html_writer.write( "0" );
505 html_writer.write( "</td>" );
508 private static void writeCopyNumberValues( final SortedMap<String, Double> copy_means,
510 final GenomeWideCombinableDomains genome,
511 final String species,
512 final Writer plain_writer,
513 final Writer html_writer,
514 final String color ) throws IOException {
515 plain_writer.write( " " + species + "\t" );
516 html_writer.write( "<td> " );
517 if ( !ForesterUtil.isEmpty( color ) ) {
518 html_writer.write( "<font color=\"" + color + "\">" );
520 html_writer.write( "<b>" + species + ":</b> " );
521 if ( !ForesterUtil.isEmpty( color ) ) {
522 html_writer.write( "</font>" );
524 html_writer.write( "</td><td>" );
525 if ( genome.contains( domain ) && ( copy_means.get( domain ) > 0 ) ) {
526 plain_writer.write( DomainCountsDifferenceUtil.combinableDomaindToString( genome.get( domain ) ) );
527 html_writer.write( DomainCountsDifferenceUtil.combinableDomaindToStringHtml( genome.get( domain ) ) );
530 plain_writer.write( "0" );
531 html_writer.write( "0" );
533 html_writer.write( "</td>" );
534 plain_writer.write( SurfacingConstants.NL );
537 private static void writeDomainCombinationValuesToFiles( final List<GenomeWideCombinableDomains> genomes,
538 final List<String> high_copy_base_species,
539 final List<String> high_copy_target_species,
540 final List<String> low_copy_species,
543 final Writer html_writer,
544 final SortedMap<BinaryDomainCombination, Double> high_copy_base_values,
545 final SortedMap<BinaryDomainCombination, Double> high_copy_target_values,
546 final SortedMap<BinaryDomainCombination, Double> low_copy_values,
547 final SortedSet<BinaryDomainCombination> all_bdcs,
548 final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome )
551 int total_absense_counter = 0;
552 int not_total_absense_counter = 0;
553 SurfacingUtil.addHtmlHead( html_writer, "Binary Domain Combination Copy Differences" );
554 html_writer.write( "<body><table>" );
555 for( final BinaryDomainCombination bdc : all_bdcs ) {
556 if ( ( high_copy_base_values.get( bdc ) > 0 ) && ( high_copy_target_values.get( bdc ) > 0 )
557 && ( high_copy_base_values.get( bdc ) >= low_copy_values.get( bdc ) ) ) {
558 if ( high_copy_target_values.get( bdc ) >= ( min_diff + ( factor * low_copy_values.get( bdc ) ) ) ) {
559 if ( low_copy_values.get( bdc ) <= 0.0 ) {
560 ++total_absense_counter;
563 ++not_total_absense_counter;
566 html_writer.write( "<tr><td><a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + bdc.getId0()
567 + "\">" + bdc.getId0() + "</a> = <a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK
568 + bdc.getId1() + "\">" + bdc.getId1() + "</a>" );
569 html_writer.write( "</td><td>" );
570 html_writer.write( "<table>" );
571 for( final GenomeWideCombinableDomains genome : genomes ) {
572 final String species = genome.getSpecies().getSpeciesId();
573 if ( high_copy_target_species.contains( species ) ) {
574 html_writer.write( "<tr>" );
575 writeCopyNumberValues( high_copy_target_values,
582 html_writer.write( "</tr>" );
584 else if ( low_copy_species.contains( species ) ) {
585 html_writer.write( "<tr>" );
586 writeCopyNumberValues( low_copy_values,
593 html_writer.write( "</tr>" );
595 else if ( high_copy_base_species.contains( species ) ) {
596 html_writer.write( "<tr>" );
597 writeCopyNumberValues( high_copy_base_values,
604 html_writer.write( "</tr>" );
607 html_writer.write( "</table>" );
608 html_writer.write( "</td></tr>" );
609 html_writer.write( SurfacingConstants.NL );
613 html_writer.write( "</table>" );
614 html_writer.write( SurfacingConstants.NL );
615 html_writer.write( "<hr>" );
616 html_writer.write( SurfacingConstants.NL );
617 html_writer.write( "Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
618 html_writer.write( "<br>" );
619 html_writer.write( SurfacingConstants.NL );
620 html_writer.write( "Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
621 html_writer.write( "<br>" );
622 html_writer.write( SurfacingConstants.NL );
623 html_writer.write( "Calculation mode for high copy target : " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
624 html_writer.write( SurfacingConstants.NL );
625 html_writer.write( "<br>" );
626 html_writer.write( "Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
627 html_writer.write( SurfacingConstants.NL );
628 html_writer.write( "<br>" );
629 html_writer.write( "Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
630 html_writer.write( SurfacingConstants.NL );
631 html_writer.write( "<br>" );
632 html_writer.write( "Minimal difference : " + min_diff );
633 html_writer.write( SurfacingConstants.NL );
634 html_writer.write( "<br>" );
635 html_writer.write( "Factor : " + factor );
636 html_writer.write( SurfacingConstants.NL );
637 html_writer.write( "<br>" );
638 html_writer.write( "Lower copy binary domain combinations : " + counter );
639 html_writer.write( SurfacingConstants.NL );
640 html_writer.write( "<br>" );
641 html_writer.write( "Total absence : " + total_absense_counter );
642 html_writer.write( SurfacingConstants.NL );
643 html_writer.write( "<br>" );
644 html_writer.write( "Not total absence : " + not_total_absense_counter );
645 html_writer.write( SurfacingConstants.NL );
646 html_writer.write( "<br>" );
647 html_writer.write( "Total binary domain combinations : " + all_bdcs.size() );
648 html_writer.write( SurfacingConstants.NL );
649 html_writer.write( "<hr>" );
650 html_writer.write( SurfacingConstants.NL );
651 html_writer.write( "</body></html>" );
652 html_writer.write( SurfacingConstants.NL );
656 private static void writeDomainValuesToFiles( final List<GenomeWideCombinableDomains> genomes,
657 final List<String> high_copy_base_species,
658 final List<String> high_copy_target_species,
659 final List<String> low_copy_species,
662 final Map<String, List<GoId>> domain_id_to_go_ids_map,
663 final Map<GoId, GoTerm> go_id_to_term_map,
664 final Writer plain_writer,
665 final Writer html_writer,
666 final File proteins_file_base,
667 final SortedMap<String, Double> high_copy_base_values,
668 final SortedMap<String, Double> high_copy_target_values,
669 final SortedMap<String, Double> low_copy_values,
670 final SortedSet<String> all_domains,
671 final SortedSet<GoId> go_ids_of_passing_domains,
672 final SortedMap<Species, List<Protein>> protein_lists_per_species )
675 int total_absense_counter = 0;
676 int not_total_absense_counter = 0;
677 SurfacingUtil.addHtmlHead( html_writer, "Domain Copy Differences" );
678 html_writer.write( "<body><table>" );
679 for( final String domain_id : all_domains ) {
680 if ( ( high_copy_base_values.get( domain_id ) > 0 ) && ( high_copy_target_values.get( domain_id ) > 0 )
681 && ( high_copy_base_values.get( domain_id ) >= low_copy_values.get( domain_id ) ) ) {
682 if ( high_copy_target_values.get( domain_id ) >= ( min_diff + ( factor * low_copy_values
683 .get( domain_id ) ) ) ) {
684 if ( low_copy_values.get( domain_id ) <= 0.0 ) {
685 ++total_absense_counter;
688 ++not_total_absense_counter;
691 writeProteinsToFile( proteins_file_base, protein_lists_per_species, domain_id );
692 if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
693 go_ids_of_passing_domains.addAll( domain_id_to_go_ids_map.get( domain_id ) );
695 plain_writer.write( domain_id );
696 plain_writer.write( SurfacingConstants.NL );
697 html_writer.write( "<tr><td><a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_id
698 + "\">" + domain_id + "</a></td><td>" );
699 html_writer.write( addGoInformation( domain_id, domain_id_to_go_ids_map, go_id_to_term_map )
701 html_writer.write( "</td><td>" );
702 html_writer.write( "<table>" );
703 for( final GenomeWideCombinableDomains genome : genomes ) {
704 final String species = genome.getSpecies().getSpeciesId();
705 if ( high_copy_target_species.contains( species ) ) {
706 html_writer.write( "<tr>" );
707 writeCopyNumberValues( high_copy_target_values,
714 html_writer.write( "</tr>" );
716 else if ( low_copy_species.contains( species ) ) {
717 html_writer.write( "<tr>" );
718 writeCopyNumberValues( low_copy_values,
725 html_writer.write( "</tr>" );
727 else if ( high_copy_base_species.contains( species ) ) {
728 html_writer.write( "<tr>" );
729 writeCopyNumberValues( high_copy_base_values,
736 html_writer.write( "</tr>" );
739 html_writer.write( "</table>" );
740 html_writer.write( "</td></tr>" );
741 html_writer.write( SurfacingConstants.NL );
742 plain_writer.write( SurfacingConstants.NL );
746 html_writer.write( "</table>" );
747 html_writer.write( SurfacingConstants.NL );
748 html_writer.write( "<hr>" );
749 html_writer.write( SurfacingConstants.NL );
750 html_writer.write( "Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
751 html_writer.write( "<br>" );
752 html_writer.write( SurfacingConstants.NL );
753 html_writer.write( "Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
754 html_writer.write( "<br>" );
755 html_writer.write( SurfacingConstants.NL );
756 html_writer.write( "Calculation mode for high copy target : " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
757 html_writer.write( SurfacingConstants.NL );
758 html_writer.write( "<br>" );
759 html_writer.write( "Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
760 html_writer.write( SurfacingConstants.NL );
761 html_writer.write( "<br>" );
762 html_writer.write( "Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
763 html_writer.write( SurfacingConstants.NL );
764 html_writer.write( "<br>" );
765 html_writer.write( "Minimal difference : " + min_diff );
766 html_writer.write( SurfacingConstants.NL );
767 html_writer.write( "<br>" );
768 html_writer.write( "Factor : " + factor );
769 html_writer.write( SurfacingConstants.NL );
770 html_writer.write( "<br>" );
771 html_writer.write( "Lower copy domains : " + counter );
772 html_writer.write( SurfacingConstants.NL );
773 html_writer.write( "<br>" );
774 html_writer.write( "Total absence : " + total_absense_counter );
775 html_writer.write( SurfacingConstants.NL );
776 html_writer.write( "<br>" );
777 html_writer.write( "Not total absence : " + not_total_absense_counter );
778 html_writer.write( SurfacingConstants.NL );
779 html_writer.write( "<br>" );
780 html_writer.write( "Total domains : " + all_domains.size() );
781 html_writer.write( SurfacingConstants.NL );
782 html_writer.write( "<hr>" );
783 html_writer.write( SurfacingConstants.NL );
784 html_writer.write( "</body></html>" );
785 html_writer.write( SurfacingConstants.NL );
787 plain_writer.write( "# Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
788 plain_writer.write( SurfacingConstants.NL );
789 plain_writer.write( "# Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
790 plain_writer.write( SurfacingConstants.NL );
791 plain_writer.write( "# Calculation mode for high copy target: " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
792 plain_writer.write( SurfacingConstants.NL );
793 plain_writer.write( "# Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
794 plain_writer.write( SurfacingConstants.NL );
795 plain_writer.write( "# Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
796 plain_writer.write( SurfacingConstants.NL );
797 plain_writer.write( "# Minimal difference: " + min_diff );
798 plain_writer.write( SurfacingConstants.NL );
799 plain_writer.write( "# Factor : " + factor );
800 plain_writer.write( SurfacingConstants.NL );
801 plain_writer.write( "# Lower copy domains: " + counter );
802 plain_writer.write( SurfacingConstants.NL );
803 plain_writer.write( "# Total absence : " + total_absense_counter );
804 plain_writer.write( SurfacingConstants.NL );
805 plain_writer.write( "# Not total absence : " + not_total_absense_counter );
806 plain_writer.write( SurfacingConstants.NL );
807 plain_writer.write( "# Total domains : " + all_domains.size() );
808 plain_writer.write( SurfacingConstants.NL );
809 plain_writer.close();
812 private static void writeGoIdsToFile( final Writer writer, final SortedSet<GoId> gos ) throws IOException {
813 for( final GoId go_id : gos ) {
814 writer.write( go_id.toString() );
815 writer.write( SurfacingConstants.NL );
820 private static void writeProteinsToFile( final File proteins_file_base,
821 final SortedMap<Species, List<Protein>> protein_lists_per_species,
822 final String domain_id ) throws IOException {
823 final File my_proteins_file = new File( proteins_file_base.getParentFile() + ForesterUtil.FILE_SEPARATOR
824 + domain_id + PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX );
825 SurfacingUtil.checkForOutputFileWriteability( my_proteins_file );
826 final Writer proteins_file_writer = new BufferedWriter( new FileWriter( my_proteins_file ) );
827 SurfacingUtil.extractProteinNames( protein_lists_per_species,
829 proteins_file_writer,
831 surfacing.LIMIT_SPEC_FOR_PROT_EX,
833 proteins_file_writer.close();
834 System.out.println( "Wrote proteins list to \"" + my_proteins_file + "\"" );
837 public static enum COPY_CALCULATION_MODE {
838 MEAN, MEDIAN, MAX, MIN