4 // FORESTER -- software libraries and applications
5 // for evolutionary biology research and applications.
7 // Copyright (C) 2008-2009 Christian M. Zmasek
8 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 // Contact: phylosoft @ gmail . com
26 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
28 package org.forester.surfacing;
30 import java.io.BufferedWriter;
32 import java.io.FileWriter;
33 import java.io.IOException;
34 import java.io.Writer;
35 import java.util.ArrayList;
36 import java.util.HashMap;
37 import java.util.List;
40 import java.util.SortedMap;
41 import java.util.SortedSet;
42 import java.util.TreeMap;
43 import java.util.TreeSet;
45 import org.forester.application.surfacing;
46 import org.forester.go.GoId;
47 import org.forester.go.GoTerm;
48 import org.forester.protein.BinaryDomainCombination;
49 import org.forester.protein.Protein;
50 import org.forester.species.Species;
51 import org.forester.util.BasicDescriptiveStatistics;
52 import org.forester.util.DescriptiveStatistics;
53 import org.forester.util.ForesterUtil;
56 * Poorly designed static class which essential has one method:
57 * calculateCopyNumberDifferences.
59 public final class DomainCountsDifferenceUtil {
61 private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES = COPY_CALCULATION_MODE.MIN;
62 private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES = COPY_CALCULATION_MODE.MIN;
63 private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_LOW_COPY_SPECIES = COPY_CALCULATION_MODE.MAX;
64 private static final String PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX = ".prot";
66 public static void calculateCopyNumberDifferences( final List<GenomeWideCombinableDomains> genomes,
67 final SortedMap<Species, List<Protein>> protein_lists_per_species,
68 final List<String> high_copy_base_species,
69 final List<String> high_copy_target_species,
70 final List<String> low_copy_species,
73 final File plain_output_dom,
74 final File html_output_dom,
75 final File html_output_dc,
76 final Map<String, List<GoId>> domain_id_to_go_ids_map,
77 final Map<GoId, GoTerm> go_id_to_term_map,
78 final File all_domains_go_ids_out_dom,
79 final File passing_domains_go_ids_out_dom,
80 final File proteins_file_base ) throws IOException {
81 if ( genomes.size() < 1 ) {
82 throw new IllegalArgumentException( "attempt to use empty list of genomes for domain difference calculation" );
84 if ( ( high_copy_base_species.size() < 1 ) || ( low_copy_species.size() < 1 ) ) {
85 throw new IllegalArgumentException( "attempt to use empty list of species for domain difference calculation" );
87 if ( high_copy_base_species.contains( high_copy_target_species )
88 || low_copy_species.contains( high_copy_target_species ) ) {
89 throw new IllegalArgumentException( "species [" + high_copy_target_species
90 + "] appears in other list as well" );
93 throw new IllegalArgumentException( "attempt to use negative addition [" + min_diff + "]" );
95 if ( factor <= 0.0 ) {
96 throw new IllegalArgumentException( "attempt to use factor equal or smaller than 0.0 [" + factor + "]" );
98 SurfacingUtil.checkForOutputFileWriteability( plain_output_dom );
99 SurfacingUtil.checkForOutputFileWriteability( html_output_dom );
100 SurfacingUtil.checkForOutputFileWriteability( html_output_dc );
101 SurfacingUtil.checkForOutputFileWriteability( all_domains_go_ids_out_dom );
102 SurfacingUtil.checkForOutputFileWriteability( passing_domains_go_ids_out_dom );
103 final Writer plain_writer = new BufferedWriter( new FileWriter( plain_output_dom ) );
104 final Writer html_writer = new BufferedWriter( new FileWriter( html_output_dom ) );
105 final Writer html_writer_dc = new BufferedWriter( new FileWriter( html_output_dc ) );
106 final Writer all_gos_writer = new BufferedWriter( new FileWriter( all_domains_go_ids_out_dom ) );
107 final Writer passing_gos_writer = new BufferedWriter( new FileWriter( passing_domains_go_ids_out_dom ) );
108 final SortedMap<String, Double> high_copy_base_values = new TreeMap<String, Double>();
109 final SortedMap<String, Double> high_copy_target_values = new TreeMap<String, Double>();
110 final SortedMap<String, Double> low_copy_values = new TreeMap<String, Double>();
111 final SortedMap<String, List<Integer>> high_copy_base_copy_counts = new TreeMap<String, List<Integer>>();
112 final SortedMap<String, List<Integer>> high_copy_target_copy_counts = new TreeMap<String, List<Integer>>();
113 final SortedMap<String, List<Integer>> low_copy_copy_counts = new TreeMap<String, List<Integer>>();
114 final SortedSet<String> all_domains = new TreeSet<String>();
115 final SortedMap<BinaryDomainCombination, Double> high_copy_base_values_dc = new TreeMap<BinaryDomainCombination, Double>();
116 final SortedMap<BinaryDomainCombination, Double> high_copy_target_values_dc = new TreeMap<BinaryDomainCombination, Double>();
117 final SortedMap<BinaryDomainCombination, Double> low_copy_values_dc = new TreeMap<BinaryDomainCombination, Double>();
118 final SortedMap<BinaryDomainCombination, List<Integer>> high_copy_base_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
119 final SortedMap<BinaryDomainCombination, List<Integer>> high_copy_target_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
120 final SortedMap<BinaryDomainCombination, List<Integer>> low_copy_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
121 final SortedSet<BinaryDomainCombination> all_dcs = new TreeSet<BinaryDomainCombination>();
122 final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome = new HashMap<String, Set<BinaryDomainCombination>>();
123 final SortedSet<GoId> go_ids_of_passing_domains = new TreeSet<GoId>();
124 final SortedSet<GoId> go_ids_all = new TreeSet<GoId>();
125 for( final GenomeWideCombinableDomains genome : genomes ) {
126 final SortedSet<String> domains = genome.getAllDomainIds();
127 final SortedSet<BinaryDomainCombination> dcs = genome.toBinaryDomainCombinations();
128 final String species = genome.getSpecies().getSpeciesId();
129 bdcs_per_genome.put( species, genome.toBinaryDomainCombinations() );
130 for( final String d : domains ) {
131 all_domains.add( d );
132 if ( domain_id_to_go_ids_map.containsKey( d ) ) {
133 go_ids_all.addAll( domain_id_to_go_ids_map.get( d ) );
136 for( final BinaryDomainCombination dc : dcs ) {
140 for( final String domain : all_domains ) {
141 for( final GenomeWideCombinableDomains genome : genomes ) {
142 final String species = genome.getSpecies().getSpeciesId();
143 if ( high_copy_base_species.contains( species ) ) {
144 DomainCountsDifferenceUtil.addCounts( high_copy_base_copy_counts, domain, genome );
146 if ( high_copy_target_species.contains( species ) ) {
147 DomainCountsDifferenceUtil.addCounts( high_copy_target_copy_counts, domain, genome );
149 if ( low_copy_species.contains( species ) ) {
150 DomainCountsDifferenceUtil.addCounts( low_copy_copy_counts, domain, genome );
154 for( final BinaryDomainCombination dc : all_dcs ) {
155 for( final GenomeWideCombinableDomains genome : genomes ) {
156 final String species = genome.getSpecies().getSpeciesId();
157 if ( high_copy_base_species.contains( species ) ) {
158 DomainCountsDifferenceUtil.addCounts( high_copy_base_copy_counts_dc,
161 bdcs_per_genome.get( species ) );
163 if ( high_copy_target_species.contains( species ) ) {
164 DomainCountsDifferenceUtil.addCounts( high_copy_target_copy_counts_dc,
167 bdcs_per_genome.get( species ) );
169 if ( low_copy_species.contains( species ) ) {
170 DomainCountsDifferenceUtil.addCounts( low_copy_copy_counts_dc,
173 bdcs_per_genome.get( species ) );
177 for( final String domain : all_domains ) {
178 calculateDomainCountsBasedValue( high_copy_target_values,
179 high_copy_target_copy_counts,
181 COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
182 calculateDomainCountsBasedValue( high_copy_base_values,
183 high_copy_base_copy_counts,
185 COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
186 calculateDomainCountsBasedValue( low_copy_values,
187 low_copy_copy_counts,
189 COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
191 for( final BinaryDomainCombination dc : all_dcs ) {
192 calculateDomainCountsBasedValue( high_copy_target_values_dc,
193 high_copy_target_copy_counts_dc,
195 COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
196 calculateDomainCountsBasedValue( high_copy_base_values_dc,
197 high_copy_base_copy_counts_dc,
199 COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
200 calculateDomainCountsBasedValue( low_copy_values_dc,
201 low_copy_copy_counts_dc,
203 COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
205 writeDomainValuesToFiles( genomes,
206 high_copy_base_species,
207 high_copy_target_species,
211 domain_id_to_go_ids_map,
216 high_copy_base_values,
217 high_copy_target_values,
220 go_ids_of_passing_domains,
221 protein_lists_per_species );
222 writeDomainCombinationValuesToFiles( genomes,
223 high_copy_base_species,
224 high_copy_target_species,
229 high_copy_base_values_dc,
230 high_copy_target_values_dc,
234 writeGoIdsToFile( all_gos_writer, go_ids_all );
235 writeGoIdsToFile( passing_gos_writer, go_ids_of_passing_domains );
238 //FIXME really needs to be tested!
239 private static void addCounts( final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
240 final BinaryDomainCombination dc,
241 final GenomeWideCombinableDomains genome,
242 final Set<BinaryDomainCombination> bdc ) {
243 if ( !copy_counts.containsKey( dc ) ) {
244 copy_counts.put( dc, new ArrayList<Integer>() );
246 if ( bdc.contains( dc )
247 && ( ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc.getId1() ) != null ) ) {
248 final int count = ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains()
250 copy_counts.get( dc ).add( count );
253 copy_counts.get( dc ).add( 0 );
257 private static void addCounts( final SortedMap<String, List<Integer>> copy_counts,
259 final GenomeWideCombinableDomains genome ) {
260 if ( !copy_counts.containsKey( domain ) ) {
261 copy_counts.put( domain, new ArrayList<Integer>() );
263 if ( genome.contains( domain ) ) {
264 copy_counts.get( domain ).add( genome.get( domain ).getKeyDomainProteinsCount() );
267 copy_counts.get( domain ).add( 0 );
271 private static StringBuilder addGoInformation( final String d,
272 final Map<String, List<GoId>> domain_id_to_go_ids_map,
273 final Map<GoId, GoTerm> go_id_to_term_map ) {
274 final StringBuilder sb = new StringBuilder();
275 if ( ( domain_id_to_go_ids_map == null ) || domain_id_to_go_ids_map.isEmpty()
276 || !domain_id_to_go_ids_map.containsKey( d ) ) {
279 final List<GoId> go_ids = domain_id_to_go_ids_map.get( d );
280 for( int i = 0; i < go_ids.size(); ++i ) {
281 final GoId go_id = go_ids.get( i );
282 if ( go_id_to_term_map.containsKey( go_id ) ) {
283 appendGoTerm( sb, go_id_to_term_map.get( go_id ) );
287 sb.append( "go id \"" + go_id + "\" not found [" + d + "]" );
293 private static void appendGoTerm( final StringBuilder sb, final GoTerm go_term ) {
294 final GoId go_id = go_term.getGoId();
295 sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id + "\" target=\"amigo_window\">" + go_id
298 sb.append( go_term.getName() );
300 sb.append( go_term.getGoNameSpace().toShortString() );
304 private static void calculateDomainCountsBasedValue( final SortedMap<BinaryDomainCombination, Double> copy_values,
305 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
306 final BinaryDomainCombination bdc,
307 final COPY_CALCULATION_MODE copy_calc_mode ) {
308 if ( copy_counts.containsKey( bdc ) ) {
309 switch ( copy_calc_mode ) {
311 DomainCountsDifferenceUtil.calculateMaxCount( copy_values, copy_counts, bdc );
314 DomainCountsDifferenceUtil.calculateMinCount( copy_values, copy_counts, bdc );
317 DomainCountsDifferenceUtil.calculateMeanCount( copy_values, copy_counts, bdc );
320 DomainCountsDifferenceUtil.calculateMedianCount( copy_values, copy_counts, bdc );
323 throw new IllegalArgumentException();
327 copy_values.put( bdc, Double.valueOf( 0.0 ) );
331 private static void calculateDomainCountsBasedValue( final SortedMap<String, Double> copy_values,
332 final SortedMap<String, List<Integer>> copy_counts,
334 final COPY_CALCULATION_MODE copy_calc_mode ) {
335 if ( copy_counts.containsKey( domain ) ) {
336 switch ( copy_calc_mode ) {
338 DomainCountsDifferenceUtil.calculateMaxCount( copy_values, copy_counts, domain );
341 DomainCountsDifferenceUtil.calculateMinCount( copy_values, copy_counts, domain );
344 DomainCountsDifferenceUtil.calculateMeanCount( copy_values, copy_counts, domain );
347 DomainCountsDifferenceUtil.calculateMedianCount( copy_values, copy_counts, domain );
350 throw new IllegalArgumentException();
354 copy_values.put( domain, Double.valueOf( 0.0 ) );
358 private static void calculateMaxCount( final SortedMap<BinaryDomainCombination, Double> results,
359 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
360 final BinaryDomainCombination bdc ) {
361 final List<Integer> counts = copy_counts.get( bdc );
363 for( final Integer count : counts ) {
368 results.put( bdc, ( double ) max );
371 private static void calculateMaxCount( final SortedMap<String, Double> results,
372 final SortedMap<String, List<Integer>> copy_counts,
373 final String domain ) {
374 final List<Integer> counts = copy_counts.get( domain );
376 for( final Integer count : counts ) {
381 results.put( domain, ( double ) max );
384 private static void calculateMeanCount( final SortedMap<BinaryDomainCombination, Double> results,
385 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
386 final BinaryDomainCombination bdc ) {
387 final List<Integer> counts = copy_counts.get( bdc );
389 for( final Integer count : counts ) {
392 results.put( bdc, ( ( double ) sum ) / ( ( double ) counts.size() ) );
395 private static void calculateMeanCount( final SortedMap<String, Double> results,
396 final SortedMap<String, List<Integer>> copy_counts,
397 final String domain ) {
398 final List<Integer> counts = copy_counts.get( domain );
400 for( final Integer count : counts ) {
403 results.put( domain, ( ( double ) sum ) / ( ( double ) counts.size() ) );
406 private static void calculateMedianCount( final SortedMap<BinaryDomainCombination, Double> results,
407 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
408 final BinaryDomainCombination bdc ) {
409 final List<Integer> counts = copy_counts.get( bdc );
410 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
411 for( final Integer count : counts ) {
412 stats.addValue( count );
414 results.put( bdc, stats.median() );
417 private static void calculateMedianCount( final SortedMap<String, Double> results,
418 final SortedMap<String, List<Integer>> copy_counts,
419 final String domain ) {
420 final List<Integer> counts = copy_counts.get( domain );
421 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
422 for( final Integer count : counts ) {
423 stats.addValue( count );
425 results.put( domain, stats.median() );
428 private static void calculateMinCount( final SortedMap<BinaryDomainCombination, Double> results,
429 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
430 final BinaryDomainCombination bdc ) {
431 final List<Integer> counts = copy_counts.get( bdc );
432 int min = Integer.MAX_VALUE;
433 for( final Integer count : counts ) {
438 results.put( bdc, ( double ) min );
441 private static void calculateMinCount( final SortedMap<String, Double> results,
442 final SortedMap<String, List<Integer>> copy_counts,
443 final String domain ) {
444 final List<Integer> counts = copy_counts.get( domain );
445 int min = Integer.MAX_VALUE;
446 for( final Integer count : counts ) {
451 results.put( domain, ( double ) min );
454 private static String combinableDomaindToString( final CombinableDomains cd ) {
455 final StringBuilder sb = new StringBuilder();
456 sb.append( cd.getKeyDomainProteinsCount() );
457 return sb.toString();
460 private static String combinableDomaindToStringHtml( final CombinableDomains cd ) {
461 final StringBuilder sb = new StringBuilder();
463 sb.append( cd.getKeyDomainCount() );
464 sb.append( ", <b>" );
465 sb.append( cd.getKeyDomainProteinsCount() );
466 sb.append( "</b>, " );
467 sb.append( cd.getNumberOfCombinableDomains() );
468 sb.append( "]</td><td>" );
469 sb.append( cd.getCombiningDomainIdsAsStringBuilder() );
470 return sb.toString();
473 private static void writeCopyNumberValues( final SortedMap<BinaryDomainCombination, Double> copy_means,
474 final BinaryDomainCombination bdc,
475 final GenomeWideCombinableDomains genome,
476 final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome,
477 final String species,
478 final Writer html_writer,
479 final String color ) throws IOException {
480 html_writer.write( "<td> " );
481 if ( !ForesterUtil.isEmpty( color ) ) {
482 html_writer.write( "<font color=\"" + color + "\">" );
484 html_writer.write( "<b>" + species + ":</b> " );
485 if ( !ForesterUtil.isEmpty( color ) ) {
486 html_writer.write( "</font>" );
488 html_writer.write( "</td><td>" );
489 if ( bdcs_per_genome.get( species ).contains( bdc ) && ( copy_means.get( bdc ) > 0 ) ) {
490 final int count = ( ( BasicCombinableDomains ) genome.get( bdc.getId0() ) ).getCombiningDomains()
491 .get( bdc.getId1() );
492 html_writer.write( count + "" );
495 html_writer.write( "0" );
497 html_writer.write( "</td>" );
500 private static void writeCopyNumberValues( final SortedMap<String, Double> copy_means,
502 final GenomeWideCombinableDomains genome,
503 final String species,
504 final Writer plain_writer,
505 final Writer html_writer,
506 final String color ) throws IOException {
507 plain_writer.write( " " + species + "\t" );
508 html_writer.write( "<td> " );
509 if ( !ForesterUtil.isEmpty( color ) ) {
510 html_writer.write( "<font color=\"" + color + "\">" );
512 html_writer.write( "<b>" + species + ":</b> " );
513 if ( !ForesterUtil.isEmpty( color ) ) {
514 html_writer.write( "</font>" );
516 html_writer.write( "</td><td>" );
517 if ( genome.contains( domain ) && ( copy_means.get( domain ) > 0 ) ) {
518 plain_writer.write( DomainCountsDifferenceUtil.combinableDomaindToString( genome.get( domain ) ) );
519 html_writer.write( DomainCountsDifferenceUtil.combinableDomaindToStringHtml( genome.get( domain ) ) );
522 plain_writer.write( "0" );
523 html_writer.write( "0" );
525 html_writer.write( "</td>" );
526 plain_writer.write( SurfacingConstants.NL );
529 private static void writeDomainCombinationValuesToFiles( final List<GenomeWideCombinableDomains> genomes,
530 final List<String> high_copy_base_species,
531 final List<String> high_copy_target_species,
532 final List<String> low_copy_species,
535 final Writer html_writer,
536 final SortedMap<BinaryDomainCombination, Double> high_copy_base_values,
537 final SortedMap<BinaryDomainCombination, Double> high_copy_target_values,
538 final SortedMap<BinaryDomainCombination, Double> low_copy_values,
539 final SortedSet<BinaryDomainCombination> all_bdcs,
540 final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome )
543 int total_absense_counter = 0;
544 int not_total_absense_counter = 0;
545 SurfacingUtil.writeHtmlHead( html_writer, "Binary Domain Combination Copy Differences" );
546 html_writer.write( "<body><table>" );
547 for( final BinaryDomainCombination bdc : all_bdcs ) {
548 if ( ( high_copy_base_values.get( bdc ) > 0 ) && ( high_copy_target_values.get( bdc ) > 0 )
549 && ( high_copy_base_values.get( bdc ) >= low_copy_values.get( bdc ) ) ) {
550 if ( high_copy_target_values.get( bdc ) >= ( min_diff + ( factor * low_copy_values.get( bdc ) ) ) ) {
551 if ( low_copy_values.get( bdc ) <= 0.0 ) {
552 ++total_absense_counter;
555 ++not_total_absense_counter;
558 html_writer.write( "<tr><td><a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + bdc.getId0()
559 + "\">" + bdc.getId0() + "</a> = <a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK
560 + bdc.getId1() + "\">" + bdc.getId1() + "</a>" );
561 html_writer.write( "</td><td>" );
562 html_writer.write( "<table>" );
563 for( final GenomeWideCombinableDomains genome : genomes ) {
564 final String species = genome.getSpecies().getSpeciesId();
565 if ( high_copy_target_species.contains( species ) ) {
566 html_writer.write( "<tr>" );
567 writeCopyNumberValues( high_copy_target_values,
574 html_writer.write( "</tr>" );
576 else if ( low_copy_species.contains( species ) ) {
577 html_writer.write( "<tr>" );
578 writeCopyNumberValues( low_copy_values,
585 html_writer.write( "</tr>" );
587 else if ( high_copy_base_species.contains( species ) ) {
588 html_writer.write( "<tr>" );
589 writeCopyNumberValues( high_copy_base_values,
596 html_writer.write( "</tr>" );
599 html_writer.write( "</table>" );
600 html_writer.write( "</td></tr>" );
601 html_writer.write( SurfacingConstants.NL );
605 html_writer.write( "</table>" );
606 html_writer.write( SurfacingConstants.NL );
607 html_writer.write( "<hr>" );
608 html_writer.write( SurfacingConstants.NL );
609 html_writer.write( "Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
610 html_writer.write( "<br>" );
611 html_writer.write( SurfacingConstants.NL );
612 html_writer.write( "Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
613 html_writer.write( "<br>" );
614 html_writer.write( SurfacingConstants.NL );
615 html_writer.write( "Calculation mode for high copy target : " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
616 html_writer.write( SurfacingConstants.NL );
617 html_writer.write( "<br>" );
618 html_writer.write( "Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
619 html_writer.write( SurfacingConstants.NL );
620 html_writer.write( "<br>" );
621 html_writer.write( "Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
622 html_writer.write( SurfacingConstants.NL );
623 html_writer.write( "<br>" );
624 html_writer.write( "Minimal difference : " + min_diff );
625 html_writer.write( SurfacingConstants.NL );
626 html_writer.write( "<br>" );
627 html_writer.write( "Factor : " + factor );
628 html_writer.write( SurfacingConstants.NL );
629 html_writer.write( "<br>" );
630 html_writer.write( "Lower copy binary domain combinations : " + counter );
631 html_writer.write( SurfacingConstants.NL );
632 html_writer.write( "<br>" );
633 html_writer.write( "Total absence : " + total_absense_counter );
634 html_writer.write( SurfacingConstants.NL );
635 html_writer.write( "<br>" );
636 html_writer.write( "Not total absence : " + not_total_absense_counter );
637 html_writer.write( SurfacingConstants.NL );
638 html_writer.write( "<br>" );
639 html_writer.write( "Total binary domain combinations : " + all_bdcs.size() );
640 html_writer.write( SurfacingConstants.NL );
641 html_writer.write( "<hr>" );
642 html_writer.write( SurfacingConstants.NL );
643 html_writer.write( "</body></html>" );
644 html_writer.write( SurfacingConstants.NL );
648 private static void writeDomainValuesToFiles( final List<GenomeWideCombinableDomains> genomes,
649 final List<String> high_copy_base_species,
650 final List<String> high_copy_target_species,
651 final List<String> low_copy_species,
654 final Map<String, List<GoId>> domain_id_to_go_ids_map,
655 final Map<GoId, GoTerm> go_id_to_term_map,
656 final Writer plain_writer,
657 final Writer html_writer,
658 final File proteins_file_base,
659 final SortedMap<String, Double> high_copy_base_values,
660 final SortedMap<String, Double> high_copy_target_values,
661 final SortedMap<String, Double> low_copy_values,
662 final SortedSet<String> all_domains,
663 final SortedSet<GoId> go_ids_of_passing_domains,
664 final SortedMap<Species, List<Protein>> protein_lists_per_species )
667 int total_absense_counter = 0;
668 int not_total_absense_counter = 0;
669 SurfacingUtil.writeHtmlHead( html_writer, "Domain Copy Differences" );
670 html_writer.write( "<body><table>" );
671 for( final String domain_id : all_domains ) {
672 if ( ( high_copy_base_values.get( domain_id ) > 0 ) && ( high_copy_target_values.get( domain_id ) > 0 )
673 && ( high_copy_base_values.get( domain_id ) >= low_copy_values.get( domain_id ) ) ) {
674 if ( high_copy_target_values.get( domain_id ) >= ( min_diff + ( factor * low_copy_values
675 .get( domain_id ) ) ) ) {
676 if ( low_copy_values.get( domain_id ) <= 0.0 ) {
677 ++total_absense_counter;
680 ++not_total_absense_counter;
683 writeProteinsToFile( proteins_file_base, protein_lists_per_species, domain_id );
684 if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
685 go_ids_of_passing_domains.addAll( domain_id_to_go_ids_map.get( domain_id ) );
687 plain_writer.write( domain_id );
688 plain_writer.write( SurfacingConstants.NL );
689 html_writer.write( "<tr><td><a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_id
690 + "\">" + domain_id + "</a></td><td>" );
691 html_writer.write( addGoInformation( domain_id, domain_id_to_go_ids_map, go_id_to_term_map )
693 html_writer.write( "</td><td>" );
694 html_writer.write( "<table>" );
695 for( final GenomeWideCombinableDomains genome : genomes ) {
696 final String species = genome.getSpecies().getSpeciesId();
697 if ( high_copy_target_species.contains( species ) ) {
698 html_writer.write( "<tr>" );
699 writeCopyNumberValues( high_copy_target_values,
706 html_writer.write( "</tr>" );
708 else if ( low_copy_species.contains( species ) ) {
709 html_writer.write( "<tr>" );
710 writeCopyNumberValues( low_copy_values,
717 html_writer.write( "</tr>" );
719 else if ( high_copy_base_species.contains( species ) ) {
720 html_writer.write( "<tr>" );
721 writeCopyNumberValues( high_copy_base_values,
728 html_writer.write( "</tr>" );
731 html_writer.write( "</table>" );
732 html_writer.write( "</td></tr>" );
733 html_writer.write( SurfacingConstants.NL );
734 plain_writer.write( SurfacingConstants.NL );
738 html_writer.write( "</table>" );
739 html_writer.write( SurfacingConstants.NL );
740 html_writer.write( "<hr>" );
741 html_writer.write( SurfacingConstants.NL );
742 html_writer.write( "Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
743 html_writer.write( "<br>" );
744 html_writer.write( SurfacingConstants.NL );
745 html_writer.write( "Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
746 html_writer.write( "<br>" );
747 html_writer.write( SurfacingConstants.NL );
748 html_writer.write( "Calculation mode for high copy target : " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
749 html_writer.write( SurfacingConstants.NL );
750 html_writer.write( "<br>" );
751 html_writer.write( "Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
752 html_writer.write( SurfacingConstants.NL );
753 html_writer.write( "<br>" );
754 html_writer.write( "Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
755 html_writer.write( SurfacingConstants.NL );
756 html_writer.write( "<br>" );
757 html_writer.write( "Minimal difference : " + min_diff );
758 html_writer.write( SurfacingConstants.NL );
759 html_writer.write( "<br>" );
760 html_writer.write( "Factor : " + factor );
761 html_writer.write( SurfacingConstants.NL );
762 html_writer.write( "<br>" );
763 html_writer.write( "Lower copy domains : " + counter );
764 html_writer.write( SurfacingConstants.NL );
765 html_writer.write( "<br>" );
766 html_writer.write( "Total absence : " + total_absense_counter );
767 html_writer.write( SurfacingConstants.NL );
768 html_writer.write( "<br>" );
769 html_writer.write( "Not total absence : " + not_total_absense_counter );
770 html_writer.write( SurfacingConstants.NL );
771 html_writer.write( "<br>" );
772 html_writer.write( "Total domains : " + all_domains.size() );
773 html_writer.write( SurfacingConstants.NL );
774 html_writer.write( "<hr>" );
775 html_writer.write( SurfacingConstants.NL );
776 html_writer.write( "</body></html>" );
777 html_writer.write( SurfacingConstants.NL );
779 plain_writer.write( "# Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
780 plain_writer.write( SurfacingConstants.NL );
781 plain_writer.write( "# Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
782 plain_writer.write( SurfacingConstants.NL );
783 plain_writer.write( "# Calculation mode for high copy target: " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
784 plain_writer.write( SurfacingConstants.NL );
785 plain_writer.write( "# Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
786 plain_writer.write( SurfacingConstants.NL );
787 plain_writer.write( "# Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
788 plain_writer.write( SurfacingConstants.NL );
789 plain_writer.write( "# Minimal difference: " + min_diff );
790 plain_writer.write( SurfacingConstants.NL );
791 plain_writer.write( "# Factor : " + factor );
792 plain_writer.write( SurfacingConstants.NL );
793 plain_writer.write( "# Lower copy domains: " + counter );
794 plain_writer.write( SurfacingConstants.NL );
795 plain_writer.write( "# Total absence : " + total_absense_counter );
796 plain_writer.write( SurfacingConstants.NL );
797 plain_writer.write( "# Not total absence : " + not_total_absense_counter );
798 plain_writer.write( SurfacingConstants.NL );
799 plain_writer.write( "# Total domains : " + all_domains.size() );
800 plain_writer.write( SurfacingConstants.NL );
801 plain_writer.close();
804 private static void writeGoIdsToFile( final Writer writer, final SortedSet<GoId> gos ) throws IOException {
805 for( final GoId go_id : gos ) {
806 writer.write( go_id.toString() );
807 writer.write( SurfacingConstants.NL );
812 private static void writeProteinsToFile( final File proteins_file_base,
813 final SortedMap<Species, List<Protein>> protein_lists_per_species,
814 final String domain_id ) throws IOException {
815 final File my_proteins_file = new File( proteins_file_base.getParentFile() + ForesterUtil.FILE_SEPARATOR
816 + domain_id + PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX );
817 SurfacingUtil.checkForOutputFileWriteability( my_proteins_file );
818 final Writer proteins_file_writer = new BufferedWriter( new FileWriter( my_proteins_file ) );
819 SurfacingUtil.extractProteinNames( protein_lists_per_species,
821 proteins_file_writer,
823 surfacing.LIMIT_SPEC_FOR_PROT_EX,
825 proteins_file_writer.close();
826 System.out.println( "Wrote proteins list to \"" + my_proteins_file + "\"" );
829 public static enum COPY_CALCULATION_MODE {
830 MAX, MEAN, MEDIAN, MIN