4 // FORESTER -- software libraries and applications
5 // for evolutionary biology research and applications.
7 // Copyright (C) 2008-2009 Christian M. Zmasek
8 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 // Contact: phylosoft @ gmail . com
26 // WWW: www.phylosoft.org/forester
28 package org.forester.surfacing;
30 import java.io.BufferedWriter;
32 import java.io.FileWriter;
33 import java.io.IOException;
34 import java.io.Writer;
35 import java.text.DecimalFormat;
36 import java.text.NumberFormat;
37 import java.util.ArrayList;
38 import java.util.HashMap;
39 import java.util.List;
42 import java.util.SortedMap;
43 import java.util.SortedSet;
44 import java.util.TreeMap;
45 import java.util.TreeSet;
47 import org.forester.go.GoId;
48 import org.forester.go.GoTerm;
49 import org.forester.util.BasicDescriptiveStatistics;
50 import org.forester.util.DescriptiveStatistics;
51 import org.forester.util.ForesterUtil;
54 * Poorly designed static class which essential has one method:
55 * calculateCopyNumberDifferences.
57 public final class DomainCountsDifferenceUtil {
59 private final static NumberFormat FORMATTER = new DecimalFormat( "0.0E0" );
60 private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES = COPY_CALCULATION_MODE.MIN;
61 private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES = COPY_CALCULATION_MODE.MIN;
62 private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_LOW_COPY_SPECIES = COPY_CALCULATION_MODE.MAX;
63 private static final String PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX = ".prot";
65 //FIXME really needs to be tested!
66 private static void addCounts( final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
67 final BinaryDomainCombination dc,
68 final GenomeWideCombinableDomains genome,
69 final Set<BinaryDomainCombination> bdc ) {
70 if ( !copy_counts.containsKey( dc ) ) {
71 copy_counts.put( dc, new ArrayList<Integer>() );
73 if ( bdc.contains( dc )
74 && ( ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc.getId1() ) != null ) ) {
75 final int count = ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains()
77 copy_counts.get( dc ).add( count );
80 copy_counts.get( dc ).add( 0 );
84 private static void addCounts( final SortedMap<DomainId, List<Integer>> copy_counts,
85 final DomainId domain,
86 final GenomeWideCombinableDomains genome ) {
87 if ( !copy_counts.containsKey( domain ) ) {
88 copy_counts.put( domain, new ArrayList<Integer>() );
90 if ( genome.contains( domain ) ) {
91 copy_counts.get( domain ).add( genome.get( domain ).getKeyDomainProteinsCount() );
94 copy_counts.get( domain ).add( 0 );
98 private static StringBuilder addGoInformation( final DomainId d,
99 final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
100 final Map<GoId, GoTerm> go_id_to_term_map ) {
101 final StringBuilder sb = new StringBuilder();
102 if ( ( domain_id_to_go_ids_map == null ) || domain_id_to_go_ids_map.isEmpty()
103 || !domain_id_to_go_ids_map.containsKey( d ) ) {
106 final List<GoId> go_ids = domain_id_to_go_ids_map.get( d );
107 for( int i = 0; i < go_ids.size(); ++i ) {
108 final GoId go_id = go_ids.get( i );
109 if ( go_id_to_term_map.containsKey( go_id ) ) {
110 appendGoTerm( sb, go_id_to_term_map.get( go_id ) );
114 sb.append( "go id \"" + go_id + "\" not found [" + d.getId() + "]" );
120 private static void appendGoTerm( final StringBuilder sb, final GoTerm go_term ) {
121 final GoId go_id = go_term.getGoId();
122 sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id + "\" target=\"amigo_window\">" + go_id
125 sb.append( go_term.getName() );
127 sb.append( go_term.getGoNameSpace().toShortString() );
131 public static void calculateCopyNumberDifferences( final List<GenomeWideCombinableDomains> genomes,
132 final SortedMap<Species, List<Protein>> protein_lists_per_species,
133 final List<String> high_copy_base_species,
134 final List<String> high_copy_target_species,
135 final List<String> low_copy_species,
138 final File plain_output_dom,
139 final File html_output_dom,
140 final File html_output_dc,
141 final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
142 final Map<GoId, GoTerm> go_id_to_term_map,
143 final File all_domains_go_ids_out_dom,
144 final File passing_domains_go_ids_out_dom,
145 final File proteins_file_base ) throws IOException {
146 if ( genomes.size() < 1 ) {
147 throw new IllegalArgumentException( "attempt to use empty list of genomes for domain difference calculation" );
149 if ( ( high_copy_base_species.size() < 1 ) || ( low_copy_species.size() < 1 ) ) {
150 throw new IllegalArgumentException( "attempt to use empty list of species for domain difference calculation" );
152 if ( high_copy_base_species.contains( high_copy_target_species )
153 || low_copy_species.contains( high_copy_target_species ) ) {
154 throw new IllegalArgumentException( "species [" + high_copy_target_species
155 + "] appears in other list as well" );
157 if ( min_diff < 0 ) {
158 throw new IllegalArgumentException( "attempt to use negative addition [" + min_diff + "]" );
160 if ( factor <= 0.0 ) {
161 throw new IllegalArgumentException( "attempt to use factor equal or smaller than 0.0 [" + factor + "]" );
163 SurfacingUtil.checkForOutputFileWriteability( plain_output_dom );
164 SurfacingUtil.checkForOutputFileWriteability( html_output_dom );
165 SurfacingUtil.checkForOutputFileWriteability( html_output_dc );
166 SurfacingUtil.checkForOutputFileWriteability( all_domains_go_ids_out_dom );
167 SurfacingUtil.checkForOutputFileWriteability( passing_domains_go_ids_out_dom );
168 final Writer plain_writer = new BufferedWriter( new FileWriter( plain_output_dom ) );
169 final Writer html_writer = new BufferedWriter( new FileWriter( html_output_dom ) );
170 final Writer html_writer_dc = new BufferedWriter( new FileWriter( html_output_dc ) );
171 final Writer all_gos_writer = new BufferedWriter( new FileWriter( all_domains_go_ids_out_dom ) );
172 final Writer passing_gos_writer = new BufferedWriter( new FileWriter( passing_domains_go_ids_out_dom ) );
173 final SortedMap<DomainId, Double> high_copy_base_values = new TreeMap<DomainId, Double>();
174 final SortedMap<DomainId, Double> high_copy_target_values = new TreeMap<DomainId, Double>();
175 final SortedMap<DomainId, Double> low_copy_values = new TreeMap<DomainId, Double>();
176 final SortedMap<DomainId, List<Integer>> high_copy_base_copy_counts = new TreeMap<DomainId, List<Integer>>();
177 final SortedMap<DomainId, List<Integer>> high_copy_target_copy_counts = new TreeMap<DomainId, List<Integer>>();
178 final SortedMap<DomainId, List<Integer>> low_copy_copy_counts = new TreeMap<DomainId, List<Integer>>();
179 final SortedSet<DomainId> all_domains = new TreeSet<DomainId>();
180 final SortedMap<BinaryDomainCombination, Double> high_copy_base_values_dc = new TreeMap<BinaryDomainCombination, Double>();
181 final SortedMap<BinaryDomainCombination, Double> high_copy_target_values_dc = new TreeMap<BinaryDomainCombination, Double>();
182 final SortedMap<BinaryDomainCombination, Double> low_copy_values_dc = new TreeMap<BinaryDomainCombination, Double>();
183 final SortedMap<BinaryDomainCombination, List<Integer>> high_copy_base_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
184 final SortedMap<BinaryDomainCombination, List<Integer>> high_copy_target_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
185 final SortedMap<BinaryDomainCombination, List<Integer>> low_copy_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
186 final SortedSet<BinaryDomainCombination> all_dcs = new TreeSet<BinaryDomainCombination>();
187 final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome = new HashMap<String, Set<BinaryDomainCombination>>();
188 final SortedSet<GoId> go_ids_of_passing_domains = new TreeSet<GoId>();
189 final SortedSet<GoId> go_ids_all = new TreeSet<GoId>();
190 for( final GenomeWideCombinableDomains genome : genomes ) {
191 final SortedSet<DomainId> domains = genome.getAllDomainIds();
192 final SortedSet<BinaryDomainCombination> dcs = genome.toBinaryDomainCombinations();
193 final String species = genome.getSpecies().getSpeciesId();
194 bdcs_per_genome.put( species, genome.toBinaryDomainCombinations() );
195 for( final DomainId d : domains ) {
196 all_domains.add( d );
197 if ( domain_id_to_go_ids_map.containsKey( d ) ) {
198 go_ids_all.addAll( domain_id_to_go_ids_map.get( d ) );
201 for( final BinaryDomainCombination dc : dcs ) {
205 for( final DomainId domain : all_domains ) {
206 for( final GenomeWideCombinableDomains genome : genomes ) {
207 final String species = genome.getSpecies().getSpeciesId();
208 if ( high_copy_base_species.contains( species ) ) {
209 DomainCountsDifferenceUtil.addCounts( high_copy_base_copy_counts, domain, genome );
211 if ( high_copy_target_species.contains( species ) ) {
212 DomainCountsDifferenceUtil.addCounts( high_copy_target_copy_counts, domain, genome );
214 if ( low_copy_species.contains( species ) ) {
215 DomainCountsDifferenceUtil.addCounts( low_copy_copy_counts, domain, genome );
219 for( final BinaryDomainCombination dc : all_dcs ) {
220 for( final GenomeWideCombinableDomains genome : genomes ) {
221 final String species = genome.getSpecies().getSpeciesId();
222 if ( high_copy_base_species.contains( species ) ) {
223 DomainCountsDifferenceUtil.addCounts( high_copy_base_copy_counts_dc,
226 bdcs_per_genome.get( species ) );
228 if ( high_copy_target_species.contains( species ) ) {
229 DomainCountsDifferenceUtil.addCounts( high_copy_target_copy_counts_dc,
232 bdcs_per_genome.get( species ) );
234 if ( low_copy_species.contains( species ) ) {
235 DomainCountsDifferenceUtil.addCounts( low_copy_copy_counts_dc,
238 bdcs_per_genome.get( species ) );
242 for( final DomainId domain : all_domains ) {
243 calculateDomainCountsBasedValue( high_copy_target_values,
244 high_copy_target_copy_counts,
246 COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
247 calculateDomainCountsBasedValue( high_copy_base_values,
248 high_copy_base_copy_counts,
250 COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
251 calculateDomainCountsBasedValue( low_copy_values,
252 low_copy_copy_counts,
254 COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
256 for( final BinaryDomainCombination dc : all_dcs ) {
257 calculateDomainCountsBasedValue( high_copy_target_values_dc,
258 high_copy_target_copy_counts_dc,
260 COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
261 calculateDomainCountsBasedValue( high_copy_base_values_dc,
262 high_copy_base_copy_counts_dc,
264 COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
265 calculateDomainCountsBasedValue( low_copy_values_dc,
266 low_copy_copy_counts_dc,
268 COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
270 writeDomainValuesToFiles( genomes,
271 high_copy_base_species,
272 high_copy_target_species,
276 domain_id_to_go_ids_map,
281 high_copy_base_values,
282 high_copy_target_values,
285 go_ids_of_passing_domains,
286 protein_lists_per_species );
287 writeDomainCombinationValuesToFiles( genomes,
288 high_copy_base_species,
289 high_copy_target_species,
294 high_copy_base_values_dc,
295 high_copy_target_values_dc,
299 writeGoIdsToFile( all_gos_writer, go_ids_all );
300 writeGoIdsToFile( passing_gos_writer, go_ids_of_passing_domains );
303 private static void calculateDomainCountsBasedValue( final SortedMap<BinaryDomainCombination, Double> copy_values,
304 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
305 final BinaryDomainCombination bdc,
306 final COPY_CALCULATION_MODE copy_calc_mode ) {
307 if ( copy_counts.containsKey( bdc ) ) {
308 switch ( copy_calc_mode ) {
310 DomainCountsDifferenceUtil.calculateMaxCount( copy_values, copy_counts, bdc );
313 DomainCountsDifferenceUtil.calculateMinCount( copy_values, copy_counts, bdc );
316 DomainCountsDifferenceUtil.calculateMeanCount( copy_values, copy_counts, bdc );
319 DomainCountsDifferenceUtil.calculateMedianCount( copy_values, copy_counts, bdc );
322 throw new IllegalArgumentException();
326 copy_values.put( bdc, Double.valueOf( 0.0 ) );
330 private static void calculateDomainCountsBasedValue( final SortedMap<DomainId, Double> copy_values,
331 final SortedMap<DomainId, List<Integer>> copy_counts,
332 final DomainId domain,
333 final COPY_CALCULATION_MODE copy_calc_mode ) {
334 if ( copy_counts.containsKey( domain ) ) {
335 switch ( copy_calc_mode ) {
337 DomainCountsDifferenceUtil.calculateMaxCount( copy_values, copy_counts, domain );
340 DomainCountsDifferenceUtil.calculateMinCount( copy_values, copy_counts, domain );
343 DomainCountsDifferenceUtil.calculateMeanCount( copy_values, copy_counts, domain );
346 DomainCountsDifferenceUtil.calculateMedianCount( copy_values, copy_counts, domain );
349 throw new IllegalArgumentException();
353 copy_values.put( domain, Double.valueOf( 0.0 ) );
357 private static void calculateMaxCount( final SortedMap<BinaryDomainCombination, Double> results,
358 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
359 final BinaryDomainCombination bdc ) {
360 final List<Integer> counts = copy_counts.get( bdc );
362 for( final Integer count : counts ) {
367 results.put( bdc, ( double ) max );
370 private static void calculateMaxCount( final SortedMap<DomainId, Double> results,
371 final SortedMap<DomainId, List<Integer>> copy_counts,
372 final DomainId domain ) {
373 final List<Integer> counts = copy_counts.get( domain );
375 for( final Integer count : counts ) {
380 results.put( domain, ( double ) max );
383 private static void calculateMeanCount( final SortedMap<BinaryDomainCombination, Double> results,
384 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
385 final BinaryDomainCombination bdc ) {
386 final List<Integer> counts = copy_counts.get( bdc );
388 for( final Integer count : counts ) {
391 results.put( bdc, ( ( double ) sum ) / ( ( double ) counts.size() ) );
394 private static void calculateMeanCount( final SortedMap<DomainId, Double> results,
395 final SortedMap<DomainId, List<Integer>> copy_counts,
396 final DomainId domain ) {
397 final List<Integer> counts = copy_counts.get( domain );
399 for( final Integer count : counts ) {
402 results.put( domain, ( ( double ) sum ) / ( ( double ) counts.size() ) );
405 private static void calculateMedianCount( final SortedMap<BinaryDomainCombination, Double> results,
406 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
407 final BinaryDomainCombination bdc ) {
408 final List<Integer> counts = copy_counts.get( bdc );
409 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
410 for( final Integer count : counts ) {
411 stats.addValue( count );
413 results.put( bdc, stats.median() );
416 private static void calculateMedianCount( final SortedMap<DomainId, Double> results,
417 final SortedMap<DomainId, List<Integer>> copy_counts,
418 final DomainId domain ) {
419 final List<Integer> counts = copy_counts.get( domain );
420 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
421 for( final Integer count : counts ) {
422 stats.addValue( count );
424 results.put( domain, stats.median() );
427 private static void calculateMinCount( final SortedMap<BinaryDomainCombination, Double> results,
428 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
429 final BinaryDomainCombination bdc ) {
430 final List<Integer> counts = copy_counts.get( bdc );
431 int min = Integer.MAX_VALUE;
432 for( final Integer count : counts ) {
437 results.put( bdc, ( double ) min );
440 private static void calculateMinCount( final SortedMap<DomainId, Double> results,
441 final SortedMap<DomainId, List<Integer>> copy_counts,
442 final DomainId domain ) {
443 final List<Integer> counts = copy_counts.get( domain );
444 int min = Integer.MAX_VALUE;
445 for( final Integer count : counts ) {
450 results.put( domain, ( double ) min );
453 private static String combinableDomaindToString( final CombinableDomains cd ) {
454 final StringBuilder sb = new StringBuilder();
455 sb.append( cd.getKeyDomainProteinsCount() );
457 sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) );
459 return sb.toString();
462 private static String combinableDomaindToStringHtml( final CombinableDomains cd ) {
463 final StringBuilder sb = new StringBuilder();
465 sb.append( cd.getKeyDomainCount() );
466 sb.append( ", <b>" );
467 sb.append( cd.getKeyDomainProteinsCount() );
468 sb.append( "</b>, " );
469 sb.append( cd.getNumberOfCombinableDomains() );
470 sb.append( "]</td><td>[" );
471 sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) );
472 sb.append( "]</td><td>" );
473 sb.append( cd.getCombiningDomainIdsAsStringBuilder() );
474 return sb.toString();
477 private static void writeCopyNumberValues( final SortedMap<BinaryDomainCombination, Double> copy_means,
478 final BinaryDomainCombination bdc,
479 final GenomeWideCombinableDomains genome,
480 final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome,
481 final String species,
482 final Writer html_writer,
483 final String color ) throws IOException {
484 html_writer.write( "<td> " );
485 if ( !ForesterUtil.isEmpty( color ) ) {
486 html_writer.write( "<font color=\"" + color + "\">" );
488 html_writer.write( "<b>" + species + ":</b> " );
489 if ( !ForesterUtil.isEmpty( color ) ) {
490 html_writer.write( "</font>" );
492 html_writer.write( "</td><td>" );
493 if ( bdcs_per_genome.get( species ).contains( bdc ) && ( copy_means.get( bdc ) > 0 ) ) {
494 final int count = ( ( BasicCombinableDomains ) genome.get( bdc.getId0() ) ).getCombiningDomains()
495 .get( bdc.getId1() );
496 html_writer.write( count + "" );
499 html_writer.write( "0" );
501 html_writer.write( "</td>" );
504 private static void writeCopyNumberValues( final SortedMap<DomainId, Double> copy_means,
505 final DomainId domain,
506 final GenomeWideCombinableDomains genome,
507 final String species,
508 final Writer plain_writer,
509 final Writer html_writer,
510 final String color ) throws IOException {
511 plain_writer.write( " " + species + "\t" );
512 html_writer.write( "<td> " );
513 if ( !ForesterUtil.isEmpty( color ) ) {
514 html_writer.write( "<font color=\"" + color + "\">" );
516 html_writer.write( "<b>" + species + ":</b> " );
517 if ( !ForesterUtil.isEmpty( color ) ) {
518 html_writer.write( "</font>" );
520 html_writer.write( "</td><td>" );
521 if ( genome.contains( domain ) && ( copy_means.get( domain ) > 0 ) ) {
522 plain_writer.write( DomainCountsDifferenceUtil.combinableDomaindToString( genome.get( domain ) ) );
523 html_writer.write( DomainCountsDifferenceUtil.combinableDomaindToStringHtml( genome.get( domain ) ) );
526 plain_writer.write( "0" );
527 html_writer.write( "0" );
529 html_writer.write( "</td>" );
530 plain_writer.write( SurfacingConstants.NL );
533 private static void writeDomainCombinationValuesToFiles( final List<GenomeWideCombinableDomains> genomes,
534 final List<String> high_copy_base_species,
535 final List<String> high_copy_target_species,
536 final List<String> low_copy_species,
539 final Writer html_writer,
540 final SortedMap<BinaryDomainCombination, Double> high_copy_base_values,
541 final SortedMap<BinaryDomainCombination, Double> high_copy_target_values,
542 final SortedMap<BinaryDomainCombination, Double> low_copy_values,
543 final SortedSet<BinaryDomainCombination> all_bdcs,
544 final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome )
547 int total_absense_counter = 0;
548 int not_total_absense_counter = 0;
549 SurfacingUtil.addHtmlHead( html_writer, "Binary Domain Combination Copy Differences" );
550 html_writer.write( "<body><table>" );
551 for( final BinaryDomainCombination bdc : all_bdcs ) {
552 if ( ( high_copy_base_values.get( bdc ) > 0 ) && ( high_copy_target_values.get( bdc ) > 0 )
553 && ( high_copy_base_values.get( bdc ) >= low_copy_values.get( bdc ) ) ) {
554 if ( high_copy_target_values.get( bdc ) >= min_diff + ( factor * low_copy_values.get( bdc ) ) ) {
555 if ( low_copy_values.get( bdc ) <= 0.0 ) {
556 ++total_absense_counter;
559 ++not_total_absense_counter;
562 html_writer.write( "<tr><td><a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + bdc.getId0()
563 + "\">" + bdc.getId0() + "</a> = <a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK
564 + bdc.getId1() + "\">" + bdc.getId1() + "</a>" );
565 html_writer.write( "</td><td>" );
566 html_writer.write( "<table>" );
567 for( final GenomeWideCombinableDomains genome : genomes ) {
568 final String species = genome.getSpecies().getSpeciesId();
569 if ( high_copy_target_species.contains( species ) ) {
570 html_writer.write( "<tr>" );
571 writeCopyNumberValues( high_copy_target_values,
578 html_writer.write( "</tr>" );
580 else if ( low_copy_species.contains( species ) ) {
581 html_writer.write( "<tr>" );
582 writeCopyNumberValues( low_copy_values,
589 html_writer.write( "</tr>" );
591 else if ( high_copy_base_species.contains( species ) ) {
592 html_writer.write( "<tr>" );
593 writeCopyNumberValues( high_copy_base_values,
600 html_writer.write( "</tr>" );
603 html_writer.write( "</table>" );
604 html_writer.write( "</td></tr>" );
605 html_writer.write( SurfacingConstants.NL );
609 html_writer.write( "</table>" );
610 html_writer.write( SurfacingConstants.NL );
611 html_writer.write( "<hr>" );
612 html_writer.write( SurfacingConstants.NL );
613 html_writer.write( "Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
614 html_writer.write( "<br>" );
615 html_writer.write( SurfacingConstants.NL );
616 html_writer.write( "Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
617 html_writer.write( "<br>" );
618 html_writer.write( SurfacingConstants.NL );
619 html_writer.write( "Calculation mode for high copy target : " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
620 html_writer.write( SurfacingConstants.NL );
621 html_writer.write( "<br>" );
622 html_writer.write( "Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
623 html_writer.write( SurfacingConstants.NL );
624 html_writer.write( "<br>" );
625 html_writer.write( "Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
626 html_writer.write( SurfacingConstants.NL );
627 html_writer.write( "<br>" );
628 html_writer.write( "Minimal difference : " + min_diff );
629 html_writer.write( SurfacingConstants.NL );
630 html_writer.write( "<br>" );
631 html_writer.write( "Factor : " + factor );
632 html_writer.write( SurfacingConstants.NL );
633 html_writer.write( "<br>" );
634 html_writer.write( "Lower copy binary domain combinations : " + counter );
635 html_writer.write( SurfacingConstants.NL );
636 html_writer.write( "<br>" );
637 html_writer.write( "Total absence : " + total_absense_counter );
638 html_writer.write( SurfacingConstants.NL );
639 html_writer.write( "<br>" );
640 html_writer.write( "Not total absence : " + not_total_absense_counter );
641 html_writer.write( SurfacingConstants.NL );
642 html_writer.write( "<br>" );
643 html_writer.write( "Total binary domain combinations : " + all_bdcs.size() );
644 html_writer.write( SurfacingConstants.NL );
645 html_writer.write( "<hr>" );
646 html_writer.write( SurfacingConstants.NL );
647 html_writer.write( "</body></html>" );
648 html_writer.write( SurfacingConstants.NL );
652 private static void writeDomainValuesToFiles( final List<GenomeWideCombinableDomains> genomes,
653 final List<String> high_copy_base_species,
654 final List<String> high_copy_target_species,
655 final List<String> low_copy_species,
658 final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
659 final Map<GoId, GoTerm> go_id_to_term_map,
660 final Writer plain_writer,
661 final Writer html_writer,
662 final File proteins_file_base,
663 final SortedMap<DomainId, Double> high_copy_base_values,
664 final SortedMap<DomainId, Double> high_copy_target_values,
665 final SortedMap<DomainId, Double> low_copy_values,
666 final SortedSet<DomainId> all_domains,
667 final SortedSet<GoId> go_ids_of_passing_domains,
668 final SortedMap<Species, List<Protein>> protein_lists_per_species )
671 int total_absense_counter = 0;
672 int not_total_absense_counter = 0;
673 SurfacingUtil.addHtmlHead( html_writer, "Domain Copy Differences" );
674 html_writer.write( "<body><table>" );
675 for( final DomainId domain_id : all_domains ) {
676 if ( ( high_copy_base_values.get( domain_id ) > 0 ) && ( high_copy_target_values.get( domain_id ) > 0 )
677 && ( high_copy_base_values.get( domain_id ) >= low_copy_values.get( domain_id ) ) ) {
678 if ( high_copy_target_values.get( domain_id ) >= min_diff
679 + ( factor * low_copy_values.get( domain_id ) ) ) {
680 if ( low_copy_values.get( domain_id ) <= 0.0 ) {
681 ++total_absense_counter;
684 ++not_total_absense_counter;
687 writeProteinsToFile( proteins_file_base, protein_lists_per_species, domain_id );
688 if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
689 go_ids_of_passing_domains.addAll( domain_id_to_go_ids_map.get( domain_id ) );
691 plain_writer.write( domain_id.getId() );
692 plain_writer.write( SurfacingConstants.NL );
693 html_writer.write( "<tr><td><a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK
694 + domain_id.getId() + "\">" + domain_id.getId() + "</a></td><td>" );
695 html_writer.write( addGoInformation( domain_id, domain_id_to_go_ids_map, go_id_to_term_map )
697 html_writer.write( "</td><td>" );
698 html_writer.write( "<table>" );
699 for( final GenomeWideCombinableDomains genome : genomes ) {
700 final String species = genome.getSpecies().getSpeciesId();
701 if ( high_copy_target_species.contains( species ) ) {
702 html_writer.write( "<tr>" );
703 writeCopyNumberValues( high_copy_target_values,
710 html_writer.write( "</tr>" );
712 else if ( low_copy_species.contains( species ) ) {
713 html_writer.write( "<tr>" );
714 writeCopyNumberValues( low_copy_values,
721 html_writer.write( "</tr>" );
723 else if ( high_copy_base_species.contains( species ) ) {
724 html_writer.write( "<tr>" );
725 writeCopyNumberValues( high_copy_base_values,
732 html_writer.write( "</tr>" );
735 html_writer.write( "</table>" );
736 html_writer.write( "</td></tr>" );
737 html_writer.write( SurfacingConstants.NL );
738 plain_writer.write( SurfacingConstants.NL );
742 html_writer.write( "</table>" );
743 html_writer.write( SurfacingConstants.NL );
744 html_writer.write( "<hr>" );
745 html_writer.write( SurfacingConstants.NL );
746 html_writer.write( "Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
747 html_writer.write( "<br>" );
748 html_writer.write( SurfacingConstants.NL );
749 html_writer.write( "Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
750 html_writer.write( "<br>" );
751 html_writer.write( SurfacingConstants.NL );
752 html_writer.write( "Calculation mode for high copy target : " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
753 html_writer.write( SurfacingConstants.NL );
754 html_writer.write( "<br>" );
755 html_writer.write( "Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
756 html_writer.write( SurfacingConstants.NL );
757 html_writer.write( "<br>" );
758 html_writer.write( "Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
759 html_writer.write( SurfacingConstants.NL );
760 html_writer.write( "<br>" );
761 html_writer.write( "Minimal difference : " + min_diff );
762 html_writer.write( SurfacingConstants.NL );
763 html_writer.write( "<br>" );
764 html_writer.write( "Factor : " + factor );
765 html_writer.write( SurfacingConstants.NL );
766 html_writer.write( "<br>" );
767 html_writer.write( "Lower copy domains : " + counter );
768 html_writer.write( SurfacingConstants.NL );
769 html_writer.write( "<br>" );
770 html_writer.write( "Total absence : " + total_absense_counter );
771 html_writer.write( SurfacingConstants.NL );
772 html_writer.write( "<br>" );
773 html_writer.write( "Not total absence : " + not_total_absense_counter );
774 html_writer.write( SurfacingConstants.NL );
775 html_writer.write( "<br>" );
776 html_writer.write( "Total domains : " + all_domains.size() );
777 html_writer.write( SurfacingConstants.NL );
778 html_writer.write( "<hr>" );
779 html_writer.write( SurfacingConstants.NL );
780 html_writer.write( "</body></html>" );
781 html_writer.write( SurfacingConstants.NL );
783 plain_writer.write( "# Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
784 plain_writer.write( SurfacingConstants.NL );
785 plain_writer.write( "# Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
786 plain_writer.write( SurfacingConstants.NL );
787 plain_writer.write( "# Calculation mode for high copy target: " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
788 plain_writer.write( SurfacingConstants.NL );
789 plain_writer.write( "# Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
790 plain_writer.write( SurfacingConstants.NL );
791 plain_writer.write( "# Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
792 plain_writer.write( SurfacingConstants.NL );
793 plain_writer.write( "# Minimal difference: " + min_diff );
794 plain_writer.write( SurfacingConstants.NL );
795 plain_writer.write( "# Factor : " + factor );
796 plain_writer.write( SurfacingConstants.NL );
797 plain_writer.write( "# Lower copy domains: " + counter );
798 plain_writer.write( SurfacingConstants.NL );
799 plain_writer.write( "# Total absence : " + total_absense_counter );
800 plain_writer.write( SurfacingConstants.NL );
801 plain_writer.write( "# Not total absence : " + not_total_absense_counter );
802 plain_writer.write( SurfacingConstants.NL );
803 plain_writer.write( "# Total domains : " + all_domains.size() );
804 plain_writer.write( SurfacingConstants.NL );
805 plain_writer.close();
808 private static void writeGoIdsToFile( final Writer writer, final SortedSet<GoId> gos ) throws IOException {
809 for( final GoId go_id : gos ) {
810 writer.write( go_id.toString() );
811 writer.write( SurfacingConstants.NL );
816 private static void writeProteinsToFile( final File proteins_file_base,
817 final SortedMap<Species, List<Protein>> protein_lists_per_species,
818 final DomainId domain_id ) throws IOException {
819 final File my_proteins_file = new File( proteins_file_base.getParentFile() + ForesterUtil.FILE_SEPARATOR
820 + domain_id + PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX );
821 SurfacingUtil.checkForOutputFileWriteability( my_proteins_file );
822 final Writer proteins_file_writer = new BufferedWriter( new FileWriter( my_proteins_file ) );
823 SurfacingUtil.extractProteinNames( protein_lists_per_species, domain_id, proteins_file_writer, "\t" );
824 proteins_file_writer.close();
825 System.out.println( "Wrote proteins list to \"" + my_proteins_file + "\"" );
828 public static enum COPY_CALCULATION_MODE {
829 MEAN, MEDIAN, MAX, MIN