4 // FORESTER -- software libraries and applications
5 // for evolutionary biology research and applications.
7 // Copyright (C) 2008-2009 Christian M. Zmasek
8 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 // Contact: phylosoft @ gmail . com
26 // WWW: www.phylosoft.org/forester
28 package org.forester.surfacing;
30 import java.io.BufferedWriter;
32 import java.io.FileWriter;
33 import java.io.IOException;
34 import java.io.Writer;
35 import java.text.DecimalFormat;
36 import java.text.NumberFormat;
37 import java.util.ArrayList;
38 import java.util.HashMap;
39 import java.util.List;
42 import java.util.SortedMap;
43 import java.util.SortedSet;
44 import java.util.TreeMap;
45 import java.util.TreeSet;
47 import org.forester.go.GoId;
48 import org.forester.go.GoTerm;
49 import org.forester.util.BasicDescriptiveStatistics;
50 import org.forester.util.DescriptiveStatistics;
51 import org.forester.util.ForesterUtil;
54 * Poorly designed static class which essential has one method:
55 * calculateCopyNumberDifferences.
57 public final class DomainCountsDifferenceUtil {
59 private final static NumberFormat FORMATTER = new DecimalFormat( "0.0E0" );
60 private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES = COPY_CALCULATION_MODE.MIN;
61 private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES = COPY_CALCULATION_MODE.MIN;
62 private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_LOW_COPY_SPECIES = COPY_CALCULATION_MODE.MAX;
63 private static final String PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX = ".prot";
65 //FIXME really needs to be tested!
66 private static void addCounts( final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
67 final BinaryDomainCombination dc,
68 final GenomeWideCombinableDomains genome,
69 final Set<BinaryDomainCombination> bdc ) {
70 if ( !copy_counts.containsKey( dc ) ) {
71 copy_counts.put( dc, new ArrayList<Integer>() );
73 if ( bdc.contains( dc )
74 && ( ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc.getId1() ) != null ) ) {
75 final int count = ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc
77 copy_counts.get( dc ).add( count );
80 copy_counts.get( dc ).add( 0 );
84 private static void addCounts( final SortedMap<DomainId, List<Integer>> copy_counts,
85 final DomainId domain,
86 final GenomeWideCombinableDomains genome ) {
87 if ( !copy_counts.containsKey( domain ) ) {
88 copy_counts.put( domain, new ArrayList<Integer>() );
90 if ( genome.contains( domain ) ) {
91 copy_counts.get( domain ).add( genome.get( domain ).getKeyDomainProteinsCount() );
94 copy_counts.get( domain ).add( 0 );
98 private static StringBuilder addGoInformation( final DomainId d,
99 final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
100 final Map<GoId, GoTerm> go_id_to_term_map ) {
101 final StringBuilder sb = new StringBuilder();
102 if ( ( domain_id_to_go_ids_map == null ) || domain_id_to_go_ids_map.isEmpty()
103 || !domain_id_to_go_ids_map.containsKey( d ) ) {
106 final List<GoId> go_ids = domain_id_to_go_ids_map.get( d );
107 for( int i = 0; i < go_ids.size(); ++i ) {
108 final GoId go_id = go_ids.get( i );
109 if ( go_id_to_term_map.containsKey( go_id ) ) {
110 appendGoTerm( sb, go_id_to_term_map.get( go_id ) );
114 sb.append( "go id \"" + go_id + "\" not found [" + d.getId() + "]" );
120 private static void appendGoTerm( final StringBuilder sb, final GoTerm go_term ) {
121 final GoId go_id = go_term.getGoId();
122 sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id + "\" target=\"amigo_window\">" + go_id
125 sb.append( go_term.getName() );
127 sb.append( go_term.getGoNameSpace().toShortString() );
131 public static void calculateCopyNumberDifferences( final List<GenomeWideCombinableDomains> genomes,
132 final SortedMap<Species, List<Protein>> protein_lists_per_species,
133 final List<String> high_copy_base_species,
134 final List<String> high_copy_target_species,
135 final List<String> low_copy_species,
138 final File plain_output_dom,
139 final File html_output_dom,
140 final File html_output_dc,
141 final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
142 final Map<GoId, GoTerm> go_id_to_term_map,
143 final File all_domains_go_ids_out_dom,
144 final File passing_domains_go_ids_out_dom,
145 final File proteins_file_base ) throws IOException {
146 if ( genomes.size() < 1 ) {
147 throw new IllegalArgumentException( "attempt to use empty list of genomes for domain difference calculation" );
149 if ( ( high_copy_base_species.size() < 1 ) || ( low_copy_species.size() < 1 ) ) {
150 throw new IllegalArgumentException( "attempt to use empty list of species for domain difference calculation" );
152 if ( high_copy_base_species.contains( high_copy_target_species )
153 || low_copy_species.contains( high_copy_target_species ) ) {
154 throw new IllegalArgumentException( "species [" + high_copy_target_species
155 + "] appears in other list as well" );
157 if ( min_diff < 0 ) {
158 throw new IllegalArgumentException( "attempt to use negative addition [" + min_diff + "]" );
160 if ( factor <= 0.0 ) {
161 throw new IllegalArgumentException( "attempt to use factor equal or smaller than 0.0 [" + factor + "]" );
163 SurfacingUtil.checkForOutputFileWriteability( plain_output_dom );
164 SurfacingUtil.checkForOutputFileWriteability( html_output_dom );
165 SurfacingUtil.checkForOutputFileWriteability( html_output_dc );
166 SurfacingUtil.checkForOutputFileWriteability( all_domains_go_ids_out_dom );
167 SurfacingUtil.checkForOutputFileWriteability( passing_domains_go_ids_out_dom );
168 final Writer plain_writer = new BufferedWriter( new FileWriter( plain_output_dom ) );
169 final Writer html_writer = new BufferedWriter( new FileWriter( html_output_dom ) );
170 final Writer html_writer_dc = new BufferedWriter( new FileWriter( html_output_dc ) );
171 final Writer all_gos_writer = new BufferedWriter( new FileWriter( all_domains_go_ids_out_dom ) );
172 final Writer passing_gos_writer = new BufferedWriter( new FileWriter( passing_domains_go_ids_out_dom ) );
173 final SortedMap<DomainId, Double> high_copy_base_values = new TreeMap<DomainId, Double>();
174 final SortedMap<DomainId, Double> high_copy_target_values = new TreeMap<DomainId, Double>();
175 final SortedMap<DomainId, Double> low_copy_values = new TreeMap<DomainId, Double>();
176 final SortedMap<DomainId, List<Integer>> high_copy_base_copy_counts = new TreeMap<DomainId, List<Integer>>();
177 final SortedMap<DomainId, List<Integer>> high_copy_target_copy_counts = new TreeMap<DomainId, List<Integer>>();
178 final SortedMap<DomainId, List<Integer>> low_copy_copy_counts = new TreeMap<DomainId, List<Integer>>();
179 final SortedSet<DomainId> all_domains = new TreeSet<DomainId>();
180 final SortedMap<BinaryDomainCombination, Double> high_copy_base_values_dc = new TreeMap<BinaryDomainCombination, Double>();
181 final SortedMap<BinaryDomainCombination, Double> high_copy_target_values_dc = new TreeMap<BinaryDomainCombination, Double>();
182 final SortedMap<BinaryDomainCombination, Double> low_copy_values_dc = new TreeMap<BinaryDomainCombination, Double>();
183 final SortedMap<BinaryDomainCombination, List<Integer>> high_copy_base_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
184 final SortedMap<BinaryDomainCombination, List<Integer>> high_copy_target_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
185 final SortedMap<BinaryDomainCombination, List<Integer>> low_copy_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
186 final SortedSet<BinaryDomainCombination> all_dcs = new TreeSet<BinaryDomainCombination>();
187 final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome = new HashMap<String, Set<BinaryDomainCombination>>();
188 final SortedSet<GoId> go_ids_of_passing_domains = new TreeSet<GoId>();
189 final SortedSet<GoId> go_ids_all = new TreeSet<GoId>();
190 for( final GenomeWideCombinableDomains genome : genomes ) {
191 final SortedSet<DomainId> domains = genome.getAllDomainIds();
192 final SortedSet<BinaryDomainCombination> dcs = genome.toBinaryDomainCombinations();
193 final String species = genome.getSpecies().getSpeciesId();
194 bdcs_per_genome.put( species, genome.toBinaryDomainCombinations() );
195 for( final DomainId d : domains ) {
196 all_domains.add( d );
197 if ( domain_id_to_go_ids_map.containsKey( d ) ) {
198 go_ids_all.addAll( domain_id_to_go_ids_map.get( d ) );
201 for( final BinaryDomainCombination dc : dcs ) {
205 for( final DomainId domain : all_domains ) {
206 for( final GenomeWideCombinableDomains genome : genomes ) {
207 final String species = genome.getSpecies().getSpeciesId();
208 if ( high_copy_base_species.contains( species ) ) {
209 DomainCountsDifferenceUtil.addCounts( high_copy_base_copy_counts, domain, genome );
211 if ( high_copy_target_species.contains( species ) ) {
212 DomainCountsDifferenceUtil.addCounts( high_copy_target_copy_counts, domain, genome );
214 if ( low_copy_species.contains( species ) ) {
215 DomainCountsDifferenceUtil.addCounts( low_copy_copy_counts, domain, genome );
219 for( final BinaryDomainCombination dc : all_dcs ) {
220 for( final GenomeWideCombinableDomains genome : genomes ) {
221 final String species = genome.getSpecies().getSpeciesId();
222 if ( high_copy_base_species.contains( species ) ) {
223 DomainCountsDifferenceUtil.addCounts( high_copy_base_copy_counts_dc, dc, genome, bdcs_per_genome
226 if ( high_copy_target_species.contains( species ) ) {
227 DomainCountsDifferenceUtil.addCounts( high_copy_target_copy_counts_dc, dc, genome, bdcs_per_genome
230 if ( low_copy_species.contains( species ) ) {
231 DomainCountsDifferenceUtil.addCounts( low_copy_copy_counts_dc, dc, genome, bdcs_per_genome
236 for( final DomainId domain : all_domains ) {
237 calculateDomainCountsBasedValue( high_copy_target_values,
238 high_copy_target_copy_counts,
240 COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
241 calculateDomainCountsBasedValue( high_copy_base_values,
242 high_copy_base_copy_counts,
244 COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
245 calculateDomainCountsBasedValue( low_copy_values,
246 low_copy_copy_counts,
248 COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
250 for( final BinaryDomainCombination dc : all_dcs ) {
251 calculateDomainCountsBasedValue( high_copy_target_values_dc,
252 high_copy_target_copy_counts_dc,
254 COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
255 calculateDomainCountsBasedValue( high_copy_base_values_dc,
256 high_copy_base_copy_counts_dc,
258 COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
259 calculateDomainCountsBasedValue( low_copy_values_dc,
260 low_copy_copy_counts_dc,
262 COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
264 writeDomainValuesToFiles( genomes,
265 high_copy_base_species,
266 high_copy_target_species,
270 domain_id_to_go_ids_map,
275 high_copy_base_values,
276 high_copy_target_values,
279 go_ids_of_passing_domains,
280 protein_lists_per_species );
281 writeDomainCombinationValuesToFiles( genomes,
282 high_copy_base_species,
283 high_copy_target_species,
288 high_copy_base_values_dc,
289 high_copy_target_values_dc,
293 writeGoIdsToFile( all_gos_writer, go_ids_all );
294 writeGoIdsToFile( passing_gos_writer, go_ids_of_passing_domains );
297 private static void calculateDomainCountsBasedValue( final SortedMap<BinaryDomainCombination, Double> copy_values,
298 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
299 final BinaryDomainCombination bdc,
300 final COPY_CALCULATION_MODE copy_calc_mode ) {
301 if ( copy_counts.containsKey( bdc ) ) {
302 switch ( copy_calc_mode ) {
304 DomainCountsDifferenceUtil.calculateMaxCount( copy_values, copy_counts, bdc );
307 DomainCountsDifferenceUtil.calculateMinCount( copy_values, copy_counts, bdc );
310 DomainCountsDifferenceUtil.calculateMeanCount( copy_values, copy_counts, bdc );
313 DomainCountsDifferenceUtil.calculateMedianCount( copy_values, copy_counts, bdc );
316 throw new IllegalArgumentException();
320 copy_values.put( bdc, Double.valueOf( 0.0 ) );
324 private static void calculateDomainCountsBasedValue( final SortedMap<DomainId, Double> copy_values,
325 final SortedMap<DomainId, List<Integer>> copy_counts,
326 final DomainId domain,
327 final COPY_CALCULATION_MODE copy_calc_mode ) {
328 if ( copy_counts.containsKey( domain ) ) {
329 switch ( copy_calc_mode ) {
331 DomainCountsDifferenceUtil.calculateMaxCount( copy_values, copy_counts, domain );
334 DomainCountsDifferenceUtil.calculateMinCount( copy_values, copy_counts, domain );
337 DomainCountsDifferenceUtil.calculateMeanCount( copy_values, copy_counts, domain );
340 DomainCountsDifferenceUtil.calculateMedianCount( copy_values, copy_counts, domain );
343 throw new IllegalArgumentException();
347 copy_values.put( domain, Double.valueOf( 0.0 ) );
351 private static void calculateMaxCount( final SortedMap<BinaryDomainCombination, Double> results,
352 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
353 final BinaryDomainCombination bdc ) {
354 final List<Integer> counts = copy_counts.get( bdc );
356 for( final Integer count : counts ) {
361 results.put( bdc, ( double ) max );
364 private static void calculateMaxCount( final SortedMap<DomainId, Double> results,
365 final SortedMap<DomainId, List<Integer>> copy_counts,
366 final DomainId domain ) {
367 final List<Integer> counts = copy_counts.get( domain );
369 for( final Integer count : counts ) {
374 results.put( domain, ( double ) max );
377 private static void calculateMeanCount( final SortedMap<BinaryDomainCombination, Double> results,
378 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
379 final BinaryDomainCombination bdc ) {
380 final List<Integer> counts = copy_counts.get( bdc );
382 for( final Integer count : counts ) {
385 results.put( bdc, ( ( double ) sum ) / ( ( double ) counts.size() ) );
388 private static void calculateMeanCount( final SortedMap<DomainId, Double> results,
389 final SortedMap<DomainId, List<Integer>> copy_counts,
390 final DomainId domain ) {
391 final List<Integer> counts = copy_counts.get( domain );
393 for( final Integer count : counts ) {
396 results.put( domain, ( ( double ) sum ) / ( ( double ) counts.size() ) );
399 private static void calculateMedianCount( final SortedMap<BinaryDomainCombination, Double> results,
400 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
401 final BinaryDomainCombination bdc ) {
402 final List<Integer> counts = copy_counts.get( bdc );
403 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
404 for( final Integer count : counts ) {
405 stats.addValue( count );
407 results.put( bdc, stats.median() );
410 private static void calculateMedianCount( final SortedMap<DomainId, Double> results,
411 final SortedMap<DomainId, List<Integer>> copy_counts,
412 final DomainId domain ) {
413 final List<Integer> counts = copy_counts.get( domain );
414 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
415 for( final Integer count : counts ) {
416 stats.addValue( count );
418 results.put( domain, stats.median() );
421 private static void calculateMinCount( final SortedMap<BinaryDomainCombination, Double> results,
422 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
423 final BinaryDomainCombination bdc ) {
424 final List<Integer> counts = copy_counts.get( bdc );
425 int min = Integer.MAX_VALUE;
426 for( final Integer count : counts ) {
431 results.put( bdc, ( double ) min );
434 private static void calculateMinCount( final SortedMap<DomainId, Double> results,
435 final SortedMap<DomainId, List<Integer>> copy_counts,
436 final DomainId domain ) {
437 final List<Integer> counts = copy_counts.get( domain );
438 int min = Integer.MAX_VALUE;
439 for( final Integer count : counts ) {
444 results.put( domain, ( double ) min );
447 private static String combinableDomaindToString( final CombinableDomains cd ) {
448 final StringBuilder sb = new StringBuilder();
449 sb.append( cd.getKeyDomainProteinsCount() );
451 sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) );
453 return sb.toString();
456 private static String combinableDomaindToStringHtml( final CombinableDomains cd ) {
457 final StringBuilder sb = new StringBuilder();
459 sb.append( cd.getKeyDomainCount() );
460 sb.append( ", <b>" );
461 sb.append( cd.getKeyDomainProteinsCount() );
462 sb.append( "</b>, " );
463 sb.append( cd.getNumberOfCombinableDomains() );
464 sb.append( "]</td><td>[" );
465 sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) );
466 sb.append( "]</td><td>" );
467 sb.append( cd.getCombiningDomainIdsAsStringBuilder() );
468 return sb.toString();
471 private static void writeCopyNumberValues( final SortedMap<BinaryDomainCombination, Double> copy_means,
472 final BinaryDomainCombination bdc,
473 final GenomeWideCombinableDomains genome,
474 final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome,
475 final String species,
476 final Writer html_writer,
477 final String color ) throws IOException {
478 html_writer.write( "<td> " );
479 if ( !ForesterUtil.isEmpty( color ) ) {
480 html_writer.write( "<font color=\"" + color + "\">" );
482 html_writer.write( "<b>" + species + ":</b> " );
483 if ( !ForesterUtil.isEmpty( color ) ) {
484 html_writer.write( "</font>" );
486 html_writer.write( "</td><td>" );
487 if ( bdcs_per_genome.get( species ).contains( bdc ) && ( copy_means.get( bdc ) > 0 ) ) {
488 final int count = ( ( BasicCombinableDomains ) genome.get( bdc.getId0() ) ).getCombiningDomains().get( bdc
490 html_writer.write( count + "" );
493 html_writer.write( "0" );
495 html_writer.write( "</td>" );
498 private static void writeCopyNumberValues( final SortedMap<DomainId, Double> copy_means,
499 final DomainId domain,
500 final GenomeWideCombinableDomains genome,
501 final String species,
502 final Writer plain_writer,
503 final Writer html_writer,
504 final String color ) throws IOException {
505 plain_writer.write( " " + species + "\t" );
506 html_writer.write( "<td> " );
507 if ( !ForesterUtil.isEmpty( color ) ) {
508 html_writer.write( "<font color=\"" + color + "\">" );
510 html_writer.write( "<b>" + species + ":</b> " );
511 if ( !ForesterUtil.isEmpty( color ) ) {
512 html_writer.write( "</font>" );
514 html_writer.write( "</td><td>" );
515 if ( genome.contains( domain ) && ( copy_means.get( domain ) > 0 ) ) {
516 plain_writer.write( DomainCountsDifferenceUtil.combinableDomaindToString( genome.get( domain ) ) );
517 html_writer.write( DomainCountsDifferenceUtil.combinableDomaindToStringHtml( genome.get( domain ) ) );
520 plain_writer.write( "0" );
521 html_writer.write( "0" );
523 html_writer.write( "</td>" );
524 plain_writer.write( SurfacingConstants.NL );
527 private static void writeDomainCombinationValuesToFiles( final List<GenomeWideCombinableDomains> genomes,
528 final List<String> high_copy_base_species,
529 final List<String> high_copy_target_species,
530 final List<String> low_copy_species,
533 final Writer html_writer,
534 final SortedMap<BinaryDomainCombination, Double> high_copy_base_values,
535 final SortedMap<BinaryDomainCombination, Double> high_copy_target_values,
536 final SortedMap<BinaryDomainCombination, Double> low_copy_values,
537 final SortedSet<BinaryDomainCombination> all_bdcs,
538 final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome )
541 int total_absense_counter = 0;
542 int not_total_absense_counter = 0;
543 SurfacingUtil.addHtmlHead( html_writer, "Binary Domain Combination Copy Differences" );
544 html_writer.write( "<body><table>" );
545 for( final BinaryDomainCombination bdc : all_bdcs ) {
546 if ( ( high_copy_base_values.get( bdc ) > 0 ) && ( high_copy_target_values.get( bdc ) > 0 )
547 && ( high_copy_base_values.get( bdc ) >= low_copy_values.get( bdc ) ) ) {
548 if ( high_copy_target_values.get( bdc ) >= min_diff + ( factor * low_copy_values.get( bdc ) ) ) {
549 if ( low_copy_values.get( bdc ) <= 0.0 ) {
550 ++total_absense_counter;
553 ++not_total_absense_counter;
556 html_writer.write( "<tr><td><a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + bdc.getId0()
557 + "\">" + bdc.getId0() + "</a> = <a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK
558 + bdc.getId1() + "\">" + bdc.getId1() + "</a>" );
559 html_writer.write( "</td><td>" );
560 html_writer.write( "<table>" );
561 for( final GenomeWideCombinableDomains genome : genomes ) {
562 final String species = genome.getSpecies().getSpeciesId();
563 if ( high_copy_target_species.contains( species ) ) {
564 html_writer.write( "<tr>" );
565 writeCopyNumberValues( high_copy_target_values,
572 html_writer.write( "</tr>" );
574 else if ( low_copy_species.contains( species ) ) {
575 html_writer.write( "<tr>" );
576 writeCopyNumberValues( low_copy_values,
583 html_writer.write( "</tr>" );
585 else if ( high_copy_base_species.contains( species ) ) {
586 html_writer.write( "<tr>" );
587 writeCopyNumberValues( high_copy_base_values,
594 html_writer.write( "</tr>" );
597 html_writer.write( "</table>" );
598 html_writer.write( "</td></tr>" );
599 html_writer.write( SurfacingConstants.NL );
603 html_writer.write( "</table>" );
604 html_writer.write( SurfacingConstants.NL );
605 html_writer.write( "<hr>" );
606 html_writer.write( SurfacingConstants.NL );
607 html_writer.write( "Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
608 html_writer.write( "<br>" );
609 html_writer.write( SurfacingConstants.NL );
610 html_writer.write( "Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
611 html_writer.write( "<br>" );
612 html_writer.write( SurfacingConstants.NL );
613 html_writer.write( "Calculation mode for high copy target : " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
614 html_writer.write( SurfacingConstants.NL );
615 html_writer.write( "<br>" );
616 html_writer.write( "Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
617 html_writer.write( SurfacingConstants.NL );
618 html_writer.write( "<br>" );
619 html_writer.write( "Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
620 html_writer.write( SurfacingConstants.NL );
621 html_writer.write( "<br>" );
622 html_writer.write( "Minimal difference : " + min_diff );
623 html_writer.write( SurfacingConstants.NL );
624 html_writer.write( "<br>" );
625 html_writer.write( "Factor : " + factor );
626 html_writer.write( SurfacingConstants.NL );
627 html_writer.write( "<br>" );
628 html_writer.write( "Lower copy binary domain combinations : " + counter );
629 html_writer.write( SurfacingConstants.NL );
630 html_writer.write( "<br>" );
631 html_writer.write( "Total absence : " + total_absense_counter );
632 html_writer.write( SurfacingConstants.NL );
633 html_writer.write( "<br>" );
634 html_writer.write( "Not total absence : " + not_total_absense_counter );
635 html_writer.write( SurfacingConstants.NL );
636 html_writer.write( "<br>" );
637 html_writer.write( "Total binary domain combinations : " + all_bdcs.size() );
638 html_writer.write( SurfacingConstants.NL );
639 html_writer.write( "<hr>" );
640 html_writer.write( SurfacingConstants.NL );
641 html_writer.write( "</body></html>" );
642 html_writer.write( SurfacingConstants.NL );
646 private static void writeDomainValuesToFiles( final List<GenomeWideCombinableDomains> genomes,
647 final List<String> high_copy_base_species,
648 final List<String> high_copy_target_species,
649 final List<String> low_copy_species,
652 final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
653 final Map<GoId, GoTerm> go_id_to_term_map,
654 final Writer plain_writer,
655 final Writer html_writer,
656 final File proteins_file_base,
657 final SortedMap<DomainId, Double> high_copy_base_values,
658 final SortedMap<DomainId, Double> high_copy_target_values,
659 final SortedMap<DomainId, Double> low_copy_values,
660 final SortedSet<DomainId> all_domains,
661 final SortedSet<GoId> go_ids_of_passing_domains,
662 final SortedMap<Species, List<Protein>> protein_lists_per_species )
665 int total_absense_counter = 0;
666 int not_total_absense_counter = 0;
667 SurfacingUtil.addHtmlHead( html_writer, "Domain Copy Differences" );
668 html_writer.write( "<body><table>" );
669 for( final DomainId domain_id : all_domains ) {
670 if ( ( high_copy_base_values.get( domain_id ) > 0 ) && ( high_copy_target_values.get( domain_id ) > 0 )
671 && ( high_copy_base_values.get( domain_id ) >= low_copy_values.get( domain_id ) ) ) {
672 if ( high_copy_target_values.get( domain_id ) >= min_diff
673 + ( factor * low_copy_values.get( domain_id ) ) ) {
674 if ( low_copy_values.get( domain_id ) <= 0.0 ) {
675 ++total_absense_counter;
678 ++not_total_absense_counter;
681 writeProteinsToFile( proteins_file_base, protein_lists_per_species, domain_id );
682 if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
683 go_ids_of_passing_domains.addAll( domain_id_to_go_ids_map.get( domain_id ) );
685 plain_writer.write( domain_id.getId() );
686 plain_writer.write( SurfacingConstants.NL );
687 html_writer.write( "<tr><td><a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK
688 + domain_id.getId() + "\">" + domain_id.getId() + "</a></td><td>" );
689 html_writer.write( addGoInformation( domain_id, domain_id_to_go_ids_map, go_id_to_term_map )
691 html_writer.write( "</td><td>" );
692 html_writer.write( "<table>" );
693 for( final GenomeWideCombinableDomains genome : genomes ) {
694 final String species = genome.getSpecies().getSpeciesId();
695 if ( high_copy_target_species.contains( species ) ) {
696 html_writer.write( "<tr>" );
697 writeCopyNumberValues( high_copy_target_values,
704 html_writer.write( "</tr>" );
706 else if ( low_copy_species.contains( species ) ) {
707 html_writer.write( "<tr>" );
708 writeCopyNumberValues( low_copy_values,
715 html_writer.write( "</tr>" );
717 else if ( high_copy_base_species.contains( species ) ) {
718 html_writer.write( "<tr>" );
719 writeCopyNumberValues( high_copy_base_values,
726 html_writer.write( "</tr>" );
729 html_writer.write( "</table>" );
730 html_writer.write( "</td></tr>" );
731 html_writer.write( SurfacingConstants.NL );
732 plain_writer.write( SurfacingConstants.NL );
736 html_writer.write( "</table>" );
737 html_writer.write( SurfacingConstants.NL );
738 html_writer.write( "<hr>" );
739 html_writer.write( SurfacingConstants.NL );
740 html_writer.write( "Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
741 html_writer.write( "<br>" );
742 html_writer.write( SurfacingConstants.NL );
743 html_writer.write( "Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
744 html_writer.write( "<br>" );
745 html_writer.write( SurfacingConstants.NL );
746 html_writer.write( "Calculation mode for high copy target : " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
747 html_writer.write( SurfacingConstants.NL );
748 html_writer.write( "<br>" );
749 html_writer.write( "Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
750 html_writer.write( SurfacingConstants.NL );
751 html_writer.write( "<br>" );
752 html_writer.write( "Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
753 html_writer.write( SurfacingConstants.NL );
754 html_writer.write( "<br>" );
755 html_writer.write( "Minimal difference : " + min_diff );
756 html_writer.write( SurfacingConstants.NL );
757 html_writer.write( "<br>" );
758 html_writer.write( "Factor : " + factor );
759 html_writer.write( SurfacingConstants.NL );
760 html_writer.write( "<br>" );
761 html_writer.write( "Lower copy domains : " + counter );
762 html_writer.write( SurfacingConstants.NL );
763 html_writer.write( "<br>" );
764 html_writer.write( "Total absence : " + total_absense_counter );
765 html_writer.write( SurfacingConstants.NL );
766 html_writer.write( "<br>" );
767 html_writer.write( "Not total absence : " + not_total_absense_counter );
768 html_writer.write( SurfacingConstants.NL );
769 html_writer.write( "<br>" );
770 html_writer.write( "Total domains : " + all_domains.size() );
771 html_writer.write( SurfacingConstants.NL );
772 html_writer.write( "<hr>" );
773 html_writer.write( SurfacingConstants.NL );
774 html_writer.write( "</body></html>" );
775 html_writer.write( SurfacingConstants.NL );
777 plain_writer.write( "# Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
778 plain_writer.write( SurfacingConstants.NL );
779 plain_writer.write( "# Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
780 plain_writer.write( SurfacingConstants.NL );
781 plain_writer.write( "# Calculation mode for high copy target: " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
782 plain_writer.write( SurfacingConstants.NL );
783 plain_writer.write( "# Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
784 plain_writer.write( SurfacingConstants.NL );
785 plain_writer.write( "# Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
786 plain_writer.write( SurfacingConstants.NL );
787 plain_writer.write( "# Minimal difference: " + min_diff );
788 plain_writer.write( SurfacingConstants.NL );
789 plain_writer.write( "# Factor : " + factor );
790 plain_writer.write( SurfacingConstants.NL );
791 plain_writer.write( "# Lower copy domains: " + counter );
792 plain_writer.write( SurfacingConstants.NL );
793 plain_writer.write( "# Total absence : " + total_absense_counter );
794 plain_writer.write( SurfacingConstants.NL );
795 plain_writer.write( "# Not total absence : " + not_total_absense_counter );
796 plain_writer.write( SurfacingConstants.NL );
797 plain_writer.write( "# Total domains : " + all_domains.size() );
798 plain_writer.write( SurfacingConstants.NL );
799 plain_writer.close();
802 private static void writeGoIdsToFile( final Writer writer, final SortedSet<GoId> gos ) throws IOException {
803 for( final GoId go_id : gos ) {
804 writer.write( go_id.toString() );
805 writer.write( SurfacingConstants.NL );
810 private static void writeProteinsToFile( final File proteins_file_base,
811 final SortedMap<Species, List<Protein>> protein_lists_per_species,
812 final DomainId domain_id ) throws IOException {
813 final File my_proteins_file = new File( proteins_file_base.getParentFile() + ForesterUtil.FILE_SEPARATOR
814 + domain_id + PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX );
815 SurfacingUtil.checkForOutputFileWriteability( my_proteins_file );
816 final Writer proteins_file_writer = new BufferedWriter( new FileWriter( my_proteins_file ) );
817 SurfacingUtil.extractProteinNames( protein_lists_per_species, domain_id, proteins_file_writer, "\t" );
818 proteins_file_writer.close();
819 System.out.println( "Wrote proteins list to \"" + my_proteins_file + "\"" );
822 public static enum COPY_CALCULATION_MODE {
823 MEAN, MEDIAN, MAX, MIN