4 // FORESTER -- software libraries and applications
5 // for evolutionary biology research and applications.
7 // Copyright (C) 2008-2009 Christian M. Zmasek
8 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 // Contact: phylosoft @ gmail . com
26 // WWW: www.phylosoft.org/forester
28 package org.forester.surfacing;
30 import java.io.BufferedWriter;
32 import java.io.FileWriter;
33 import java.io.IOException;
34 import java.io.Writer;
35 import java.text.DecimalFormat;
36 import java.text.NumberFormat;
37 import java.util.ArrayList;
38 import java.util.HashMap;
39 import java.util.List;
42 import java.util.SortedMap;
43 import java.util.SortedSet;
44 import java.util.TreeMap;
45 import java.util.TreeSet;
47 import org.forester.application.surfacing;
48 import org.forester.go.GoId;
49 import org.forester.go.GoTerm;
50 import org.forester.protein.BinaryDomainCombination;
51 import org.forester.protein.DomainId;
52 import org.forester.protein.Protein;
53 import org.forester.species.Species;
54 import org.forester.util.BasicDescriptiveStatistics;
55 import org.forester.util.DescriptiveStatistics;
56 import org.forester.util.ForesterUtil;
59 * Poorly designed static class which essential has one method:
60 * calculateCopyNumberDifferences.
62 public final class DomainCountsDifferenceUtil {
64 private final static NumberFormat FORMATTER = new DecimalFormat( "0.0E0" );
65 private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES = COPY_CALCULATION_MODE.MIN;
66 private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES = COPY_CALCULATION_MODE.MIN;
67 private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_LOW_COPY_SPECIES = COPY_CALCULATION_MODE.MAX;
68 private static final String PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX = ".prot";
70 //FIXME really needs to be tested!
71 private static void addCounts( final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
72 final BinaryDomainCombination dc,
73 final GenomeWideCombinableDomains genome,
74 final Set<BinaryDomainCombination> bdc ) {
75 if ( !copy_counts.containsKey( dc ) ) {
76 copy_counts.put( dc, new ArrayList<Integer>() );
78 if ( bdc.contains( dc )
79 && ( ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc.getId1() ) != null ) ) {
80 final int count = ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains()
82 copy_counts.get( dc ).add( count );
85 copy_counts.get( dc ).add( 0 );
89 private static void addCounts( final SortedMap<DomainId, List<Integer>> copy_counts,
90 final DomainId domain,
91 final GenomeWideCombinableDomains genome ) {
92 if ( !copy_counts.containsKey( domain ) ) {
93 copy_counts.put( domain, new ArrayList<Integer>() );
95 if ( genome.contains( domain ) ) {
96 copy_counts.get( domain ).add( genome.get( domain ).getKeyDomainProteinsCount() );
99 copy_counts.get( domain ).add( 0 );
103 private static StringBuilder addGoInformation( final DomainId d,
104 final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
105 final Map<GoId, GoTerm> go_id_to_term_map ) {
106 final StringBuilder sb = new StringBuilder();
107 if ( ( domain_id_to_go_ids_map == null ) || domain_id_to_go_ids_map.isEmpty()
108 || !domain_id_to_go_ids_map.containsKey( d ) ) {
111 final List<GoId> go_ids = domain_id_to_go_ids_map.get( d );
112 for( int i = 0; i < go_ids.size(); ++i ) {
113 final GoId go_id = go_ids.get( i );
114 if ( go_id_to_term_map.containsKey( go_id ) ) {
115 appendGoTerm( sb, go_id_to_term_map.get( go_id ) );
119 sb.append( "go id \"" + go_id + "\" not found [" + d.getId() + "]" );
125 private static void appendGoTerm( final StringBuilder sb, final GoTerm go_term ) {
126 final GoId go_id = go_term.getGoId();
127 sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id + "\" target=\"amigo_window\">" + go_id
130 sb.append( go_term.getName() );
132 sb.append( go_term.getGoNameSpace().toShortString() );
136 public static void calculateCopyNumberDifferences( final List<GenomeWideCombinableDomains> genomes,
137 final SortedMap<Species, List<Protein>> protein_lists_per_species,
138 final List<String> high_copy_base_species,
139 final List<String> high_copy_target_species,
140 final List<String> low_copy_species,
143 final File plain_output_dom,
144 final File html_output_dom,
145 final File html_output_dc,
146 final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
147 final Map<GoId, GoTerm> go_id_to_term_map,
148 final File all_domains_go_ids_out_dom,
149 final File passing_domains_go_ids_out_dom,
150 final File proteins_file_base ) throws IOException {
151 if ( genomes.size() < 1 ) {
152 throw new IllegalArgumentException( "attempt to use empty list of genomes for domain difference calculation" );
154 if ( ( high_copy_base_species.size() < 1 ) || ( low_copy_species.size() < 1 ) ) {
155 throw new IllegalArgumentException( "attempt to use empty list of species for domain difference calculation" );
157 if ( high_copy_base_species.contains( high_copy_target_species )
158 || low_copy_species.contains( high_copy_target_species ) ) {
159 throw new IllegalArgumentException( "species [" + high_copy_target_species
160 + "] appears in other list as well" );
162 if ( min_diff < 0 ) {
163 throw new IllegalArgumentException( "attempt to use negative addition [" + min_diff + "]" );
165 if ( factor <= 0.0 ) {
166 throw new IllegalArgumentException( "attempt to use factor equal or smaller than 0.0 [" + factor + "]" );
168 SurfacingUtil.checkForOutputFileWriteability( plain_output_dom );
169 SurfacingUtil.checkForOutputFileWriteability( html_output_dom );
170 SurfacingUtil.checkForOutputFileWriteability( html_output_dc );
171 SurfacingUtil.checkForOutputFileWriteability( all_domains_go_ids_out_dom );
172 SurfacingUtil.checkForOutputFileWriteability( passing_domains_go_ids_out_dom );
173 final Writer plain_writer = new BufferedWriter( new FileWriter( plain_output_dom ) );
174 final Writer html_writer = new BufferedWriter( new FileWriter( html_output_dom ) );
175 final Writer html_writer_dc = new BufferedWriter( new FileWriter( html_output_dc ) );
176 final Writer all_gos_writer = new BufferedWriter( new FileWriter( all_domains_go_ids_out_dom ) );
177 final Writer passing_gos_writer = new BufferedWriter( new FileWriter( passing_domains_go_ids_out_dom ) );
178 final SortedMap<DomainId, Double> high_copy_base_values = new TreeMap<DomainId, Double>();
179 final SortedMap<DomainId, Double> high_copy_target_values = new TreeMap<DomainId, Double>();
180 final SortedMap<DomainId, Double> low_copy_values = new TreeMap<DomainId, Double>();
181 final SortedMap<DomainId, List<Integer>> high_copy_base_copy_counts = new TreeMap<DomainId, List<Integer>>();
182 final SortedMap<DomainId, List<Integer>> high_copy_target_copy_counts = new TreeMap<DomainId, List<Integer>>();
183 final SortedMap<DomainId, List<Integer>> low_copy_copy_counts = new TreeMap<DomainId, List<Integer>>();
184 final SortedSet<DomainId> all_domains = new TreeSet<DomainId>();
185 final SortedMap<BinaryDomainCombination, Double> high_copy_base_values_dc = new TreeMap<BinaryDomainCombination, Double>();
186 final SortedMap<BinaryDomainCombination, Double> high_copy_target_values_dc = new TreeMap<BinaryDomainCombination, Double>();
187 final SortedMap<BinaryDomainCombination, Double> low_copy_values_dc = new TreeMap<BinaryDomainCombination, Double>();
188 final SortedMap<BinaryDomainCombination, List<Integer>> high_copy_base_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
189 final SortedMap<BinaryDomainCombination, List<Integer>> high_copy_target_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
190 final SortedMap<BinaryDomainCombination, List<Integer>> low_copy_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
191 final SortedSet<BinaryDomainCombination> all_dcs = new TreeSet<BinaryDomainCombination>();
192 final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome = new HashMap<String, Set<BinaryDomainCombination>>();
193 final SortedSet<GoId> go_ids_of_passing_domains = new TreeSet<GoId>();
194 final SortedSet<GoId> go_ids_all = new TreeSet<GoId>();
195 for( final GenomeWideCombinableDomains genome : genomes ) {
196 final SortedSet<DomainId> domains = genome.getAllDomainIds();
197 final SortedSet<BinaryDomainCombination> dcs = genome.toBinaryDomainCombinations();
198 final String species = genome.getSpecies().getSpeciesId();
199 bdcs_per_genome.put( species, genome.toBinaryDomainCombinations() );
200 for( final DomainId d : domains ) {
201 all_domains.add( d );
202 if ( domain_id_to_go_ids_map.containsKey( d ) ) {
203 go_ids_all.addAll( domain_id_to_go_ids_map.get( d ) );
206 for( final BinaryDomainCombination dc : dcs ) {
210 for( final DomainId domain : all_domains ) {
211 for( final GenomeWideCombinableDomains genome : genomes ) {
212 final String species = genome.getSpecies().getSpeciesId();
213 if ( high_copy_base_species.contains( species ) ) {
214 DomainCountsDifferenceUtil.addCounts( high_copy_base_copy_counts, domain, genome );
216 if ( high_copy_target_species.contains( species ) ) {
217 DomainCountsDifferenceUtil.addCounts( high_copy_target_copy_counts, domain, genome );
219 if ( low_copy_species.contains( species ) ) {
220 DomainCountsDifferenceUtil.addCounts( low_copy_copy_counts, domain, genome );
224 for( final BinaryDomainCombination dc : all_dcs ) {
225 for( final GenomeWideCombinableDomains genome : genomes ) {
226 final String species = genome.getSpecies().getSpeciesId();
227 if ( high_copy_base_species.contains( species ) ) {
228 DomainCountsDifferenceUtil.addCounts( high_copy_base_copy_counts_dc,
231 bdcs_per_genome.get( species ) );
233 if ( high_copy_target_species.contains( species ) ) {
234 DomainCountsDifferenceUtil.addCounts( high_copy_target_copy_counts_dc,
237 bdcs_per_genome.get( species ) );
239 if ( low_copy_species.contains( species ) ) {
240 DomainCountsDifferenceUtil.addCounts( low_copy_copy_counts_dc,
243 bdcs_per_genome.get( species ) );
247 for( final DomainId domain : all_domains ) {
248 calculateDomainCountsBasedValue( high_copy_target_values,
249 high_copy_target_copy_counts,
251 COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
252 calculateDomainCountsBasedValue( high_copy_base_values,
253 high_copy_base_copy_counts,
255 COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
256 calculateDomainCountsBasedValue( low_copy_values,
257 low_copy_copy_counts,
259 COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
261 for( final BinaryDomainCombination dc : all_dcs ) {
262 calculateDomainCountsBasedValue( high_copy_target_values_dc,
263 high_copy_target_copy_counts_dc,
265 COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
266 calculateDomainCountsBasedValue( high_copy_base_values_dc,
267 high_copy_base_copy_counts_dc,
269 COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
270 calculateDomainCountsBasedValue( low_copy_values_dc,
271 low_copy_copy_counts_dc,
273 COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
275 writeDomainValuesToFiles( genomes,
276 high_copy_base_species,
277 high_copy_target_species,
281 domain_id_to_go_ids_map,
286 high_copy_base_values,
287 high_copy_target_values,
290 go_ids_of_passing_domains,
291 protein_lists_per_species );
292 writeDomainCombinationValuesToFiles( genomes,
293 high_copy_base_species,
294 high_copy_target_species,
299 high_copy_base_values_dc,
300 high_copy_target_values_dc,
304 writeGoIdsToFile( all_gos_writer, go_ids_all );
305 writeGoIdsToFile( passing_gos_writer, go_ids_of_passing_domains );
308 private static void calculateDomainCountsBasedValue( final SortedMap<BinaryDomainCombination, Double> copy_values,
309 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
310 final BinaryDomainCombination bdc,
311 final COPY_CALCULATION_MODE copy_calc_mode ) {
312 if ( copy_counts.containsKey( bdc ) ) {
313 switch ( copy_calc_mode ) {
315 DomainCountsDifferenceUtil.calculateMaxCount( copy_values, copy_counts, bdc );
318 DomainCountsDifferenceUtil.calculateMinCount( copy_values, copy_counts, bdc );
321 DomainCountsDifferenceUtil.calculateMeanCount( copy_values, copy_counts, bdc );
324 DomainCountsDifferenceUtil.calculateMedianCount( copy_values, copy_counts, bdc );
327 throw new IllegalArgumentException();
331 copy_values.put( bdc, Double.valueOf( 0.0 ) );
335 private static void calculateDomainCountsBasedValue( final SortedMap<DomainId, Double> copy_values,
336 final SortedMap<DomainId, List<Integer>> copy_counts,
337 final DomainId domain,
338 final COPY_CALCULATION_MODE copy_calc_mode ) {
339 if ( copy_counts.containsKey( domain ) ) {
340 switch ( copy_calc_mode ) {
342 DomainCountsDifferenceUtil.calculateMaxCount( copy_values, copy_counts, domain );
345 DomainCountsDifferenceUtil.calculateMinCount( copy_values, copy_counts, domain );
348 DomainCountsDifferenceUtil.calculateMeanCount( copy_values, copy_counts, domain );
351 DomainCountsDifferenceUtil.calculateMedianCount( copy_values, copy_counts, domain );
354 throw new IllegalArgumentException();
358 copy_values.put( domain, Double.valueOf( 0.0 ) );
362 private static void calculateMaxCount( final SortedMap<BinaryDomainCombination, Double> results,
363 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
364 final BinaryDomainCombination bdc ) {
365 final List<Integer> counts = copy_counts.get( bdc );
367 for( final Integer count : counts ) {
372 results.put( bdc, ( double ) max );
375 private static void calculateMaxCount( final SortedMap<DomainId, Double> results,
376 final SortedMap<DomainId, List<Integer>> copy_counts,
377 final DomainId domain ) {
378 final List<Integer> counts = copy_counts.get( domain );
380 for( final Integer count : counts ) {
385 results.put( domain, ( double ) max );
388 private static void calculateMeanCount( final SortedMap<BinaryDomainCombination, Double> results,
389 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
390 final BinaryDomainCombination bdc ) {
391 final List<Integer> counts = copy_counts.get( bdc );
393 for( final Integer count : counts ) {
396 results.put( bdc, ( ( double ) sum ) / ( ( double ) counts.size() ) );
399 private static void calculateMeanCount( final SortedMap<DomainId, Double> results,
400 final SortedMap<DomainId, List<Integer>> copy_counts,
401 final DomainId domain ) {
402 final List<Integer> counts = copy_counts.get( domain );
404 for( final Integer count : counts ) {
407 results.put( domain, ( ( double ) sum ) / ( ( double ) counts.size() ) );
410 private static void calculateMedianCount( final SortedMap<BinaryDomainCombination, Double> results,
411 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
412 final BinaryDomainCombination bdc ) {
413 final List<Integer> counts = copy_counts.get( bdc );
414 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
415 for( final Integer count : counts ) {
416 stats.addValue( count );
418 results.put( bdc, stats.median() );
421 private static void calculateMedianCount( final SortedMap<DomainId, Double> results,
422 final SortedMap<DomainId, List<Integer>> copy_counts,
423 final DomainId domain ) {
424 final List<Integer> counts = copy_counts.get( domain );
425 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
426 for( final Integer count : counts ) {
427 stats.addValue( count );
429 results.put( domain, stats.median() );
432 private static void calculateMinCount( final SortedMap<BinaryDomainCombination, Double> results,
433 final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
434 final BinaryDomainCombination bdc ) {
435 final List<Integer> counts = copy_counts.get( bdc );
436 int min = Integer.MAX_VALUE;
437 for( final Integer count : counts ) {
442 results.put( bdc, ( double ) min );
445 private static void calculateMinCount( final SortedMap<DomainId, Double> results,
446 final SortedMap<DomainId, List<Integer>> copy_counts,
447 final DomainId domain ) {
448 final List<Integer> counts = copy_counts.get( domain );
449 int min = Integer.MAX_VALUE;
450 for( final Integer count : counts ) {
455 results.put( domain, ( double ) min );
458 private static String combinableDomaindToString( final CombinableDomains cd ) {
459 final StringBuilder sb = new StringBuilder();
460 sb.append( cd.getKeyDomainProteinsCount() );
462 sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) );
464 return sb.toString();
467 private static String combinableDomaindToStringHtml( final CombinableDomains cd ) {
468 final StringBuilder sb = new StringBuilder();
470 sb.append( cd.getKeyDomainCount() );
471 sb.append( ", <b>" );
472 sb.append( cd.getKeyDomainProteinsCount() );
473 sb.append( "</b>, " );
474 sb.append( cd.getNumberOfCombinableDomains() );
475 sb.append( "]</td><td>[" );
476 sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) );
477 sb.append( "]</td><td>" );
478 sb.append( cd.getCombiningDomainIdsAsStringBuilder() );
479 return sb.toString();
482 private static void writeCopyNumberValues( final SortedMap<BinaryDomainCombination, Double> copy_means,
483 final BinaryDomainCombination bdc,
484 final GenomeWideCombinableDomains genome,
485 final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome,
486 final String species,
487 final Writer html_writer,
488 final String color ) throws IOException {
489 html_writer.write( "<td> " );
490 if ( !ForesterUtil.isEmpty( color ) ) {
491 html_writer.write( "<font color=\"" + color + "\">" );
493 html_writer.write( "<b>" + species + ":</b> " );
494 if ( !ForesterUtil.isEmpty( color ) ) {
495 html_writer.write( "</font>" );
497 html_writer.write( "</td><td>" );
498 if ( bdcs_per_genome.get( species ).contains( bdc ) && ( copy_means.get( bdc ) > 0 ) ) {
499 final int count = ( ( BasicCombinableDomains ) genome.get( bdc.getId0() ) ).getCombiningDomains()
500 .get( bdc.getId1() );
501 html_writer.write( count + "" );
504 html_writer.write( "0" );
506 html_writer.write( "</td>" );
509 private static void writeCopyNumberValues( final SortedMap<DomainId, Double> copy_means,
510 final DomainId domain,
511 final GenomeWideCombinableDomains genome,
512 final String species,
513 final Writer plain_writer,
514 final Writer html_writer,
515 final String color ) throws IOException {
516 plain_writer.write( " " + species + "\t" );
517 html_writer.write( "<td> " );
518 if ( !ForesterUtil.isEmpty( color ) ) {
519 html_writer.write( "<font color=\"" + color + "\">" );
521 html_writer.write( "<b>" + species + ":</b> " );
522 if ( !ForesterUtil.isEmpty( color ) ) {
523 html_writer.write( "</font>" );
525 html_writer.write( "</td><td>" );
526 if ( genome.contains( domain ) && ( copy_means.get( domain ) > 0 ) ) {
527 plain_writer.write( DomainCountsDifferenceUtil.combinableDomaindToString( genome.get( domain ) ) );
528 html_writer.write( DomainCountsDifferenceUtil.combinableDomaindToStringHtml( genome.get( domain ) ) );
531 plain_writer.write( "0" );
532 html_writer.write( "0" );
534 html_writer.write( "</td>" );
535 plain_writer.write( SurfacingConstants.NL );
538 private static void writeDomainCombinationValuesToFiles( final List<GenomeWideCombinableDomains> genomes,
539 final List<String> high_copy_base_species,
540 final List<String> high_copy_target_species,
541 final List<String> low_copy_species,
544 final Writer html_writer,
545 final SortedMap<BinaryDomainCombination, Double> high_copy_base_values,
546 final SortedMap<BinaryDomainCombination, Double> high_copy_target_values,
547 final SortedMap<BinaryDomainCombination, Double> low_copy_values,
548 final SortedSet<BinaryDomainCombination> all_bdcs,
549 final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome )
552 int total_absense_counter = 0;
553 int not_total_absense_counter = 0;
554 SurfacingUtil.addHtmlHead( html_writer, "Binary Domain Combination Copy Differences" );
555 html_writer.write( "<body><table>" );
556 for( final BinaryDomainCombination bdc : all_bdcs ) {
557 if ( ( high_copy_base_values.get( bdc ) > 0 ) && ( high_copy_target_values.get( bdc ) > 0 )
558 && ( high_copy_base_values.get( bdc ) >= low_copy_values.get( bdc ) ) ) {
559 if ( high_copy_target_values.get( bdc ) >= min_diff + ( factor * low_copy_values.get( bdc ) ) ) {
560 if ( low_copy_values.get( bdc ) <= 0.0 ) {
561 ++total_absense_counter;
564 ++not_total_absense_counter;
567 html_writer.write( "<tr><td><a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + bdc.getId0()
568 + "\">" + bdc.getId0() + "</a> = <a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK
569 + bdc.getId1() + "\">" + bdc.getId1() + "</a>" );
570 html_writer.write( "</td><td>" );
571 html_writer.write( "<table>" );
572 for( final GenomeWideCombinableDomains genome : genomes ) {
573 final String species = genome.getSpecies().getSpeciesId();
574 if ( high_copy_target_species.contains( species ) ) {
575 html_writer.write( "<tr>" );
576 writeCopyNumberValues( high_copy_target_values,
583 html_writer.write( "</tr>" );
585 else if ( low_copy_species.contains( species ) ) {
586 html_writer.write( "<tr>" );
587 writeCopyNumberValues( low_copy_values,
594 html_writer.write( "</tr>" );
596 else if ( high_copy_base_species.contains( species ) ) {
597 html_writer.write( "<tr>" );
598 writeCopyNumberValues( high_copy_base_values,
605 html_writer.write( "</tr>" );
608 html_writer.write( "</table>" );
609 html_writer.write( "</td></tr>" );
610 html_writer.write( SurfacingConstants.NL );
614 html_writer.write( "</table>" );
615 html_writer.write( SurfacingConstants.NL );
616 html_writer.write( "<hr>" );
617 html_writer.write( SurfacingConstants.NL );
618 html_writer.write( "Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
619 html_writer.write( "<br>" );
620 html_writer.write( SurfacingConstants.NL );
621 html_writer.write( "Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
622 html_writer.write( "<br>" );
623 html_writer.write( SurfacingConstants.NL );
624 html_writer.write( "Calculation mode for high copy target : " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
625 html_writer.write( SurfacingConstants.NL );
626 html_writer.write( "<br>" );
627 html_writer.write( "Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
628 html_writer.write( SurfacingConstants.NL );
629 html_writer.write( "<br>" );
630 html_writer.write( "Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
631 html_writer.write( SurfacingConstants.NL );
632 html_writer.write( "<br>" );
633 html_writer.write( "Minimal difference : " + min_diff );
634 html_writer.write( SurfacingConstants.NL );
635 html_writer.write( "<br>" );
636 html_writer.write( "Factor : " + factor );
637 html_writer.write( SurfacingConstants.NL );
638 html_writer.write( "<br>" );
639 html_writer.write( "Lower copy binary domain combinations : " + counter );
640 html_writer.write( SurfacingConstants.NL );
641 html_writer.write( "<br>" );
642 html_writer.write( "Total absence : " + total_absense_counter );
643 html_writer.write( SurfacingConstants.NL );
644 html_writer.write( "<br>" );
645 html_writer.write( "Not total absence : " + not_total_absense_counter );
646 html_writer.write( SurfacingConstants.NL );
647 html_writer.write( "<br>" );
648 html_writer.write( "Total binary domain combinations : " + all_bdcs.size() );
649 html_writer.write( SurfacingConstants.NL );
650 html_writer.write( "<hr>" );
651 html_writer.write( SurfacingConstants.NL );
652 html_writer.write( "</body></html>" );
653 html_writer.write( SurfacingConstants.NL );
657 private static void writeDomainValuesToFiles( final List<GenomeWideCombinableDomains> genomes,
658 final List<String> high_copy_base_species,
659 final List<String> high_copy_target_species,
660 final List<String> low_copy_species,
663 final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
664 final Map<GoId, GoTerm> go_id_to_term_map,
665 final Writer plain_writer,
666 final Writer html_writer,
667 final File proteins_file_base,
668 final SortedMap<DomainId, Double> high_copy_base_values,
669 final SortedMap<DomainId, Double> high_copy_target_values,
670 final SortedMap<DomainId, Double> low_copy_values,
671 final SortedSet<DomainId> all_domains,
672 final SortedSet<GoId> go_ids_of_passing_domains,
673 final SortedMap<Species, List<Protein>> protein_lists_per_species )
676 int total_absense_counter = 0;
677 int not_total_absense_counter = 0;
678 SurfacingUtil.addHtmlHead( html_writer, "Domain Copy Differences" );
679 html_writer.write( "<body><table>" );
680 for( final DomainId domain_id : all_domains ) {
681 if ( ( high_copy_base_values.get( domain_id ) > 0 ) && ( high_copy_target_values.get( domain_id ) > 0 )
682 && ( high_copy_base_values.get( domain_id ) >= low_copy_values.get( domain_id ) ) ) {
683 if ( high_copy_target_values.get( domain_id ) >= min_diff
684 + ( factor * low_copy_values.get( domain_id ) ) ) {
685 if ( low_copy_values.get( domain_id ) <= 0.0 ) {
686 ++total_absense_counter;
689 ++not_total_absense_counter;
692 writeProteinsToFile( proteins_file_base, protein_lists_per_species, domain_id );
693 if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
694 go_ids_of_passing_domains.addAll( domain_id_to_go_ids_map.get( domain_id ) );
696 plain_writer.write( domain_id.getId() );
697 plain_writer.write( SurfacingConstants.NL );
698 html_writer.write( "<tr><td><a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK
699 + domain_id.getId() + "\">" + domain_id.getId() + "</a></td><td>" );
700 html_writer.write( addGoInformation( domain_id, domain_id_to_go_ids_map, go_id_to_term_map )
702 html_writer.write( "</td><td>" );
703 html_writer.write( "<table>" );
704 for( final GenomeWideCombinableDomains genome : genomes ) {
705 final String species = genome.getSpecies().getSpeciesId();
706 if ( high_copy_target_species.contains( species ) ) {
707 html_writer.write( "<tr>" );
708 writeCopyNumberValues( high_copy_target_values,
715 html_writer.write( "</tr>" );
717 else if ( low_copy_species.contains( species ) ) {
718 html_writer.write( "<tr>" );
719 writeCopyNumberValues( low_copy_values,
726 html_writer.write( "</tr>" );
728 else if ( high_copy_base_species.contains( species ) ) {
729 html_writer.write( "<tr>" );
730 writeCopyNumberValues( high_copy_base_values,
737 html_writer.write( "</tr>" );
740 html_writer.write( "</table>" );
741 html_writer.write( "</td></tr>" );
742 html_writer.write( SurfacingConstants.NL );
743 plain_writer.write( SurfacingConstants.NL );
747 html_writer.write( "</table>" );
748 html_writer.write( SurfacingConstants.NL );
749 html_writer.write( "<hr>" );
750 html_writer.write( SurfacingConstants.NL );
751 html_writer.write( "Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
752 html_writer.write( "<br>" );
753 html_writer.write( SurfacingConstants.NL );
754 html_writer.write( "Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
755 html_writer.write( "<br>" );
756 html_writer.write( SurfacingConstants.NL );
757 html_writer.write( "Calculation mode for high copy target : " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
758 html_writer.write( SurfacingConstants.NL );
759 html_writer.write( "<br>" );
760 html_writer.write( "Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
761 html_writer.write( SurfacingConstants.NL );
762 html_writer.write( "<br>" );
763 html_writer.write( "Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
764 html_writer.write( SurfacingConstants.NL );
765 html_writer.write( "<br>" );
766 html_writer.write( "Minimal difference : " + min_diff );
767 html_writer.write( SurfacingConstants.NL );
768 html_writer.write( "<br>" );
769 html_writer.write( "Factor : " + factor );
770 html_writer.write( SurfacingConstants.NL );
771 html_writer.write( "<br>" );
772 html_writer.write( "Lower copy domains : " + counter );
773 html_writer.write( SurfacingConstants.NL );
774 html_writer.write( "<br>" );
775 html_writer.write( "Total absence : " + total_absense_counter );
776 html_writer.write( SurfacingConstants.NL );
777 html_writer.write( "<br>" );
778 html_writer.write( "Not total absence : " + not_total_absense_counter );
779 html_writer.write( SurfacingConstants.NL );
780 html_writer.write( "<br>" );
781 html_writer.write( "Total domains : " + all_domains.size() );
782 html_writer.write( SurfacingConstants.NL );
783 html_writer.write( "<hr>" );
784 html_writer.write( SurfacingConstants.NL );
785 html_writer.write( "</body></html>" );
786 html_writer.write( SurfacingConstants.NL );
788 plain_writer.write( "# Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
789 plain_writer.write( SurfacingConstants.NL );
790 plain_writer.write( "# Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
791 plain_writer.write( SurfacingConstants.NL );
792 plain_writer.write( "# Calculation mode for high copy target: " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
793 plain_writer.write( SurfacingConstants.NL );
794 plain_writer.write( "# Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
795 plain_writer.write( SurfacingConstants.NL );
796 plain_writer.write( "# Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
797 plain_writer.write( SurfacingConstants.NL );
798 plain_writer.write( "# Minimal difference: " + min_diff );
799 plain_writer.write( SurfacingConstants.NL );
800 plain_writer.write( "# Factor : " + factor );
801 plain_writer.write( SurfacingConstants.NL );
802 plain_writer.write( "# Lower copy domains: " + counter );
803 plain_writer.write( SurfacingConstants.NL );
804 plain_writer.write( "# Total absence : " + total_absense_counter );
805 plain_writer.write( SurfacingConstants.NL );
806 plain_writer.write( "# Not total absence : " + not_total_absense_counter );
807 plain_writer.write( SurfacingConstants.NL );
808 plain_writer.write( "# Total domains : " + all_domains.size() );
809 plain_writer.write( SurfacingConstants.NL );
810 plain_writer.close();
813 private static void writeGoIdsToFile( final Writer writer, final SortedSet<GoId> gos ) throws IOException {
814 for( final GoId go_id : gos ) {
815 writer.write( go_id.toString() );
816 writer.write( SurfacingConstants.NL );
821 private static void writeProteinsToFile( final File proteins_file_base,
822 final SortedMap<Species, List<Protein>> protein_lists_per_species,
823 final DomainId domain_id ) throws IOException {
824 final File my_proteins_file = new File( proteins_file_base.getParentFile() + ForesterUtil.FILE_SEPARATOR
825 + domain_id + PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX );
826 SurfacingUtil.checkForOutputFileWriteability( my_proteins_file );
827 final Writer proteins_file_writer = new BufferedWriter( new FileWriter( my_proteins_file ) );
828 SurfacingUtil.extractProteinNames( protein_lists_per_species,
830 proteins_file_writer,
832 surfacing.LIMIT_SPEC_FOR_PROT_EX );
833 proteins_file_writer.close();
834 System.out.println( "Wrote proteins list to \"" + my_proteins_file + "\"" );
837 public static enum COPY_CALCULATION_MODE {
838 MEAN, MEDIAN, MAX, MIN