// Copyright (C) 2008-2009 Christian M. Zmasek
// Copyright (C) 2008-2009 Burnham Institute for Medical Research
// All rights reserved
-//
+//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
-//
+//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
package org.forester.surfacing;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
-import java.text.DecimalFormat;
-import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.TreeMap;
import java.util.TreeSet;
+import org.forester.application.surfacing;
import org.forester.go.GoId;
import org.forester.go.GoTerm;
+import org.forester.protein.BinaryDomainCombination;
+import org.forester.protein.Protein;
+import org.forester.species.Species;
import org.forester.util.BasicDescriptiveStatistics;
import org.forester.util.DescriptiveStatistics;
import org.forester.util.ForesterUtil;
*/
public final class DomainCountsDifferenceUtil {
- private final static NumberFormat FORMATTER = new DecimalFormat( "0.0E0" );
private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES = COPY_CALCULATION_MODE.MIN;
private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES = COPY_CALCULATION_MODE.MIN;
private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_LOW_COPY_SPECIES = COPY_CALCULATION_MODE.MAX;
}
if ( bdc.contains( dc )
&& ( ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc.getId1() ) != null ) ) {
- final int count = ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc
- .getId1() );
+ final int count = ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains()
+ .get( dc.getId1() );
copy_counts.get( dc ).add( count );
}
else {
}
}
- private static void addCounts( final SortedMap<DomainId, List<Integer>> copy_counts,
- final DomainId domain,
+ private static void addCounts( final SortedMap<String, List<Integer>> copy_counts,
+ final String domain,
final GenomeWideCombinableDomains genome ) {
if ( !copy_counts.containsKey( domain ) ) {
copy_counts.put( domain, new ArrayList<Integer>() );
}
}
- private static StringBuilder addGoInformation( final DomainId d,
- final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
+ private static StringBuilder addGoInformation( final String d,
+ final Map<String, List<GoId>> domain_id_to_go_ids_map,
final Map<GoId, GoTerm> go_id_to_term_map ) {
final StringBuilder sb = new StringBuilder();
if ( ( domain_id_to_go_ids_map == null ) || domain_id_to_go_ids_map.isEmpty()
sb.append( "<br>" );
}
else {
- sb.append( "go id \"" + go_id + "\" not found [" + d.getId() + "]" );
+ sb.append( "go id \"" + go_id + "\" not found [" + d + "]" );
}
}
return sb;
final File plain_output_dom,
final File html_output_dom,
final File html_output_dc,
- final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
+ final Map<String, List<GoId>> domain_id_to_go_ids_map,
final Map<GoId, GoTerm> go_id_to_term_map,
final File all_domains_go_ids_out_dom,
final File passing_domains_go_ids_out_dom,
final Writer html_writer_dc = new BufferedWriter( new FileWriter( html_output_dc ) );
final Writer all_gos_writer = new BufferedWriter( new FileWriter( all_domains_go_ids_out_dom ) );
final Writer passing_gos_writer = new BufferedWriter( new FileWriter( passing_domains_go_ids_out_dom ) );
- final SortedMap<DomainId, Double> high_copy_base_values = new TreeMap<DomainId, Double>();
- final SortedMap<DomainId, Double> high_copy_target_values = new TreeMap<DomainId, Double>();
- final SortedMap<DomainId, Double> low_copy_values = new TreeMap<DomainId, Double>();
- final SortedMap<DomainId, List<Integer>> high_copy_base_copy_counts = new TreeMap<DomainId, List<Integer>>();
- final SortedMap<DomainId, List<Integer>> high_copy_target_copy_counts = new TreeMap<DomainId, List<Integer>>();
- final SortedMap<DomainId, List<Integer>> low_copy_copy_counts = new TreeMap<DomainId, List<Integer>>();
- final SortedSet<DomainId> all_domains = new TreeSet<DomainId>();
+ final SortedMap<String, Double> high_copy_base_values = new TreeMap<String, Double>();
+ final SortedMap<String, Double> high_copy_target_values = new TreeMap<String, Double>();
+ final SortedMap<String, Double> low_copy_values = new TreeMap<String, Double>();
+ final SortedMap<String, List<Integer>> high_copy_base_copy_counts = new TreeMap<String, List<Integer>>();
+ final SortedMap<String, List<Integer>> high_copy_target_copy_counts = new TreeMap<String, List<Integer>>();
+ final SortedMap<String, List<Integer>> low_copy_copy_counts = new TreeMap<String, List<Integer>>();
+ final SortedSet<String> all_domains = new TreeSet<String>();
final SortedMap<BinaryDomainCombination, Double> high_copy_base_values_dc = new TreeMap<BinaryDomainCombination, Double>();
final SortedMap<BinaryDomainCombination, Double> high_copy_target_values_dc = new TreeMap<BinaryDomainCombination, Double>();
final SortedMap<BinaryDomainCombination, Double> low_copy_values_dc = new TreeMap<BinaryDomainCombination, Double>();
final SortedSet<GoId> go_ids_of_passing_domains = new TreeSet<GoId>();
final SortedSet<GoId> go_ids_all = new TreeSet<GoId>();
for( final GenomeWideCombinableDomains genome : genomes ) {
- final SortedSet<DomainId> domains = genome.getAllDomainIds();
+ final SortedSet<String> domains = genome.getAllDomainIds();
final SortedSet<BinaryDomainCombination> dcs = genome.toBinaryDomainCombinations();
final String species = genome.getSpecies().getSpeciesId();
bdcs_per_genome.put( species, genome.toBinaryDomainCombinations() );
- for( final DomainId d : domains ) {
+ for( final String d : domains ) {
all_domains.add( d );
if ( domain_id_to_go_ids_map.containsKey( d ) ) {
go_ids_all.addAll( domain_id_to_go_ids_map.get( d ) );
all_dcs.add( dc );
}
}
- for( final DomainId domain : all_domains ) {
+ for( final String domain : all_domains ) {
for( final GenomeWideCombinableDomains genome : genomes ) {
final String species = genome.getSpecies().getSpeciesId();
if ( high_copy_base_species.contains( species ) ) {
for( final GenomeWideCombinableDomains genome : genomes ) {
final String species = genome.getSpecies().getSpeciesId();
if ( high_copy_base_species.contains( species ) ) {
- DomainCountsDifferenceUtil.addCounts( high_copy_base_copy_counts_dc, dc, genome, bdcs_per_genome
- .get( species ) );
+ DomainCountsDifferenceUtil.addCounts( high_copy_base_copy_counts_dc,
+ dc,
+ genome,
+ bdcs_per_genome.get( species ) );
}
if ( high_copy_target_species.contains( species ) ) {
- DomainCountsDifferenceUtil.addCounts( high_copy_target_copy_counts_dc, dc, genome, bdcs_per_genome
- .get( species ) );
+ DomainCountsDifferenceUtil.addCounts( high_copy_target_copy_counts_dc,
+ dc,
+ genome,
+ bdcs_per_genome.get( species ) );
}
if ( low_copy_species.contains( species ) ) {
- DomainCountsDifferenceUtil.addCounts( low_copy_copy_counts_dc, dc, genome, bdcs_per_genome
- .get( species ) );
+ DomainCountsDifferenceUtil.addCounts( low_copy_copy_counts_dc,
+ dc,
+ genome,
+ bdcs_per_genome.get( species ) );
}
}
}
- for( final DomainId domain : all_domains ) {
+ for( final String domain : all_domains ) {
calculateDomainCountsBasedValue( high_copy_target_values,
high_copy_target_copy_counts,
domain,
}
}
- private static void calculateDomainCountsBasedValue( final SortedMap<DomainId, Double> copy_values,
- final SortedMap<DomainId, List<Integer>> copy_counts,
- final DomainId domain,
+ private static void calculateDomainCountsBasedValue( final SortedMap<String, Double> copy_values,
+ final SortedMap<String, List<Integer>> copy_counts,
+ final String domain,
final COPY_CALCULATION_MODE copy_calc_mode ) {
if ( copy_counts.containsKey( domain ) ) {
switch ( copy_calc_mode ) {
results.put( bdc, ( double ) max );
}
- private static void calculateMaxCount( final SortedMap<DomainId, Double> results,
- final SortedMap<DomainId, List<Integer>> copy_counts,
- final DomainId domain ) {
+ private static void calculateMaxCount( final SortedMap<String, Double> results,
+ final SortedMap<String, List<Integer>> copy_counts,
+ final String domain ) {
final List<Integer> counts = copy_counts.get( domain );
int max = 0;
for( final Integer count : counts ) {
results.put( bdc, ( ( double ) sum ) / ( ( double ) counts.size() ) );
}
- private static void calculateMeanCount( final SortedMap<DomainId, Double> results,
- final SortedMap<DomainId, List<Integer>> copy_counts,
- final DomainId domain ) {
+ private static void calculateMeanCount( final SortedMap<String, Double> results,
+ final SortedMap<String, List<Integer>> copy_counts,
+ final String domain ) {
final List<Integer> counts = copy_counts.get( domain );
int sum = 0;
for( final Integer count : counts ) {
results.put( bdc, stats.median() );
}
- private static void calculateMedianCount( final SortedMap<DomainId, Double> results,
- final SortedMap<DomainId, List<Integer>> copy_counts,
- final DomainId domain ) {
+ private static void calculateMedianCount( final SortedMap<String, Double> results,
+ final SortedMap<String, List<Integer>> copy_counts,
+ final String domain ) {
final List<Integer> counts = copy_counts.get( domain );
final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
for( final Integer count : counts ) {
results.put( bdc, ( double ) min );
}
- private static void calculateMinCount( final SortedMap<DomainId, Double> results,
- final SortedMap<DomainId, List<Integer>> copy_counts,
- final DomainId domain ) {
+ private static void calculateMinCount( final SortedMap<String, Double> results,
+ final SortedMap<String, List<Integer>> copy_counts,
+ final String domain ) {
final List<Integer> counts = copy_counts.get( domain );
int min = Integer.MAX_VALUE;
for( final Integer count : counts ) {
private static String combinableDomaindToString( final CombinableDomains cd ) {
final StringBuilder sb = new StringBuilder();
sb.append( cd.getKeyDomainProteinsCount() );
- sb.append( "\t[" );
- sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) );
- sb.append( "]" );
return sb.toString();
}
sb.append( cd.getKeyDomainProteinsCount() );
sb.append( "</b>, " );
sb.append( cd.getNumberOfCombinableDomains() );
- sb.append( "]</td><td>[" );
- sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) );
sb.append( "]</td><td>" );
sb.append( cd.getCombiningDomainIdsAsStringBuilder() );
return sb.toString();
}
html_writer.write( "</td><td>" );
if ( bdcs_per_genome.get( species ).contains( bdc ) && ( copy_means.get( bdc ) > 0 ) ) {
- final int count = ( ( BasicCombinableDomains ) genome.get( bdc.getId0() ) ).getCombiningDomains().get( bdc
- .getId1() );
+ final int count = ( ( BasicCombinableDomains ) genome.get( bdc.getId0() ) ).getCombiningDomains()
+ .get( bdc.getId1() );
html_writer.write( count + "" );
}
else {
html_writer.write( "</td>" );
}
- private static void writeCopyNumberValues( final SortedMap<DomainId, Double> copy_means,
- final DomainId domain,
+ private static void writeCopyNumberValues( final SortedMap<String, Double> copy_means,
+ final String domain,
final GenomeWideCombinableDomains genome,
final String species,
final Writer plain_writer,
for( final BinaryDomainCombination bdc : all_bdcs ) {
if ( ( high_copy_base_values.get( bdc ) > 0 ) && ( high_copy_target_values.get( bdc ) > 0 )
&& ( high_copy_base_values.get( bdc ) >= low_copy_values.get( bdc ) ) ) {
- if ( high_copy_target_values.get( bdc ) >= min_diff + ( factor * low_copy_values.get( bdc ) ) ) {
+ if ( high_copy_target_values.get( bdc ) >= ( min_diff + ( factor * low_copy_values.get( bdc ) ) ) ) {
if ( low_copy_values.get( bdc ) <= 0.0 ) {
++total_absense_counter;
}
final List<String> low_copy_species,
final int min_diff,
final Double factor,
- final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
+ final Map<String, List<GoId>> domain_id_to_go_ids_map,
final Map<GoId, GoTerm> go_id_to_term_map,
final Writer plain_writer,
final Writer html_writer,
final File proteins_file_base,
- final SortedMap<DomainId, Double> high_copy_base_values,
- final SortedMap<DomainId, Double> high_copy_target_values,
- final SortedMap<DomainId, Double> low_copy_values,
- final SortedSet<DomainId> all_domains,
+ final SortedMap<String, Double> high_copy_base_values,
+ final SortedMap<String, Double> high_copy_target_values,
+ final SortedMap<String, Double> low_copy_values,
+ final SortedSet<String> all_domains,
final SortedSet<GoId> go_ids_of_passing_domains,
final SortedMap<Species, List<Protein>> protein_lists_per_species )
throws IOException {
int not_total_absense_counter = 0;
SurfacingUtil.addHtmlHead( html_writer, "Domain Copy Differences" );
html_writer.write( "<body><table>" );
- for( final DomainId domain_id : all_domains ) {
+ for( final String domain_id : all_domains ) {
if ( ( high_copy_base_values.get( domain_id ) > 0 ) && ( high_copy_target_values.get( domain_id ) > 0 )
&& ( high_copy_base_values.get( domain_id ) >= low_copy_values.get( domain_id ) ) ) {
- if ( high_copy_target_values.get( domain_id ) >= min_diff
- + ( factor * low_copy_values.get( domain_id ) ) ) {
+ if ( high_copy_target_values.get( domain_id ) >= ( min_diff + ( factor * low_copy_values
+ .get( domain_id ) ) ) ) {
if ( low_copy_values.get( domain_id ) <= 0.0 ) {
++total_absense_counter;
}
if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
go_ids_of_passing_domains.addAll( domain_id_to_go_ids_map.get( domain_id ) );
}
- plain_writer.write( domain_id.getId() );
+ plain_writer.write( domain_id );
plain_writer.write( SurfacingConstants.NL );
- html_writer.write( "<tr><td><a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK
- + domain_id.getId() + "\">" + domain_id.getId() + "</a></td><td>" );
+ html_writer.write( "<tr><td><a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_id
+ + "\">" + domain_id + "</a></td><td>" );
html_writer.write( addGoInformation( domain_id, domain_id_to_go_ids_map, go_id_to_term_map )
.toString() );
html_writer.write( "</td><td>" );
private static void writeProteinsToFile( final File proteins_file_base,
final SortedMap<Species, List<Protein>> protein_lists_per_species,
- final DomainId domain_id ) throws IOException {
+ final String domain_id ) throws IOException {
final File my_proteins_file = new File( proteins_file_base.getParentFile() + ForesterUtil.FILE_SEPARATOR
+ domain_id + PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX );
SurfacingUtil.checkForOutputFileWriteability( my_proteins_file );
final Writer proteins_file_writer = new BufferedWriter( new FileWriter( my_proteins_file ) );
- SurfacingUtil.extractProteinNames( protein_lists_per_species, domain_id, proteins_file_writer, "\t" );
+ SurfacingUtil.extractProteinNames( protein_lists_per_species,
+ domain_id,
+ proteins_file_writer,
+ "\t",
+ surfacing.LIMIT_SPEC_FOR_PROT_EX,
+ -1 );
proteins_file_writer.close();
System.out.println( "Wrote proteins list to \"" + my_proteins_file + "\"" );
}