X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsurfacing%2FDomainCountsDifferenceUtil.java;h=a9643a89b9ef7a59fede3add2a4b36e1e7291c13;hb=df8e9950662eaab9427f6873dcd0072f0d28f690;hp=c3691bffd4104fce2cd0ae4be7e4683a137d1c5d;hpb=13fe062e973e7b025c08910326cf9b3206ee8cdb;p=jalview.git diff --git a/forester/java/src/org/forester/surfacing/DomainCountsDifferenceUtil.java b/forester/java/src/org/forester/surfacing/DomainCountsDifferenceUtil.java index c3691bf..a9643a8 100644 --- a/forester/java/src/org/forester/surfacing/DomainCountsDifferenceUtil.java +++ b/forester/java/src/org/forester/surfacing/DomainCountsDifferenceUtil.java @@ -23,7 +23,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; @@ -32,8 +32,6 @@ import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.Writer; -import java.text.DecimalFormat; -import java.text.NumberFormat; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -48,7 +46,6 @@ import org.forester.application.surfacing; import org.forester.go.GoId; import org.forester.go.GoTerm; import org.forester.protein.BinaryDomainCombination; -import org.forester.protein.DomainId; import org.forester.protein.Protein; import org.forester.species.Species; import org.forester.util.BasicDescriptiveStatistics; @@ -61,78 +58,11 @@ import org.forester.util.ForesterUtil; */ public final class DomainCountsDifferenceUtil { - private final static NumberFormat FORMATTER = new DecimalFormat( "0.0E0" ); - private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES = COPY_CALCULATION_MODE.MIN; private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES = COPY_CALCULATION_MODE.MIN; + private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES = COPY_CALCULATION_MODE.MIN; private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_LOW_COPY_SPECIES = COPY_CALCULATION_MODE.MAX; private static final String PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX = ".prot"; - //FIXME really needs to be tested! - private static void addCounts( final SortedMap> copy_counts, - final BinaryDomainCombination dc, - final GenomeWideCombinableDomains genome, - final Set bdc ) { - if ( !copy_counts.containsKey( dc ) ) { - copy_counts.put( dc, new ArrayList() ); - } - if ( bdc.contains( dc ) - && ( ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc.getId1() ) != null ) ) { - final int count = ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains() - .get( dc.getId1() ); - copy_counts.get( dc ).add( count ); - } - else { - copy_counts.get( dc ).add( 0 ); - } - } - - private static void addCounts( final SortedMap> copy_counts, - final DomainId domain, - final GenomeWideCombinableDomains genome ) { - if ( !copy_counts.containsKey( domain ) ) { - copy_counts.put( domain, new ArrayList() ); - } - if ( genome.contains( domain ) ) { - copy_counts.get( domain ).add( genome.get( domain ).getKeyDomainProteinsCount() ); - } - else { - copy_counts.get( domain ).add( 0 ); - } - } - - private static StringBuilder addGoInformation( final DomainId d, - final Map> domain_id_to_go_ids_map, - final Map go_id_to_term_map ) { - final StringBuilder sb = new StringBuilder(); - if ( ( domain_id_to_go_ids_map == null ) || domain_id_to_go_ids_map.isEmpty() - || !domain_id_to_go_ids_map.containsKey( d ) ) { - return sb; - } - final List go_ids = domain_id_to_go_ids_map.get( d ); - for( int i = 0; i < go_ids.size(); ++i ) { - final GoId go_id = go_ids.get( i ); - if ( go_id_to_term_map.containsKey( go_id ) ) { - appendGoTerm( sb, go_id_to_term_map.get( go_id ) ); - sb.append( "
" ); - } - else { - sb.append( "go id \"" + go_id + "\" not found [" + d.getId() + "]" ); - } - } - return sb; - } - - private static void appendGoTerm( final StringBuilder sb, final GoTerm go_term ) { - final GoId go_id = go_term.getGoId(); - sb.append( "" + go_id - + "" ); - sb.append( ":" ); - sb.append( go_term.getName() ); - sb.append( " [" ); - sb.append( go_term.getGoNameSpace().toShortString() ); - sb.append( "]" ); - } - public static void calculateCopyNumberDifferences( final List genomes, final SortedMap> protein_lists_per_species, final List high_copy_base_species, @@ -143,7 +73,7 @@ public final class DomainCountsDifferenceUtil { final File plain_output_dom, final File html_output_dom, final File html_output_dc, - final Map> domain_id_to_go_ids_map, + final Map> domain_id_to_go_ids_map, final Map go_id_to_term_map, final File all_domains_go_ids_out_dom, final File passing_domains_go_ids_out_dom, @@ -157,7 +87,7 @@ public final class DomainCountsDifferenceUtil { if ( high_copy_base_species.contains( high_copy_target_species ) || low_copy_species.contains( high_copy_target_species ) ) { throw new IllegalArgumentException( "species [" + high_copy_target_species - + "] appears in other list as well" ); + + "] appears in other list as well" ); } if ( min_diff < 0 ) { throw new IllegalArgumentException( "attempt to use negative addition [" + min_diff + "]" ); @@ -175,13 +105,13 @@ public final class DomainCountsDifferenceUtil { final Writer html_writer_dc = new BufferedWriter( new FileWriter( html_output_dc ) ); final Writer all_gos_writer = new BufferedWriter( new FileWriter( all_domains_go_ids_out_dom ) ); final Writer passing_gos_writer = new BufferedWriter( new FileWriter( passing_domains_go_ids_out_dom ) ); - final SortedMap high_copy_base_values = new TreeMap(); - final SortedMap high_copy_target_values = new TreeMap(); - final SortedMap low_copy_values = new TreeMap(); - final SortedMap> high_copy_base_copy_counts = new TreeMap>(); - final SortedMap> high_copy_target_copy_counts = new TreeMap>(); - final SortedMap> low_copy_copy_counts = new TreeMap>(); - final SortedSet all_domains = new TreeSet(); + final SortedMap high_copy_base_values = new TreeMap(); + final SortedMap high_copy_target_values = new TreeMap(); + final SortedMap low_copy_values = new TreeMap(); + final SortedMap> high_copy_base_copy_counts = new TreeMap>(); + final SortedMap> high_copy_target_copy_counts = new TreeMap>(); + final SortedMap> low_copy_copy_counts = new TreeMap>(); + final SortedSet all_domains = new TreeSet(); final SortedMap high_copy_base_values_dc = new TreeMap(); final SortedMap high_copy_target_values_dc = new TreeMap(); final SortedMap low_copy_values_dc = new TreeMap(); @@ -193,11 +123,11 @@ public final class DomainCountsDifferenceUtil { final SortedSet go_ids_of_passing_domains = new TreeSet(); final SortedSet go_ids_all = new TreeSet(); for( final GenomeWideCombinableDomains genome : genomes ) { - final SortedSet domains = genome.getAllDomainIds(); + final SortedSet domains = genome.getAllDomainIds(); final SortedSet dcs = genome.toBinaryDomainCombinations(); final String species = genome.getSpecies().getSpeciesId(); bdcs_per_genome.put( species, genome.toBinaryDomainCombinations() ); - for( final DomainId d : domains ) { + for( final String d : domains ) { all_domains.add( d ); if ( domain_id_to_go_ids_map.containsKey( d ) ) { go_ids_all.addAll( domain_id_to_go_ids_map.get( d ) ); @@ -207,7 +137,7 @@ public final class DomainCountsDifferenceUtil { all_dcs.add( dc ); } } - for( final DomainId domain : all_domains ) { + for( final String domain : all_domains ) { for( final GenomeWideCombinableDomains genome : genomes ) { final String species = genome.getSpecies().getSpeciesId(); if ( high_copy_base_species.contains( species ) ) { @@ -244,7 +174,7 @@ public final class DomainCountsDifferenceUtil { } } } - for( final DomainId domain : all_domains ) { + for( final String domain : all_domains ) { calculateDomainCountsBasedValue( high_copy_target_values, high_copy_target_copy_counts, domain, @@ -305,6 +235,72 @@ public final class DomainCountsDifferenceUtil { writeGoIdsToFile( passing_gos_writer, go_ids_of_passing_domains ); } + //FIXME really needs to be tested! + private static void addCounts( final SortedMap> copy_counts, + final BinaryDomainCombination dc, + final GenomeWideCombinableDomains genome, + final Set bdc ) { + if ( !copy_counts.containsKey( dc ) ) { + copy_counts.put( dc, new ArrayList() ); + } + if ( bdc.contains( dc ) + && ( ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc.getId1() ) != null ) ) { + final int count = ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains() + .get( dc.getId1() ); + copy_counts.get( dc ).add( count ); + } + else { + copy_counts.get( dc ).add( 0 ); + } + } + + private static void addCounts( final SortedMap> copy_counts, + final String domain, + final GenomeWideCombinableDomains genome ) { + if ( !copy_counts.containsKey( domain ) ) { + copy_counts.put( domain, new ArrayList() ); + } + if ( genome.contains( domain ) ) { + copy_counts.get( domain ).add( genome.get( domain ).getKeyDomainProteinsCount() ); + } + else { + copy_counts.get( domain ).add( 0 ); + } + } + + private static StringBuilder addGoInformation( final String d, + final Map> domain_id_to_go_ids_map, + final Map go_id_to_term_map ) { + final StringBuilder sb = new StringBuilder(); + if ( ( domain_id_to_go_ids_map == null ) || domain_id_to_go_ids_map.isEmpty() + || !domain_id_to_go_ids_map.containsKey( d ) ) { + return sb; + } + final List go_ids = domain_id_to_go_ids_map.get( d ); + for( int i = 0; i < go_ids.size(); ++i ) { + final GoId go_id = go_ids.get( i ); + if ( go_id_to_term_map.containsKey( go_id ) ) { + appendGoTerm( sb, go_id_to_term_map.get( go_id ) ); + sb.append( "
" ); + } + else { + sb.append( "go id \"" + go_id + "\" not found [" + d + "]" ); + } + } + return sb; + } + + private static void appendGoTerm( final StringBuilder sb, final GoTerm go_term ) { + final GoId go_id = go_term.getGoId(); + sb.append( "" + go_id + + "" ); + sb.append( ":" ); + sb.append( go_term.getName() ); + sb.append( " [" ); + sb.append( go_term.getGoNameSpace().toShortString() ); + sb.append( "]" ); + } + private static void calculateDomainCountsBasedValue( final SortedMap copy_values, final SortedMap> copy_counts, final BinaryDomainCombination bdc, @@ -332,9 +328,9 @@ public final class DomainCountsDifferenceUtil { } } - private static void calculateDomainCountsBasedValue( final SortedMap copy_values, - final SortedMap> copy_counts, - final DomainId domain, + private static void calculateDomainCountsBasedValue( final SortedMap copy_values, + final SortedMap> copy_counts, + final String domain, final COPY_CALCULATION_MODE copy_calc_mode ) { if ( copy_counts.containsKey( domain ) ) { switch ( copy_calc_mode ) { @@ -372,9 +368,9 @@ public final class DomainCountsDifferenceUtil { results.put( bdc, ( double ) max ); } - private static void calculateMaxCount( final SortedMap results, - final SortedMap> copy_counts, - final DomainId domain ) { + private static void calculateMaxCount( final SortedMap results, + final SortedMap> copy_counts, + final String domain ) { final List counts = copy_counts.get( domain ); int max = 0; for( final Integer count : counts ) { @@ -396,9 +392,9 @@ public final class DomainCountsDifferenceUtil { results.put( bdc, ( ( double ) sum ) / ( ( double ) counts.size() ) ); } - private static void calculateMeanCount( final SortedMap results, - final SortedMap> copy_counts, - final DomainId domain ) { + private static void calculateMeanCount( final SortedMap results, + final SortedMap> copy_counts, + final String domain ) { final List counts = copy_counts.get( domain ); int sum = 0; for( final Integer count : counts ) { @@ -418,9 +414,9 @@ public final class DomainCountsDifferenceUtil { results.put( bdc, stats.median() ); } - private static void calculateMedianCount( final SortedMap results, - final SortedMap> copy_counts, - final DomainId domain ) { + private static void calculateMedianCount( final SortedMap results, + final SortedMap> copy_counts, + final String domain ) { final List counts = copy_counts.get( domain ); final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final Integer count : counts ) { @@ -442,9 +438,9 @@ public final class DomainCountsDifferenceUtil { results.put( bdc, ( double ) min ); } - private static void calculateMinCount( final SortedMap results, - final SortedMap> copy_counts, - final DomainId domain ) { + private static void calculateMinCount( final SortedMap results, + final SortedMap> copy_counts, + final String domain ) { final List counts = copy_counts.get( domain ); int min = Integer.MAX_VALUE; for( final Integer count : counts ) { @@ -458,9 +454,6 @@ public final class DomainCountsDifferenceUtil { private static String combinableDomaindToString( final CombinableDomains cd ) { final StringBuilder sb = new StringBuilder(); sb.append( cd.getKeyDomainProteinsCount() ); - sb.append( "\t[" ); - sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) ); - sb.append( "]" ); return sb.toString(); } @@ -472,8 +465,6 @@ public final class DomainCountsDifferenceUtil { sb.append( cd.getKeyDomainProteinsCount() ); sb.append( ", " ); sb.append( cd.getNumberOfCombinableDomains() ); - sb.append( "][" ); - sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) ); sb.append( "]" ); sb.append( cd.getCombiningDomainIdsAsStringBuilder() ); return sb.toString(); @@ -506,8 +497,8 @@ public final class DomainCountsDifferenceUtil { html_writer.write( "" ); } - private static void writeCopyNumberValues( final SortedMap copy_means, - final DomainId domain, + private static void writeCopyNumberValues( final SortedMap copy_means, + final String domain, final GenomeWideCombinableDomains genome, final String species, final Writer plain_writer, @@ -547,16 +538,16 @@ public final class DomainCountsDifferenceUtil { final SortedMap low_copy_values, final SortedSet all_bdcs, final Map> bdcs_per_genome ) - throws IOException { + throws IOException { int counter = 0; int total_absense_counter = 0; int not_total_absense_counter = 0; - SurfacingUtil.addHtmlHead( html_writer, "Binary Domain Combination Copy Differences" ); + SurfacingUtil.writeHtmlHead( html_writer, "Binary Domain Combination Copy Differences" ); html_writer.write( "" ); for( final BinaryDomainCombination bdc : all_bdcs ) { if ( ( high_copy_base_values.get( bdc ) > 0 ) && ( high_copy_target_values.get( bdc ) > 0 ) && ( high_copy_base_values.get( bdc ) >= low_copy_values.get( bdc ) ) ) { - if ( high_copy_target_values.get( bdc ) >= min_diff + ( factor * low_copy_values.get( bdc ) ) ) { + if ( high_copy_target_values.get( bdc ) >= ( min_diff + ( factor * low_copy_values.get( bdc ) ) ) ) { if ( low_copy_values.get( bdc ) <= 0.0 ) { ++total_absense_counter; } @@ -565,8 +556,8 @@ public final class DomainCountsDifferenceUtil { } ++counter; html_writer.write( "
" + bdc.getId0() + " = " + bdc.getId1() + "" ); + + "\">" + bdc.getId0() + " = " + bdc.getId1() + "" ); html_writer.write( "" ); html_writer.write( "" ); for( final GenomeWideCombinableDomains genome : genomes ) { @@ -579,7 +570,7 @@ public final class DomainCountsDifferenceUtil { bdcs_per_genome, species, html_writer, - "#0000FF" ); + "#0000FF" ); html_writer.write( "" ); } else if ( low_copy_species.contains( species ) ) { @@ -590,7 +581,7 @@ public final class DomainCountsDifferenceUtil { bdcs_per_genome, species, html_writer, - "#A0A0A0" ); + "#A0A0A0" ); html_writer.write( "" ); } else if ( high_copy_base_species.contains( species ) ) { @@ -601,7 +592,7 @@ public final class DomainCountsDifferenceUtil { bdcs_per_genome, species, html_writer, - "#404040" ); + "#404040" ); html_writer.write( "" ); } } @@ -660,28 +651,28 @@ public final class DomainCountsDifferenceUtil { final List low_copy_species, final int min_diff, final Double factor, - final Map> domain_id_to_go_ids_map, + final Map> domain_id_to_go_ids_map, final Map go_id_to_term_map, final Writer plain_writer, final Writer html_writer, final File proteins_file_base, - final SortedMap high_copy_base_values, - final SortedMap high_copy_target_values, - final SortedMap low_copy_values, - final SortedSet all_domains, + final SortedMap high_copy_base_values, + final SortedMap high_copy_target_values, + final SortedMap low_copy_values, + final SortedSet all_domains, final SortedSet go_ids_of_passing_domains, final SortedMap> protein_lists_per_species ) - throws IOException { + throws IOException { int counter = 0; int total_absense_counter = 0; int not_total_absense_counter = 0; - SurfacingUtil.addHtmlHead( html_writer, "Domain Copy Differences" ); + SurfacingUtil.writeHtmlHead( html_writer, "Domain Copy Differences" ); html_writer.write( "
" ); - for( final DomainId domain_id : all_domains ) { + for( final String domain_id : all_domains ) { if ( ( high_copy_base_values.get( domain_id ) > 0 ) && ( high_copy_target_values.get( domain_id ) > 0 ) && ( high_copy_base_values.get( domain_id ) >= low_copy_values.get( domain_id ) ) ) { - if ( high_copy_target_values.get( domain_id ) >= min_diff - + ( factor * low_copy_values.get( domain_id ) ) ) { + if ( high_copy_target_values.get( domain_id ) >= ( min_diff + ( factor * low_copy_values + .get( domain_id ) ) ) ) { if ( low_copy_values.get( domain_id ) <= 0.0 ) { ++total_absense_counter; } @@ -693,12 +684,12 @@ public final class DomainCountsDifferenceUtil { if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) { go_ids_of_passing_domains.addAll( domain_id_to_go_ids_map.get( domain_id ) ); } - plain_writer.write( domain_id.getId() ); + plain_writer.write( domain_id ); plain_writer.write( SurfacingConstants.NL ); - html_writer.write( "
" + domain_id.getId() + "" ); + html_writer.write( "
" + domain_id + "" ); html_writer.write( addGoInformation( domain_id, domain_id_to_go_ids_map, go_id_to_term_map ) - .toString() ); + .toString() ); html_writer.write( "" ); html_writer.write( "" ); for( final GenomeWideCombinableDomains genome : genomes ) { @@ -711,7 +702,7 @@ public final class DomainCountsDifferenceUtil { species, plain_writer, html_writer, - "#0000FF" ); + "#0000FF" ); html_writer.write( "" ); } else if ( low_copy_species.contains( species ) ) { @@ -722,7 +713,7 @@ public final class DomainCountsDifferenceUtil { species, plain_writer, html_writer, - "#A0A0A0" ); + "#A0A0A0" ); html_writer.write( "" ); } else if ( high_copy_base_species.contains( species ) ) { @@ -733,7 +724,7 @@ public final class DomainCountsDifferenceUtil { species, plain_writer, html_writer, - "#404040" ); + "#404040" ); html_writer.write( "" ); } } @@ -820,9 +811,9 @@ public final class DomainCountsDifferenceUtil { private static void writeProteinsToFile( final File proteins_file_base, final SortedMap> protein_lists_per_species, - final DomainId domain_id ) throws IOException { + final String domain_id ) throws IOException { final File my_proteins_file = new File( proteins_file_base.getParentFile() + ForesterUtil.FILE_SEPARATOR - + domain_id + PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX ); + + domain_id + PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX ); SurfacingUtil.checkForOutputFileWriteability( my_proteins_file ); final Writer proteins_file_writer = new BufferedWriter( new FileWriter( my_proteins_file ) ); SurfacingUtil.extractProteinNames( protein_lists_per_species, @@ -836,6 +827,6 @@ public final class DomainCountsDifferenceUtil { } public static enum COPY_CALCULATION_MODE { - MEAN, MEDIAN, MAX, MIN + MAX, MEAN, MEDIAN, MIN } }