From f47d5c382bc4e329ff6977ebcc46b75b57ffc901 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Tue, 9 Jul 2013 22:43:56 +0000 Subject: [PATCH] inprogress --- .../src/org/forester/application/surfacing.java | 27 +- .../surfacing/BasicDomainSimilarityCalculator.java | 10 +- .../BasicGenomeWideCombinableDomains.java | 7 - .../org/forester/surfacing/DomainSimilarity.java | 2 +- .../surfacing/PairwiseGenomeComparator.java | 4 +- .../surfacing/PrintableDomainSimilarity.java | 343 ++++---------------- ...rintableSpeciesSpecificDomainSimilariyData.java | 33 +- .../src/org/forester/surfacing/SurfacingUtil.java | 215 ++++-------- 8 files changed, 146 insertions(+), 495 deletions(-) diff --git a/forester/java/src/org/forester/application/surfacing.java b/forester/java/src/org/forester/application/surfacing.java index 10a615f..0c548af 100644 --- a/forester/java/src/org/forester/application/surfacing.java +++ b/forester/java/src/org/forester/application/surfacing.java @@ -225,8 +225,8 @@ public class surfacing { final static private String INPUT_GENOMES_FILE_OPTION = "genomes"; final static private String INPUT_SPECIES_TREE_OPTION = "species_tree"; final static private String SEQ_EXTRACT_OPTION = "prot_extract"; - final static private String PRG_VERSION = "2.280"; - final static private String PRG_DATE = "130701"; + final static private String PRG_VERSION = "2.290"; + final static private String PRG_DATE = "130709"; final static private String E_MAIL = "czmasek@burnham.org"; final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing"; final static private boolean IGNORE_DUFS_DEFAULT = true; @@ -600,9 +600,6 @@ public class surfacing { allowed_options.add( surfacing.PAIRWISE_DOMAIN_COMPARISONS_OPTION ); allowed_options.add( surfacing.IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_OPTION ); allowed_options.add( surfacing.CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS ); - //allowed_options.add( JACKNIFE_OPTION ); - // allowed_options.add( JACKNIFE_RANDOM_SEED_OPTION ); - // allowed_options.add( JACKNIFE_RATIO_OPTION ); allowed_options.add( INPUT_SPECIES_TREE_OPTION ); allowed_options.add( FILTER_POSITIVE_OPTION ); allowed_options.add( FILTER_NEGATIVE_OPTION ); @@ -1844,22 +1841,6 @@ public class surfacing { } System.out.println( "Time for processing : " + parser.getTime() + "ms" ); log( "", log_writer ); - html_desc.append( "" + input_file_properties[ i ][ 0 ] + ":doms analyzed: " - + parser.getDomainsStored() + "; doms ignored: [ind score cutoffs: " - + parser.getDomainsIgnoredDueToIndividualScoreCutoff() + "] [E-value cutoff: " - + parser.getDomainsIgnoredDueToEval() + "] [DUF: " + parser.getDomainsIgnoredDueToDuf() - + "] [virus like ids: " + parser.getDomainsIgnoredDueToVirusLikeIds() + "] [negative dom filter: " - + parser.getDomainsIgnoredDueToNegativeDomainFilter() + "] [overlap: " - + parser.getDomainsIgnoredDueToOverlap() + "]" ); - if ( negative_filter_file != null ) { - html_desc.append( "; proteins ignored due to negative filter: " - + parser.getProteinsIgnoredDueToFilter() ); - } - if ( positive_filter_file != null ) { - html_desc.append( "; proteins ignored due to positive filter: " - + parser.getProteinsIgnoredDueToFilter() ); - } - html_desc.append( "" + nl ); try { int count = 0; for( final Protein protein : protein_list ) { @@ -2097,10 +2078,10 @@ public class surfacing { number_of_genomes == 2, species_order, domain_similarity_print_option, - domain_similarity_sort_field, scoring, true, - tax_code_to_id_map ); + tax_code_to_id_map, + false ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote main output (includes domain similarities) to: \"" + ( out_dir == null ? my_outfile : out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile ) + "\"" ); } diff --git a/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java b/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java index 846aa7a..0e8406a 100644 --- a/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java +++ b/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java @@ -138,7 +138,6 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat 0, 0, species_data, - getSort(), isSortBySpeciesCountFirst(), isTreatAsBinaryComparison() ); } @@ -198,7 +197,6 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat max_difference_in_counts, max_difference, species_data, - getSort(), isSortBySpeciesCountFirst(), isTreatAsBinaryComparison() ); } @@ -213,17 +211,12 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat max_difference_in_counts, max_difference, species_data, - getSort(), isSortBySpeciesCountFirst(), isTreatAsBinaryComparison() ); } return similarity; } - private DomainSimilarity.DomainSimilaritySortField getSort() { - return _sort; - } - private boolean isSortBySpeciesCountFirst() { return _sort_by_species_count_first; } @@ -235,8 +228,7 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat private static SpeciesSpecificDomainSimilariyData createSpeciesSpecificDomainSimilariyData( final CombinableDomains cd ) { final SpeciesSpecificDomainSimilariyData sd = new PrintableSpeciesSpecificDomainSimilariyData( cd.getKeyDomainProteinsCount(), cd.getKeyDomainCount(), - cd.getNumberOfCombinableDomains(), - cd.getKeyDomainConfidenceDescriptiveStatistics() ); + cd.getNumberOfCombinableDomains() ); for( final String domain : cd.getCombinableDomains() ) { sd.addProteinsExhibitingCombinationCount( domain, cd.getNumberOfProteinsExhibitingCombination( domain ) ); } diff --git a/forester/java/src/org/forester/surfacing/BasicGenomeWideCombinableDomains.java b/forester/java/src/org/forester/surfacing/BasicGenomeWideCombinableDomains.java index c6d205f..6e782a3 100644 --- a/forester/java/src/org/forester/surfacing/BasicGenomeWideCombinableDomains.java +++ b/forester/java/src/org/forester/surfacing/BasicGenomeWideCombinableDomains.java @@ -315,13 +315,6 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom else { domain_combination = new BasicCombinableDomains( pd_i.getDomainId(), species ); } - // ^^ if ( ( domain_id_to_go_ids_map != null ) - // ^^ && domain_id_to_go_ids_map.containsKey( pd_i.getDomainId() ) ) { - // ^^ final List go_ids = domain_id_to_go_ids_map.get( pd_i.getDomainId() ); - // ^^ for( final GoId go_id : go_ids ) { - // ^^ domain_combination.getKeyDomain().addGoId( go_id ); - // ^^ } - // ^^ } instance.add( id_i, domain_combination ); } final Set saw_j = new HashSet(); diff --git a/forester/java/src/org/forester/surfacing/DomainSimilarity.java b/forester/java/src/org/forester/surfacing/DomainSimilarity.java index 0787439..be7273e 100644 --- a/forester/java/src/org/forester/surfacing/DomainSimilarity.java +++ b/forester/java/src/org/forester/surfacing/DomainSimilarity.java @@ -47,7 +47,7 @@ public interface DomainSimilarity extends Comparable { MIN, MAX, SD, MEAN, ABS_MAX_COUNTS_DIFFERENCE, MAX_COUNTS_DIFFERENCE, MAX_DIFFERENCE, SPECIES_COUNT, DOMAIN_ID, } - public SortedSet getCombinableDomainIds( final Species species_of_combinable_domain );; + public SortedSet getCombinableDomainIds( final Species species_of_combinable_domain ); public String getDomainId(); diff --git a/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java b/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java index eae12fc..e9ee8b3 100644 --- a/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java +++ b/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java @@ -217,10 +217,10 @@ public class PairwiseGenomeComparator { true, null, domain_similarity_print_option, - domain_similarity_sort_field, scoring, false, - tax_code_to_id_map ); + tax_code_to_id_map, + false ); } catch ( final IOException e ) { ForesterUtil.fatalError( command_line_prg_name, "Failed to write similarites to: \"" diff --git a/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java b/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java index 891fba5..c785ff7 100644 --- a/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java +++ b/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java @@ -30,6 +30,7 @@ import java.util.List; import java.util.Map; import java.util.SortedMap; import java.util.SortedSet; +import java.util.TreeMap; import java.util.TreeSet; import org.forester.species.Species; @@ -39,11 +40,9 @@ import org.forester.util.ForesterUtil; public class PrintableDomainSimilarity implements DomainSimilarity { final public static String SPECIES_SEPARATOR = " "; - final private static char TAB = '\t'; - final private static int BEFORE = -1; final private static int EQUAL = 0; - final private static int AFTER = 1; final private static String NO_SPECIES = " "; + private static final boolean PRINT_MORE_INFO = false; final private double _min; final private double _max; final private double _mean; @@ -53,18 +52,10 @@ public class PrintableDomainSimilarity implements DomainSimilarity { private final int _max_difference; final private CombinableDomains _combinable_domains; final private SortedMap _species_data; - final private DomainSimilaritySortField _sort_field; private List _species_order; - private final boolean _sort_by_species_count_first; private DomainSimilarityCalculator.Detailedness _detailedness; private final boolean _treat_as_binary_comparison; - /** - * If go_id_to_term_map not null, detailed GO information is written, - * only GO ids otherwise. - * - * - */ public PrintableDomainSimilarity( final CombinableDomains combinable_domains, final double min, final double max, @@ -75,15 +66,11 @@ public class PrintableDomainSimilarity implements DomainSimilarity { final int max_difference_in_counts, final int max_difference, final SortedMap species_data, - final DomainSimilaritySortField sort_field, final boolean sort_by_species_count_first, final boolean treat_as_binary_comparison ) { if ( combinable_domains == null ) { throw new IllegalArgumentException( "attempt to use null combinable domains" ); } - if ( sort_field == null ) { - throw new IllegalArgumentException( "attempt to use null sorting" ); - } if ( species_data == null ) { throw new IllegalArgumentException( "attempt to use null species data" ); } @@ -112,8 +99,6 @@ public class PrintableDomainSimilarity implements DomainSimilarity { _max_difference_in_counts = max_difference_in_counts; _max_difference = max_difference; _species_data = species_data; - _sort_field = sort_field; - _sort_by_species_count_first = sort_by_species_count_first; _treat_as_binary_comparison = treat_as_binary_comparison; final int s = species_data.size(); if ( ( ( s * s ) - s ) != ( getN() * 2 ) ) { @@ -134,21 +119,8 @@ public class PrintableDomainSimilarity implements DomainSimilarity { final Species species, final boolean html, final Map tax_code_to_id_map ) { - if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) { - sb.append( "[" ); - } if ( html ) { - sb.append( "" ); - final String tax_code = species.getSpeciesId(); - if ( !ForesterUtil.isEmpty( tax_code ) - && ( ( tax_code_to_id_map != null ) && tax_code_to_id_map.containsKey( tax_code ) ) ) { - sb.append( "" + tax_code + "" ); - } - else { - sb.append( tax_code ); - } - sb.append( "" ); + addTaxWithLink( sb, species.getSpeciesId(), tax_code_to_id_map ); } else { sb.append( species.getSpeciesId() ); @@ -156,48 +128,38 @@ public class PrintableDomainSimilarity implements DomainSimilarity { if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) { sb.append( ":" ); sb.append( getSpeciesData().get( species ).toStringBuffer( getDetaildness(), html ) ); - sb.append( "]" ); } if ( html ) { sb.append( "
" ); } - sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR ); - } - - private void boldEndIfSortedBy( final DomainSimilaritySortField sort_field, final StringBuffer sb ) { - if ( getSortField() == sort_field ) { - sb.append( "" ); + else { + sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR ); } } - private void boldStartIfSortedBy( final DomainSimilaritySortField sort_field, final StringBuffer sb ) { - if ( getSortField() == sort_field ) { - sb.append( "" ); + private void addTaxWithLink( final StringBuffer sb, + final String tax_code, + final Map tax_code_to_id_map ) { + sb.append( "" ); + if ( !ForesterUtil.isEmpty( tax_code ) + && ( ( tax_code_to_id_map != null ) && tax_code_to_id_map.containsKey( tax_code ) ) ) { + sb.append( "" + tax_code + "" ); + } + else { + sb.append( tax_code ); } + sb.append( "" ); } private int compareByDomainId( final DomainSimilarity other ) { - return getDomainId().compareTo( other.getDomainId() ); - } - - private int compareBySpeciesCount( final DomainSimilarity domain_similarity ) { - final int s_this = getSpeciesData().size(); - final int s_other = domain_similarity.getSpeciesData().size(); - if ( s_this < s_other ) { - return PrintableDomainSimilarity.BEFORE; - } - else if ( s_this > s_other ) { - return PrintableDomainSimilarity.AFTER; - } - else { - return PrintableDomainSimilarity.EQUAL; - } + return getDomainId().compareToIgnoreCase( other.getDomainId() ); } @Override public int compareTo( final DomainSimilarity domain_similarity ) { if ( this == domain_similarity ) { - return PrintableDomainSimilarity.EQUAL; + return EQUAL; } else if ( domain_similarity == null ) { throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to null" ); @@ -206,138 +168,7 @@ public class PrintableDomainSimilarity implements DomainSimilarity { throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to " + domain_similarity.getClass() ); } - switch ( getSortField() ) { - case MIN: - if ( isSortBySpeciesCountFirst() ) { - final int i = compareBySpeciesCount( domain_similarity ); - if ( i != PrintableDomainSimilarity.EQUAL ) { - return i; - } - } - if ( getMinimalSimilarityScore() < domain_similarity.getMinimalSimilarityScore() ) { - return PrintableDomainSimilarity.BEFORE; - } - else if ( getMinimalSimilarityScore() > domain_similarity.getMinimalSimilarityScore() ) { - return PrintableDomainSimilarity.AFTER; - } - else { - return compareByDomainId( domain_similarity ); - } - case MAX: - if ( isSortBySpeciesCountFirst() ) { - final int i = compareBySpeciesCount( domain_similarity ); - if ( i != PrintableDomainSimilarity.EQUAL ) { - return i; - } - } - if ( getMaximalSimilarityScore() < domain_similarity.getMaximalSimilarityScore() ) { - return PrintableDomainSimilarity.BEFORE; - } - else if ( getMaximalSimilarityScore() > domain_similarity.getMaximalSimilarityScore() ) { - return PrintableDomainSimilarity.AFTER; - } - else { - return compareByDomainId( domain_similarity ); - } - case MEAN: - if ( isSortBySpeciesCountFirst() ) { - final int i = compareBySpeciesCount( domain_similarity ); - if ( i != PrintableDomainSimilarity.EQUAL ) { - return i; - } - } - if ( getMeanSimilarityScore() < domain_similarity.getMeanSimilarityScore() ) { - return PrintableDomainSimilarity.BEFORE; - } - else if ( getMeanSimilarityScore() > domain_similarity.getMeanSimilarityScore() ) { - return PrintableDomainSimilarity.AFTER; - } - else { - return compareByDomainId( domain_similarity ); - } - case SD: - if ( isSortBySpeciesCountFirst() ) { - final int i = compareBySpeciesCount( domain_similarity ); - if ( i != PrintableDomainSimilarity.EQUAL ) { - return i; - } - } - if ( getStandardDeviationOfSimilarityScore() < domain_similarity - .getStandardDeviationOfSimilarityScore() ) { - return PrintableDomainSimilarity.BEFORE; - } - else if ( getStandardDeviationOfSimilarityScore() > domain_similarity - .getStandardDeviationOfSimilarityScore() ) { - return PrintableDomainSimilarity.AFTER; - } - else { - return compareByDomainId( domain_similarity ); - } - case MAX_DIFFERENCE: - if ( isSortBySpeciesCountFirst() ) { - final int i = compareBySpeciesCount( domain_similarity ); - if ( i != PrintableDomainSimilarity.EQUAL ) { - return i; - } - } - if ( getMaximalDifference() > domain_similarity.getMaximalDifference() ) { - return PrintableDomainSimilarity.BEFORE; - } - else if ( getMaximalDifference() < domain_similarity.getMaximalDifference() ) { - return PrintableDomainSimilarity.AFTER; - } - else { - return compareByDomainId( domain_similarity ); - } - case ABS_MAX_COUNTS_DIFFERENCE: - if ( isSortBySpeciesCountFirst() ) { - final int i = compareBySpeciesCount( domain_similarity ); - if ( i != PrintableDomainSimilarity.EQUAL ) { - return i; - } - } - if ( Math.abs( getMaximalDifferenceInCounts() ) > Math.abs( domain_similarity - .getMaximalDifferenceInCounts() ) ) { - return PrintableDomainSimilarity.BEFORE; - } - else if ( Math.abs( getMaximalDifferenceInCounts() ) < Math.abs( domain_similarity - .getMaximalDifferenceInCounts() ) ) { - return PrintableDomainSimilarity.AFTER; - } - else { - return compareByDomainId( domain_similarity ); - } - case MAX_COUNTS_DIFFERENCE: - if ( getSpeciesData().size() != 2 ) { - throw new RuntimeException( "attempt to sort by maximal difference with species not equal to two" ); - } - if ( isSortBySpeciesCountFirst() ) { - final int i = compareBySpeciesCount( domain_similarity ); - if ( i != PrintableDomainSimilarity.EQUAL ) { - return i; - } - } - if ( getMaximalDifferenceInCounts() > domain_similarity.getMaximalDifferenceInCounts() ) { - return PrintableDomainSimilarity.BEFORE; - } - else if ( getMaximalDifferenceInCounts() < domain_similarity.getMaximalDifferenceInCounts() ) { - return PrintableDomainSimilarity.AFTER; - } - else { - return compareByDomainId( domain_similarity ); - } - case SPECIES_COUNT: - final int i = compareBySpeciesCount( domain_similarity ); - if ( i != PrintableDomainSimilarity.EQUAL ) { - return i; - } - else { - return compareByDomainId( domain_similarity ); - } - case DOMAIN_ID: - return compareByDomainId( domain_similarity ); - } - throw new AssertionError( "Unknown sort method: " + getSortField() ); + return compareByDomainId( domain_similarity ); } @Override @@ -395,10 +226,6 @@ public class PrintableDomainSimilarity implements DomainSimilarity { return _n; } - private DomainSimilaritySortField getSortField() { - return _sort_field; - } - @Override public SortedSet getSpecies() { final SortedSet species = new TreeSet(); @@ -426,6 +253,29 @@ public class PrintableDomainSimilarity implements DomainSimilarity { return sb; } + private StringBuffer getDomainDataInAlphabeticalOrder() { + final SortedMap> m = new TreeMap>(); + final StringBuffer sb = new StringBuffer(); + for( final Species species : getSpeciesData().keySet() ) { + for( final String combable_dom : getCombinableDomainIds( species ) ) { + if ( !m.containsKey( combable_dom ) ) { + m.put( combable_dom, new TreeSet() ); + } + m.get( combable_dom ).add( species.getSpeciesId() ); + } + } + for( final Map.Entry> e : m.entrySet() ) { + sb.append( "" + e.getKey() + "" ); + sb.append( ": " ); + for( final String s : e.getValue() ) { + sb.append( s ); + sb.append( " " ); + } + sb.append( "
" ); + } + return sb; + } + private StringBuffer getSpeciesDataInCustomOrder( final boolean html, final Map tax_code_to_id_map ) { final StringBuffer sb = new StringBuffer(); for( final Species order_species : getSpeciesCustomOrder() ) { @@ -449,10 +299,6 @@ public class PrintableDomainSimilarity implements DomainSimilarity { _detailedness = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS; } - private boolean isSortBySpeciesCountFirst() { - return _sort_by_species_count_first; - } - private boolean isTreatAsBinaryComparison() { return _treat_as_binary_comparison; } @@ -485,10 +331,10 @@ public class PrintableDomainSimilarity implements DomainSimilarity { final StringBuffer sb = new StringBuffer(); sb.append( "" ); sb.append( "" ); - boldStartIfSortedBy( DomainSimilaritySortField.DOMAIN_ID, sb ); + sb.append( "" ); sb.append( "" + getDomainId() + "" ); - boldEndIfSortedBy( DomainSimilaritySortField.DOMAIN_ID, sb ); + sb.append( "" ); sb.append( "" ); sb.append( "" ); sb.append( "" ); @@ -496,70 +342,52 @@ public class PrintableDomainSimilarity implements DomainSimilarity { + "\" target=\"gs_window\">gs" ); sb.append( "" ); sb.append( "" ); - boldStartIfSortedBy( DomainSimilaritySortField.MEAN, sb ); sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) ); - boldEndIfSortedBy( DomainSimilaritySortField.MEAN, sb ); sb.append( "" ); - if ( !isTreatAsBinaryComparison() ) { - sb.append( "" ); - sb.append( "(" ); - boldStartIfSortedBy( DomainSimilaritySortField.SD, sb ); - sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) ); - boldEndIfSortedBy( DomainSimilaritySortField.SD, sb ); - sb.append( ")" ); - sb.append( "" ); - sb.append( "" ); - sb.append( "[" ); - boldStartIfSortedBy( DomainSimilaritySortField.MIN, sb ); - sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) ); - boldEndIfSortedBy( DomainSimilaritySortField.MIN, sb ); - sb.append( "-" ); - boldStartIfSortedBy( DomainSimilaritySortField.MAX, sb ); - sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) ); - boldEndIfSortedBy( DomainSimilaritySortField.MAX, sb ); - sb.append( "]" ); - sb.append( "" ); + if ( PRINT_MORE_INFO ) { + if ( !isTreatAsBinaryComparison() ) { + sb.append( "" ); + sb.append( "(" ); + sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) ); + sb.append( ")" ); + sb.append( "" ); + sb.append( "" ); + sb.append( "[" ); + sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) ); + sb.append( "-" ); + sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) ); + sb.append( "]" ); + sb.append( "" ); + } } sb.append( "" ); - boldStartIfSortedBy( DomainSimilaritySortField.MAX_DIFFERENCE, sb ); sb.append( getMaximalDifference() ); - boldEndIfSortedBy( DomainSimilaritySortField.MAX_DIFFERENCE, sb ); sb.append( "" ); sb.append( "" ); if ( isTreatAsBinaryComparison() ) { - boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb ); - boldStartIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb ); sb.append( getMaximalDifferenceInCounts() ); - boldEndIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb ); - boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb ); } else { - boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb ); - boldStartIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb ); sb.append( Math.abs( getMaximalDifferenceInCounts() ) ); - boldEndIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb ); - boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb ); } sb.append( "" ); if ( !isTreatAsBinaryComparison() ) { sb.append( "" ); - if ( ( getSortField() == DomainSimilaritySortField.SPECIES_COUNT ) || isSortBySpeciesCountFirst() ) { - sb.append( "" ); - } + sb.append( "" ); sb.append( getSpeciesData().size() ); - if ( ( getSortField() == DomainSimilaritySortField.SPECIES_COUNT ) || isSortBySpeciesCountFirst() ) { - sb.append( "" ); - } + sb.append( "" ); sb.append( "" ); } if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) { sb.append( "" ); sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map ) ); + sb.append( getDomainDataInAlphabeticalOrder() ); sb.append( "" ); } else { sb.append( "" ); sb.append( getSpeciesDataInCustomOrder( true, tax_code_to_id_map ) ); + sb.append( getDomainDataInAlphabeticalOrder() ); sb.append( "" ); } sb.append( "" ); @@ -569,49 +397,6 @@ public class PrintableDomainSimilarity implements DomainSimilarity { private StringBuffer toStringBufferSimpleTabDelimited() { final StringBuffer sb = new StringBuffer(); sb.append( getDomainId() ); - switch ( getSortField() ) { - case MIN: - sb.append( TAB ); - sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) ); - break; - case MAX: - sb.append( TAB ); - sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) ); - break; - case MEAN: - sb.append( TAB ); - sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) ); - break; - case SD: - sb.append( TAB ); - sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) ); - break; - case MAX_DIFFERENCE: - sb.append( TAB ); - sb.append( getMaximalDifference() ); - case ABS_MAX_COUNTS_DIFFERENCE: - case MAX_COUNTS_DIFFERENCE: - sb.append( TAB ); - if ( isTreatAsBinaryComparison() ) { - sb.append( getMaximalDifferenceInCounts() ); - } - else { - sb.append( Math.abs( getMaximalDifferenceInCounts() ) ); - } - break; - case SPECIES_COUNT: - sb.append( TAB ); - sb.append( getSpeciesData().size() ); - break; - case DOMAIN_ID: - break; - default: - throw new AssertionError( "Unknown sort method: " + getSortField() ); - } - // ^^ if ( getGoAnnotationOutput() != DomainSimilarityCalculator.GoAnnotationOutput.NONE ) { - // ^^ sb.append( TAB ); - // ^^ addGoInformation( sb, true, false ); - // ^^ } return sb; } diff --git a/forester/java/src/org/forester/surfacing/PrintableSpeciesSpecificDomainSimilariyData.java b/forester/java/src/org/forester/surfacing/PrintableSpeciesSpecificDomainSimilariyData.java index d361573..74e168d 100644 --- a/forester/java/src/org/forester/surfacing/PrintableSpeciesSpecificDomainSimilariyData.java +++ b/forester/java/src/org/forester/surfacing/PrintableSpeciesSpecificDomainSimilariyData.java @@ -27,31 +27,23 @@ package org.forester.surfacing; -import java.text.DecimalFormat; -import java.text.NumberFormat; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; -import org.forester.util.DescriptiveStatistics; - class PrintableSpeciesSpecificDomainSimilariyData implements SpeciesSpecificDomainSimilariyData { - private final static NumberFormat FORMATTER = new DecimalFormat( "0.0E0" ); - final SortedMap _combinable_domain_id_to_count_map; - final private int _key_domain_proteins_count; - final private int _key_domain_domains_count; - final private int _combinable_domains_count; - final private DescriptiveStatistics _key_domain_confidence_descriptive_statistics; + final SortedMap _combinable_domain_id_to_count_map; + final private int _key_domain_proteins_count; + final private int _key_domain_domains_count; + final private int _combinable_domains_count; public PrintableSpeciesSpecificDomainSimilariyData( final int key_domain_proteins_count, final int key_domain_domains_count, - final int combinable_domains, - final DescriptiveStatistics key_domain_confidence_descriptive_statistics ) { + final int combinable_domains ) { _key_domain_proteins_count = key_domain_proteins_count; _key_domain_domains_count = key_domain_domains_count; _combinable_domains_count = combinable_domains; - _key_domain_confidence_descriptive_statistics = key_domain_confidence_descriptive_statistics; _combinable_domain_id_to_count_map = new TreeMap(); } @@ -72,10 +64,6 @@ class PrintableSpeciesSpecificDomainSimilariyData implements SpeciesSpecificDoma return _combinable_domains_count; } - private DescriptiveStatistics getKeyDomainConfidenceDescriptiveStatistics() { - return _key_domain_confidence_descriptive_statistics; - } - private int getKeyDomainDomainsCount() { return _key_domain_domains_count; } @@ -108,14 +96,6 @@ class PrintableSpeciesSpecificDomainSimilariyData implements SpeciesSpecificDoma sb.append( getKeyDomainProteinsCount() ); sb.append( ", " ); sb.append( getCombinableDomainsCount() ); - sb.append( ", " ); - if ( html ) { - sb.append( "" ); - } - sb.append( FORMATTER.format( getKeyDomainConfidenceDescriptiveStatistics().arithmeticMean() ) ); - if ( html ) { - sb.append( "" ); - } if ( !getCombinableDomainIdToCountsMap().isEmpty() ) { sb.append( ":" ); } @@ -136,9 +116,6 @@ class PrintableSpeciesSpecificDomainSimilariyData implements SpeciesSpecificDoma sb.append( ":" ); sb.append( getCombinableDomainIdToCountsMap().get( domain_id ) ); } - if ( i < ( ids.size() - 1 ) ) { - sb.append( "," ); - } } return sb; } diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java index 3191312..9dfc40f 100644 --- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java +++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java @@ -451,10 +451,6 @@ public final class SurfacingUtil { out.write( species + "\t" ); } out.write( ForesterUtil.LINE_SEPARATOR ); - // DescriptiveStatistics stats_for_domain = domain_lengths - // .calculateMeanBasedStatistics(); - //AsciiHistogram histo = new AsciiHistogram( stats_for_domain ); - //System.out.println( histo.toStringBuffer( 40, '=', 60, 4 ).toString() ); } } out.write( ForesterUtil.LINE_SEPARATOR ); @@ -488,16 +484,6 @@ public final class SurfacingUtil { } } out.close(); - // final List histogram_datas = new ArrayList(); - // for( int i = 0; i < number_of_genomes; ++i ) { - // final Species species = new BasicSpecies( input_file_properties[ i ][ 0 ] ); - // histogram_datas - // .add( new HistogramData( species.toString(), domain_lengths_table - // .calculateMeanBasedStatisticsForSpecies( species ) - // .getDataAsDoubleArray(), 5, 600, null, 60 ) ); - // } - // final HistogramsFrame hf = new HistogramsFrame( histogram_datas ); - // hf.setVisible( true ); System.gc(); } @@ -1668,101 +1654,28 @@ public final class SurfacingUtil { final boolean treat_as_binary, final List species_order, final PrintableDomainSimilarity.PRINT_OPTION print_option, - final DomainSimilarity.DomainSimilaritySortField sort_field, final DomainSimilarity.DomainSimilarityScoring scoring, final boolean verbose, - final Map tax_code_to_id_map ) + final Map tax_code_to_id_map, + final boolean print_some_stats ) throws IOException { - final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); - String histogram_title = null; - switch ( sort_field ) { - case ABS_MAX_COUNTS_DIFFERENCE: - if ( treat_as_binary ) { - histogram_title = "absolute counts difference:"; - } - else { - histogram_title = "absolute (maximal) counts difference:"; - } - break; - case MAX_COUNTS_DIFFERENCE: - if ( treat_as_binary ) { - histogram_title = "counts difference:"; - } - else { - histogram_title = "(maximal) counts difference:"; - } - break; - case DOMAIN_ID: - histogram_title = "score mean:"; - break; - case MIN: - histogram_title = "score minimum:"; - break; - case MAX: - histogram_title = "score maximum:"; - break; - case MAX_DIFFERENCE: - if ( treat_as_binary ) { - histogram_title = "difference:"; - } - else { - histogram_title = "(maximal) difference:"; + DescriptiveStatistics stats = null; + AsciiHistogram histo = null; + if ( print_some_stats ) { + stats = new BasicDescriptiveStatistics(); + final String histogram_title = "score mean distribution:"; + for( final DomainSimilarity similarity : similarities ) { + stats.addValue( similarity.getMeanSimilarityScore() ); + } + try { + if ( stats.getMin() < stats.getMax() ) { + histo = new AsciiHistogram( stats, histogram_title ); } - break; - case MEAN: - histogram_title = "score mean:"; - break; - case SD: - histogram_title = "score standard deviation:"; - break; - case SPECIES_COUNT: - histogram_title = "species number:"; - break; - default: - throw new AssertionError( "Unknown sort field: " + sort_field ); - } - for( final DomainSimilarity similarity : similarities ) { - switch ( sort_field ) { - case ABS_MAX_COUNTS_DIFFERENCE: - stats.addValue( Math.abs( similarity.getMaximalDifferenceInCounts() ) ); - break; - case MAX_COUNTS_DIFFERENCE: - stats.addValue( similarity.getMaximalDifferenceInCounts() ); - break; - case DOMAIN_ID: - stats.addValue( similarity.getMeanSimilarityScore() ); - break; - case MIN: - stats.addValue( similarity.getMinimalSimilarityScore() ); - break; - case MAX: - stats.addValue( similarity.getMaximalSimilarityScore() ); - break; - case MAX_DIFFERENCE: - stats.addValue( similarity.getMaximalDifference() ); - break; - case MEAN: - stats.addValue( similarity.getMeanSimilarityScore() ); - break; - case SD: - stats.addValue( similarity.getStandardDeviationOfSimilarityScore() ); - break; - case SPECIES_COUNT: - stats.addValue( similarity.getSpecies().size() ); - break; - default: - throw new AssertionError( "Unknown sort field: " + sort_field ); } - } - AsciiHistogram histo = null; - try { - if ( stats.getMin() < stats.getMax() ) { - histo = new AsciiHistogram( stats, histogram_title ); + catch ( final Exception e ) { + histo = null; } } - catch ( Exception e ) { - histo = null; - } if ( ( single_writer != null ) && ( ( split_writers == null ) || split_writers.isEmpty() ) ) { split_writers = new HashMap(); split_writers.put( '_', single_writer ); @@ -1776,70 +1689,38 @@ public final class SurfacingUtil { w.write( "" ); w.write( SurfacingConstants.NL ); if ( key != '_' ) { - addHtmlHead( w, "DCs (" + html_title + ") " + key.toString().toUpperCase() ); + addHtmlHead( w, "DC analysis (" + html_title + ") " + key.toString().toUpperCase() ); } else { - addHtmlHead( w, "DCs (" + html_title + ")" ); + addHtmlHead( w, "DC analysis (" + html_title + ")" ); } w.write( SurfacingConstants.NL ); w.write( "" ); w.write( SurfacingConstants.NL ); w.write( html_desc.toString() ); w.write( SurfacingConstants.NL ); - w.write( "
" ); - w.write( "
" ); - w.write( SurfacingConstants.NL ); - w.write( "
" );
-                    w.write( SurfacingConstants.NL );
-                    if ( histo != null ) {
-                        w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
-                        w.write( SurfacingConstants.NL );
-                    }
-                    w.write( "
" ); - w.write( SurfacingConstants.NL ); - w.write( "" ); - w.write( SurfacingConstants.NL ); - w.write( "" ); - w.write( SurfacingConstants.NL ); - w.write( "" ); - w.write( SurfacingConstants.NL ); - w.write( "" ); - w.write( SurfacingConstants.NL ); - w.write( "" ); - w.write( SurfacingConstants.NL ); - if ( stats.getN() > 1 ) { - w.write( "" ); + if ( print_some_stats ) { + printSomeStats( stats, histo, w ); } - else { - w.write( "" ); - } - w.write( SurfacingConstants.NL ); - w.write( "" ); - w.write( SurfacingConstants.NL ); - w.write( "
N: " + stats.getN() + "
Min: " + stats.getMin() + "
Max: " + stats.getMax() + "
Mean: " + stats.arithmeticMean() + "
SD: " + stats.sampleStandardDeviation() + "
SD: n/a
Median: " + stats.median() + "
" ); - w.write( SurfacingConstants.NL ); - w.write( "
" ); - w.write( SurfacingConstants.NL ); w.write( "
" ); w.write( SurfacingConstants.NL ); w.write( "
" ); w.write( SurfacingConstants.NL ); w.write( "" ); w.write( SurfacingConstants.NL ); + w.write( "" ); + w.write( SurfacingConstants.NL ); } break; } - for( final Writer w : split_writers.values() ) { - w.write( SurfacingConstants.NL ); - } // for( final DomainSimilarity similarity : similarities ) { if ( ( species_order != null ) && !species_order.isEmpty() ) { ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order ); } if ( single_writer != null ) { - single_writer.write( "" + similarity.getDomainId() - + "
" ); + single_writer.write( "" ); single_writer.write( SurfacingConstants.NL ); } else { @@ -1848,13 +1729,19 @@ public final class SurfacingUtil { if ( local_writer == null ) { local_writer = split_writers.get( '0' ); } - local_writer.write( "" + similarity.getDomainId() - + "
" ); + local_writer.write( "" ); local_writer.write( SurfacingConstants.NL ); } } - // w.write( "
" ); - // w.write( SurfacingConstants.NL ); + for( final Writer w : split_writers.values() ) { + w.write( "
Domains:
" + + similarity.getDomainId() + "
" + + similarity.getDomainId() + "
" ); + w.write( SurfacingConstants.NL ); + w.write( "
" ); + w.write( SurfacingConstants.NL ); + w.write( "" ); + w.write( SurfacingConstants.NL ); + } // for( final DomainSimilarity similarity : similarities ) { if ( ( species_order != null ) && !species_order.isEmpty() ) { @@ -1895,6 +1782,42 @@ public final class SurfacingUtil { return stats; } + private static void printSomeStats( final DescriptiveStatistics stats, final AsciiHistogram histo, final Writer w ) + throws IOException { + w.write( "
" ); + w.write( "
" ); + w.write( SurfacingConstants.NL ); + w.write( "
" );
+        w.write( SurfacingConstants.NL );
+        if ( histo != null ) {
+            w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
+            w.write( SurfacingConstants.NL );
+        }
+        w.write( "
" ); + w.write( SurfacingConstants.NL ); + w.write( "
" ); + w.write( SurfacingConstants.NL ); + w.write( "" ); + w.write( SurfacingConstants.NL ); + w.write( "" ); + w.write( SurfacingConstants.NL ); + w.write( "" ); + w.write( SurfacingConstants.NL ); + w.write( "" ); + w.write( SurfacingConstants.NL ); + if ( stats.getN() > 1 ) { + w.write( "" ); + } + else { + w.write( "" ); + } + w.write( SurfacingConstants.NL ); + w.write( "
N: " + stats.getN() + "
Min: " + stats.getMin() + "
Max: " + stats.getMax() + "
Mean: " + stats.arithmeticMean() + "
SD: " + stats.sampleStandardDeviation() + "
SD: n/a
" ); + w.write( SurfacingConstants.NL ); + w.write( "
" ); + w.write( SurfacingConstants.NL ); + } + public static void writeMatrixToFile( final CharacterStateMatrix matrix, final String filename, final Format format ) { -- 1.7.10.2