From 249882688f35000b8cedfff3b4da1845d749d18e Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Tue, 9 Jul 2013 17:54:03 +0000 Subject: [PATCH] inprogress --- .../src/org/forester/application/surfacing.java | 19 ++-- .../surfacing/PairwiseGenomeComparator.java | 6 +- .../surfacing/PrintableDomainSimilarity.java | 117 +------------------- .../src/org/forester/surfacing/SurfacingUtil.java | 76 ++++++------- 4 files changed, 41 insertions(+), 177 deletions(-) diff --git a/forester/java/src/org/forester/application/surfacing.java b/forester/java/src/org/forester/application/surfacing.java index fc98702..10a615f 100644 --- a/forester/java/src/org/forester/application/surfacing.java +++ b/forester/java/src/org/forester/application/surfacing.java @@ -228,7 +228,7 @@ public class surfacing { final static private String PRG_VERSION = "2.280"; final static private String PRG_DATE = "130701"; final static private String E_MAIL = "czmasek@burnham.org"; - final static private String WWW = "www.phylosoft.org/forester/applications/surfacing"; + final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing"; final static private boolean IGNORE_DUFS_DEFAULT = true; final static private boolean IGNORE_COMBINATION_WITH_SAME_DEFAULLT = false; final static private double MAX_E_VALUE_DEFAULT = -1; @@ -1844,14 +1844,13 @@ public class surfacing { } System.out.println( "Time for processing : " + parser.getTime() + "ms" ); log( "", log_writer ); - html_desc.append( "" + input_file_properties[ i ][ 0 ] + " [species: " - + input_file_properties[ i ][ 1 ] + "]" + ":domains analyzed: " - + parser.getDomainsStored() + "; domains ignored: [ind score cutoffs: " + html_desc.append( "" + input_file_properties[ i ][ 0 ] + ":doms analyzed: " + + parser.getDomainsStored() + "; doms ignored: [ind score cutoffs: " + parser.getDomainsIgnoredDueToIndividualScoreCutoff() + "] [E-value cutoff: " + parser.getDomainsIgnoredDueToEval() + "] [DUF: " + parser.getDomainsIgnoredDueToDuf() - + "] [virus like ids: " + parser.getDomainsIgnoredDueToVirusLikeIds() - + "] [negative domain filter: " + parser.getDomainsIgnoredDueToNegativeDomainFilter() - + "] [overlap: " + parser.getDomainsIgnoredDueToOverlap() + "]" ); + + "] [virus like ids: " + parser.getDomainsIgnoredDueToVirusLikeIds() + "] [negative dom filter: " + + parser.getDomainsIgnoredDueToNegativeDomainFilter() + "] [overlap: " + + parser.getDomainsIgnoredDueToOverlap() + "]" ); if ( negative_filter_file != null ) { html_desc.append( "; proteins ignored due to negative filter: " + parser.getProteinsIgnoredDueToFilter() ); @@ -2057,11 +2056,7 @@ public class surfacing { gwcd_list, ignore_domains_without_combs_in_all_spec, ignore_species_specific_domains ); - SurfacingUtil.decoratePrintableDomainSimilarities( similarities, - detailedness, - go_annotation_output, - go_id_to_term_map, - go_namespace_limit ); + SurfacingUtil.decoratePrintableDomainSimilarities( similarities, detailedness ); final Map tax_code_to_id_map = SurfacingUtil.createTaxCodeToIdMap( intrees[ 0 ] ); try { String my_outfile = output_file.toString(); diff --git a/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java b/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java index 0fd6c6d..eae12fc 100644 --- a/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java +++ b/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java @@ -152,11 +152,7 @@ public class PairwiseGenomeComparator { genome_pair, ignore_domains_without_combs_in_all_spec, ignore_domains_specific_to_one_species ); - SurfacingUtil.decoratePrintableDomainSimilarities( similarities, - detailedness, - go_annotation_output, - go_id_to_term_map, - go_namespace_limit ); + SurfacingUtil.decoratePrintableDomainSimilarities( similarities, detailedness ); final DescriptiveStatistics stats = SurfacingUtil .calculateDescriptiveStatisticsForMeanValues( similarities ); final String species_j = species[ j ].getSpeciesId(); diff --git a/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java b/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java index 2cf1a91..891fba5 100644 --- a/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java +++ b/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java @@ -32,13 +32,8 @@ import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeSet; -import org.forester.go.GoId; -import org.forester.go.GoNameSpace; -import org.forester.go.GoTerm; -import org.forester.go.GoXRef; import org.forester.species.Species; import org.forester.surfacing.DomainSimilarityCalculator.Detailedness; -import org.forester.surfacing.DomainSimilarityCalculator.GoAnnotationOutput; import org.forester.util.ForesterUtil; public class PrintableDomainSimilarity implements DomainSimilarity { @@ -56,15 +51,12 @@ public class PrintableDomainSimilarity implements DomainSimilarity { final private int _n; private final int _max_difference_in_counts; private final int _max_difference; - private DomainSimilarityCalculator.GoAnnotationOutput _go_annotation_output; final private CombinableDomains _combinable_domains; final private SortedMap _species_data; final private DomainSimilaritySortField _sort_field; private List _species_order; private final boolean _sort_by_species_count_first; private DomainSimilarityCalculator.Detailedness _detailedness; - private Map _go_id_to_term_map; - private GoNameSpace _go_namespace_limit; private final boolean _treat_as_binary_comparison; /** @@ -138,51 +130,6 @@ public class PrintableDomainSimilarity implements DomainSimilarity { } } - // private void addGoInformation( final StringBuffer sb, final boolean for_table, final boolean html ) { - // if ( !for_table ) { - // sb.append( "<" ); - // } - // switch ( getGoAnnotationOutput() ) { - // case ALL: { - // final int go_ids = getCombinableDomains().getKeyDomain().getNumberOfGoIds(); - // boolean first = true; - // for( int i = 0; i < go_ids; ++i ) { - // final GoId go_id = getCombinableDomains().getKeyDomain().getGoId( i ); - // if ( getGoIdToTermMap() != null ) { - // if ( getGoIdToTermMap().containsKey( go_id ) ) { - // first = appendGoTerm( sb, getGoIdToTermMap().get( go_id ), first, html ); - // } - // else { - // sb.append( "go id \"" + go_id + "\" not found [" - // + getCombinableDomains().getKeyDomain().getId() + "]" ); - // } - // } - // else { - // if ( !first ) { - // sb.append( ", " ); - // } - // if ( html ) { - // sb.append( "" + go_id + "" ); - // } - // else { - // sb.append( go_id ); - // } - // first = false; - // } - // } - // break; - // } - // case NONE: { - // break; - // } - // default: - // throw new RuntimeException( "unknown " + getGoAnnotationOutput() ); - // } - // if ( !for_table ) { - // sb.append( ">: " ); - // } - // } private void addSpeciesSpecificDomainData( final StringBuffer sb, final Species species, final boolean html, @@ -217,36 +164,6 @@ public class PrintableDomainSimilarity implements DomainSimilarity { sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR ); } - private boolean appendGoTerm( final StringBuffer sb, final GoTerm go_term, final boolean first, final boolean html ) { - if ( ( getGoNamespaceLimit() == null ) || getGoNamespaceLimit().equals( go_term.getGoNameSpace() ) ) { - if ( !first ) { - sb.append( ", " ); - } - final GoId go_id = go_term.getGoId(); - if ( html ) { - sb.append( "" + go_id - + "" ); - } - else { - sb.append( go_id ); - } - sb.append( ":" ); - sb.append( go_term.getName() ); - if ( !html ) { - if ( getGoNamespaceLimit() == null ) { - sb.append( ":" ); - sb.append( go_term.getGoNameSpace().toString() ); - } - for( final GoXRef xref : go_term.getGoXRefs() ) { - sb.append( ":" ); - sb.append( xref.toString() ); - } - } - return false; - } - return true; - } - private void boldEndIfSortedBy( final DomainSimilaritySortField sort_field, final StringBuffer sb ) { if ( getSortField() == sort_field ) { sb.append( "" ); @@ -448,18 +365,6 @@ public class PrintableDomainSimilarity implements DomainSimilarity { return getCombinableDomains().getKeyDomain(); } - private DomainSimilarityCalculator.GoAnnotationOutput getGoAnnotationOutput() { - return _go_annotation_output; - } - - private Map getGoIdToTermMap() { - return _go_id_to_term_map; - } - - public GoNameSpace getGoNamespaceLimit() { - return _go_namespace_limit; - } - @Override public int getMaximalDifference() { return _max_difference; @@ -542,8 +447,6 @@ public class PrintableDomainSimilarity implements DomainSimilarity { private void init() { _detailedness = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS; - _go_annotation_output = null; - _go_id_to_term_map = null; } private boolean isSortBySpeciesCountFirst() { @@ -558,18 +461,6 @@ public class PrintableDomainSimilarity implements DomainSimilarity { _detailedness = detailedness; } - public void setGoAnnotationOutput( final GoAnnotationOutput go_annotation_output ) { - _go_annotation_output = go_annotation_output; - } - - public void setGoIdToTermMap( final Map go_id_to_term_map ) { - _go_id_to_term_map = go_id_to_term_map; - } - - public void setGoNamespaceLimit( final GoNameSpace go_namespace_limit ) { - _go_namespace_limit = go_namespace_limit; - } - public void setSpeciesOrder( final List species_order ) { if ( !species_order.containsAll( getSpeciesData().keySet() ) ) { throw new IllegalArgumentException( "list to order species must contain all species of multiple combinable domains similarity" ); @@ -598,6 +489,7 @@ public class PrintableDomainSimilarity implements DomainSimilarity { sb.append( "" + getDomainId() + "" ); boldEndIfSortedBy( DomainSimilaritySortField.DOMAIN_ID, sb ); + sb.append( "" ); sb.append( "" ); sb.append( "" ); sb.append( "" ); } - // ^^ if ( getGoAnnotationOutput() != DomainSimilarityCalculator.GoAnnotationOutput.NONE ) { - // ^^ sb.append( "" ); - // ^^ addGoInformation( sb, true, true ); - // ^^ sb.append( "" ); - // ^^ } if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) { sb.append( "" ); sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map ) ); diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java index cb9df26..3191312 100644 --- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java +++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java @@ -82,7 +82,6 @@ import org.forester.protein.Domain; import org.forester.protein.Protein; import org.forester.species.Species; import org.forester.surfacing.DomainSimilarityCalculator.Detailedness; -import org.forester.surfacing.DomainSimilarityCalculator.GoAnnotationOutput; import org.forester.surfacing.GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder; import org.forester.util.AsciiHistogram; import org.forester.util.BasicDescriptiveStatistics; @@ -285,20 +284,11 @@ public final class SurfacingUtil { } public static void decoratePrintableDomainSimilarities( final SortedSet domain_similarities, - final Detailedness detailedness, - final GoAnnotationOutput go_annotation_output, - final Map go_id_to_term_map, - final GoNameSpace go_namespace_limit ) { - if ( ( go_namespace_limit != null ) && ( ( go_id_to_term_map == null ) || go_id_to_term_map.isEmpty() ) ) { - throw new IllegalArgumentException( "attempt to use a GO namespace limit without a GO id to term map" ); - } + final Detailedness detailedness ) { for( final DomainSimilarity domain_similarity : domain_similarities ) { if ( domain_similarity instanceof PrintableDomainSimilarity ) { final PrintableDomainSimilarity printable_domain_similarity = ( PrintableDomainSimilarity ) domain_similarity; printable_domain_similarity.setDetailedness( detailedness ); - printable_domain_similarity.setGoAnnotationOutput( go_annotation_output ); - printable_domain_similarity.setGoIdToTermMap( go_id_to_term_map ); - printable_domain_similarity.setGoNamespaceLimit( go_namespace_limit ); } } } @@ -1765,32 +1755,14 @@ public final class SurfacingUtil { } } AsciiHistogram histo = null; - if ( stats.getMin() < stats.getMin() ) { - histo = new AsciiHistogram( stats, histogram_title ); - } - if ( verbose ) { - if ( histo != null ) { - System.out.println( histo.toStringBuffer( 20, '|', 40, 5 ) ); - } - System.out.println(); - System.out.println( "N : " + stats.getN() ); - System.out.println( "Min : " + stats.getMin() ); - System.out.println( "Max : " + stats.getMax() ); - System.out.println( "Mean : " + stats.arithmeticMean() ); - if ( stats.getN() > 1 ) { - System.out.println( "SD : " + stats.sampleStandardDeviation() ); - } - else { - System.out.println( "SD : n/a" ); - } - System.out.println( "Median : " + stats.median() ); - if ( stats.getN() > 1 ) { - System.out.println( "Pearsonian skewness : " + stats.pearsonianSkewness() ); - } - else { - System.out.println( "Pearsonian skewness : n/a" ); + try { + if ( stats.getMin() < stats.getMax() ) { + histo = new AsciiHistogram( stats, histogram_title ); } } + catch ( Exception e ) { + histo = null; + } if ( ( single_writer != null ) && ( ( split_writers == null ) || split_writers.isEmpty() ) ) { split_writers = new HashMap(); split_writers.put( '_', single_writer ); @@ -1844,13 +1816,6 @@ public final class SurfacingUtil { w.write( SurfacingConstants.NL ); w.write( "Median: " + stats.median() + "" ); w.write( SurfacingConstants.NL ); - if ( stats.getN() > 1 ) { - w.write( "Pearsonian skewness: " + stats.pearsonianSkewness() + "" ); - } - else { - w.write( "Pearsonian skewness: n/a" ); - } - w.write( SurfacingConstants.NL ); w.write( "" ); w.write( SurfacingConstants.NL ); w.write( "
" ); @@ -1867,6 +1832,30 @@ public final class SurfacingUtil { for( final Writer w : split_writers.values() ) { w.write( SurfacingConstants.NL ); } + // + for( final DomainSimilarity similarity : similarities ) { + if ( ( species_order != null ) && !species_order.isEmpty() ) { + ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order ); + } + if ( single_writer != null ) { + single_writer.write( "
" + similarity.getDomainId() + + "
" ); + single_writer.write( SurfacingConstants.NL ); + } + else { + Writer local_writer = split_writers.get( ( similarity.getDomainId().charAt( 0 ) + "" ).toLowerCase() + .charAt( 0 ) ); + if ( local_writer == null ) { + local_writer = split_writers.get( '0' ); + } + local_writer.write( "" + similarity.getDomainId() + + "
" ); + local_writer.write( SurfacingConstants.NL ); + } + } + // w.write( "
" ); + // w.write( SurfacingConstants.NL ); + // for( final DomainSimilarity similarity : similarities ) { if ( ( species_order != null ) && !species_order.isEmpty() ) { ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order ); @@ -1884,9 +1873,6 @@ public final class SurfacingUtil { local_writer.write( similarity.toStringBuffer( print_option, tax_code_to_id_map ).toString() ); local_writer.write( SurfacingConstants.NL ); } - // for( final Writer w : split_writers.values() ) { - //w.write( SurfacingConstants.NL ); - // } } switch ( print_option ) { case HTML: -- 1.7.10.2