From eb02413234a507a55865bffd9f3677602d6ee8d0 Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Thu, 8 Dec 2011 18:42:04 +0000 Subject: [PATCH] experimental changes to surfacing methods --- .../src/org/forester/application/surfacing.java | 49 ++++++- .../forester/application/surfacing_hmmpfam.java | 2 + .../surfacing/PairwiseGenomeComparator.java | 1 + .../src/org/forester/surfacing/SurfacingUtil.java | 142 ++++++++++++-------- 4 files changed, 132 insertions(+), 62 deletions(-) diff --git a/forester/java/src/org/forester/application/surfacing.java b/forester/java/src/org/forester/application/surfacing.java index 0e6ad5f..8c8d4a0 100644 --- a/forester/java/src/org/forester/application/surfacing.java +++ b/forester/java/src/org/forester/application/surfacing.java @@ -33,6 +33,7 @@ import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.Date; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -233,7 +234,7 @@ public class surfacing { final static private String SEQ_EXTRACT_OPTION = "prot_extract"; final static private char SEPARATOR_FOR_INPUT_VALUES = '#'; final static private String PRG_VERSION = "2.210"; - final static private String PRG_DATE = "2011.11.30"; + final static private String PRG_DATE = "2011.12.08"; final static private String E_MAIL = "czmasek@burnham.org"; final static private String WWW = "www.phylosoft.org/forester/applications/surfacing"; final static private boolean IGNORE_DUFS_DEFAULT = true; @@ -2073,11 +2074,19 @@ public class surfacing { DescriptiveStatistics pw_stats = null; try { String my_outfile = output_file.toString(); - if ( !my_outfile.endsWith( ".html" ) ) { + Map split_writers = null; + Writer writer = null; + if ( similarities.size() > 1000 ) { + if ( my_outfile.endsWith( ".html" ) ) { + my_outfile = my_outfile.substring( 0, my_outfile.length() - 5 ); + } + split_writers = new HashMap(); + createSplitWriters( out_dir, my_outfile, split_writers ); + } + else if ( !my_outfile.endsWith( ".html" ) ) { my_outfile += ".html"; + writer = new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile ) ); } - final Writer writer = new BufferedWriter( new FileWriter( out_dir == null ? my_outfile : out_dir - + ForesterUtil.FILE_SEPARATOR + my_outfile ) ); List species_order = null; if ( species_matrix ) { species_order = new ArrayList(); @@ -2097,6 +2106,7 @@ public class surfacing { .writeDomainSimilaritiesToFile( html_desc, new StringBuilder( number_of_genomes + " genomes" ), writer, + split_writers, similarities, number_of_genomes == 2, species_order, @@ -2363,6 +2373,37 @@ public class surfacing { System.out.println(); } + private static void createSplitWriters( File out_dir, String my_outfile, Map split_writers ) + throws IOException { + split_writers.put( 'a', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_A.html" ) ) ); + split_writers.put( 'b', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_B.html" ) ) ); + split_writers.put( 'c', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_C.html" ) ) ); + split_writers.put( 'd', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_D.html" ) ) ); + split_writers.put( 'e', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_E.html" ) ) ); + split_writers.put( 'f', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_F.html" ) ) ); + split_writers.put( 'g', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_G.html" ) ) ); + split_writers.put( 'h', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_H.html" ) ) ); + split_writers.put( 'i', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_I.html" ) ) ); + split_writers.put( 'j', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_J.html" ) ) ); + split_writers.put( 'k', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_K.html" ) ) ); + split_writers.put( 'l', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_L.html" ) ) ); + split_writers.put( 'm', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_M.html" ) ) ); + split_writers.put( 'n', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_N.html" ) ) ); + split_writers.put( 'o', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_O.html" ) ) ); + split_writers.put( 'p', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_P.html" ) ) ); + split_writers.put( 'q', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_Q.html" ) ) ); + split_writers.put( 'r', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_R.html" ) ) ); + split_writers.put( 's', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_S.html" ) ) ); + split_writers.put( 't', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_T.html" ) ) ); + split_writers.put( 'u', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_U.html" ) ) ); + split_writers.put( 'v', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_V.html" ) ) ); + split_writers.put( 'w', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_W.html" ) ) ); + split_writers.put( 'x', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_X.html" ) ) ); + split_writers.put( 'y', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_Y.html" ) ) ); + split_writers.put( 'z', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_Z.html" ) ) ); + split_writers.put( '0', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_09.html" ) ) ); + } + private static void printOutPercentageOfMultidomainProteins( final SortedMap all_genomes_domains_per_potein_histo, final Writer log_writer ) { int sum = 0; diff --git a/forester/java/src/org/forester/application/surfacing_hmmpfam.java b/forester/java/src/org/forester/application/surfacing_hmmpfam.java index 4104a34..bd97c50 100644 --- a/forester/java/src/org/forester/application/surfacing_hmmpfam.java +++ b/forester/java/src/org/forester/application/surfacing_hmmpfam.java @@ -1973,7 +1973,9 @@ public class surfacing_hmmpfam { pw_stats = SurfacingUtil .writeDomainSimilaritiesToFile( html_desc, new StringBuilder( number_of_genomes + " genomes" ), + writer, + null, similarities, number_of_genomes == 2, species_order, diff --git a/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java b/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java index d9ad2aa..99d336f 100644 --- a/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java +++ b/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java @@ -214,6 +214,7 @@ public class PairwiseGenomeComparator { new StringBuilder( species_i + "-" + species_j ), writer, + null, similarities, true, null, diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java index d3e8c0a..e6e8736 100644 --- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java +++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java @@ -1995,7 +1995,8 @@ public final class SurfacingUtil { public static DescriptiveStatistics writeDomainSimilaritiesToFile( final StringBuilder html_desc, final StringBuilder html_title, - final Writer w, + final Writer single_writer, + Map split_writers, final SortedSet similarities, final boolean treat_as_binary, final List species_order, @@ -2138,77 +2139,100 @@ public final class SurfacingUtil { System.out.println( "Pearsonian skewness : n/a" ); } } + + if ( single_writer != null && ( split_writers == null || split_writers.isEmpty() ) ) { + split_writers = new HashMap(); + split_writers.put( '_', single_writer ); + + } + switch ( print_option ) { case SIMPLE_TAB_DELIMITED: break; case HTML: - w.write( "" ); - w.write( SurfacingConstants.NL ); - addHtmlHead( w, "SURFACING :: " + html_title ); - w.write( SurfacingConstants.NL ); - w.write( "" ); - w.write( SurfacingConstants.NL ); - w.write( html_desc.toString() ); - w.write( SurfacingConstants.NL ); - w.write( "
" ); - w.write( "
" ); - w.write( SurfacingConstants.NL ); - w.write( "
" );
-                w.write( SurfacingConstants.NL );
-                if ( histo != null ) {
-                    w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
+                for (Writer w : split_writers.values()) {
+                    w.write( "" );
+                    w.write( SurfacingConstants.NL );
+                    addHtmlHead( w, "SURFACING :: " + html_title );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( html_desc.toString() );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "
" ); + w.write( "
" ); + w.write( SurfacingConstants.NL ); + w.write( "
" );
+                    w.write( SurfacingConstants.NL );
+                    if ( histo != null ) {
+                        w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
+                        w.write( SurfacingConstants.NL );
+                    }
+                    w.write( "
" ); + w.write( SurfacingConstants.NL ); + w.write( "" ); + w.write( SurfacingConstants.NL ); + w.write( "" ); + w.write( SurfacingConstants.NL ); + w.write( "" ); + w.write( SurfacingConstants.NL ); + w.write( "" ); + w.write( SurfacingConstants.NL ); + w.write( "" ); + w.write( SurfacingConstants.NL ); + if ( stats.getN() > 1 ) { + w.write( "" ); + } + else { + w.write( "" ); + } + w.write( SurfacingConstants.NL ); + w.write( "" ); + w.write( SurfacingConstants.NL ); + if ( stats.getN() > 1 ) { + w.write( "" ); + } + else { + w.write( "" ); + } + w.write( SurfacingConstants.NL ); + w.write( "
N: " + stats.getN() + "
Min: " + stats.getMin() + "
Max: " + stats.getMax() + "
Mean: " + stats.arithmeticMean() + "
SD: " + stats.sampleStandardDeviation() + "
SD: n/a
Median: " + stats.median() + "
Pearsonian skewness: " + stats.pearsonianSkewness() + "
Pearsonian skewness: n/a
" ); + w.write( SurfacingConstants.NL ); + w.write( "
" ); + w.write( SurfacingConstants.NL ); + w.write( "
" ); + w.write( SurfacingConstants.NL ); + w.write( "
" ); + w.write( SurfacingConstants.NL ); + w.write( "" ); w.write( SurfacingConstants.NL ); } - w.write( "" ); - w.write( SurfacingConstants.NL ); - w.write( "
" ); - w.write( SurfacingConstants.NL ); - w.write( "" ); - w.write( SurfacingConstants.NL ); - w.write( "" ); - w.write( SurfacingConstants.NL ); - w.write( "" ); - w.write( SurfacingConstants.NL ); - w.write( "" ); - w.write( SurfacingConstants.NL ); - if ( stats.getN() > 1 ) { - w.write( "" ); - } - else { - w.write( "" ); - } - w.write( SurfacingConstants.NL ); - w.write( "" ); - w.write( SurfacingConstants.NL ); - if ( stats.getN() > 1 ) { - w.write( "" ); - } - else { - w.write( "" ); - } - w.write( SurfacingConstants.NL ); - w.write( "
N: " + stats.getN() + "
Min: " + stats.getMin() + "
Max: " + stats.getMax() + "
Mean: " + stats.arithmeticMean() + "
SD: " + stats.sampleStandardDeviation() + "
SD: n/a
Median: " + stats.median() + "
Pearsonian skewness: " + stats.pearsonianSkewness() + "
Pearsonian skewness: n/a
" ); - w.write( SurfacingConstants.NL ); - w.write( "
" ); - w.write( SurfacingConstants.NL ); - w.write( "
" ); - w.write( SurfacingConstants.NL ); - w.write( "
" ); - w.write( SurfacingConstants.NL ); - w.write( "" ); - w.write( SurfacingConstants.NL ); break; } - w.write( SurfacingConstants.NL ); + for (Writer w : split_writers.values()) { + w.write( SurfacingConstants.NL ); + } for( final DomainSimilarity similarity : similarities ) { if ( ( species_order != null ) && !species_order.isEmpty() ) { ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order ); } - w.write( similarity.toStringBuffer( print_option ).toString() ); - w.write( SurfacingConstants.NL ); + if ( single_writer != null ) { + single_writer.write( similarity.toStringBuffer( print_option ).toString() ); + } + else { + Writer local_writer = split_writers.get( ( similarity.getDomainId().getId().charAt( 0 ) + "" ).toLowerCase().charAt( 0 ) ); + if ( local_writer == null ) { + local_writer = split_writers.get( '0' ); + } + local_writer.write( similarity.toStringBuffer( print_option ).toString() ); + } + for (Writer w : split_writers.values()) { + w.write( SurfacingConstants.NL ); + } } switch ( print_option ) { case HTML: + for (Writer w : split_writers.values()) { w.write( SurfacingConstants.NL ); w.write( "
" ); w.write( SurfacingConstants.NL ); @@ -2218,10 +2242,12 @@ public final class SurfacingUtil { w.write( SurfacingConstants.NL ); w.write( "" ); w.write( SurfacingConstants.NL ); + } break; } - w.flush(); + for (Writer w : split_writers.values()) { w.close(); + } return stats; } -- 1.7.10.2