experimental changes to surfacing methods
authorcmzmasek <cmzmasek@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Thu, 8 Dec 2011 18:42:04 +0000 (18:42 +0000)
committercmzmasek <cmzmasek@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Thu, 8 Dec 2011 18:42:04 +0000 (18:42 +0000)
forester/java/src/org/forester/application/surfacing.java
forester/java/src/org/forester/application/surfacing_hmmpfam.java
forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java
forester/java/src/org/forester/surfacing/SurfacingUtil.java

index 0e6ad5f..8c8d4a0 100644 (file)
@@ -33,6 +33,7 @@ import java.io.IOException;
 import java.io.Writer;
 import java.util.ArrayList;
 import java.util.Date;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
@@ -233,7 +234,7 @@ public class surfacing {
     final static private String                               SEQ_EXTRACT_OPTION                                                     = "prot_extract";
     final static private char                                 SEPARATOR_FOR_INPUT_VALUES                                             = '#';
     final static private String                               PRG_VERSION                                                            = "2.210";
-    final static private String                               PRG_DATE                                                               = "2011.11.30";
+    final static private String                               PRG_DATE                                                               = "2011.12.08";
     final static private String                               E_MAIL                                                                 = "czmasek@burnham.org";
     final static private String                               WWW                                                                    = "www.phylosoft.org/forester/applications/surfacing";
     final static private boolean                              IGNORE_DUFS_DEFAULT                                                    = true;
@@ -2073,11 +2074,19 @@ public class surfacing {
         DescriptiveStatistics pw_stats = null;
         try {
             String my_outfile = output_file.toString();
-            if ( !my_outfile.endsWith( ".html" ) ) {
+            Map<Character, Writer> split_writers = null;
+            Writer writer = null;
+            if ( similarities.size() > 1000  ) {
+                if ( my_outfile.endsWith( ".html" ) ) {
+                    my_outfile = my_outfile.substring( 0, my_outfile.length() - 5 );
+                }
+                split_writers = new HashMap<Character, Writer>();
+                createSplitWriters( out_dir, my_outfile, split_writers );
+            }
+            else if ( !my_outfile.endsWith( ".html" ) ) {
                 my_outfile += ".html";
+                writer = new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile ) );
             }
-            final Writer writer = new BufferedWriter( new FileWriter( out_dir == null ? my_outfile : out_dir
-                    + ForesterUtil.FILE_SEPARATOR + my_outfile ) );
             List<Species> species_order = null;
             if ( species_matrix ) {
                 species_order = new ArrayList<Species>();
@@ -2097,6 +2106,7 @@ public class surfacing {
                     .writeDomainSimilaritiesToFile( html_desc,
                                                     new StringBuilder( number_of_genomes + " genomes" ),
                                                     writer,
+                                                    split_writers,
                                                     similarities,
                                                     number_of_genomes == 2,
                                                     species_order,
@@ -2363,6 +2373,37 @@ public class surfacing {
         System.out.println();
     }
 
+    private static void createSplitWriters( File out_dir, String my_outfile, Map<Character, Writer> split_writers )
+            throws IOException {
+        split_writers.put( 'a', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_A.html"  ) ) );
+        split_writers.put( 'b', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_B.html"  ) ) );
+        split_writers.put( 'c', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_C.html"  ) ) );
+        split_writers.put( 'd', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_D.html"  ) ) );
+        split_writers.put( 'e', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_E.html"  ) ) );
+        split_writers.put( 'f', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_F.html"  ) ) );
+        split_writers.put( 'g', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_G.html"  ) ) );
+        split_writers.put( 'h', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_H.html"  ) ) );
+        split_writers.put( 'i', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_I.html"  ) ) );
+        split_writers.put( 'j', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_J.html"  ) ) );
+        split_writers.put( 'k', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_K.html"  ) ) );
+        split_writers.put( 'l', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_L.html"  ) ) );
+        split_writers.put( 'm', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_M.html"  ) ) );
+        split_writers.put( 'n', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_N.html"  ) ) );
+        split_writers.put( 'o', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_O.html"  ) ) );
+        split_writers.put( 'p', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_P.html"  ) ) );
+        split_writers.put( 'q', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_Q.html"  ) ) );
+        split_writers.put( 'r', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_R.html"  ) ) );
+        split_writers.put( 's', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_S.html"  ) ) );
+        split_writers.put( 't', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_T.html"  ) ) );
+        split_writers.put( 'u', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_U.html"  ) ) );
+        split_writers.put( 'v', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_V.html"  ) ) );
+        split_writers.put( 'w', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_W.html"  ) ) );
+        split_writers.put( 'x', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_X.html"  ) ) );
+        split_writers.put( 'y', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_Y.html"  ) ) );
+        split_writers.put( 'z', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_Z.html"  ) ) );
+        split_writers.put( '0', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_09.html"  ) ) );
+    }
+
     private static void printOutPercentageOfMultidomainProteins( final SortedMap<Integer, Integer> all_genomes_domains_per_potein_histo,
                                                                  final Writer log_writer ) {
         int sum = 0;
index 4104a34..bd97c50 100644 (file)
@@ -1973,7 +1973,9 @@ public class surfacing_hmmpfam {
             pw_stats = SurfacingUtil
                     .writeDomainSimilaritiesToFile( html_desc,
                                                     new StringBuilder( number_of_genomes + " genomes" ),
+                                                    
                                                     writer,
+                                                    null,
                                                     similarities,
                                                     number_of_genomes == 2,
                                                     species_order,
index d9ad2aa..99d336f 100644 (file)
@@ -214,6 +214,7 @@ public class PairwiseGenomeComparator {
                                                                                 new StringBuilder( species_i + "-"
                                                                                         + species_j ),
                                                                                 writer,
+                                                                                null,
                                                                                 similarities,
                                                                                 true,
                                                                                 null,
index d3e8c0a..e6e8736 100644 (file)
@@ -1995,7 +1995,8 @@ public final class SurfacingUtil {
 
     public static DescriptiveStatistics writeDomainSimilaritiesToFile( final StringBuilder html_desc,
                                                                        final StringBuilder html_title,
-                                                                       final Writer w,
+                                                                       final Writer single_writer,
+                                                                       Map<Character, Writer> split_writers,
                                                                        final SortedSet<DomainSimilarity> similarities,
                                                                        final boolean treat_as_binary,
                                                                        final List<Species> species_order,
@@ -2138,77 +2139,100 @@ public final class SurfacingUtil {
                 System.out.println( "Pearsonian skewness : n/a" );
             }
         }
+
+        if ( single_writer != null && ( split_writers == null || split_writers.isEmpty() ) ) {
+            split_writers = new HashMap<Character, Writer>();
+            split_writers.put( '_', single_writer );
+        
+        }
+
         switch ( print_option ) {
             case SIMPLE_TAB_DELIMITED:
                 break;
             case HTML:
-                w.write( "<html>" );
-                w.write( SurfacingConstants.NL );
-                addHtmlHead( w, "SURFACING :: " + html_title );
-                w.write( SurfacingConstants.NL );
-                w.write( "<body>" );
-                w.write( SurfacingConstants.NL );
-                w.write( html_desc.toString() );
-                w.write( SurfacingConstants.NL );
-                w.write( "<hr>" );
-                w.write( "<br>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "<tt><pre>" );
-                w.write( SurfacingConstants.NL );
-                if ( histo != null ) {
-                    w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
+                for (Writer w : split_writers.values()) {
+                    w.write( "<html>" );
+                    w.write( SurfacingConstants.NL );
+                    addHtmlHead( w, "SURFACING :: " + html_title );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<body>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( html_desc.toString() );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<hr>" );
+                    w.write( "<br>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<tt><pre>" );
+                    w.write( SurfacingConstants.NL );
+                    if ( histo != null ) {
+                        w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
+                        w.write( SurfacingConstants.NL );
+                    }
+                    w.write( "</pre></tt>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<table>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<tr><td>N: </td><td>" + stats.getN() + "</td></tr>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<tr><td>Min: </td><td>" + stats.getMin() + "</td></tr>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<tr><td>Max: </td><td>" + stats.getMax() + "</td></tr>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<tr><td>Mean: </td><td>" + stats.arithmeticMean() + "</td></tr>" );
+                    w.write( SurfacingConstants.NL );
+                    if ( stats.getN() > 1 ) {
+                        w.write( "<tr><td>SD: </td><td>" + stats.sampleStandardDeviation() + "</td></tr>" );
+                    }
+                    else {
+                        w.write( "<tr><td>SD: </td><td>n/a</td></tr>" );
+                    }
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<tr><td>Median: </td><td>" + stats.median() + "</td></tr>" );
+                    w.write( SurfacingConstants.NL );
+                    if ( stats.getN() > 1 ) {
+                        w.write( "<tr><td>Pearsonian skewness: </td><td>" + stats.pearsonianSkewness() + "</td></tr>" );
+                    }
+                    else {
+                        w.write( "<tr><td>Pearsonian skewness: </td><td>n/a</td></tr>" );
+                    }
+                    w.write( SurfacingConstants.NL );
+                    w.write( "</table>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<br>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<hr>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<br>" );
+                    w.write( SurfacingConstants.NL );
+                    w.write( "<table>" );
                     w.write( SurfacingConstants.NL );
                 }
-                w.write( "</pre></tt>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "<table>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "<tr><td>N: </td><td>" + stats.getN() + "</td></tr>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "<tr><td>Min: </td><td>" + stats.getMin() + "</td></tr>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "<tr><td>Max: </td><td>" + stats.getMax() + "</td></tr>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "<tr><td>Mean: </td><td>" + stats.arithmeticMean() + "</td></tr>" );
-                w.write( SurfacingConstants.NL );
-                if ( stats.getN() > 1 ) {
-                    w.write( "<tr><td>SD: </td><td>" + stats.sampleStandardDeviation() + "</td></tr>" );
-                }
-                else {
-                    w.write( "<tr><td>SD: </td><td>n/a</td></tr>" );
-                }
-                w.write( SurfacingConstants.NL );
-                w.write( "<tr><td>Median: </td><td>" + stats.median() + "</td></tr>" );
-                w.write( SurfacingConstants.NL );
-                if ( stats.getN() > 1 ) {
-                    w.write( "<tr><td>Pearsonian skewness: </td><td>" + stats.pearsonianSkewness() + "</td></tr>" );
-                }
-                else {
-                    w.write( "<tr><td>Pearsonian skewness: </td><td>n/a</td></tr>" );
-                }
-                w.write( SurfacingConstants.NL );
-                w.write( "</table>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "<br>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "<hr>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "<br>" );
-                w.write( SurfacingConstants.NL );
-                w.write( "<table>" );
-                w.write( SurfacingConstants.NL );
                 break;
         }
-        w.write( SurfacingConstants.NL );
+        for (Writer w : split_writers.values()) {
+            w.write( SurfacingConstants.NL );
+        }
         for( final DomainSimilarity similarity : similarities ) {
             if ( ( species_order != null ) && !species_order.isEmpty() ) {
                 ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
             }
-            w.write( similarity.toStringBuffer( print_option ).toString() );
-            w.write( SurfacingConstants.NL );
+            if ( single_writer != null ) {
+                single_writer.write( similarity.toStringBuffer( print_option ).toString() );
+            }
+            else {
+                Writer local_writer = split_writers.get( ( similarity.getDomainId().getId().charAt( 0 ) + "" ).toLowerCase().charAt( 0 ) );
+                if ( local_writer == null ) {
+                    local_writer = split_writers.get( '0' );
+                }    
+                local_writer.write( similarity.toStringBuffer( print_option ).toString() );
+            }
+            for (Writer w : split_writers.values()) {
+                w.write( SurfacingConstants.NL );
+            }
         }
         switch ( print_option ) {
             case HTML:
+                for (Writer w : split_writers.values()) {
                 w.write( SurfacingConstants.NL );
                 w.write( "</table>" );
                 w.write( SurfacingConstants.NL );
@@ -2218,10 +2242,12 @@ public final class SurfacingUtil {
                 w.write( SurfacingConstants.NL );
                 w.write( "</html>" );
                 w.write( SurfacingConstants.NL );
+                }
                 break;
         }
-        w.flush();
+        for (Writer w : split_writers.values()) {
         w.close();
+        }
         return stats;
     }