import java.io.Writer;
import java.util.ArrayList;
import java.util.Date;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
final static private String SEQ_EXTRACT_OPTION = "prot_extract";
final static private char SEPARATOR_FOR_INPUT_VALUES = '#';
final static private String PRG_VERSION = "2.210";
- final static private String PRG_DATE = "2011.11.30";
+ final static private String PRG_DATE = "2011.12.08";
final static private String E_MAIL = "czmasek@burnham.org";
final static private String WWW = "www.phylosoft.org/forester/applications/surfacing";
final static private boolean IGNORE_DUFS_DEFAULT = true;
DescriptiveStatistics pw_stats = null;
try {
String my_outfile = output_file.toString();
- if ( !my_outfile.endsWith( ".html" ) ) {
+ Map<Character, Writer> split_writers = null;
+ Writer writer = null;
+ if ( similarities.size() > 1000 ) {
+ if ( my_outfile.endsWith( ".html" ) ) {
+ my_outfile = my_outfile.substring( 0, my_outfile.length() - 5 );
+ }
+ split_writers = new HashMap<Character, Writer>();
+ createSplitWriters( out_dir, my_outfile, split_writers );
+ }
+ else if ( !my_outfile.endsWith( ".html" ) ) {
my_outfile += ".html";
+ writer = new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile ) );
}
- final Writer writer = new BufferedWriter( new FileWriter( out_dir == null ? my_outfile : out_dir
- + ForesterUtil.FILE_SEPARATOR + my_outfile ) );
List<Species> species_order = null;
if ( species_matrix ) {
species_order = new ArrayList<Species>();
.writeDomainSimilaritiesToFile( html_desc,
new StringBuilder( number_of_genomes + " genomes" ),
writer,
+ split_writers,
similarities,
number_of_genomes == 2,
species_order,
System.out.println();
}
+ private static void createSplitWriters( File out_dir, String my_outfile, Map<Character, Writer> split_writers )
+ throws IOException {
+ split_writers.put( 'a', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_A.html" ) ) );
+ split_writers.put( 'b', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_B.html" ) ) );
+ split_writers.put( 'c', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_C.html" ) ) );
+ split_writers.put( 'd', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_D.html" ) ) );
+ split_writers.put( 'e', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_E.html" ) ) );
+ split_writers.put( 'f', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_F.html" ) ) );
+ split_writers.put( 'g', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_G.html" ) ) );
+ split_writers.put( 'h', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_H.html" ) ) );
+ split_writers.put( 'i', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_I.html" ) ) );
+ split_writers.put( 'j', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_J.html" ) ) );
+ split_writers.put( 'k', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_K.html" ) ) );
+ split_writers.put( 'l', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_L.html" ) ) );
+ split_writers.put( 'm', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_M.html" ) ) );
+ split_writers.put( 'n', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_N.html" ) ) );
+ split_writers.put( 'o', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_O.html" ) ) );
+ split_writers.put( 'p', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_P.html" ) ) );
+ split_writers.put( 'q', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_Q.html" ) ) );
+ split_writers.put( 'r', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_R.html" ) ) );
+ split_writers.put( 's', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_S.html" ) ) );
+ split_writers.put( 't', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_T.html" ) ) );
+ split_writers.put( 'u', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_U.html" ) ) );
+ split_writers.put( 'v', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_V.html" ) ) );
+ split_writers.put( 'w', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_W.html" ) ) );
+ split_writers.put( 'x', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_X.html" ) ) );
+ split_writers.put( 'y', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_Y.html" ) ) );
+ split_writers.put( 'z', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_Z.html" ) ) );
+ split_writers.put( '0', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_09.html" ) ) );
+ }
+
private static void printOutPercentageOfMultidomainProteins( final SortedMap<Integer, Integer> all_genomes_domains_per_potein_histo,
final Writer log_writer ) {
int sum = 0;
public static DescriptiveStatistics writeDomainSimilaritiesToFile( final StringBuilder html_desc,
final StringBuilder html_title,
- final Writer w,
+ final Writer single_writer,
+ Map<Character, Writer> split_writers,
final SortedSet<DomainSimilarity> similarities,
final boolean treat_as_binary,
final List<Species> species_order,
System.out.println( "Pearsonian skewness : n/a" );
}
}
+
+ if ( single_writer != null && ( split_writers == null || split_writers.isEmpty() ) ) {
+ split_writers = new HashMap<Character, Writer>();
+ split_writers.put( '_', single_writer );
+
+ }
+
switch ( print_option ) {
case SIMPLE_TAB_DELIMITED:
break;
case HTML:
- w.write( "<html>" );
- w.write( SurfacingConstants.NL );
- addHtmlHead( w, "SURFACING :: " + html_title );
- w.write( SurfacingConstants.NL );
- w.write( "<body>" );
- w.write( SurfacingConstants.NL );
- w.write( html_desc.toString() );
- w.write( SurfacingConstants.NL );
- w.write( "<hr>" );
- w.write( "<br>" );
- w.write( SurfacingConstants.NL );
- w.write( "<tt><pre>" );
- w.write( SurfacingConstants.NL );
- if ( histo != null ) {
- w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
+ for (Writer w : split_writers.values()) {
+ w.write( "<html>" );
+ w.write( SurfacingConstants.NL );
+ addHtmlHead( w, "SURFACING :: " + html_title );
+ w.write( SurfacingConstants.NL );
+ w.write( "<body>" );
+ w.write( SurfacingConstants.NL );
+ w.write( html_desc.toString() );
+ w.write( SurfacingConstants.NL );
+ w.write( "<hr>" );
+ w.write( "<br>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<tt><pre>" );
+ w.write( SurfacingConstants.NL );
+ if ( histo != null ) {
+ w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
+ w.write( SurfacingConstants.NL );
+ }
+ w.write( "</pre></tt>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<table>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<tr><td>N: </td><td>" + stats.getN() + "</td></tr>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<tr><td>Min: </td><td>" + stats.getMin() + "</td></tr>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<tr><td>Max: </td><td>" + stats.getMax() + "</td></tr>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<tr><td>Mean: </td><td>" + stats.arithmeticMean() + "</td></tr>" );
+ w.write( SurfacingConstants.NL );
+ if ( stats.getN() > 1 ) {
+ w.write( "<tr><td>SD: </td><td>" + stats.sampleStandardDeviation() + "</td></tr>" );
+ }
+ else {
+ w.write( "<tr><td>SD: </td><td>n/a</td></tr>" );
+ }
+ w.write( SurfacingConstants.NL );
+ w.write( "<tr><td>Median: </td><td>" + stats.median() + "</td></tr>" );
+ w.write( SurfacingConstants.NL );
+ if ( stats.getN() > 1 ) {
+ w.write( "<tr><td>Pearsonian skewness: </td><td>" + stats.pearsonianSkewness() + "</td></tr>" );
+ }
+ else {
+ w.write( "<tr><td>Pearsonian skewness: </td><td>n/a</td></tr>" );
+ }
+ w.write( SurfacingConstants.NL );
+ w.write( "</table>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<br>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<hr>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<br>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<table>" );
w.write( SurfacingConstants.NL );
}
- w.write( "</pre></tt>" );
- w.write( SurfacingConstants.NL );
- w.write( "<table>" );
- w.write( SurfacingConstants.NL );
- w.write( "<tr><td>N: </td><td>" + stats.getN() + "</td></tr>" );
- w.write( SurfacingConstants.NL );
- w.write( "<tr><td>Min: </td><td>" + stats.getMin() + "</td></tr>" );
- w.write( SurfacingConstants.NL );
- w.write( "<tr><td>Max: </td><td>" + stats.getMax() + "</td></tr>" );
- w.write( SurfacingConstants.NL );
- w.write( "<tr><td>Mean: </td><td>" + stats.arithmeticMean() + "</td></tr>" );
- w.write( SurfacingConstants.NL );
- if ( stats.getN() > 1 ) {
- w.write( "<tr><td>SD: </td><td>" + stats.sampleStandardDeviation() + "</td></tr>" );
- }
- else {
- w.write( "<tr><td>SD: </td><td>n/a</td></tr>" );
- }
- w.write( SurfacingConstants.NL );
- w.write( "<tr><td>Median: </td><td>" + stats.median() + "</td></tr>" );
- w.write( SurfacingConstants.NL );
- if ( stats.getN() > 1 ) {
- w.write( "<tr><td>Pearsonian skewness: </td><td>" + stats.pearsonianSkewness() + "</td></tr>" );
- }
- else {
- w.write( "<tr><td>Pearsonian skewness: </td><td>n/a</td></tr>" );
- }
- w.write( SurfacingConstants.NL );
- w.write( "</table>" );
- w.write( SurfacingConstants.NL );
- w.write( "<br>" );
- w.write( SurfacingConstants.NL );
- w.write( "<hr>" );
- w.write( SurfacingConstants.NL );
- w.write( "<br>" );
- w.write( SurfacingConstants.NL );
- w.write( "<table>" );
- w.write( SurfacingConstants.NL );
break;
}
- w.write( SurfacingConstants.NL );
+ for (Writer w : split_writers.values()) {
+ w.write( SurfacingConstants.NL );
+ }
for( final DomainSimilarity similarity : similarities ) {
if ( ( species_order != null ) && !species_order.isEmpty() ) {
( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
}
- w.write( similarity.toStringBuffer( print_option ).toString() );
- w.write( SurfacingConstants.NL );
+ if ( single_writer != null ) {
+ single_writer.write( similarity.toStringBuffer( print_option ).toString() );
+ }
+ else {
+ Writer local_writer = split_writers.get( ( similarity.getDomainId().getId().charAt( 0 ) + "" ).toLowerCase().charAt( 0 ) );
+ if ( local_writer == null ) {
+ local_writer = split_writers.get( '0' );
+ }
+ local_writer.write( similarity.toStringBuffer( print_option ).toString() );
+ }
+ for (Writer w : split_writers.values()) {
+ w.write( SurfacingConstants.NL );
+ }
}
switch ( print_option ) {
case HTML:
+ for (Writer w : split_writers.values()) {
w.write( SurfacingConstants.NL );
w.write( "</table>" );
w.write( SurfacingConstants.NL );
w.write( SurfacingConstants.NL );
w.write( "</html>" );
w.write( SurfacingConstants.NL );
+ }
break;
}
- w.flush();
+ for (Writer w : split_writers.values()) {
w.close();
+ }
return stats;
}