From 1ec82d057c07d5b936fb42fa129b809d65aeb5e5 Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Mon, 27 Mar 2017 15:11:11 -0700 Subject: [PATCH] in progress... --- .../src/org/forester/application/surfacing.java | 34 +++++---- .../surfacing/MinimalDomainomeCalculator.java | 78 +++++++++++++------- 2 files changed, 72 insertions(+), 40 deletions(-) diff --git a/forester/java/src/org/forester/application/surfacing.java b/forester/java/src/org/forester/application/surfacing.java index 88be3d2..c7319e0 100644 --- a/forester/java/src/org/forester/application/surfacing.java +++ b/forester/java/src/org/forester/application/surfacing.java @@ -217,8 +217,8 @@ public class surfacing { final static private String INPUT_GENOMES_FILE_OPTION = "genomes"; final static private String INPUT_SPECIES_TREE_OPTION = "species_tree"; final static private String SEQ_EXTRACT_OPTION = "prot_extract"; - final static private String PRG_VERSION = "2.500"; - final static private String PRG_DATE = "170323"; + final static private String PRG_VERSION = "2.501"; + final static private String PRG_DATE = "170327"; final static private String E_MAIL = "czmasek@burnham.org"; final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing"; final static private boolean IGNORE_DUFS_DEFAULT = true; @@ -272,6 +272,7 @@ public class surfacing { public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt"; public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt"; private static final boolean CALC_SIMILARITY_SCORES = false; + private static final String SEPARATOR_FOR_DA = "--"; @SuppressWarnings( "unchecked") public static void main( final String args[] ) { @@ -1776,23 +1777,25 @@ public class surfacing { .programMessage( PRG_NAME, "Wrote domain promiscuities to: " + per_genome_domain_promiscuity_statistics_file ); try { - MinimalDomainomeCalculator.calcOme( false, - intrees[ 0 ], - protein_lists_per_species, - "---", - -1, - out_dir.toString() + "/" + output_file ); + MinimalDomainomeCalculator.calc( false, + intrees[ 0 ], + protein_lists_per_species, + SEPARATOR_FOR_DA, + -1, + out_dir.toString() + "/" + output_file, + true ); } catch ( IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() ); } try { - MinimalDomainomeCalculator.calcOme( true, - intrees[ 0 ], - protein_lists_per_species, - "---", - -1, - out_dir.toString() + "/" + output_file ); + MinimalDomainomeCalculator.calc( true, + intrees[ 0 ], + protein_lists_per_species, + SEPARATOR_FOR_DA, + -1, + out_dir.toString() + "/" + output_file, + true ); } catch ( IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() ); @@ -2207,6 +2210,9 @@ public class surfacing { "Free memory : " + free_memory + "MB, total memory: " + total_memory + "MB" ); ForesterUtil.programMessage( PRG_NAME, "If this application is useful to you, please cite:" ); ForesterUtil.programMessage( PRG_NAME, surfacing.WWW ); + ForesterUtil + .programMessage( PRG_NAME, + "[next step for phylogenomic analysis pipeline (example, in \"DAS\" dir): % mse.rb .prot . FL_seqs DA_seqs ../../genome_locations.txt]" ); ForesterUtil.programMessage( PRG_NAME, "OK" ); System.out.println(); } diff --git a/forester/java/src/org/forester/surfacing/MinimalDomainomeCalculator.java b/forester/java/src/org/forester/surfacing/MinimalDomainomeCalculator.java index 9abb02a..ab82419 100644 --- a/forester/java/src/org/forester/surfacing/MinimalDomainomeCalculator.java +++ b/forester/java/src/org/forester/surfacing/MinimalDomainomeCalculator.java @@ -30,12 +30,13 @@ import org.forester.util.ForesterUtil; public final class MinimalDomainomeCalculator { - static final public void calcOme( final boolean use_domain_architectures, - final Phylogeny tre, - final SortedMap> protein_lists_per_species, - final String separator, - final double ie_cutoff, - final String outfile_base ) + public final static void calc( final boolean use_domain_architectures, + final Phylogeny tre, + final SortedMap> protein_lists_per_species, + final String separator, + final double ie_cutoff, + final String outfile_base, + final boolean write_protein_files ) throws IOException { final SortedMap> species_to_features_map = new TreeMap>(); if ( protein_lists_per_species == null || tre == null ) { @@ -192,25 +193,46 @@ public final class MinimalDomainomeCalculator { out_table.close(); ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote minimal DAome data to : " + outfile ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote minimal DAome data to (as table): " + outfile_table ); - for( String f : all_features ) { + if ( write_protein_files ) { + final String protdirname; final String a; + final String b; if ( use_domain_architectures ) { - a = "DA_"; + a = "_DA"; + b = "domain architectures (DAs)"; + protdirname = "_DAS"; } else { - a = "domain_"; + a = "_domain"; + b = "domains"; + protdirname = "_DOMAINS"; } - final File prot_dir = new File( outfile_base + "_prot" ); - prot_dir.mkdir(); - final File outt = new File( outfile_base + "_prot/" + a + f + surfacing.SEQ_EXTRACT_SUFFIX ); - final Writer proteins_file_writer = new BufferedWriter( new FileWriter( outt ) ); - extractProteinFeatures( use_domain_architectures, - protein_lists_per_species, - f, - proteins_file_writer, - ie_cutoff, - separator ); - proteins_file_writer.close(); + final File prot_dir = new File( outfile_base + protdirname ); + final boolean success = prot_dir.mkdir(); + if ( !success ) { + throw new IOException( "failed to create dir " + prot_dir ); + } + int total = 0; + final String dir = outfile_base + protdirname + "/"; + for( final String feat : all_features ) { + final File extract_outfile = new File( dir + feat + a + surfacing.SEQ_EXTRACT_SUFFIX ); + SurfacingUtil.checkForOutputFileWriteability( extract_outfile ); + final Writer proteins_file_writer = new BufferedWriter( new FileWriter( extract_outfile ) ); + final int counter = extractProteinFeatures( use_domain_architectures, + protein_lists_per_species, + feat, + proteins_file_writer, + ie_cutoff, + separator ); + if ( counter < 1 ) { + ForesterUtil.printWarningMessage( "surfacing", feat + " not present (in " + b + " extraction)" ); + } + total += counter; + proteins_file_writer.close(); + } + ForesterUtil.programMessage( "surfacing", + "Wrote " + total + " individual " + b + " from a total of " + + all_features.size() + " into: " + dir ); } } @@ -226,13 +248,14 @@ public final class MinimalDomainomeCalculator { return my_first; } - public static void extractProteinFeatures( final boolean use_domain_architectures, - final SortedMap> protein_lists_per_species, - final String domain_id, - final Writer out, - final double ie_cutoff, - final String domain_separator ) + private final static int extractProteinFeatures( final boolean use_domain_architectures, + final SortedMap> protein_lists_per_species, + final String domain_id, + final Writer out, + final double ie_cutoff, + final String domain_separator ) throws IOException { + int counter = 0; final String separator_for_output = "\t"; for( final Species species : protein_lists_per_species.keySet() ) { final List proteins_per_species = protein_lists_per_species.get( species ); @@ -261,6 +284,7 @@ public final class MinimalDomainomeCalculator { out.write( from + "-" + to ); out.write( "/" ); out.write( SurfacingConstants.NL ); + ++counter; } } else { @@ -315,11 +339,13 @@ public final class MinimalDomainomeCalculator { out.write( protein.getAccession() ); } out.write( SurfacingConstants.NL ); + ++counter; } } } } out.flush(); + return counter; } public static void main( final String[] args ) { -- 1.7.10.2