in progress...
authorcmzmasek <chris.zma@outlook.com>
Mon, 27 Mar 2017 22:11:11 +0000 (15:11 -0700)
committercmzmasek <chris.zma@outlook.com>
Mon, 27 Mar 2017 22:11:11 +0000 (15:11 -0700)
forester/java/src/org/forester/application/surfacing.java
forester/java/src/org/forester/surfacing/MinimalDomainomeCalculator.java

index 88be3d2..c7319e0 100644 (file)
@@ -217,8 +217,8 @@ public class surfacing {
     final static private String                                     INPUT_GENOMES_FILE_OPTION                                                     = "genomes";
     final static private String                                     INPUT_SPECIES_TREE_OPTION                                                     = "species_tree";
     final static private String                                     SEQ_EXTRACT_OPTION                                                            = "prot_extract";
-    final static private String                                     PRG_VERSION                                                                   = "2.500";
-    final static private String                                     PRG_DATE                                                                      = "170323";
+    final static private String                                     PRG_VERSION                                                                   = "2.501";
+    final static private String                                     PRG_DATE                                                                      = "170327";
     final static private String                                     E_MAIL                                                                        = "czmasek@burnham.org";
     final static private String                                     WWW                                                                           = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing";
     final static private boolean                                    IGNORE_DUFS_DEFAULT                                                           = true;
@@ -272,6 +272,7 @@ public class surfacing {
     public static final String                                      INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX        = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt";
     public static final String                                      INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt";
     private static final boolean                                    CALC_SIMILARITY_SCORES                                                        = false;
+    private static final String                                     SEPARATOR_FOR_DA                                                              = "--";
 
     @SuppressWarnings( "unchecked")
     public static void main( final String args[] ) {
@@ -1776,23 +1777,25 @@ public class surfacing {
                 .programMessage( PRG_NAME,
                                  "Wrote domain promiscuities to: " + per_genome_domain_promiscuity_statistics_file );
         try {
-            MinimalDomainomeCalculator.calcOme( false,
-                                                intrees[ 0 ],
-                                                protein_lists_per_species,
-                                                "---",
-                                                -1,
-                                                out_dir.toString() + "/" + output_file );
+            MinimalDomainomeCalculator.calc( false,
+                                             intrees[ 0 ],
+                                             protein_lists_per_species,
+                                             SEPARATOR_FOR_DA,
+                                             -1,
+                                             out_dir.toString() + "/" + output_file,
+                                             true );
         }
         catch ( IOException e ) {
             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
         }
         try {
-            MinimalDomainomeCalculator.calcOme( true,
-                                                intrees[ 0 ],
-                                                protein_lists_per_species,
-                                                "---",
-                                                -1,
-                                                out_dir.toString() + "/" + output_file );
+            MinimalDomainomeCalculator.calc( true,
+                                             intrees[ 0 ],
+                                             protein_lists_per_species,
+                                             SEPARATOR_FOR_DA,
+                                             -1,
+                                             out_dir.toString() + "/" + output_file,
+                                             true );
         }
         catch ( IOException e ) {
             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
@@ -2207,6 +2210,9 @@ public class surfacing {
                                  "Free memory       : " + free_memory + "MB, total memory: " + total_memory + "MB" );
         ForesterUtil.programMessage( PRG_NAME, "If this application is useful to you, please cite:" );
         ForesterUtil.programMessage( PRG_NAME, surfacing.WWW );
+        ForesterUtil
+                .programMessage( PRG_NAME,
+                                 "[next step for phylogenomic analysis pipeline (example, in \"DAS\" dir): % mse.rb .prot . FL_seqs DA_seqs ../../genome_locations.txt]" );
         ForesterUtil.programMessage( PRG_NAME, "OK" );
         System.out.println();
     }
index 9abb02a..ab82419 100644 (file)
@@ -30,12 +30,13 @@ import org.forester.util.ForesterUtil;
 
 public final class MinimalDomainomeCalculator {
 
-    static final public void calcOme( final boolean use_domain_architectures,
-                                      final Phylogeny tre,
-                                      final SortedMap<Species, List<Protein>> protein_lists_per_species,
-                                      final String separator,
-                                      final double ie_cutoff,
-                                      final String outfile_base )
+    public final static void calc( final boolean use_domain_architectures,
+                                   final Phylogeny tre,
+                                   final SortedMap<Species, List<Protein>> protein_lists_per_species,
+                                   final String separator,
+                                   final double ie_cutoff,
+                                   final String outfile_base,
+                                   final boolean write_protein_files )
             throws IOException {
         final SortedMap<String, SortedSet<String>> species_to_features_map = new TreeMap<String, SortedSet<String>>();
         if ( protein_lists_per_species == null || tre == null ) {
@@ -192,25 +193,46 @@ public final class MinimalDomainomeCalculator {
         out_table.close();
         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote minimal DAome data to           : " + outfile );
         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote minimal DAome data to (as table): " + outfile_table );
-        for( String f : all_features ) {
+        if ( write_protein_files ) {
+            final String protdirname;
             final String a;
+            final String b;
             if ( use_domain_architectures ) {
-                a = "DA_";
+                a = "_DA";
+                b = "domain architectures (DAs)";
+                protdirname = "_DAS";
             }
             else {
-                a = "domain_";
+                a = "_domain";
+                b = "domains";
+                protdirname = "_DOMAINS";
             }
-            final File prot_dir = new File( outfile_base + "_prot" );
-            prot_dir.mkdir();
-            final File outt = new File( outfile_base + "_prot/" + a + f + surfacing.SEQ_EXTRACT_SUFFIX );
-            final Writer proteins_file_writer = new BufferedWriter( new FileWriter( outt ) );
-            extractProteinFeatures( use_domain_architectures,
-                                    protein_lists_per_species,
-                                    f,
-                                    proteins_file_writer,
-                                    ie_cutoff,
-                                    separator );
-            proteins_file_writer.close();
+            final File prot_dir = new File( outfile_base + protdirname );
+            final boolean success = prot_dir.mkdir();
+            if ( !success ) {
+                throw new IOException( "failed to create dir " + prot_dir );
+            }
+            int total = 0;
+            final String dir = outfile_base + protdirname + "/";
+            for( final String feat : all_features ) {
+                final File extract_outfile = new File( dir + feat + a + surfacing.SEQ_EXTRACT_SUFFIX );
+                SurfacingUtil.checkForOutputFileWriteability( extract_outfile );
+                final Writer proteins_file_writer = new BufferedWriter( new FileWriter( extract_outfile ) );
+                final int counter = extractProteinFeatures( use_domain_architectures,
+                                                            protein_lists_per_species,
+                                                            feat,
+                                                            proteins_file_writer,
+                                                            ie_cutoff,
+                                                            separator );
+                if ( counter < 1 ) {
+                    ForesterUtil.printWarningMessage( "surfacing", feat + " not present (in " + b + " extraction)" );
+                }
+                total += counter;
+                proteins_file_writer.close();
+            }
+            ForesterUtil.programMessage( "surfacing",
+                                         "Wrote " + total + " individual " + b + " from a total of "
+                                                 + all_features.size() + " into: " + dir );
         }
     }
 
@@ -226,13 +248,14 @@ public final class MinimalDomainomeCalculator {
         return my_first;
     }
 
-    public static void extractProteinFeatures( final boolean use_domain_architectures,
-                                               final SortedMap<Species, List<Protein>> protein_lists_per_species,
-                                               final String domain_id,
-                                               final Writer out,
-                                               final double ie_cutoff,
-                                               final String domain_separator )
+    private final static int extractProteinFeatures( final boolean use_domain_architectures,
+                                                     final SortedMap<Species, List<Protein>> protein_lists_per_species,
+                                                     final String domain_id,
+                                                     final Writer out,
+                                                     final double ie_cutoff,
+                                                     final String domain_separator )
             throws IOException {
+        int counter = 0;
         final String separator_for_output = "\t";
         for( final Species species : protein_lists_per_species.keySet() ) {
             final List<Protein> proteins_per_species = protein_lists_per_species.get( species );
@@ -261,6 +284,7 @@ public final class MinimalDomainomeCalculator {
                         out.write( from + "-" + to );
                         out.write( "/" );
                         out.write( SurfacingConstants.NL );
+                        ++counter;
                     }
                 }
                 else {
@@ -315,11 +339,13 @@ public final class MinimalDomainomeCalculator {
                             out.write( protein.getAccession() );
                         }
                         out.write( SurfacingConstants.NL );
+                        ++counter;
                     }
                 }
             }
         }
         out.flush();
+        return counter;
     }
 
     public static void main( final String[] args ) {