regex search
[jalview.git] / forester / java / src / org / forester / application / surfacing.java
index 5bd7d7b..141db17 100644 (file)
@@ -178,7 +178,7 @@ public class surfacing {
     final static private String                                     NO_ENGULFING_OVERLAP_OPTION                                                   = "no_eo";
     final static private String                                     IGNORE_COMBINATION_WITH_SAME_OPTION                                           = "ignore_self_comb";
     final static private String                                     PERFORM_DC_REGAIN_PROTEINS_STATS_OPTION                                       = "dc_regain_stats";
-    final static private String                                     DA_ANALYSIS_OPTION                                                            = "DA_analyis";
+    final static private String                                     DA_ANALYSIS_OPTION                                                            = "da_analyis";
     final static private String                                     USE_LAST_IN_FITCH_OPTION                                                      = "last";
     public final static String                                      PAIRWISE_DOMAIN_COMPARISONS_PREFIX                                            = "pwc_";
     final static private String                                     PAIRWISE_DOMAIN_COMPARISONS_OPTION                                            = "pwc";
@@ -215,8 +215,8 @@ public class surfacing {
     final static private String                                     INPUT_GENOMES_FILE_OPTION                                                     = "genomes";
     final static private String                                     INPUT_SPECIES_TREE_OPTION                                                     = "species_tree";
     final static private String                                     SEQ_EXTRACT_OPTION                                                            = "prot_extract";
-    final static private String                                     PRG_VERSION                                                                   = "2.403";
-    final static private String                                     PRG_DATE                                                                      = "131127";
+    final static private String                                     PRG_VERSION                                                                   = "2.404";
+    final static private String                                     PRG_DATE                                                                      = "140709";
     final static private String                                     E_MAIL                                                                        = "czmasek@burnham.org";
     final static private String                                     WWW                                                                           = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing";
     final static private boolean                                    IGNORE_DUFS_DEFAULT                                                           = true;
@@ -379,9 +379,9 @@ public class surfacing {
         if ( cla.isOptionSet( DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS ) ) {
             output_binary_domain_combinationsfor_graph_analysis = true;
         }
-        final boolean output_binary_domain_combinationsfor_counts = false;
+        boolean output_binary_domain_combinationsfor_counts = false;
         if ( cla.isOptionSet( DOMAIN_COMBINITONS_COUNTS_OUTPUT_OPTION ) ) {
-            output_binary_domain_combinationsfor_graph_analysis = true;
+            output_binary_domain_combinationsfor_counts = true;
         }
         if ( cla.isOptionSet( surfacing.MAX_FS_E_VALUE_OPTION ) ) {
             try {
@@ -806,62 +806,56 @@ public class surfacing {
             }
             SurfacingUtil.checkForOutputFileWriteability( dcc_outfile );
         }
-        File pfam_to_go_file = null;
-        Map<String, List<GoId>> domain_id_to_go_ids_map = null;
-        int domain_id_to_go_ids_count = 0;
+        File pfam_to_go_file = new File( "pfam2go.txt" );
         if ( cla.isOptionSet( surfacing.PFAM_TO_GO_FILE_USE_OPTION ) ) {
             if ( !cla.isOptionValueSet( surfacing.PFAM_TO_GO_FILE_USE_OPTION ) ) {
                 ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for Pfam to GO mapping file: -"
                         + surfacing.PFAM_TO_GO_FILE_USE_OPTION + "=<file>" );
             }
             pfam_to_go_file = new File( cla.getOptionValue( surfacing.PFAM_TO_GO_FILE_USE_OPTION ) );
-            final String error = ForesterUtil.isReadableFile( pfam_to_go_file );
-            if ( !ForesterUtil.isEmpty( error ) ) {
-                ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read Pfam to GO mapping file: " + error );
-            }
-            try {
-                final PfamToGoParser parser = new PfamToGoParser( pfam_to_go_file );
-                final List<PfamToGoMapping> pfam_to_go_mappings = parser.parse();
-                domain_id_to_go_ids_map = SurfacingUtil.createDomainIdToGoIdMap( pfam_to_go_mappings );
-                if ( parser.getMappingCount() < domain_id_to_go_ids_map.size() ) {
-                    ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME,
-                                                       "parser.getMappingCount() < domain_id_to_go_ids_map.size()" );
-                }
-                domain_id_to_go_ids_count = parser.getMappingCount();
-            }
-            catch ( final IOException e ) {
-                ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read from Pfam to GO mapping file: " + e );
+        }
+        final String error1 = ForesterUtil.isReadableFile( pfam_to_go_file );
+        if ( !ForesterUtil.isEmpty( error1 ) ) {
+            ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read Pfam to GO mapping file: " + error1 );
+        }
+        Map<String, List<GoId>> domain_id_to_go_ids_map = null;
+        int domain_id_to_go_ids_count = 0;
+        try {
+            final PfamToGoParser parser = new PfamToGoParser( pfam_to_go_file );
+            final List<PfamToGoMapping> pfam_to_go_mappings = parser.parse();
+            domain_id_to_go_ids_map = SurfacingUtil.createDomainIdToGoIdMap( pfam_to_go_mappings );
+            if ( parser.getMappingCount() < domain_id_to_go_ids_map.size() ) {
+                ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME,
+                                                   "parser.getMappingCount() < domain_id_to_go_ids_map.size()" );
             }
+            domain_id_to_go_ids_count = parser.getMappingCount();
         }
-        File go_obo_file = null;
-        List<GoTerm> go_terms = null;
+        catch ( final IOException e ) {
+            ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read from Pfam to GO mapping file: " + e );
+        }
+        File go_obo_file = new File( "go.obo" );
         if ( cla.isOptionSet( surfacing.GO_OBO_FILE_USE_OPTION ) ) {
             if ( !cla.isOptionValueSet( surfacing.GO_OBO_FILE_USE_OPTION ) ) {
                 ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for GO OBO file: -"
                         + surfacing.GO_OBO_FILE_USE_OPTION + "=<file>" );
             }
-            if ( ( domain_id_to_go_ids_map == null ) || ( domain_id_to_go_ids_map.size() < 1 ) ) {
-                ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot use GO OBO file (-"
-                        + surfacing.GO_OBO_FILE_USE_OPTION + "=<file>) without Pfam to GO mapping file ("
-                        + surfacing.PFAM_TO_GO_FILE_USE_OPTION + "=<file>)" );
-            }
             go_obo_file = new File( cla.getOptionValue( surfacing.GO_OBO_FILE_USE_OPTION ) );
-            final String error = ForesterUtil.isReadableFile( go_obo_file );
-            if ( !ForesterUtil.isEmpty( error ) ) {
-                ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read GO OBO file: " + error );
-            }
-            try {
-                final OBOparser parser = new OBOparser( go_obo_file, OBOparser.ReturnType.BASIC_GO_TERM );
-                go_terms = parser.parse();
-                if ( parser.getGoTermCount() != go_terms.size() ) {
-                    ForesterUtil
-                            .unexpectedFatalError( surfacing.PRG_NAME, "parser.getGoTermCount() != go_terms.size()" );
-                }
-            }
-            catch ( final IOException e ) {
-                ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read from GO OBO file: " + e );
+        }
+        final String error2 = ForesterUtil.isReadableFile( go_obo_file );
+        if ( !ForesterUtil.isEmpty( error2 ) ) {
+            ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read GO OBO file: " + error2 );
+        }
+        List<GoTerm> go_terms = null;
+        try {
+            final OBOparser parser = new OBOparser( go_obo_file, OBOparser.ReturnType.BASIC_GO_TERM );
+            go_terms = parser.parse();
+            if ( parser.getGoTermCount() != go_terms.size() ) {
+                ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME, "parser.getGoTermCount() != go_terms.size()" );
             }
         }
+        catch ( final IOException e ) {
+            ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read from GO OBO file: " + e );
+        }
         Map<GoId, GoTerm> go_id_to_term_map = null;
         if ( ( ( domain_id_to_go_ids_map != null ) && ( domain_id_to_go_ids_map.size() > 0 ) )
                 && ( ( go_terms != null ) && ( go_terms.size() > 0 ) ) ) {
@@ -1817,7 +1811,8 @@ public class surfacing {
         final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( domain_similarity_sort_field,
                                                                                      sort_by_species_count_first,
                                                                                      number_of_genomes == 2,
-                                                                                     CALC_SIMILARITY_SCORES );
+                                                                                     CALC_SIMILARITY_SCORES,
+                                                                                     true );
         switch ( scoring ) {
             case COMBINATIONS:
                 pw_calc = new CombinationsBasedPairwiseDomainSimilarityCalculator();
@@ -2061,7 +2056,8 @@ public class surfacing {
             SurfacingUtil.writeProteinListsForAllSpecies( out_dir,
                                                           protein_lists_per_species,
                                                           gwcd_list,
-                                                          output_list_of_all_proteins_per_domain_e_value_max );
+                                                          output_list_of_all_proteins_per_domain_e_value_max,
+                                                          positive_filter_file != null ? filter : null );
         }
         gwcd_list = null;
         if ( all_bin_domain_combinations_gained_fitch != null ) {
@@ -2194,16 +2190,12 @@ public class surfacing {
         System.out.println( DA_ANALYSIS_OPTION + ": to perform DA analysis" );
         System.out.println( PERFORM_DOMAIN_LENGTH_ANALYSIS_OPTION + ": to perform domain length analysis" );
         System.out.println();
-        System.out.println( "Example 1: java -Xms128m -Xmx512m -cp path/to/forester.jar"
-                + " org.forester.application.surfacing p2g=pfam2go_2012_02_07.txt -dufs -cos=Pfam_260_NC1"
-                + " -no_eo -mo=0 -genomes=eukaryotes.txt -out_dir=out -o=o "
-                + " -species_tree=tol.xml -obo=gene_ontology_2012_02_07.obo -pos_filter=f.txt -all_prot" );
         System.out.println();
-        System.out.println( "Example 2: java -Xms128m -Xmx512m -cp path/to/forester.jar"
-                + " org.forester.application.surfacing -detail=punctilious -o=TEST.html -pwc=TEST"
-                + " -cos=Pfam_ls_22_TC2 -p2g=pfam2go -obo=gene_ontology_edit.obo "
-                + "-dc_sort=dom -ignore_with_self -no_singles -ie=0.001 -mo=1 -no_eo -genomes=eukaryotes.txt "
-                + "-ds_output=detailed_html -scoring=domains -sort=alpha " );
+        System.out
+                .println( "Example 1: surfacing -p2g=pfam2go.txt -obo=go.obo -species_tree=tol_156.xml -no_eo -ie=0.01 -dufs -genomes=genomes_all.txt -pos_filter=tf_1.txt -out_dir=_tf1 -o=tf1" );
+        System.out.println();
+        System.out
+                .println( "Example 2: surfacing -p2g=pfam2go.txt -obo=go.obo -species_tree=tol_156.xml -last -ignore_viral_ids -no_eo -ie=0.1 -dufs -genomes=genomes_all.txt -pos_filter=tf_1.txt -all_prot -all_prot_e=0.1 -out_dir=_tf1_e01_ape01 -o=tf1_e01_ape01" );
         System.out.println();
     }
 }