in progress
[jalview.git] / forester / java / src / org / forester / surfacing / SurfacingUtil.java
index c4f98d6..e646ba4 100644 (file)
@@ -85,9 +85,9 @@ import org.forester.protein.BinaryDomainCombination;
 import org.forester.protein.Domain;
 import org.forester.protein.Protein;
 import org.forester.species.Species;
+import org.forester.surfacing.DomainSimilarity.PRINT_OPTION;
 import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
 import org.forester.surfacing.GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder;
-import org.forester.surfacing.PrintableDomainSimilarity.PRINT_OPTION;
 import org.forester.util.AsciiHistogram;
 import org.forester.util.BasicDescriptiveStatistics;
 import org.forester.util.BasicTable;
@@ -107,13 +107,12 @@ public final class SurfacingUtil {
                                                                                   @Override
                                                                                   public int compare( final Domain d1,
                                                                                                       final Domain d2 ) {
-                                                                                      if ( d1.getPerSequenceEvalue() < d2
-                                                                                              .getPerSequenceEvalue() ) {
+                                                                                      if ( d1.getPerDomainEvalue() < d2
+                                                                                              .getPerDomainEvalue() ) {
                                                                                           return -1;
                                                                                       }
-                                                                                      else if ( d1
-                                                                                              .getPerSequenceEvalue() > d2
-                                                                                              .getPerSequenceEvalue() ) {
+                                                                                      else if ( d1.getPerDomainEvalue() > d2
+                                                                                              .getPerDomainEvalue() ) {
                                                                                           return 1;
                                                                                       }
                                                                                       else {
@@ -143,9 +142,9 @@ public final class SurfacingUtil {
         }
     }
 
-    public static DescriptiveStatistics calculateDescriptiveStatisticsForMeanValues( final Set<PrintableDomainSimilarity> similarities ) {
+    public static DescriptiveStatistics calculateDescriptiveStatisticsForMeanValues( final Set<DomainSimilarity> similarities ) {
         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
-        for( final PrintableDomainSimilarity similarity : similarities ) {
+        for( final DomainSimilarity similarity : similarities ) {
             stats.addValue( similarity.getMeanSimilarityScore() );
         }
         return stats;
@@ -158,7 +157,7 @@ public final class SurfacingUtil {
         }
     }
 
-    public static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
+    public static void checkWriteabilityForPairwiseComparisons( final DomainSimilarity.PRINT_OPTION domain_similarity_print_option,
                                                                 final String[][] input_file_properties,
                                                                 final String automated_pairwise_comparison_suffix,
                                                                 final File outdir ) {
@@ -199,14 +198,14 @@ public final class SurfacingUtil {
                         || ( !get_gains && ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.LOSS ) ) ) {
                     if ( dc_type == BinaryDomainCombination.DomainCombinationType.DIRECTED_ADJACTANT ) {
                         all_binary_domains_combination_gained.add( AdjactantDirectedBinaryDomainCombination
-                                .createInstance( matrix.getCharacter( c ) ) );
+                                .obtainInstance( matrix.getCharacter( c ) ) );
                     }
                     else if ( dc_type == BinaryDomainCombination.DomainCombinationType.DIRECTED ) {
                         all_binary_domains_combination_gained.add( DirectedBinaryDomainCombination
-                                .createInstance( matrix.getCharacter( c ) ) );
+                                .obtainInstance( matrix.getCharacter( c ) ) );
                     }
                     else {
-                        all_binary_domains_combination_gained.add( BasicBinaryDomainCombination.createInstance( matrix
+                        all_binary_domains_combination_gained.add( BasicBinaryDomainCombination.obtainInstance( matrix
                                 .getCharacter( c ) ) );
                     }
                 }
@@ -249,13 +248,15 @@ public final class SurfacingUtil {
     }
 
     public static StringBuilder createParametersAsString( final boolean ignore_dufs,
-                                                          final double e_value_max,
+                                                          final double ie_value_max,
+                                                          final double fs_e_value_max,
                                                           final int max_allowed_overlap,
                                                           final boolean no_engulfing_overlaps,
                                                           final File cutoff_scores_file,
                                                           final BinaryDomainCombination.DomainCombinationType dc_type ) {
         final StringBuilder parameters_sb = new StringBuilder();
-        parameters_sb.append( "E-value: " + e_value_max );
+        parameters_sb.append( "iE-value: " + ie_value_max );
+        parameters_sb.append( ", FS E-value: " + fs_e_value_max );
         if ( cutoff_scores_file != null ) {
             parameters_sb.append( ", Cutoff-scores-file: " + cutoff_scores_file );
         }
@@ -375,11 +376,11 @@ public final class SurfacingUtil {
         return m;
     }
 
-    public static void decoratePrintableDomainSimilarities( final SortedSet<PrintableDomainSimilarity> domain_similarities,
+    public static void decoratePrintableDomainSimilarities( final SortedSet<DomainSimilarity> domain_similarities,
                                                             final Detailedness detailedness ) {
-        for( final PrintableDomainSimilarity domain_similarity : domain_similarities ) {
-            if ( domain_similarity instanceof PrintableDomainSimilarity ) {
-                final PrintableDomainSimilarity printable_domain_similarity = domain_similarity;
+        for( final DomainSimilarity domain_similarity : domain_similarities ) {
+            if ( domain_similarity instanceof DomainSimilarity ) {
+                final DomainSimilarity printable_domain_similarity = domain_similarity;
                 printable_domain_similarity.setDetailedness( detailedness );
             }
         }
@@ -667,6 +668,7 @@ public final class SurfacingUtil {
      * 
      * @param all_binary_domains_combination_lost_fitch 
      * @param use_last_in_fitch_parsimony 
+     * @param perform_dc_fich 
      * @param consider_directedness_and_adjacency_for_bin_combinations 
      * @param all_binary_domains_combination_gained if null ignored, otherwise this is to list all binary domain combinations
      * which were gained under unweighted (Fitch) parsimony.
@@ -691,7 +693,8 @@ public final class SurfacingUtil {
                                                  final Map<String, DescriptiveStatistics> domain_length_stats_by_domain,
                                                  final Map<String, Integer> tax_code_to_id_map,
                                                  final boolean write_to_nexus,
-                                                 final boolean use_last_in_fitch_parsimony ) {
+                                                 final boolean use_last_in_fitch_parsimony,
+                                                 final boolean perform_dc_fich ) {
         final String sep = ForesterUtil.LINE_SEPARATOR + "###################" + ForesterUtil.LINE_SEPARATOR;
         final String date_time = ForesterUtil.getCurrentDateTime();
         final SortedSet<String> all_pfams_encountered = new TreeSet<String>();
@@ -794,7 +797,7 @@ public final class SurfacingUtil {
             e.printStackTrace();
             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
         }
-        if ( domain_parsimony.calculateNumberOfBinaryDomainCombination() > 0 ) {
+        if ( perform_dc_fich && ( domain_parsimony.calculateNumberOfBinaryDomainCombination() > 0 ) ) {
             // FITCH DOMAIN COMBINATIONS
             // -------------------------
             local_phylogeny_l = phylogeny.copy();
@@ -1883,9 +1886,16 @@ public final class SurfacingUtil {
         catch ( final IOException e ) {
             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
         }
-        ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote binary domain combination for \""
-                + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ", "
-                + input_file_properties[ i ][ 2 ] + ") to: \"" + dc_outfile_dot + "\"" );
+        if ( input_file_properties[ i ].length == 3 ) {
+            ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote binary domain combination for \""
+                    + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ", "
+                    + input_file_properties[ i ][ 2 ] + ") to: \"" + dc_outfile_dot + "\"" );
+        }
+        else {
+            ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote binary domain combination for \""
+                    + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ") to: \""
+                    + dc_outfile_dot + "\"" );
+        }
     }
 
     public static void writeBinaryStatesMatrixAsListToFile( final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
@@ -1957,7 +1967,7 @@ public final class SurfacingUtil {
                                     .getState( id, c ) == CharacterStateMatrix.GainLossStates.UNCHANGED_PRESENT ) ) ) ) {
                         BinaryDomainCombination bdc = null;
                         try {
-                            bdc = BasicBinaryDomainCombination.createInstance( matrix.getCharacter( c ) );
+                            bdc = BasicBinaryDomainCombination.obtainInstance( matrix.getCharacter( c ) );
                         }
                         catch ( final Exception e ) {
                             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
@@ -2218,11 +2228,11 @@ public final class SurfacingUtil {
                                                       final Writer simple_tab_writer,
                                                       final Writer single_writer,
                                                       Map<Character, Writer> split_writers,
-                                                      final SortedSet<PrintableDomainSimilarity> similarities,
+                                                      final SortedSet<DomainSimilarity> similarities,
                                                       final boolean treat_as_binary,
                                                       final List<Species> species_order,
-                                                      final PrintableDomainSimilarity.PRINT_OPTION print_option,
-                                                      final PrintableDomainSimilarity.DomainSimilarityScoring scoring,
+                                                      final DomainSimilarity.PRINT_OPTION print_option,
+                                                      final DomainSimilarity.DomainSimilarityScoring scoring,
                                                       final boolean verbose,
                                                       final Map<String, Integer> tax_code_to_id_map,
                                                       final Phylogeny phy,
@@ -2262,7 +2272,7 @@ public final class SurfacingUtil {
                 break;
         }
         //
-        for( final PrintableDomainSimilarity similarity : similarities ) {
+        for( final DomainSimilarity similarity : similarities ) {
             if ( ( species_order != null ) && !species_order.isEmpty() ) {
                 ( similarity ).setSpeciesOrder( species_order );
             }
@@ -2347,7 +2357,7 @@ public final class SurfacingUtil {
             w.write( SurfacingConstants.NL );
         }
         //
-        for( final PrintableDomainSimilarity similarity : similarities ) {
+        for( final DomainSimilarity similarity : similarities ) {
             if ( ( species_order != null ) && !species_order.isEmpty() ) {
                 ( similarity ).setSpeciesOrder( species_order );
             }
@@ -2508,12 +2518,16 @@ public final class SurfacingUtil {
     public static void writeProteinListsForAllSpecies( final File output_dir,
                                                        final SortedMap<Species, List<Protein>> protein_lists_per_species,
                                                        final List<GenomeWideCombinableDomains> gwcd_list,
-                                                       final double domain_e_cutoff ) {
+                                                       final double domain_e_cutoff,
+                                                       final Set<String> pos_filter_doms ) {
         final SortedSet<String> all_domains = new TreeSet<String>();
         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
             all_domains.addAll( gwcd.getAllDomainIds() );
         }
         for( final String domain : all_domains ) {
+            if ( !ForesterUtil.isEmpty( pos_filter_doms ) && !pos_filter_doms.contains( domain ) ) {
+                continue;
+            }
             final File out = new File( output_dir + ForesterUtil.FILE_SEPARATOR + domain + surfacing.SEQ_EXTRACT_SUFFIX );
             checkForOutputFileWriteability( out );
             try {