clean up
[jalview.git] / forester / java / src / org / forester / surfacing / SurfacingUtil.java
index f23f865..e646ba4 100644 (file)
@@ -85,9 +85,9 @@ import org.forester.protein.BinaryDomainCombination;
 import org.forester.protein.Domain;
 import org.forester.protein.Protein;
 import org.forester.species.Species;
+import org.forester.surfacing.DomainSimilarity.PRINT_OPTION;
 import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
 import org.forester.surfacing.GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder;
-import org.forester.surfacing.PrintableDomainSimilarity.PRINT_OPTION;
 import org.forester.util.AsciiHistogram;
 import org.forester.util.BasicDescriptiveStatistics;
 import org.forester.util.BasicTable;
@@ -101,18 +101,18 @@ public final class SurfacingUtil {
 
     public final static Pattern              PATTERN_SP_STYLE_TAXONOMY        = Pattern.compile( "^[A-Z0-9]{3,5}$" );
     private final static Map<String, String> _TAXCODE_HEXCOLORSTRING_MAP      = new HashMap<String, String>();
+    private final static Map<String, String> _TAXCODE_TAXGROUP_MAP            = new HashMap<String, String>();
     private static final Comparator<Domain>  ASCENDING_CONFIDENCE_VALUE_ORDER = new Comparator<Domain>() {
 
                                                                                   @Override
                                                                                   public int compare( final Domain d1,
                                                                                                       final Domain d2 ) {
-                                                                                      if ( d1.getPerSequenceEvalue() < d2
-                                                                                              .getPerSequenceEvalue() ) {
+                                                                                      if ( d1.getPerDomainEvalue() < d2
+                                                                                              .getPerDomainEvalue() ) {
                                                                                           return -1;
                                                                                       }
-                                                                                      else if ( d1
-                                                                                              .getPerSequenceEvalue() > d2
-                                                                                              .getPerSequenceEvalue() ) {
+                                                                                      else if ( d1.getPerDomainEvalue() > d2
+                                                                                              .getPerDomainEvalue() ) {
                                                                                           return 1;
                                                                                       }
                                                                                       else {
@@ -157,7 +157,7 @@ public final class SurfacingUtil {
         }
     }
 
-    public static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
+    public static void checkWriteabilityForPairwiseComparisons( final DomainSimilarity.PRINT_OPTION domain_similarity_print_option,
                                                                 final String[][] input_file_properties,
                                                                 final String automated_pairwise_comparison_suffix,
                                                                 final File outdir ) {
@@ -198,14 +198,14 @@ public final class SurfacingUtil {
                         || ( !get_gains && ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.LOSS ) ) ) {
                     if ( dc_type == BinaryDomainCombination.DomainCombinationType.DIRECTED_ADJACTANT ) {
                         all_binary_domains_combination_gained.add( AdjactantDirectedBinaryDomainCombination
-                                .createInstance( matrix.getCharacter( c ) ) );
+                                .obtainInstance( matrix.getCharacter( c ) ) );
                     }
                     else if ( dc_type == BinaryDomainCombination.DomainCombinationType.DIRECTED ) {
                         all_binary_domains_combination_gained.add( DirectedBinaryDomainCombination
-                                .createInstance( matrix.getCharacter( c ) ) );
+                                .obtainInstance( matrix.getCharacter( c ) ) );
                     }
                     else {
-                        all_binary_domains_combination_gained.add( BasicBinaryDomainCombination.createInstance( matrix
+                        all_binary_domains_combination_gained.add( BasicBinaryDomainCombination.obtainInstance( matrix
                                 .getCharacter( c ) ) );
                     }
                 }
@@ -248,13 +248,15 @@ public final class SurfacingUtil {
     }
 
     public static StringBuilder createParametersAsString( final boolean ignore_dufs,
-                                                          final double e_value_max,
+                                                          final double ie_value_max,
+                                                          final double fs_e_value_max,
                                                           final int max_allowed_overlap,
                                                           final boolean no_engulfing_overlaps,
                                                           final File cutoff_scores_file,
                                                           final BinaryDomainCombination.DomainCombinationType dc_type ) {
         final StringBuilder parameters_sb = new StringBuilder();
-        parameters_sb.append( "E-value: " + e_value_max );
+        parameters_sb.append( "iE-value: " + ie_value_max );
+        parameters_sb.append( ", FS E-value: " + fs_e_value_max );
         if ( cutoff_scores_file != null ) {
             parameters_sb.append( ", Cutoff-scores-file: " + cutoff_scores_file );
         }
@@ -377,8 +379,8 @@ public final class SurfacingUtil {
     public static void decoratePrintableDomainSimilarities( final SortedSet<DomainSimilarity> domain_similarities,
                                                             final Detailedness detailedness ) {
         for( final DomainSimilarity domain_similarity : domain_similarities ) {
-            if ( domain_similarity instanceof PrintableDomainSimilarity ) {
-                final PrintableDomainSimilarity printable_domain_similarity = ( PrintableDomainSimilarity ) domain_similarity;
+            if ( domain_similarity instanceof DomainSimilarity ) {
+                final DomainSimilarity printable_domain_similarity = domain_similarity;
                 printable_domain_similarity.setDetailedness( detailedness );
             }
         }
@@ -666,6 +668,7 @@ public final class SurfacingUtil {
      * 
      * @param all_binary_domains_combination_lost_fitch 
      * @param use_last_in_fitch_parsimony 
+     * @param perform_dc_fich 
      * @param consider_directedness_and_adjacency_for_bin_combinations 
      * @param all_binary_domains_combination_gained if null ignored, otherwise this is to list all binary domain combinations
      * which were gained under unweighted (Fitch) parsimony.
@@ -690,7 +693,8 @@ public final class SurfacingUtil {
                                                  final Map<String, DescriptiveStatistics> domain_length_stats_by_domain,
                                                  final Map<String, Integer> tax_code_to_id_map,
                                                  final boolean write_to_nexus,
-                                                 final boolean use_last_in_fitch_parsimony ) {
+                                                 final boolean use_last_in_fitch_parsimony,
+                                                 final boolean perform_dc_fich ) {
         final String sep = ForesterUtil.LINE_SEPARATOR + "###################" + ForesterUtil.LINE_SEPARATOR;
         final String date_time = ForesterUtil.getCurrentDateTime();
         final SortedSet<String> all_pfams_encountered = new TreeSet<String>();
@@ -793,7 +797,7 @@ public final class SurfacingUtil {
             e.printStackTrace();
             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
         }
-        if ( domain_parsimony.calculateNumberOfBinaryDomainCombination() > 0 ) {
+        if ( perform_dc_fich && ( domain_parsimony.calculateNumberOfBinaryDomainCombination() > 0 ) ) {
             // FITCH DOMAIN COMBINATIONS
             // -------------------------
             local_phylogeny_l = phylogeny.copy();
@@ -1342,8 +1346,51 @@ public final class SurfacingUtil {
             throws IllegalArgumentException {
         if ( !_TAXCODE_HEXCOLORSTRING_MAP.containsKey( tax_code ) ) {
             if ( ( phy != null ) && !phy.isEmpty() ) {
-                final List<PhylogenyNode> nodes = phy.getNodesViaTaxonomyCode( tax_code );
-                Color c = null;
+                //                final List<PhylogenyNode> nodes = phy.getNodesViaTaxonomyCode( tax_code );
+                //                Color c = null;
+                //                if ( ( nodes == null ) || nodes.isEmpty() ) {
+                //                    throw new IllegalArgumentException( "code " + tax_code + " is not found" );
+                //                }
+                //                if ( nodes.size() != 1 ) {
+                //                    throw new IllegalArgumentException( "code " + tax_code + " is not unique" );
+                //                }
+                //                PhylogenyNode n = nodes.get( 0 );
+                //                while ( n != null ) {
+                //                    if ( n.getNodeData().isHasTaxonomy()
+                //                            && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
+                //                        c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getNodeData().getTaxonomy()
+                //                                .getScientificName(), tax_code );
+                //                    }
+                //                    if ( ( c == null ) && !ForesterUtil.isEmpty( n.getName() ) ) {
+                //                        c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getName(), tax_code );
+                //                    }
+                //                    if ( c != null ) {
+                //                        break;
+                //                    }
+                //                    n = n.getParent();
+                //                }
+                final String group = obtainTaxonomyGroup( tax_code, phy );
+                final Color c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( group );
+                if ( c == null ) {
+                    throw new IllegalArgumentException( "no color found for taxonomy group \"" + group
+                            + "\" for code \"" + tax_code + "\"" );
+                }
+                final String hex = String.format( "#%02x%02x%02x", c.getRed(), c.getGreen(), c.getBlue() );
+                _TAXCODE_HEXCOLORSTRING_MAP.put( tax_code, hex );
+            }
+            else {
+                throw new IllegalArgumentException( "unable to obtain color for code " + tax_code
+                        + " (tree is null or empty and code is not in map)" );
+            }
+        }
+        return _TAXCODE_HEXCOLORSTRING_MAP.get( tax_code );
+    }
+
+    public static String obtainTaxonomyGroup( final String tax_code, final Phylogeny species_tree )
+            throws IllegalArgumentException {
+        if ( !_TAXCODE_TAXGROUP_MAP.containsKey( tax_code ) ) {
+            if ( ( species_tree != null ) && !species_tree.isEmpty() ) {
+                final List<PhylogenyNode> nodes = species_tree.getNodesViaTaxonomyCode( tax_code );
                 if ( ( nodes == null ) || nodes.isEmpty() ) {
                     throw new IllegalArgumentException( "code " + tax_code + " is not found" );
                 }
@@ -1351,32 +1398,32 @@ public final class SurfacingUtil {
                     throw new IllegalArgumentException( "code " + tax_code + " is not unique" );
                 }
                 PhylogenyNode n = nodes.get( 0 );
+                String group = null;
                 while ( n != null ) {
                     if ( n.getNodeData().isHasTaxonomy()
                             && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
-                        c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getNodeData().getTaxonomy()
-                                .getScientificName(), tax_code );
+                        group = ForesterUtil.obtainNormalizedTaxonomyGroup( n.getNodeData().getTaxonomy()
+                                .getScientificName() );
                     }
-                    if ( ( c == null ) && !ForesterUtil.isEmpty( n.getName() ) ) {
-                        c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getName(), tax_code );
+                    if ( ForesterUtil.isEmpty( group ) && !ForesterUtil.isEmpty( n.getName() ) ) {
+                        group = ForesterUtil.obtainNormalizedTaxonomyGroup( n.getName() );
                     }
-                    if ( c != null ) {
+                    if ( !ForesterUtil.isEmpty( group ) ) {
                         break;
                     }
                     n = n.getParent();
                 }
-                if ( c == null ) {
-                    throw new IllegalArgumentException( "no color found for taxonomy code \"" + tax_code + "\"" );
+                if ( ForesterUtil.isEmpty( group ) ) {
+                    throw new IllegalArgumentException( "no group found for taxonomy code \"" + tax_code + "\"" );
                 }
-                final String hex = String.format( "#%02x%02x%02x", c.getRed(), c.getGreen(), c.getBlue() );
-                _TAXCODE_HEXCOLORSTRING_MAP.put( tax_code, hex );
+                _TAXCODE_TAXGROUP_MAP.put( tax_code, group );
             }
             else {
-                throw new IllegalArgumentException( "unable to obtain color for code " + tax_code
+                throw new IllegalArgumentException( "unable to obtain group for code " + tax_code
                         + " (tree is null or empty and code is not in map)" );
             }
         }
-        return _TAXCODE_HEXCOLORSTRING_MAP.get( tax_code );
+        return _TAXCODE_TAXGROUP_MAP.get( tax_code );
     }
 
     public static void performDomainArchitectureAnalysis( final SortedMap<String, Set<String>> domain_architecutures,
@@ -1839,9 +1886,16 @@ public final class SurfacingUtil {
         catch ( final IOException e ) {
             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
         }
-        ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote binary domain combination for \""
-                + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ", "
-                + input_file_properties[ i ][ 2 ] + ") to: \"" + dc_outfile_dot + "\"" );
+        if ( input_file_properties[ i ].length == 3 ) {
+            ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote binary domain combination for \""
+                    + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ", "
+                    + input_file_properties[ i ][ 2 ] + ") to: \"" + dc_outfile_dot + "\"" );
+        }
+        else {
+            ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote binary domain combination for \""
+                    + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ") to: \""
+                    + dc_outfile_dot + "\"" );
+        }
     }
 
     public static void writeBinaryStatesMatrixAsListToFile( final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
@@ -1913,7 +1967,7 @@ public final class SurfacingUtil {
                                     .getState( id, c ) == CharacterStateMatrix.GainLossStates.UNCHANGED_PRESENT ) ) ) ) {
                         BinaryDomainCombination bdc = null;
                         try {
-                            bdc = BasicBinaryDomainCombination.createInstance( matrix.getCharacter( c ) );
+                            bdc = BasicBinaryDomainCombination.obtainInstance( matrix.getCharacter( c ) );
                         }
                         catch ( final Exception e ) {
                             ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
@@ -2177,11 +2231,12 @@ public final class SurfacingUtil {
                                                       final SortedSet<DomainSimilarity> similarities,
                                                       final boolean treat_as_binary,
                                                       final List<Species> species_order,
-                                                      final PrintableDomainSimilarity.PRINT_OPTION print_option,
+                                                      final DomainSimilarity.PRINT_OPTION print_option,
                                                       final DomainSimilarity.DomainSimilarityScoring scoring,
                                                       final boolean verbose,
                                                       final Map<String, Integer> tax_code_to_id_map,
-                                                      final Phylogeny phy ) throws IOException {
+                                                      final Phylogeny phy,
+                                                      final Set<String> pos_filter_doms ) throws IOException {
         if ( ( single_writer != null ) && ( ( split_writers == null ) || split_writers.isEmpty() ) ) {
             split_writers = new HashMap<Character, Writer>();
             split_writers.put( '_', single_writer );
@@ -2219,11 +2274,18 @@ public final class SurfacingUtil {
         //
         for( final DomainSimilarity similarity : similarities ) {
             if ( ( species_order != null ) && !species_order.isEmpty() ) {
-                ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
+                ( similarity ).setSpeciesOrder( species_order );
             }
             if ( single_writer != null ) {
-                single_writer.write( "<tr><td><b><a href=\"#" + similarity.getDomainId() + "\">"
-                        + similarity.getDomainId() + "</a></b></td></tr>" );
+                if ( !ForesterUtil.isEmpty( pos_filter_doms ) && pos_filter_doms.contains( similarity.getDomainId() ) ) {
+                    single_writer.write( "<tr><td><b><a href=\"#" + similarity.getDomainId()
+                            + "\"><span style=\"color:#00ff00\">" + similarity.getDomainId()
+                            + "</span></a></b></td></tr>" );
+                }
+                else {
+                    single_writer.write( "<tr><td><b><a href=\"#" + similarity.getDomainId() + "\">"
+                            + similarity.getDomainId() + "</a></b></td></tr>" );
+                }
                 single_writer.write( SurfacingConstants.NL );
             }
             else {
@@ -2232,8 +2294,15 @@ public final class SurfacingUtil {
                 if ( local_writer == null ) {
                     local_writer = split_writers.get( '0' );
                 }
-                local_writer.write( "<tr><td><b><a href=\"#" + similarity.getDomainId() + "\">"
-                        + similarity.getDomainId() + "</a></b></td></tr>" );
+                if ( !ForesterUtil.isEmpty( pos_filter_doms ) && pos_filter_doms.contains( similarity.getDomainId() ) ) {
+                    local_writer.write( "<tr><td><b><a href=\"#" + similarity.getDomainId()
+                            + "\"><span style=\"color:#00ff00\">" + similarity.getDomainId()
+                            + "</span></a></b></td></tr>" );
+                }
+                else {
+                    local_writer.write( "<tr><td><b><a href=\"#" + similarity.getDomainId() + "\">"
+                            + similarity.getDomainId() + "</a></b></td></tr>" );
+                }
                 local_writer.write( SurfacingConstants.NL );
             }
         }
@@ -2290,7 +2359,7 @@ public final class SurfacingUtil {
         //
         for( final DomainSimilarity similarity : similarities ) {
             if ( ( species_order != null ) && !species_order.isEmpty() ) {
-                ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
+                ( similarity ).setSpeciesOrder( species_order );
             }
             if ( simple_tab_writer != null ) {
                 simple_tab_writer.write( similarity.toStringBuffer( PRINT_OPTION.SIMPLE_TAB_DELIMITED,
@@ -2449,12 +2518,16 @@ public final class SurfacingUtil {
     public static void writeProteinListsForAllSpecies( final File output_dir,
                                                        final SortedMap<Species, List<Protein>> protein_lists_per_species,
                                                        final List<GenomeWideCombinableDomains> gwcd_list,
-                                                       final double domain_e_cutoff ) {
+                                                       final double domain_e_cutoff,
+                                                       final Set<String> pos_filter_doms ) {
         final SortedSet<String> all_domains = new TreeSet<String>();
         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
             all_domains.addAll( gwcd.getAllDomainIds() );
         }
         for( final String domain : all_domains ) {
+            if ( !ForesterUtil.isEmpty( pos_filter_doms ) && !pos_filter_doms.contains( domain ) ) {
+                continue;
+            }
             final File out = new File( output_dir + ForesterUtil.FILE_SEPARATOR + domain + surfacing.SEQ_EXTRACT_SUFFIX );
             checkForOutputFileWriteability( out );
             try {