X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsurfacing%2FSurfacingUtil.java;h=e646ba4b40603d395cb35d555a05043896180e58;hb=7e567e18a5ba32035a8db4ca041023c9e862d25b;hp=b605e66f377e10750528b6dadf4686c8aaa6739e;hpb=41ea5973f93687513d29e5b7cad1abff8f3adb4b;p=jalview.git diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java index b605e66..e646ba4 100644 --- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java +++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java @@ -85,9 +85,9 @@ import org.forester.protein.BinaryDomainCombination; import org.forester.protein.Domain; import org.forester.protein.Protein; import org.forester.species.Species; +import org.forester.surfacing.DomainSimilarity.PRINT_OPTION; import org.forester.surfacing.DomainSimilarityCalculator.Detailedness; import org.forester.surfacing.GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder; -import org.forester.surfacing.PrintableDomainSimilarity.PRINT_OPTION; import org.forester.util.AsciiHistogram; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.BasicTable; @@ -101,18 +101,18 @@ public final class SurfacingUtil { public final static Pattern PATTERN_SP_STYLE_TAXONOMY = Pattern.compile( "^[A-Z0-9]{3,5}$" ); private final static Map _TAXCODE_HEXCOLORSTRING_MAP = new HashMap(); + private final static Map _TAXCODE_TAXGROUP_MAP = new HashMap(); private static final Comparator ASCENDING_CONFIDENCE_VALUE_ORDER = new Comparator() { @Override public int compare( final Domain d1, final Domain d2 ) { - if ( d1.getPerSequenceEvalue() < d2 - .getPerSequenceEvalue() ) { + if ( d1.getPerDomainEvalue() < d2 + .getPerDomainEvalue() ) { return -1; } - else if ( d1 - .getPerSequenceEvalue() > d2 - .getPerSequenceEvalue() ) { + else if ( d1.getPerDomainEvalue() > d2 + .getPerDomainEvalue() ) { return 1; } else { @@ -121,6 +121,7 @@ public final class SurfacingUtil { } }; private final static NumberFormat FORMATTER_3 = new DecimalFormat( "0.000" ); + private SurfacingUtil() { // Hidden constructor. } @@ -156,7 +157,7 @@ public final class SurfacingUtil { } } - public static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option, + public static void checkWriteabilityForPairwiseComparisons( final DomainSimilarity.PRINT_OPTION domain_similarity_print_option, final String[][] input_file_properties, final String automated_pairwise_comparison_suffix, final File outdir ) { @@ -197,14 +198,14 @@ public final class SurfacingUtil { || ( !get_gains && ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.LOSS ) ) ) { if ( dc_type == BinaryDomainCombination.DomainCombinationType.DIRECTED_ADJACTANT ) { all_binary_domains_combination_gained.add( AdjactantDirectedBinaryDomainCombination - .createInstance( matrix.getCharacter( c ) ) ); + .obtainInstance( matrix.getCharacter( c ) ) ); } else if ( dc_type == BinaryDomainCombination.DomainCombinationType.DIRECTED ) { all_binary_domains_combination_gained.add( DirectedBinaryDomainCombination - .createInstance( matrix.getCharacter( c ) ) ); + .obtainInstance( matrix.getCharacter( c ) ) ); } else { - all_binary_domains_combination_gained.add( BasicBinaryDomainCombination.createInstance( matrix + all_binary_domains_combination_gained.add( BasicBinaryDomainCombination.obtainInstance( matrix .getCharacter( c ) ) ); } } @@ -247,13 +248,15 @@ public final class SurfacingUtil { } public static StringBuilder createParametersAsString( final boolean ignore_dufs, - final double e_value_max, + final double ie_value_max, + final double fs_e_value_max, final int max_allowed_overlap, final boolean no_engulfing_overlaps, final File cutoff_scores_file, final BinaryDomainCombination.DomainCombinationType dc_type ) { final StringBuilder parameters_sb = new StringBuilder(); - parameters_sb.append( "E-value: " + e_value_max ); + parameters_sb.append( "iE-value: " + ie_value_max ); + parameters_sb.append( ", FS E-value: " + fs_e_value_max ); if ( cutoff_scores_file != null ) { parameters_sb.append( ", Cutoff-scores-file: " + cutoff_scores_file ); } @@ -376,8 +379,8 @@ public final class SurfacingUtil { public static void decoratePrintableDomainSimilarities( final SortedSet domain_similarities, final Detailedness detailedness ) { for( final DomainSimilarity domain_similarity : domain_similarities ) { - if ( domain_similarity instanceof PrintableDomainSimilarity ) { - final PrintableDomainSimilarity printable_domain_similarity = ( PrintableDomainSimilarity ) domain_similarity; + if ( domain_similarity instanceof DomainSimilarity ) { + final DomainSimilarity printable_domain_similarity = domain_similarity; printable_domain_similarity.setDetailedness( detailedness ); } } @@ -665,6 +668,7 @@ public final class SurfacingUtil { * * @param all_binary_domains_combination_lost_fitch * @param use_last_in_fitch_parsimony + * @param perform_dc_fich * @param consider_directedness_and_adjacency_for_bin_combinations * @param all_binary_domains_combination_gained if null ignored, otherwise this is to list all binary domain combinations * which were gained under unweighted (Fitch) parsimony. @@ -689,7 +693,8 @@ public final class SurfacingUtil { final Map domain_length_stats_by_domain, final Map tax_code_to_id_map, final boolean write_to_nexus, - final boolean use_last_in_fitch_parsimony ) { + final boolean use_last_in_fitch_parsimony, + final boolean perform_dc_fich ) { final String sep = ForesterUtil.LINE_SEPARATOR + "###################" + ForesterUtil.LINE_SEPARATOR; final String date_time = ForesterUtil.getCurrentDateTime(); final SortedSet all_pfams_encountered = new TreeSet(); @@ -792,7 +797,7 @@ public final class SurfacingUtil { e.printStackTrace(); ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() ); } - if ( domain_parsimony.calculateNumberOfBinaryDomainCombination() > 0 ) { + if ( perform_dc_fich && ( domain_parsimony.calculateNumberOfBinaryDomainCombination() > 0 ) ) { // FITCH DOMAIN COMBINATIONS // ------------------------- local_phylogeny_l = phylogeny.copy(); @@ -1341,8 +1346,51 @@ public final class SurfacingUtil { throws IllegalArgumentException { if ( !_TAXCODE_HEXCOLORSTRING_MAP.containsKey( tax_code ) ) { if ( ( phy != null ) && !phy.isEmpty() ) { - final List nodes = phy.getNodesViaTaxonomyCode( tax_code ); - Color c = null; + // final List nodes = phy.getNodesViaTaxonomyCode( tax_code ); + // Color c = null; + // if ( ( nodes == null ) || nodes.isEmpty() ) { + // throw new IllegalArgumentException( "code " + tax_code + " is not found" ); + // } + // if ( nodes.size() != 1 ) { + // throw new IllegalArgumentException( "code " + tax_code + " is not unique" ); + // } + // PhylogenyNode n = nodes.get( 0 ); + // while ( n != null ) { + // if ( n.getNodeData().isHasTaxonomy() + // && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { + // c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getNodeData().getTaxonomy() + // .getScientificName(), tax_code ); + // } + // if ( ( c == null ) && !ForesterUtil.isEmpty( n.getName() ) ) { + // c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getName(), tax_code ); + // } + // if ( c != null ) { + // break; + // } + // n = n.getParent(); + // } + final String group = obtainTaxonomyGroup( tax_code, phy ); + final Color c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( group ); + if ( c == null ) { + throw new IllegalArgumentException( "no color found for taxonomy group \"" + group + + "\" for code \"" + tax_code + "\"" ); + } + final String hex = String.format( "#%02x%02x%02x", c.getRed(), c.getGreen(), c.getBlue() ); + _TAXCODE_HEXCOLORSTRING_MAP.put( tax_code, hex ); + } + else { + throw new IllegalArgumentException( "unable to obtain color for code " + tax_code + + " (tree is null or empty and code is not in map)" ); + } + } + return _TAXCODE_HEXCOLORSTRING_MAP.get( tax_code ); + } + + public static String obtainTaxonomyGroup( final String tax_code, final Phylogeny species_tree ) + throws IllegalArgumentException { + if ( !_TAXCODE_TAXGROUP_MAP.containsKey( tax_code ) ) { + if ( ( species_tree != null ) && !species_tree.isEmpty() ) { + final List nodes = species_tree.getNodesViaTaxonomyCode( tax_code ); if ( ( nodes == null ) || nodes.isEmpty() ) { throw new IllegalArgumentException( "code " + tax_code + " is not found" ); } @@ -1350,32 +1398,32 @@ public final class SurfacingUtil { throw new IllegalArgumentException( "code " + tax_code + " is not unique" ); } PhylogenyNode n = nodes.get( 0 ); + String group = null; while ( n != null ) { if ( n.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { - c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getNodeData().getTaxonomy() - .getScientificName(), tax_code ); + group = ForesterUtil.obtainNormalizedTaxonomyGroup( n.getNodeData().getTaxonomy() + .getScientificName() ); } - if ( ( c == null ) && !ForesterUtil.isEmpty( n.getName() ) ) { - c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getName(), tax_code ); + if ( ForesterUtil.isEmpty( group ) && !ForesterUtil.isEmpty( n.getName() ) ) { + group = ForesterUtil.obtainNormalizedTaxonomyGroup( n.getName() ); } - if ( c != null ) { + if ( !ForesterUtil.isEmpty( group ) ) { break; } n = n.getParent(); } - if ( c == null ) { - throw new IllegalArgumentException( "no color found for taxonomy code \"" + tax_code + "\"" ); + if ( ForesterUtil.isEmpty( group ) ) { + throw new IllegalArgumentException( "no group found for taxonomy code \"" + tax_code + "\"" ); } - final String hex = String.format( "#%02x%02x%02x", c.getRed(), c.getGreen(), c.getBlue() ); - _TAXCODE_HEXCOLORSTRING_MAP.put( tax_code, hex ); + _TAXCODE_TAXGROUP_MAP.put( tax_code, group ); } else { - throw new IllegalArgumentException( "unable to obtain color for code " + tax_code + throw new IllegalArgumentException( "unable to obtain group for code " + tax_code + " (tree is null or empty and code is not in map)" ); } } - return _TAXCODE_HEXCOLORSTRING_MAP.get( tax_code ); + return _TAXCODE_TAXGROUP_MAP.get( tax_code ); } public static void performDomainArchitectureAnalysis( final SortedMap> domain_architecutures, @@ -1838,9 +1886,16 @@ public final class SurfacingUtil { catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() ); } - ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote binary domain combination for \"" - + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ", " - + input_file_properties[ i ][ 2 ] + ") to: \"" + dc_outfile_dot + "\"" ); + if ( input_file_properties[ i ].length == 3 ) { + ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote binary domain combination for \"" + + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ", " + + input_file_properties[ i ][ 2 ] + ") to: \"" + dc_outfile_dot + "\"" ); + } + else { + ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote binary domain combination for \"" + + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ") to: \"" + + dc_outfile_dot + "\"" ); + } } public static void writeBinaryStatesMatrixAsListToFile( final CharacterStateMatrix matrix, @@ -1912,7 +1967,7 @@ public final class SurfacingUtil { .getState( id, c ) == CharacterStateMatrix.GainLossStates.UNCHANGED_PRESENT ) ) ) ) { BinaryDomainCombination bdc = null; try { - bdc = BasicBinaryDomainCombination.createInstance( matrix.getCharacter( c ) ); + bdc = BasicBinaryDomainCombination.obtainInstance( matrix.getCharacter( c ) ); } catch ( final Exception e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() ); @@ -2176,11 +2231,12 @@ public final class SurfacingUtil { final SortedSet similarities, final boolean treat_as_binary, final List species_order, - final PrintableDomainSimilarity.PRINT_OPTION print_option, + final DomainSimilarity.PRINT_OPTION print_option, final DomainSimilarity.DomainSimilarityScoring scoring, final boolean verbose, final Map tax_code_to_id_map, - final Phylogeny phy ) throws IOException { + final Phylogeny phy, + final Set pos_filter_doms ) throws IOException { if ( ( single_writer != null ) && ( ( split_writers == null ) || split_writers.isEmpty() ) ) { split_writers = new HashMap(); split_writers.put( '_', single_writer ); @@ -2218,11 +2274,18 @@ public final class SurfacingUtil { // for( final DomainSimilarity similarity : similarities ) { if ( ( species_order != null ) && !species_order.isEmpty() ) { - ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order ); + ( similarity ).setSpeciesOrder( species_order ); } if ( single_writer != null ) { - single_writer.write( "" - + similarity.getDomainId() + "" ); + if ( !ForesterUtil.isEmpty( pos_filter_doms ) && pos_filter_doms.contains( similarity.getDomainId() ) ) { + single_writer.write( "" + similarity.getDomainId() + + "" ); + } + else { + single_writer.write( "" + + similarity.getDomainId() + "" ); + } single_writer.write( SurfacingConstants.NL ); } else { @@ -2231,8 +2294,15 @@ public final class SurfacingUtil { if ( local_writer == null ) { local_writer = split_writers.get( '0' ); } - local_writer.write( "" - + similarity.getDomainId() + "" ); + if ( !ForesterUtil.isEmpty( pos_filter_doms ) && pos_filter_doms.contains( similarity.getDomainId() ) ) { + local_writer.write( "" + similarity.getDomainId() + + "" ); + } + else { + local_writer.write( "" + + similarity.getDomainId() + "" ); + } local_writer.write( SurfacingConstants.NL ); } } @@ -2256,7 +2326,10 @@ public final class SurfacingUtil { writeColorLabels( "Porifera (sponges)", TaxonomyColors.PORIFERA_COLOR, w ); writeColorLabels( "Choanoflagellida", TaxonomyColors.CHOANOFLAGELLIDA, w ); writeColorLabels( "Ichthyosporea & Filasterea", TaxonomyColors.ICHTHYOSPOREA_AND_FILASTEREA, w ); - writeColorLabels( "Fungi", TaxonomyColors.FUNGI_COLOR, w ); + writeColorLabels( "Dikarya (Ascomycota & Basidiomycota, so-called \"higher fungi\")", + TaxonomyColors.DIKARYA_COLOR, + w ); + writeColorLabels( "other Fungi", TaxonomyColors.OTHER_FUNGI_COLOR, w ); writeColorLabels( "Nucleariidae and Fonticula group", TaxonomyColors.NUCLEARIIDAE_AND_FONTICULA_GROUP_COLOR, w ); @@ -2286,7 +2359,7 @@ public final class SurfacingUtil { // for( final DomainSimilarity similarity : similarities ) { if ( ( species_order != null ) && !species_order.isEmpty() ) { - ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order ); + ( similarity ).setSpeciesOrder( species_order ); } if ( simple_tab_writer != null ) { simple_tab_writer.write( similarity.toStringBuffer( PRINT_OPTION.SIMPLE_TAB_DELIMITED, @@ -2445,12 +2518,16 @@ public final class SurfacingUtil { public static void writeProteinListsForAllSpecies( final File output_dir, final SortedMap> protein_lists_per_species, final List gwcd_list, - final double domain_e_cutoff ) { + final double domain_e_cutoff, + final Set pos_filter_doms ) { final SortedSet all_domains = new TreeSet(); for( final GenomeWideCombinableDomains gwcd : gwcd_list ) { all_domains.addAll( gwcd.getAllDomainIds() ); } for( final String domain : all_domains ) { + if ( !ForesterUtil.isEmpty( pos_filter_doms ) && !pos_filter_doms.contains( domain ) ) { + continue; + } final File out = new File( output_dir + ForesterUtil.FILE_SEPARATOR + domain + surfacing.SEQ_EXTRACT_SUFFIX ); checkForOutputFileWriteability( out ); try {