" ); - w.write( title );

X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsurfacing%2FSurfacingUtil.java;h=73544b7f96b42024ad4ae955e70d9b5b1d1496c1;hb=c914fef4ec7f50e4cf01375a30207992c84659a3;hp=ecb0839a6ed870bf1a4abe63a23af92cabc52a1e;hpb=94fab44b4568a8c49d9766c7f49eefac2d8f22ff;p=jalview.git diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java index ecb0839..73544b7 100644 --- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java +++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java @@ -26,6 +26,7 @@ package org.forester.surfacing; +import java.awt.Color; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; @@ -66,6 +67,8 @@ import org.forester.go.GoNameSpace; import org.forester.go.GoTerm; import org.forester.go.PfamToGoMapping; import org.forester.io.parsers.nexus.NexusConstants; +import org.forester.io.parsers.phyloxml.PhyloXmlUtil; +import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; @@ -74,6 +77,7 @@ import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE; import org.forester.phylogeny.data.BinaryCharacters; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.data.Taxonomy; +import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.protein.BasicDomain; import org.forester.protein.BasicProtein; @@ -88,32 +92,35 @@ import org.forester.util.AsciiHistogram; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.BasicTable; import org.forester.util.BasicTableParser; +import org.forester.util.CommandLineArguments; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; +import org.forester.util.TaxonomyColors; public final class SurfacingUtil { - private final static NumberFormat FORMATTER_3 = new DecimalFormat( "0.000" ); - private static final Comparator ASCENDING_CONFIDENCE_VALUE_ORDER = new Comparator() { - - @Override - public int compare( final Domain d1, - final Domain d2 ) { - if ( d1.getPerSequenceEvalue() < d2 - .getPerSequenceEvalue() ) { - return -1; - } - else if ( d1 - .getPerSequenceEvalue() > d2 - .getPerSequenceEvalue() ) { - return 1; - } - else { - return d1.compareTo( d2 ); - } - } - }; - public final static Pattern PATTERN_SP_STYLE_TAXONOMY = Pattern.compile( "^[A-Z0-9]{3,5}$" ); + public final static Pattern PATTERN_SP_STYLE_TAXONOMY = Pattern.compile( "^[A-Z0-9]{3,5}$" ); + private final static Map _TAXCODE_HEXCOLORSTRING_MAP = new HashMap(); + private static final Comparator ASCENDING_CONFIDENCE_VALUE_ORDER = new Comparator() { + + @Override + public int compare( final Domain d1, + final Domain d2 ) { + if ( d1.getPerSequenceEvalue() < d2 + .getPerSequenceEvalue() ) { + return -1; + } + else if ( d1 + .getPerSequenceEvalue() > d2 + .getPerSequenceEvalue() ) { + return 1; + } + else { + return d1.compareTo( d2 ); + } + } + }; + private final static NumberFormat FORMATTER_3 = new DecimalFormat( "0.000" ); private SurfacingUtil() { // Hidden constructor. @@ -135,54 +142,6 @@ public final class SurfacingUtil { } } - public static void addHtmlHead( final Writer w, final String title ) throws IOException { - w.write( SurfacingConstants.NL ); - w.write( "" ); - w.write( "" ); - w.write( title ); - w.write( "" ); - w.write( SurfacingConstants.NL ); - w.write( "" ); - w.write( SurfacingConstants.NL ); - w.write( "" ); - w.write( SurfacingConstants.NL ); - } - public static DescriptiveStatistics calculateDescriptiveStatisticsForMeanValues( final Set similarities ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final DomainSimilarity similarity : similarities ) { @@ -191,16 +150,6 @@ public final class SurfacingUtil { return stats; } - public static int calculateOverlap( final Domain domain, final List covered_positions ) { - int overlap_count = 0; - for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { - if ( ( i < covered_positions.size() ) && ( covered_positions.get( i ) == true ) ) { - ++overlap_count; - } - } - return overlap_count; - } - public static void checkForOutputFileWriteability( final File outfile ) { final String error = ForesterUtil.isWritableFile( outfile ); if ( !ForesterUtil.isEmpty( error ) ) { @@ -208,6 +157,33 @@ public final class SurfacingUtil { } } + public static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option, + final String[][] input_file_properties, + final String automated_pairwise_comparison_suffix, + final File outdir ) { + for( int i = 0; i < input_file_properties.length; ++i ) { + for( int j = 0; j < i; ++j ) { + final String species_i = input_file_properties[ i ][ 1 ]; + final String species_j = input_file_properties[ j ][ 1 ]; + String pairwise_similarities_output_file_str = surfacing.PAIRWISE_DOMAIN_COMPARISONS_PREFIX + species_i + + "_" + species_j + automated_pairwise_comparison_suffix; + switch ( domain_similarity_print_option ) { + case HTML: + if ( !pairwise_similarities_output_file_str.endsWith( ".html" ) ) { + pairwise_similarities_output_file_str += ".html"; + } + break; + } + final String error = ForesterUtil + .isWritableFile( new File( outdir == null ? pairwise_similarities_output_file_str : outdir + + ForesterUtil.FILE_SEPARATOR + pairwise_similarities_output_file_str ) ); + if ( !ForesterUtil.isEmpty( error ) ) { + ForesterUtil.fatalError( surfacing.PRG_NAME, error ); + } + } + } + } + public static void collectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile( final CharacterStateMatrix matrix, final BinaryDomainCombination.DomainCombinationType dc_type, final List all_binary_domains_combination_gained, @@ -271,6 +247,101 @@ public final class SurfacingUtil { return phylogeny; } + public static StringBuilder createParametersAsString( final boolean ignore_dufs, + final double e_value_max, + final int max_allowed_overlap, + final boolean no_engulfing_overlaps, + final File cutoff_scores_file, + final BinaryDomainCombination.DomainCombinationType dc_type ) { + final StringBuilder parameters_sb = new StringBuilder(); + parameters_sb.append( "E-value: " + e_value_max ); + if ( cutoff_scores_file != null ) { + parameters_sb.append( ", Cutoff-scores-file: " + cutoff_scores_file ); + } + else { + parameters_sb.append( ", Cutoff-scores-file: not-set" ); + } + if ( max_allowed_overlap != surfacing.MAX_ALLOWED_OVERLAP_DEFAULT ) { + parameters_sb.append( ", Max-overlap: " + max_allowed_overlap ); + } + else { + parameters_sb.append( ", Max-overlap: not-set" ); + } + if ( no_engulfing_overlaps ) { + parameters_sb.append( ", Engulfing-overlaps: not-allowed" ); + } + else { + parameters_sb.append( ", Engulfing-overlaps: allowed" ); + } + if ( ignore_dufs ) { + parameters_sb.append( ", Ignore-dufs: true" ); + } + else { + parameters_sb.append( ", Ignore-dufs: false" ); + } + parameters_sb.append( ", DC type (if applicable): " + dc_type ); + return parameters_sb; + } + + public static void createSplitWriters( final File out_dir, + final String my_outfile, + final Map split_writers ) throws IOException { + split_writers.put( 'a', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_A.html" ) ) ); + split_writers.put( 'b', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_B.html" ) ) ); + split_writers.put( 'c', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_C.html" ) ) ); + split_writers.put( 'd', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_D.html" ) ) ); + split_writers.put( 'e', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_E.html" ) ) ); + split_writers.put( 'f', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_F.html" ) ) ); + split_writers.put( 'g', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_G.html" ) ) ); + split_writers.put( 'h', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_H.html" ) ) ); + split_writers.put( 'i', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_I.html" ) ) ); + split_writers.put( 'j', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_J.html" ) ) ); + split_writers.put( 'k', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_K.html" ) ) ); + split_writers.put( 'l', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_L.html" ) ) ); + split_writers.put( 'm', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_M.html" ) ) ); + split_writers.put( 'n', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_N.html" ) ) ); + split_writers.put( 'o', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_O.html" ) ) ); + split_writers.put( 'p', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_P.html" ) ) ); + split_writers.put( 'q', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_Q.html" ) ) ); + split_writers.put( 'r', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_R.html" ) ) ); + split_writers.put( 's', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_S.html" ) ) ); + split_writers.put( 't', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_T.html" ) ) ); + split_writers.put( 'u', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_U.html" ) ) ); + split_writers.put( 'v', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_V.html" ) ) ); + split_writers.put( 'w', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_W.html" ) ) ); + split_writers.put( 'x', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_X.html" ) ) ); + split_writers.put( 'y', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_Y.html" ) ) ); + split_writers.put( 'z', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_Z.html" ) ) ); + split_writers.put( '0', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + + "_domains_0.html" ) ) ); + } + public static Map createTaxCodeToIdMap( final Phylogeny phy ) { final Map m = new HashMap(); for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { @@ -508,6 +579,90 @@ public final class SurfacingUtil { } /** + * Warning: This side-effects 'all_bin_domain_combinations_encountered'! + * + * + * @param output_file + * @param all_bin_domain_combinations_changed + * @param sum_of_all_domains_encountered + * @param all_bin_domain_combinations_encountered + * @param is_gains_analysis + * @param protein_length_stats_by_dc + * @throws IOException + */ + public static void executeFitchGainsAnalysis( final File output_file, + final List all_bin_domain_combinations_changed, + final int sum_of_all_domains_encountered, + final SortedSet all_bin_domain_combinations_encountered, + final boolean is_gains_analysis ) throws IOException { + checkForOutputFileWriteability( output_file ); + final Writer out = ForesterUtil.createBufferedWriter( output_file ); + final SortedMap bdc_to_counts = ForesterUtil + .listToSortedCountsMap( all_bin_domain_combinations_changed ); + final SortedSet all_domains_in_combination_changed_more_than_once = new TreeSet(); + final SortedSet all_domains_in_combination_changed_only_once = new TreeSet(); + int above_one = 0; + int one = 0; + for( final Object bdc_object : bdc_to_counts.keySet() ) { + final BinaryDomainCombination bdc = ( BinaryDomainCombination ) bdc_object; + final int count = bdc_to_counts.get( bdc_object ); + if ( count < 1 ) { + ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME, "count < 1 " ); + } + out.write( bdc + "\t" + count + ForesterUtil.LINE_SEPARATOR ); + if ( count > 1 ) { + all_domains_in_combination_changed_more_than_once.add( bdc.getId0() ); + all_domains_in_combination_changed_more_than_once.add( bdc.getId1() ); + above_one++; + } + else if ( count == 1 ) { + all_domains_in_combination_changed_only_once.add( bdc.getId0() ); + all_domains_in_combination_changed_only_once.add( bdc.getId1() ); + one++; + } + } + final int all = all_bin_domain_combinations_encountered.size(); + int never_lost = -1; + if ( !is_gains_analysis ) { + all_bin_domain_combinations_encountered.removeAll( all_bin_domain_combinations_changed ); + never_lost = all_bin_domain_combinations_encountered.size(); + for( final BinaryDomainCombination bdc : all_bin_domain_combinations_encountered ) { + out.write( bdc + "\t" + "0" + ForesterUtil.LINE_SEPARATOR ); + } + } + if ( is_gains_analysis ) { + out.write( "Sum of all distinct domain combinations appearing once : " + one + + ForesterUtil.LINE_SEPARATOR ); + out.write( "Sum of all distinct domain combinations appearing more than once : " + above_one + + ForesterUtil.LINE_SEPARATOR ); + out.write( "Sum of all distinct domains in combinations apppearing only once : " + + all_domains_in_combination_changed_only_once.size() + ForesterUtil.LINE_SEPARATOR ); + out.write( "Sum of all distinct domains in combinations apppearing more than once: " + + all_domains_in_combination_changed_more_than_once.size() + ForesterUtil.LINE_SEPARATOR ); + } + else { + out.write( "Sum of all distinct domain combinations never lost : " + never_lost + + ForesterUtil.LINE_SEPARATOR ); + out.write( "Sum of all distinct domain combinations lost once : " + one + + ForesterUtil.LINE_SEPARATOR ); + out.write( "Sum of all distinct domain combinations lost more than once : " + above_one + + ForesterUtil.LINE_SEPARATOR ); + out.write( "Sum of all distinct domains in combinations lost only once : " + + all_domains_in_combination_changed_only_once.size() + ForesterUtil.LINE_SEPARATOR ); + out.write( "Sum of all distinct domains in combinations lost more than once: " + + all_domains_in_combination_changed_more_than_once.size() + ForesterUtil.LINE_SEPARATOR ); + } + out.write( "All binary combinations : " + all + + ForesterUtil.LINE_SEPARATOR ); + out.write( "All domains : " + + sum_of_all_domains_encountered ); + out.close(); + ForesterUtil.programMessage( surfacing.PRG_NAME, + "Wrote fitch domain combination dynamics counts analysis to \"" + output_file + + "\"" ); + } + + /** * * @param all_binary_domains_combination_lost_fitch * @param use_last_in_fitch_parsimony @@ -852,6 +1007,60 @@ public final class SurfacingUtil { + "_MAPPED_indep_dc_gains_fitch_lca_taxonomies.txt", null, null, null, null ); } + public static void executePlusMinusAnalysis( final File output_file, + final List plus_minus_analysis_high_copy_base, + final List plus_minus_analysis_high_copy_target, + final List plus_minus_analysis_low_copy, + final List gwcd_list, + final SortedMap> protein_lists_per_species, + final Map> domain_id_to_go_ids_map, + final Map go_id_to_term_map, + final List

N:	" + stats.getN() + "
Min:	" + stats.getMin() + "
Max:	" + stats.getMax() + "
Mean:	" + stats.arithmeticMean() + "
SD:	" + stats.sampleStandardDeviation() + "
SD:	n/a