final static private String INPUT_GENOMES_FILE_OPTION = "genomes";
final static private String INPUT_SPECIES_TREE_OPTION = "species_tree";
final static private String SEQ_EXTRACT_OPTION = "prot_extract";
- final static private String PRG_VERSION = "2.300";
- final static private String PRG_DATE = "130711";
+ final static private String PRG_VERSION = "2.301";
+ final static private String PRG_DATE = "130712";
final static private String E_MAIL = "czmasek@burnham.org";
final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing";
final static private boolean IGNORE_DUFS_DEFAULT = true;
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_MAPPED.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt";
+ private static final boolean CALC_SIMILARITY_SCORES = false;
private static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
final String[][] input_file_properties,
all_bin_domain_combinations_gained_fitch = new ArrayList<BinaryDomainCombination>();
all_bin_domain_combinations_lost_fitch = new ArrayList<BinaryDomainCombination>();
}
- final DomainLengthsTable domain_lengths_table = new DomainLengthsTable();
+ DomainLengthsTable domain_lengths_table = new DomainLengthsTable();
final File per_genome_domain_promiscuity_statistics_file = new File( out_dir + ForesterUtil.FILE_SEPARATOR
+ output_file + D_PROMISCUITY_FILE_SUFFIX );
BufferedWriter per_genome_domain_promiscuity_statistics_writer = null;
ForesterUtil.programMessage( PRG_NAME, "Wrote domain length data to: " + domain_lengths_analysis_outfile );
System.out.println();
}
+ domain_lengths_table = null;
final long analysis_start_time = new Date().getTime();
PairwiseDomainSimilarityCalculator pw_calc = null;
- // double[] values_for_all_scores_histogram = null;
final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( domain_similarity_sort_field,
sort_by_species_count_first,
- number_of_genomes == 2 );
+ number_of_genomes == 2,
+ CALC_SIMILARITY_SCORES );
switch ( scoring ) {
case COMBINATIONS:
pw_calc = new CombinationsBasedPairwiseDomainSimilarityCalculator();
+ new java.text.SimpleDateFormat( "yyyy.MM.dd HH:mm:ss" ).format( new java.util.Date() )
+ "</td></tr>" + nl );
html_desc.append( "</table>" + nl );
- final DescriptiveStatistics pw_stats = SurfacingUtil
- .writeDomainSimilaritiesToFile( html_desc,
- new StringBuilder( number_of_genomes + " genomes" ),
- writer,
- split_writers,
- similarities,
- number_of_genomes == 2,
- species_order,
- domain_similarity_print_option,
- scoring,
- true,
- tax_code_to_id_map,
- false );
+ SurfacingUtil.writeDomainSimilaritiesToFile( html_desc,
+ new StringBuilder( number_of_genomes + " genomes" ),
+ writer,
+ split_writers,
+ similarities,
+ number_of_genomes == 2,
+ species_order,
+ domain_similarity_print_option,
+ scoring,
+ true,
+ tax_code_to_id_map );
ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote main output (includes domain similarities) to: \""
+ ( out_dir == null ? my_outfile : out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile ) + "\"" );
}
surfacing.PRG_NAME,
out_dir,
write_pwc_files,
- tax_code_to_id_map );
+ tax_code_to_id_map,
+ CALC_SIMILARITY_SCORES );
String matrix_output_file = new String( output_file.toString() );
if ( matrix_output_file.indexOf( '.' ) > 1 ) {
matrix_output_file = matrix_output_file.substring( 0, matrix_output_file.indexOf( '.' ) );
package org.forester.surfacing;
+import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.SortedMap;
import org.forester.species.Species;
import org.forester.util.BasicDescriptiveStatistics;
import org.forester.util.DescriptiveStatistics;
+import org.forester.util.ForesterUtil;
public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculator {
final DomainSimilarity.DomainSimilaritySortField _sort;
private final boolean _sort_by_species_count_first;
private final boolean _treat_as_binary_comparison;
+ private final boolean _calc_similarity_score;
public BasicDomainSimilarityCalculator( final DomainSimilarity.DomainSimilaritySortField sort,
final boolean sort_by_species_count_first,
- final boolean treat_as_binary_comparison ) {
+ final boolean treat_as_binary_comparison,
+ final boolean calc_similarity_score ) {
_sort = sort;
_sort_by_species_count_first = sort_by_species_count_first;
_treat_as_binary_comparison = treat_as_binary_comparison;
+ _calc_similarity_score = calc_similarity_score;
+ }
+
+ public boolean isCalcSimilarityScore() {
+ return _calc_similarity_score;
}
@Override
for( final GenomeWideCombinableDomains cdc : cdc_list ) {
keys.addAll( ( cdc ).getAllCombinableDomainsIds().keySet() );
}
+ final DecimalFormat pf = new java.text.DecimalFormat( "000000" );
+ int counter = 1;
+ System.out.println( keys.size() );
for( final String key : keys ) {
+ ForesterUtil.updateProgress( counter, pf );
+ counter++;
final List<CombinableDomains> same_id_cd_list = new ArrayList<CombinableDomains>( cdc_list.size() );
final List<Species> species_with_key_id_domain = new ArrayList<Species>();
for( final GenomeWideCombinableDomains cdc : cdc_list ) {
continue;
}
}
- // BIG CHANGE IN LOGIC: Tuesday July 08, 0;55
- // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- // OLD: if ( same_id_cd_list.size() > 1 ) {
if ( same_id_cd_list.size() > 0 ) {
if ( !ignore_domains_specific_to_one_genome || ( same_id_cd_list.size() > 1 ) ) {
final DomainSimilarity s = calculateSimilarity( pairwise_calculator, same_id_cd_list );
}
}
}
- // ~~~ NEW:
else {
throw new RuntimeException( "this should not have happened" );
}
- // ~~~ OLD:
- // else if ( same_id_cd_list.size() == 1 ) {
- // TODO need to go in file
- // System.out.println( "only in one species [" +
- // species_with_key_id_domain.get( 0 ) + "]: " + key_id );
- //}
- //else {
- // throw new RuntimeException( "this should not have happened" );
- // }
}
+ System.out.println();
return similarities;
}
private DomainSimilarity calculateSimilarity( final PairwiseDomainSimilarityCalculator pairwise_calculator,
final List<CombinableDomains> domains_list ) {
if ( domains_list.size() == 1 ) {
- // BIG CHANGE IN LOGIC: Tuesday July 08, 0;55
- // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- // ~~~OLD:
- //throw new IllegalArgumentException( "attempt to calculate multiple combinable domains similarity for less than two combinable domains" );
- // ~~~new:
final SortedMap<Species, SpeciesSpecificDcData> species_data = new TreeMap<Species, SpeciesSpecificDcData>();
species_data.put( domains_list.get( 0 ).getSpecies(),
createSpeciesSpecificDomainSimilariyData( domains_list.get( 0 ) ) );
- return new PrintableDomainSimilarity( domains_list.get( 0 ),
- 1.0,
- 1.0,
- 1.0,
- 1.0,
- 0.0,
- 0,
- 0,
- 0,
- species_data,
- isSortBySpeciesCountFirst(),
- isTreatAsBinaryComparison() );
+ if ( !isCalcSimilarityScore() ) {
+ return new PrintableDomainSimilarity( domains_list.get( 0 ),
+ 0,
+ 0,
+ species_data,
+ isSortBySpeciesCountFirst(),
+ isTreatAsBinaryComparison() );
+ }
+ else {
+ return new PrintableDomainSimilarity( domains_list.get( 0 ),
+ 1.0,
+ 1.0,
+ 1.0,
+ 1.0,
+ 0.0,
+ 0,
+ 0,
+ 0,
+ species_data,
+ isSortBySpeciesCountFirst(),
+ isTreatAsBinaryComparison() );
+ }
+ }
+ DescriptiveStatistics stat = null;
+ if ( isCalcSimilarityScore() ) {
+ stat = new BasicDescriptiveStatistics();
}
- final DescriptiveStatistics stat = new BasicDescriptiveStatistics();
final SortedMap<Species, SpeciesSpecificDcData> species_data = new TreeMap<Species, SpeciesSpecificDcData>();
species_data.put( domains_list.get( 0 ).getSpecies(),
createSpeciesSpecificDomainSimilariyData( domains_list.get( 0 ) ) );
if ( Math.abs( difference ) > Math.abs( max_difference ) ) {
max_difference = difference;
}
- stat.addValue( pairwise_similarity.getSimilarityScore() );
+ if ( isCalcSimilarityScore() ) {
+ stat.addValue( pairwise_similarity.getSimilarityScore() );
+ }
}
}
- if ( stat.getN() < 1 ) {
- throw new AssertionError( "empty descriptive statistics: this should not have happened" );
- }
- if ( ( stat.getN() != 1 ) && isTreatAsBinaryComparison() ) {
- throw new IllegalArgumentException( "attmpt to treat similarity with N not equal to one as binary comparison" );
+ if ( isCalcSimilarityScore() ) {
+ if ( stat.getN() < 1 ) {
+ throw new RuntimeException( "empty descriptive statistics: this should not have happened" );
+ }
+ if ( ( stat.getN() != 1 ) && isTreatAsBinaryComparison() ) {
+ throw new IllegalArgumentException( "attmpt to treat similarity with N not equal to one as binary comparison" );
+ }
}
- if ( ( /*stat.getN() != 1 ||*/!isTreatAsBinaryComparison() ) && ( max_difference_in_counts < 0 ) ) {
+ if ( !isTreatAsBinaryComparison() && ( max_difference_in_counts < 0 ) ) {
max_difference_in_counts = Math.abs( max_difference_in_counts );
if ( !is_domain_combination_based ) {
- max_difference = Math.abs( max_difference ); //=max_difference_in_counts for !is_domain_combination_based.
+ max_difference = Math.abs( max_difference );
}
}
DomainSimilarity similarity = null;
- if ( stat.getN() == 1 ) {
+ if ( !isCalcSimilarityScore() ) {
similarity = new PrintableDomainSimilarity( domains_list.get( 0 ),
- stat.getMin(),
- stat.getMax(),
- stat.arithmeticMean(),
- stat.median(),
- 0.0,
- stat.getN(),
max_difference_in_counts,
max_difference,
species_data,
isTreatAsBinaryComparison() );
}
else {
- similarity = new PrintableDomainSimilarity( domains_list.get( 0 ),
- stat.getMin(),
- stat.getMax(),
- stat.arithmeticMean(),
- stat.median(),
- stat.sampleStandardDeviation(),
- stat.getN(),
- max_difference_in_counts,
- max_difference,
- species_data,
- isSortBySpeciesCountFirst(),
- isTreatAsBinaryComparison() );
+ if ( stat.getN() == 1 ) {
+ similarity = new PrintableDomainSimilarity( domains_list.get( 0 ),
+ stat.getMin(),
+ stat.getMax(),
+ stat.arithmeticMean(),
+ stat.median(),
+ 0.0,
+ stat.getN(),
+ max_difference_in_counts,
+ max_difference,
+ species_data,
+ isSortBySpeciesCountFirst(),
+ isTreatAsBinaryComparison() );
+ }
+ else {
+ similarity = new PrintableDomainSimilarity( domains_list.get( 0 ),
+ stat.getMin(),
+ stat.getMax(),
+ stat.arithmeticMean(),
+ stat.median(),
+ stat.sampleStandardDeviation(),
+ stat.getN(),
+ max_difference_in_counts,
+ max_difference,
+ species_data,
+ isSortBySpeciesCountFirst(),
+ isTreatAsBinaryComparison() );
+ }
}
return similarity;
}
public class CountsBasedPairwiseDomainSimilarity implements PairwiseDomainSimilarity {
- private final double _score;
- private final int _copy_number_difference;
+ private final short _copy_number_difference;
+ private final short _counts_sum;
/**
* counts_difference: (counts for domain 1) minus (counts for domain 2).
* @param counts_difference value of domain_1 minus value of domain_2
* @param counts_sum
*/
- public CountsBasedPairwiseDomainSimilarity( final int counts_difference, final int counts_sum ) {
+ public CountsBasedPairwiseDomainSimilarity( final short counts_difference, final short counts_sum ) {
if ( counts_sum <= 0 ) {
throw new IllegalArgumentException( "attempt to use copy sum of less than or equal to 0: " + counts_sum );
}
- _copy_number_difference = counts_difference;
- final int abs_copy_number_difference = Math.abs( counts_difference );
- if ( abs_copy_number_difference > counts_sum ) {
+ if ( Math.abs( counts_difference ) > counts_sum ) {
throw new IllegalArgumentException( "attempt to use absolute copy number difference larger than copy number sum" );
}
- _score = 1.0 - ( ( double ) abs_copy_number_difference / counts_sum );
+ _copy_number_difference = counts_difference;
+ _counts_sum = counts_sum;
}
/**
@Override
public double getSimilarityScore() {
- return _score;
+ return ( 1.0 - ( ( double ) Math.abs( _copy_number_difference ) / _counts_sum ) );
}
}
if ( !domains_1.getKeyDomain().equals( domains_2.getKeyDomain() ) ) {
throw new IllegalArgumentException( "attempt to calculate similarity between domain collection with different keys" );
}
- final int dc1 = domains_1.getKeyDomainCount();
- final int dc2 = domains_2.getKeyDomainCount();
- return new CountsBasedPairwiseDomainSimilarity( dc1 - dc2, dc1 + dc2 );
+ if ( ( domains_1.getKeyDomainCount() > Short.MAX_VALUE ) || ( domains_2.getKeyDomainCount() > Short.MAX_VALUE )
+ || ( ( domains_1.getKeyDomainCount() + domains_2.getKeyDomainCount() ) > Short.MAX_VALUE ) ) {
+ throw new IllegalArgumentException( "too large for short!" );
+ }
+ final short dc1 = ( short ) domains_1.getKeyDomainCount();
+ final short dc2 = ( short ) domains_2.getKeyDomainCount();
+ return new CountsBasedPairwiseDomainSimilarity( ( short ) ( dc1 - dc2 ), ( short ) ( dc1 + dc2 ) );
}
}
private List<DistanceMatrix> _shared_domains_based_distances;
private List<DistanceMatrix> _shared_binary_combinations_based_distances;
- //private List<HistogramData> _histogram_datas;
public PairwiseGenomeComparator() {
init();
}
return _domain_distance_scores_means;
}
- //public List<HistogramData> getHistogramDatas() {
- // return _histogram_datas;
- //}
public List<DistanceMatrix> getSharedBinaryCombinationsBasedDistances() {
return _shared_binary_combinations_based_distances;
}
}
private void init() {
- //_histogram_datas = new ArrayList<HistogramData>();
_domain_distance_scores_means = new ArrayList<DistanceMatrix>();
_shared_domains_based_distances = new ArrayList<DistanceMatrix>();
_shared_binary_combinations_based_distances = new ArrayList<DistanceMatrix>();
final String command_line_prg_name,
final File out_dir,
final boolean write_pairwise_comparisons,
- final Map<String, Integer> tax_code_to_id_map ) {
+ final Map<String, Integer> tax_code_to_id_map,
+ final boolean calc_similarity_scores ) {
init();
final BasicSymmetricalDistanceMatrix domain_distance_scores_means = new BasicSymmetricalDistanceMatrix( number_of_genomes );
final BasicSymmetricalDistanceMatrix shared_domains_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
}
final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( domain_similarity_sort_field,
sort_by_species_count_first,
- true );
+ true,
+ calc_similarity_scores );
final SortedSet<DomainSimilarity> similarities = calc
.calculateSimilarities( pw_calc,
genome_pair,
}
break;
}
- DescriptiveStatistics pw_stats = null;
if ( write_pairwise_comparisons ) {
try {
final Writer writer = new BufferedWriter( new FileWriter( out_dir == null ? pairwise_similarities_output_file_str
: out_dir + ForesterUtil.FILE_SEPARATOR + pairwise_similarities_output_file_str ) );
- pw_stats = SurfacingUtil.writeDomainSimilaritiesToFile( html_desc,
- new StringBuilder( species_i + "-"
- + species_j ),
- writer,
- null,
- similarities,
- true,
- null,
- domain_similarity_print_option,
- scoring,
- false,
- tax_code_to_id_map,
- false );
+ SurfacingUtil.writeDomainSimilaritiesToFile( html_desc,
+ new StringBuilder( species_i + "-" + species_j ),
+ writer,
+ null,
+ similarities,
+ true,
+ null,
+ domain_similarity_print_option,
+ scoring,
+ false,
+ tax_code_to_id_map );
}
catch ( final IOException e ) {
ForesterUtil.fatalError( command_line_prg_name, "Failed to write similarites to: \""
+ pairwise_similarities_output_file_str + "\" [" + e.getMessage() + "]" );
}
}
- if ( pw_stats != null ) {
- if ( pw_stats.getMin() >= pw_stats.getMax() ) {
- ForesterUtil
- .printWarningMessage( command_line_prg_name, "for [" + species_i + "-" + species_j
- + "] score minimum is [" + pw_stats.getMin() + "] while score maximum is ["
- + pw_stats.getMax()
- + "], possibly indicating that a genome is compared to itself" );
- }
- }
}
}
getDomainDistanceScoresMeans().add( domain_distance_scores_means );
final public static String SPECIES_SEPARATOR = " ";
final private static int EQUAL = 0;
final private static String NO_SPECIES = " ";
- private static final boolean PRINT_MORE_INFO = false;
final private double _min;
final private double _max;
final private double _mean;
}
}
+ public PrintableDomainSimilarity( final CombinableDomains combinable_domains,
+ final int max_difference_in_counts,
+ final int max_difference,
+ final SortedMap<Species, SpeciesSpecificDcData> species_data,
+ final boolean sort_by_species_count_first,
+ final boolean treat_as_binary_comparison ) {
+ if ( combinable_domains == null ) {
+ throw new IllegalArgumentException( "attempt to use null combinable domains" );
+ }
+ if ( species_data == null ) {
+ throw new IllegalArgumentException( "attempt to use null species data" );
+ }
+ if ( species_data.size() < 1 ) {
+ throw new IllegalArgumentException( "attempt to use empty species data" );
+ }
+ init();
+ _combinable_domains = combinable_domains;
+ _min = -1;
+ _max = -1;
+ _mean = -1;
+ _sd = -1;
+ _n = -1;
+ _max_difference_in_counts = max_difference_in_counts;
+ _max_difference = max_difference;
+ _species_data = species_data;
+ _treat_as_binary_comparison = treat_as_binary_comparison;
+ final int s = species_data.size();
+ if ( s > 2 ) {
+ if ( getMaximalDifferenceInCounts() < 0 ) {
+ throw new IllegalArgumentException( "attempt to use negative max difference in counts with more than two species" );
+ }
+ if ( getMaximalDifference() < 0 ) {
+ throw new IllegalArgumentException( "attempt to use negative max difference with more than two species" );
+ }
+ }
+ }
+
private void addSpeciesSpecificDomainData( final StringBuffer sb,
final Species species,
final boolean html,
sb.append( "<a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_SEARCH + getDomainId()
+ "\" target=\"gs_window\">gs</a>" );
sb.append( "</td>" );
- sb.append( "<td>" );
- sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
- sb.append( "</td>" );
- if ( PRINT_MORE_INFO ) {
- if ( !isTreatAsBinaryComparison() ) {
- sb.append( "<td>" );
- sb.append( "(" );
- sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
- sb.append( ")" );
- sb.append( "</td>" );
- sb.append( "<td>" );
- sb.append( "[" );
- sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
- sb.append( "-" );
- sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
- sb.append( "]" );
- sb.append( "</td>" );
+ if ( getMaximalSimilarityScore() > 0 ) {
+ sb.append( "<td>" );
+ sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
+ sb.append( "</td>" );
+ if ( SurfacingConstants.PRINT_MORE_DOM_SIMILARITY_INFO ) {
+ if ( !isTreatAsBinaryComparison() ) {
+ sb.append( "<td>" );
+ sb.append( "(" );
+ sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
+ sb.append( ")" );
+ sb.append( "</td>" );
+ sb.append( "<td>" );
+ sb.append( "[" );
+ sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
+ sb.append( "-" );
+ sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
+ sb.append( "]" );
+ sb.append( "</td>" );
+ }
}
}
sb.append( "<td>" );
if ( !domains_1.getKeyDomain().equals( domains_2.getKeyDomain() ) ) {
throw new IllegalArgumentException( "attempt to calculate similarity between domain collection with different keys" );
}
- final int pc1 = domains_1.getKeyDomainProteinsCount();
- final int pc2 = domains_2.getKeyDomainProteinsCount();
- return new CountsBasedPairwiseDomainSimilarity( pc1 - pc2, pc1 + pc2 );
+ if ( ( domains_1.getKeyDomainProteinsCount() > Short.MAX_VALUE )
+ || ( domains_2.getKeyDomainProteinsCount() > Short.MAX_VALUE )
+ || ( ( domains_1.getKeyDomainProteinsCount() + domains_2.getKeyDomainCount() ) > Short.MAX_VALUE ) ) {
+ throw new IllegalArgumentException( "too large for short!" );
+ }
+ final short pc1 = ( short ) domains_1.getKeyDomainProteinsCount();
+ final short pc2 = ( short ) domains_2.getKeyDomainProteinsCount();
+ return new CountsBasedPairwiseDomainSimilarity( ( short ) ( pc1 - pc2 ), ( short ) ( pc1 + pc2 ) );
}
}
public class SurfacingConstants {
- public static final String AMIGO_LINK = "http://amigo.geneontology.org/cgi-bin/amigo/go.cgi?view=details&search_constraint=terms&query=";
- public static final String EOL_LINK = "http://www.eol.org/search?q=";
- public static final String GO_LINK = "http://amigo.geneontology.org/cgi-bin/amigo/go.cgi?view=details&search_constraint=terms&query=";
- public static final String GOOGLE_SCHOLAR_SEARCH = "http://scholar.google.com/scholar?q=";
- public static final String GOOGLE_WEB_SEARCH_LINK = "http://www.google.com/search?q=";
- public static final String NL = ForesterUtil.LINE_SEPARATOR;
- public static final String NONE = "[none]";
- public static final String PFAM_FAMILY_ID_LINK = "http://pfam.janelia.org/family/";
- public static final String UNIPROT_TAXONOMY_ID_LINK = "http://www.uniprot.org/taxonomy/";
- static final boolean SECONDARY_FEATURES_ARE_SCOP = true;
- static final String SECONDARY_FEATURES_SCOP_LINK = "http://scop.mrc-lmb.cam.ac.uk/scop/search.cgi?key=";
+ public static final String AMIGO_LINK = "http://amigo.geneontology.org/cgi-bin/amigo/go.cgi?view=details&search_constraint=terms&query=";
+ public static final String EOL_LINK = "http://www.eol.org/search?q=";
+ public static final String GO_LINK = "http://amigo.geneontology.org/cgi-bin/amigo/go.cgi?view=details&search_constraint=terms&query=";
+ public static final String GOOGLE_SCHOLAR_SEARCH = "http://scholar.google.com/scholar?q=";
+ public static final String GOOGLE_WEB_SEARCH_LINK = "http://www.google.com/search?q=";
+ public static final String NL = ForesterUtil.LINE_SEPARATOR;
+ public static final String NONE = "[none]";
+ public static final String PFAM_FAMILY_ID_LINK = "http://pfam.janelia.org/family/";
+ public static final String UNIPROT_TAXONOMY_ID_LINK = "http://www.uniprot.org/taxonomy/";
+ static final boolean SECONDARY_FEATURES_ARE_SCOP = true;
+ static final String SECONDARY_FEATURES_SCOP_LINK = "http://scop.mrc-lmb.cam.ac.uk/scop/search.cgi?key=";
+ static final boolean PRINT_MORE_DOM_SIMILARITY_INFO = false;
}
}
}
- public static DescriptiveStatistics writeDomainSimilaritiesToFile( final StringBuilder html_desc,
- final StringBuilder html_title,
- final Writer single_writer,
- Map<Character, Writer> split_writers,
- final SortedSet<DomainSimilarity> similarities,
- final boolean treat_as_binary,
- final List<Species> species_order,
- final PrintableDomainSimilarity.PRINT_OPTION print_option,
- final DomainSimilarity.DomainSimilarityScoring scoring,
- final boolean verbose,
- final Map<String, Integer> tax_code_to_id_map,
- final boolean print_some_stats )
+ public static void writeDomainSimilaritiesToFile( final StringBuilder html_desc,
+ final StringBuilder html_title,
+ final Writer single_writer,
+ Map<Character, Writer> split_writers,
+ final SortedSet<DomainSimilarity> similarities,
+ final boolean treat_as_binary,
+ final List<Species> species_order,
+ final PrintableDomainSimilarity.PRINT_OPTION print_option,
+ final DomainSimilarity.DomainSimilarityScoring scoring,
+ final boolean verbose,
+ final Map<String, Integer> tax_code_to_id_map )
throws IOException {
- DescriptiveStatistics stats = null;
- AsciiHistogram histo = null;
- if ( print_some_stats ) {
- stats = new BasicDescriptiveStatistics();
- final String histogram_title = "score mean distribution:";
- for( final DomainSimilarity similarity : similarities ) {
- stats.addValue( similarity.getMeanSimilarityScore() );
- }
- try {
- if ( stats.getMin() < stats.getMax() ) {
- histo = new AsciiHistogram( stats, histogram_title );
- }
- }
- catch ( final Exception e ) {
- histo = null;
- }
- }
if ( ( single_writer != null ) && ( ( split_writers == null ) || split_writers.isEmpty() ) ) {
split_writers = new HashMap<Character, Writer>();
split_writers.put( '_', single_writer );
w.write( SurfacingConstants.NL );
w.write( html_desc.toString() );
w.write( SurfacingConstants.NL );
- if ( print_some_stats ) {
- printSomeStats( stats, histo, w );
- }
w.write( "<hr>" );
w.write( SurfacingConstants.NL );
w.write( "<br>" );
for( final Writer w : split_writers.values() ) {
w.close();
}
- return stats;
}
private static void printSomeStats( final DescriptiveStatistics stats, final AsciiHistogram histo, final Writer w )
new BasicSpecies( "nemve" ) ) );
final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
false,
- false );
+ false,
+ true );
final SortedSet<DomainSimilarity> sims = calc
.calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
cdc_list,
new BasicSpecies( "nemve" ) ) );
final DomainSimilarityCalculator calc2 = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
false,
- false );
+ false,
+ true );
final SortedSet<DomainSimilarity> sims2 = calc2
.calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
cdc_list2,
new BasicSpecies( "nemve" ) ) );
final DomainSimilarityCalculator calc3 = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
false,
- false );
+ false,
+ true );
final SortedSet<DomainSimilarity> sims3 = calc3
.calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
cdc_list3,
new BasicSpecies( "nemve" ) ) );
final DomainSimilarityCalculator calc4 = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
true,
- false );
+ false,
+ true );
final SortedSet<DomainSimilarity> sims4 = calc4
.calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
cdc_list4,
new BasicSpecies( "nemve" ) ) );
final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
false,
- false );
+ false,
+ true );
final SortedSet<DomainSimilarity> sims = calc
.calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
cdc_list,
new BasicSpecies( "nemve" ) ) );
final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
false,
- false );
+ false,
+ true );
final SortedSet<DomainSimilarity> sims = calc
.calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
cdc_list,