final static private String INPUT_GENOMES_FILE_OPTION = "genomes";
final static private String INPUT_SPECIES_TREE_OPTION = "species_tree";
final static private String SEQ_EXTRACT_OPTION = "prot_extract";
- final static private String PRG_VERSION = "2.280";
- final static private String PRG_DATE = "130701";
+ final static private String PRG_VERSION = "2.290";
+ final static private String PRG_DATE = "130709";
final static private String E_MAIL = "czmasek@burnham.org";
final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing";
final static private boolean IGNORE_DUFS_DEFAULT = true;
allowed_options.add( surfacing.PAIRWISE_DOMAIN_COMPARISONS_OPTION );
allowed_options.add( surfacing.IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_OPTION );
allowed_options.add( surfacing.CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS );
- //allowed_options.add( JACKNIFE_OPTION );
- // allowed_options.add( JACKNIFE_RANDOM_SEED_OPTION );
- // allowed_options.add( JACKNIFE_RATIO_OPTION );
allowed_options.add( INPUT_SPECIES_TREE_OPTION );
allowed_options.add( FILTER_POSITIVE_OPTION );
allowed_options.add( FILTER_NEGATIVE_OPTION );
}
System.out.println( "Time for processing : " + parser.getTime() + "ms" );
log( "", log_writer );
- html_desc.append( "<tr><td>" + input_file_properties[ i ][ 0 ] + ":</td><td>doms analyzed: "
- + parser.getDomainsStored() + "; doms ignored: [ind score cutoffs: "
- + parser.getDomainsIgnoredDueToIndividualScoreCutoff() + "] [E-value cutoff: "
- + parser.getDomainsIgnoredDueToEval() + "] [DUF: " + parser.getDomainsIgnoredDueToDuf()
- + "] [virus like ids: " + parser.getDomainsIgnoredDueToVirusLikeIds() + "] [negative dom filter: "
- + parser.getDomainsIgnoredDueToNegativeDomainFilter() + "] [overlap: "
- + parser.getDomainsIgnoredDueToOverlap() + "]" );
- if ( negative_filter_file != null ) {
- html_desc.append( "; proteins ignored due to negative filter: "
- + parser.getProteinsIgnoredDueToFilter() );
- }
- if ( positive_filter_file != null ) {
- html_desc.append( "; proteins ignored due to positive filter: "
- + parser.getProteinsIgnoredDueToFilter() );
- }
- html_desc.append( "</td></tr>" + nl );
try {
int count = 0;
for( final Protein protein : protein_list ) {
number_of_genomes == 2,
species_order,
domain_similarity_print_option,
- domain_similarity_sort_field,
scoring,
true,
- tax_code_to_id_map );
+ tax_code_to_id_map,
+ false );
ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote main output (includes domain similarities) to: \""
+ ( out_dir == null ? my_outfile : out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile ) + "\"" );
}
0,
0,
species_data,
- getSort(),
isSortBySpeciesCountFirst(),
isTreatAsBinaryComparison() );
}
max_difference_in_counts,
max_difference,
species_data,
- getSort(),
isSortBySpeciesCountFirst(),
isTreatAsBinaryComparison() );
}
max_difference_in_counts,
max_difference,
species_data,
- getSort(),
isSortBySpeciesCountFirst(),
isTreatAsBinaryComparison() );
}
return similarity;
}
- private DomainSimilarity.DomainSimilaritySortField getSort() {
- return _sort;
- }
-
private boolean isSortBySpeciesCountFirst() {
return _sort_by_species_count_first;
}
private static SpeciesSpecificDomainSimilariyData createSpeciesSpecificDomainSimilariyData( final CombinableDomains cd ) {
final SpeciesSpecificDomainSimilariyData sd = new PrintableSpeciesSpecificDomainSimilariyData( cd.getKeyDomainProteinsCount(),
cd.getKeyDomainCount(),
- cd.getNumberOfCombinableDomains(),
- cd.getKeyDomainConfidenceDescriptiveStatistics() );
+ cd.getNumberOfCombinableDomains() );
for( final String domain : cd.getCombinableDomains() ) {
sd.addProteinsExhibitingCombinationCount( domain, cd.getNumberOfProteinsExhibitingCombination( domain ) );
}
else {
domain_combination = new BasicCombinableDomains( pd_i.getDomainId(), species );
}
- // ^^ if ( ( domain_id_to_go_ids_map != null )
- // ^^ && domain_id_to_go_ids_map.containsKey( pd_i.getDomainId() ) ) {
- // ^^ final List<GoId> go_ids = domain_id_to_go_ids_map.get( pd_i.getDomainId() );
- // ^^ for( final GoId go_id : go_ids ) {
- // ^^ domain_combination.getKeyDomain().addGoId( go_id );
- // ^^ }
- // ^^ }
instance.add( id_i, domain_combination );
}
final Set<String> saw_j = new HashSet<String>();
MIN, MAX, SD, MEAN, ABS_MAX_COUNTS_DIFFERENCE, MAX_COUNTS_DIFFERENCE, MAX_DIFFERENCE, SPECIES_COUNT, DOMAIN_ID,
}
- public SortedSet<String> getCombinableDomainIds( final Species species_of_combinable_domain );;
+ public SortedSet<String> getCombinableDomainIds( final Species species_of_combinable_domain );
public String getDomainId();
true,
null,
domain_similarity_print_option,
- domain_similarity_sort_field,
scoring,
false,
- tax_code_to_id_map );
+ tax_code_to_id_map,
+ false );
}
catch ( final IOException e ) {
ForesterUtil.fatalError( command_line_prg_name, "Failed to write similarites to: \""
import java.util.Map;
import java.util.SortedMap;
import java.util.SortedSet;
+import java.util.TreeMap;
import java.util.TreeSet;
import org.forester.species.Species;
public class PrintableDomainSimilarity implements DomainSimilarity {
final public static String SPECIES_SEPARATOR = " ";
- final private static char TAB = '\t';
- final private static int BEFORE = -1;
final private static int EQUAL = 0;
- final private static int AFTER = 1;
final private static String NO_SPECIES = " ";
+ private static final boolean PRINT_MORE_INFO = false;
final private double _min;
final private double _max;
final private double _mean;
private final int _max_difference;
final private CombinableDomains _combinable_domains;
final private SortedMap<Species, SpeciesSpecificDomainSimilariyData> _species_data;
- final private DomainSimilaritySortField _sort_field;
private List<Species> _species_order;
- private final boolean _sort_by_species_count_first;
private DomainSimilarityCalculator.Detailedness _detailedness;
private final boolean _treat_as_binary_comparison;
- /**
- * If go_id_to_term_map not null, detailed GO information is written,
- * only GO ids otherwise.
- *
- *
- */
public PrintableDomainSimilarity( final CombinableDomains combinable_domains,
final double min,
final double max,
final int max_difference_in_counts,
final int max_difference,
final SortedMap<Species, SpeciesSpecificDomainSimilariyData> species_data,
- final DomainSimilaritySortField sort_field,
final boolean sort_by_species_count_first,
final boolean treat_as_binary_comparison ) {
if ( combinable_domains == null ) {
throw new IllegalArgumentException( "attempt to use null combinable domains" );
}
- if ( sort_field == null ) {
- throw new IllegalArgumentException( "attempt to use null sorting" );
- }
if ( species_data == null ) {
throw new IllegalArgumentException( "attempt to use null species data" );
}
_max_difference_in_counts = max_difference_in_counts;
_max_difference = max_difference;
_species_data = species_data;
- _sort_field = sort_field;
- _sort_by_species_count_first = sort_by_species_count_first;
_treat_as_binary_comparison = treat_as_binary_comparison;
final int s = species_data.size();
if ( ( ( s * s ) - s ) != ( getN() * 2 ) ) {
final Species species,
final boolean html,
final Map<String, Integer> tax_code_to_id_map ) {
- if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
- sb.append( "[" );
- }
if ( html ) {
- sb.append( "<b>" );
- final String tax_code = species.getSpeciesId();
- if ( !ForesterUtil.isEmpty( tax_code )
- && ( ( tax_code_to_id_map != null ) && tax_code_to_id_map.containsKey( tax_code ) ) ) {
- sb.append( "<a href=\"" + SurfacingConstants.UNIPROT_TAXONOMY_ID_LINK
- + tax_code_to_id_map.get( tax_code ) + "\" target=\"taxonomy_window\">" + tax_code + "</a>" );
- }
- else {
- sb.append( tax_code );
- }
- sb.append( "</b>" );
+ addTaxWithLink( sb, species.getSpeciesId(), tax_code_to_id_map );
}
else {
sb.append( species.getSpeciesId() );
if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
sb.append( ":" );
sb.append( getSpeciesData().get( species ).toStringBuffer( getDetaildness(), html ) );
- sb.append( "]" );
}
if ( html ) {
sb.append( "<br>" );
}
- sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
- }
-
- private void boldEndIfSortedBy( final DomainSimilaritySortField sort_field, final StringBuffer sb ) {
- if ( getSortField() == sort_field ) {
- sb.append( "</b>" );
+ else {
+ sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
}
}
- private void boldStartIfSortedBy( final DomainSimilaritySortField sort_field, final StringBuffer sb ) {
- if ( getSortField() == sort_field ) {
- sb.append( "<b>" );
+ private void addTaxWithLink( final StringBuffer sb,
+ final String tax_code,
+ final Map<String, Integer> tax_code_to_id_map ) {
+ sb.append( "<b>" );
+ if ( !ForesterUtil.isEmpty( tax_code )
+ && ( ( tax_code_to_id_map != null ) && tax_code_to_id_map.containsKey( tax_code ) ) ) {
+ sb.append( "<a href=\"" + SurfacingConstants.UNIPROT_TAXONOMY_ID_LINK + tax_code_to_id_map.get( tax_code )
+ + "\" target=\"taxonomy_window\">" + tax_code + "</a>" );
+ }
+ else {
+ sb.append( tax_code );
}
+ sb.append( "</b>" );
}
private int compareByDomainId( final DomainSimilarity other ) {
- return getDomainId().compareTo( other.getDomainId() );
- }
-
- private int compareBySpeciesCount( final DomainSimilarity domain_similarity ) {
- final int s_this = getSpeciesData().size();
- final int s_other = domain_similarity.getSpeciesData().size();
- if ( s_this < s_other ) {
- return PrintableDomainSimilarity.BEFORE;
- }
- else if ( s_this > s_other ) {
- return PrintableDomainSimilarity.AFTER;
- }
- else {
- return PrintableDomainSimilarity.EQUAL;
- }
+ return getDomainId().compareToIgnoreCase( other.getDomainId() );
}
@Override
public int compareTo( final DomainSimilarity domain_similarity ) {
if ( this == domain_similarity ) {
- return PrintableDomainSimilarity.EQUAL;
+ return EQUAL;
}
else if ( domain_similarity == null ) {
throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to null" );
throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to "
+ domain_similarity.getClass() );
}
- switch ( getSortField() ) {
- case MIN:
- if ( isSortBySpeciesCountFirst() ) {
- final int i = compareBySpeciesCount( domain_similarity );
- if ( i != PrintableDomainSimilarity.EQUAL ) {
- return i;
- }
- }
- if ( getMinimalSimilarityScore() < domain_similarity.getMinimalSimilarityScore() ) {
- return PrintableDomainSimilarity.BEFORE;
- }
- else if ( getMinimalSimilarityScore() > domain_similarity.getMinimalSimilarityScore() ) {
- return PrintableDomainSimilarity.AFTER;
- }
- else {
- return compareByDomainId( domain_similarity );
- }
- case MAX:
- if ( isSortBySpeciesCountFirst() ) {
- final int i = compareBySpeciesCount( domain_similarity );
- if ( i != PrintableDomainSimilarity.EQUAL ) {
- return i;
- }
- }
- if ( getMaximalSimilarityScore() < domain_similarity.getMaximalSimilarityScore() ) {
- return PrintableDomainSimilarity.BEFORE;
- }
- else if ( getMaximalSimilarityScore() > domain_similarity.getMaximalSimilarityScore() ) {
- return PrintableDomainSimilarity.AFTER;
- }
- else {
- return compareByDomainId( domain_similarity );
- }
- case MEAN:
- if ( isSortBySpeciesCountFirst() ) {
- final int i = compareBySpeciesCount( domain_similarity );
- if ( i != PrintableDomainSimilarity.EQUAL ) {
- return i;
- }
- }
- if ( getMeanSimilarityScore() < domain_similarity.getMeanSimilarityScore() ) {
- return PrintableDomainSimilarity.BEFORE;
- }
- else if ( getMeanSimilarityScore() > domain_similarity.getMeanSimilarityScore() ) {
- return PrintableDomainSimilarity.AFTER;
- }
- else {
- return compareByDomainId( domain_similarity );
- }
- case SD:
- if ( isSortBySpeciesCountFirst() ) {
- final int i = compareBySpeciesCount( domain_similarity );
- if ( i != PrintableDomainSimilarity.EQUAL ) {
- return i;
- }
- }
- if ( getStandardDeviationOfSimilarityScore() < domain_similarity
- .getStandardDeviationOfSimilarityScore() ) {
- return PrintableDomainSimilarity.BEFORE;
- }
- else if ( getStandardDeviationOfSimilarityScore() > domain_similarity
- .getStandardDeviationOfSimilarityScore() ) {
- return PrintableDomainSimilarity.AFTER;
- }
- else {
- return compareByDomainId( domain_similarity );
- }
- case MAX_DIFFERENCE:
- if ( isSortBySpeciesCountFirst() ) {
- final int i = compareBySpeciesCount( domain_similarity );
- if ( i != PrintableDomainSimilarity.EQUAL ) {
- return i;
- }
- }
- if ( getMaximalDifference() > domain_similarity.getMaximalDifference() ) {
- return PrintableDomainSimilarity.BEFORE;
- }
- else if ( getMaximalDifference() < domain_similarity.getMaximalDifference() ) {
- return PrintableDomainSimilarity.AFTER;
- }
- else {
- return compareByDomainId( domain_similarity );
- }
- case ABS_MAX_COUNTS_DIFFERENCE:
- if ( isSortBySpeciesCountFirst() ) {
- final int i = compareBySpeciesCount( domain_similarity );
- if ( i != PrintableDomainSimilarity.EQUAL ) {
- return i;
- }
- }
- if ( Math.abs( getMaximalDifferenceInCounts() ) > Math.abs( domain_similarity
- .getMaximalDifferenceInCounts() ) ) {
- return PrintableDomainSimilarity.BEFORE;
- }
- else if ( Math.abs( getMaximalDifferenceInCounts() ) < Math.abs( domain_similarity
- .getMaximalDifferenceInCounts() ) ) {
- return PrintableDomainSimilarity.AFTER;
- }
- else {
- return compareByDomainId( domain_similarity );
- }
- case MAX_COUNTS_DIFFERENCE:
- if ( getSpeciesData().size() != 2 ) {
- throw new RuntimeException( "attempt to sort by maximal difference with species not equal to two" );
- }
- if ( isSortBySpeciesCountFirst() ) {
- final int i = compareBySpeciesCount( domain_similarity );
- if ( i != PrintableDomainSimilarity.EQUAL ) {
- return i;
- }
- }
- if ( getMaximalDifferenceInCounts() > domain_similarity.getMaximalDifferenceInCounts() ) {
- return PrintableDomainSimilarity.BEFORE;
- }
- else if ( getMaximalDifferenceInCounts() < domain_similarity.getMaximalDifferenceInCounts() ) {
- return PrintableDomainSimilarity.AFTER;
- }
- else {
- return compareByDomainId( domain_similarity );
- }
- case SPECIES_COUNT:
- final int i = compareBySpeciesCount( domain_similarity );
- if ( i != PrintableDomainSimilarity.EQUAL ) {
- return i;
- }
- else {
- return compareByDomainId( domain_similarity );
- }
- case DOMAIN_ID:
- return compareByDomainId( domain_similarity );
- }
- throw new AssertionError( "Unknown sort method: " + getSortField() );
+ return compareByDomainId( domain_similarity );
}
@Override
return _n;
}
- private DomainSimilaritySortField getSortField() {
- return _sort_field;
- }
-
@Override
public SortedSet<Species> getSpecies() {
final SortedSet<Species> species = new TreeSet<Species>();
return sb;
}
+ private StringBuffer getDomainDataInAlphabeticalOrder() {
+ final SortedMap<String, SortedSet<String>> m = new TreeMap<String, SortedSet<String>>();
+ final StringBuffer sb = new StringBuffer();
+ for( final Species species : getSpeciesData().keySet() ) {
+ for( final String combable_dom : getCombinableDomainIds( species ) ) {
+ if ( !m.containsKey( combable_dom ) ) {
+ m.put( combable_dom, new TreeSet<String>() );
+ }
+ m.get( combable_dom ).add( species.getSpeciesId() );
+ }
+ }
+ for( final Map.Entry<String, SortedSet<String>> e : m.entrySet() ) {
+ sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + e.getKey() + "\">" + e.getKey() + "</a>" );
+ sb.append( ": " );
+ for( final String s : e.getValue() ) {
+ sb.append( s );
+ sb.append( " " );
+ }
+ sb.append( "<br>" );
+ }
+ return sb;
+ }
+
private StringBuffer getSpeciesDataInCustomOrder( final boolean html, final Map<String, Integer> tax_code_to_id_map ) {
final StringBuffer sb = new StringBuffer();
for( final Species order_species : getSpeciesCustomOrder() ) {
_detailedness = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS;
}
- private boolean isSortBySpeciesCountFirst() {
- return _sort_by_species_count_first;
- }
-
private boolean isTreatAsBinaryComparison() {
return _treat_as_binary_comparison;
}
final StringBuffer sb = new StringBuffer();
sb.append( "<tr>" );
sb.append( "<td>" );
- boldStartIfSortedBy( DomainSimilaritySortField.DOMAIN_ID, sb );
+ sb.append( "<b>" );
sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + getDomainId() + "\" target=\"pfam_window\">"
+ getDomainId() + "</a>" );
- boldEndIfSortedBy( DomainSimilaritySortField.DOMAIN_ID, sb );
+ sb.append( "</b>" );
sb.append( "<a name=\"" + getDomainId() + "\">" );
sb.append( "</td>" );
sb.append( "<td>" );
+ "\" target=\"gs_window\">gs</a>" );
sb.append( "</td>" );
sb.append( "<td>" );
- boldStartIfSortedBy( DomainSimilaritySortField.MEAN, sb );
sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
- boldEndIfSortedBy( DomainSimilaritySortField.MEAN, sb );
sb.append( "</td>" );
- if ( !isTreatAsBinaryComparison() ) {
- sb.append( "<td>" );
- sb.append( "(" );
- boldStartIfSortedBy( DomainSimilaritySortField.SD, sb );
- sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
- boldEndIfSortedBy( DomainSimilaritySortField.SD, sb );
- sb.append( ")" );
- sb.append( "</td>" );
- sb.append( "<td>" );
- sb.append( "[" );
- boldStartIfSortedBy( DomainSimilaritySortField.MIN, sb );
- sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
- boldEndIfSortedBy( DomainSimilaritySortField.MIN, sb );
- sb.append( "-" );
- boldStartIfSortedBy( DomainSimilaritySortField.MAX, sb );
- sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
- boldEndIfSortedBy( DomainSimilaritySortField.MAX, sb );
- sb.append( "]" );
- sb.append( "</td>" );
+ if ( PRINT_MORE_INFO ) {
+ if ( !isTreatAsBinaryComparison() ) {
+ sb.append( "<td>" );
+ sb.append( "(" );
+ sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
+ sb.append( ")" );
+ sb.append( "</td>" );
+ sb.append( "<td>" );
+ sb.append( "[" );
+ sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
+ sb.append( "-" );
+ sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
+ sb.append( "]" );
+ sb.append( "</td>" );
+ }
}
sb.append( "<td>" );
- boldStartIfSortedBy( DomainSimilaritySortField.MAX_DIFFERENCE, sb );
sb.append( getMaximalDifference() );
- boldEndIfSortedBy( DomainSimilaritySortField.MAX_DIFFERENCE, sb );
sb.append( "</td>" );
sb.append( "<td>" );
if ( isTreatAsBinaryComparison() ) {
- boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
- boldStartIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
sb.append( getMaximalDifferenceInCounts() );
- boldEndIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
- boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
}
else {
- boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
- boldStartIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
sb.append( Math.abs( getMaximalDifferenceInCounts() ) );
- boldEndIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
- boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
}
sb.append( "</td>" );
if ( !isTreatAsBinaryComparison() ) {
sb.append( "<td>" );
- if ( ( getSortField() == DomainSimilaritySortField.SPECIES_COUNT ) || isSortBySpeciesCountFirst() ) {
- sb.append( "<b>" );
- }
+ sb.append( "<b>" );
sb.append( getSpeciesData().size() );
- if ( ( getSortField() == DomainSimilaritySortField.SPECIES_COUNT ) || isSortBySpeciesCountFirst() ) {
- sb.append( "</b>" );
- }
+ sb.append( "</b>" );
sb.append( "</td>" );
}
if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) {
sb.append( "<td>" );
sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map ) );
+ sb.append( getDomainDataInAlphabeticalOrder() );
sb.append( "</td>" );
}
else {
sb.append( "<td>" );
sb.append( getSpeciesDataInCustomOrder( true, tax_code_to_id_map ) );
+ sb.append( getDomainDataInAlphabeticalOrder() );
sb.append( "</td>" );
}
sb.append( "</tr>" );
private StringBuffer toStringBufferSimpleTabDelimited() {
final StringBuffer sb = new StringBuffer();
sb.append( getDomainId() );
- switch ( getSortField() ) {
- case MIN:
- sb.append( TAB );
- sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
- break;
- case MAX:
- sb.append( TAB );
- sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
- break;
- case MEAN:
- sb.append( TAB );
- sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
- break;
- case SD:
- sb.append( TAB );
- sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
- break;
- case MAX_DIFFERENCE:
- sb.append( TAB );
- sb.append( getMaximalDifference() );
- case ABS_MAX_COUNTS_DIFFERENCE:
- case MAX_COUNTS_DIFFERENCE:
- sb.append( TAB );
- if ( isTreatAsBinaryComparison() ) {
- sb.append( getMaximalDifferenceInCounts() );
- }
- else {
- sb.append( Math.abs( getMaximalDifferenceInCounts() ) );
- }
- break;
- case SPECIES_COUNT:
- sb.append( TAB );
- sb.append( getSpeciesData().size() );
- break;
- case DOMAIN_ID:
- break;
- default:
- throw new AssertionError( "Unknown sort method: " + getSortField() );
- }
- // ^^ if ( getGoAnnotationOutput() != DomainSimilarityCalculator.GoAnnotationOutput.NONE ) {
- // ^^ sb.append( TAB );
- // ^^ addGoInformation( sb, true, false );
- // ^^ }
return sb;
}
package org.forester.surfacing;
-import java.text.DecimalFormat;
-import java.text.NumberFormat;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
-import org.forester.util.DescriptiveStatistics;
-
class PrintableSpeciesSpecificDomainSimilariyData implements SpeciesSpecificDomainSimilariyData {
- private final static NumberFormat FORMATTER = new DecimalFormat( "0.0E0" );
- final SortedMap<String, Integer> _combinable_domain_id_to_count_map;
- final private int _key_domain_proteins_count;
- final private int _key_domain_domains_count;
- final private int _combinable_domains_count;
- final private DescriptiveStatistics _key_domain_confidence_descriptive_statistics;
+ final SortedMap<String, Integer> _combinable_domain_id_to_count_map;
+ final private int _key_domain_proteins_count;
+ final private int _key_domain_domains_count;
+ final private int _combinable_domains_count;
public PrintableSpeciesSpecificDomainSimilariyData( final int key_domain_proteins_count,
final int key_domain_domains_count,
- final int combinable_domains,
- final DescriptiveStatistics key_domain_confidence_descriptive_statistics ) {
+ final int combinable_domains ) {
_key_domain_proteins_count = key_domain_proteins_count;
_key_domain_domains_count = key_domain_domains_count;
_combinable_domains_count = combinable_domains;
- _key_domain_confidence_descriptive_statistics = key_domain_confidence_descriptive_statistics;
_combinable_domain_id_to_count_map = new TreeMap<String, Integer>();
}
return _combinable_domains_count;
}
- private DescriptiveStatistics getKeyDomainConfidenceDescriptiveStatistics() {
- return _key_domain_confidence_descriptive_statistics;
- }
-
private int getKeyDomainDomainsCount() {
return _key_domain_domains_count;
}
sb.append( getKeyDomainProteinsCount() );
sb.append( ", " );
sb.append( getCombinableDomainsCount() );
- sb.append( ", " );
- if ( html ) {
- sb.append( "<i>" );
- }
- sb.append( FORMATTER.format( getKeyDomainConfidenceDescriptiveStatistics().arithmeticMean() ) );
- if ( html ) {
- sb.append( "</i>" );
- }
if ( !getCombinableDomainIdToCountsMap().isEmpty() ) {
sb.append( ":" );
}
sb.append( ":" );
sb.append( getCombinableDomainIdToCountsMap().get( domain_id ) );
}
- if ( i < ( ids.size() - 1 ) ) {
- sb.append( "," );
- }
}
return sb;
}
out.write( species + "\t" );
}
out.write( ForesterUtil.LINE_SEPARATOR );
- // DescriptiveStatistics stats_for_domain = domain_lengths
- // .calculateMeanBasedStatistics();
- //AsciiHistogram histo = new AsciiHistogram( stats_for_domain );
- //System.out.println( histo.toStringBuffer( 40, '=', 60, 4 ).toString() );
}
}
out.write( ForesterUtil.LINE_SEPARATOR );
}
}
out.close();
- // final List<HistogramData> histogram_datas = new ArrayList<HistogramData>();
- // for( int i = 0; i < number_of_genomes; ++i ) {
- // final Species species = new BasicSpecies( input_file_properties[ i ][ 0 ] );
- // histogram_datas
- // .add( new HistogramData( species.toString(), domain_lengths_table
- // .calculateMeanBasedStatisticsForSpecies( species )
- // .getDataAsDoubleArray(), 5, 600, null, 60 ) );
- // }
- // final HistogramsFrame hf = new HistogramsFrame( histogram_datas );
- // hf.setVisible( true );
System.gc();
}
final boolean treat_as_binary,
final List<Species> species_order,
final PrintableDomainSimilarity.PRINT_OPTION print_option,
- final DomainSimilarity.DomainSimilaritySortField sort_field,
final DomainSimilarity.DomainSimilarityScoring scoring,
final boolean verbose,
- final Map<String, Integer> tax_code_to_id_map )
+ final Map<String, Integer> tax_code_to_id_map,
+ final boolean print_some_stats )
throws IOException {
- final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
- String histogram_title = null;
- switch ( sort_field ) {
- case ABS_MAX_COUNTS_DIFFERENCE:
- if ( treat_as_binary ) {
- histogram_title = "absolute counts difference:";
- }
- else {
- histogram_title = "absolute (maximal) counts difference:";
- }
- break;
- case MAX_COUNTS_DIFFERENCE:
- if ( treat_as_binary ) {
- histogram_title = "counts difference:";
- }
- else {
- histogram_title = "(maximal) counts difference:";
- }
- break;
- case DOMAIN_ID:
- histogram_title = "score mean:";
- break;
- case MIN:
- histogram_title = "score minimum:";
- break;
- case MAX:
- histogram_title = "score maximum:";
- break;
- case MAX_DIFFERENCE:
- if ( treat_as_binary ) {
- histogram_title = "difference:";
- }
- else {
- histogram_title = "(maximal) difference:";
+ DescriptiveStatistics stats = null;
+ AsciiHistogram histo = null;
+ if ( print_some_stats ) {
+ stats = new BasicDescriptiveStatistics();
+ final String histogram_title = "score mean distribution:";
+ for( final DomainSimilarity similarity : similarities ) {
+ stats.addValue( similarity.getMeanSimilarityScore() );
+ }
+ try {
+ if ( stats.getMin() < stats.getMax() ) {
+ histo = new AsciiHistogram( stats, histogram_title );
}
- break;
- case MEAN:
- histogram_title = "score mean:";
- break;
- case SD:
- histogram_title = "score standard deviation:";
- break;
- case SPECIES_COUNT:
- histogram_title = "species number:";
- break;
- default:
- throw new AssertionError( "Unknown sort field: " + sort_field );
- }
- for( final DomainSimilarity similarity : similarities ) {
- switch ( sort_field ) {
- case ABS_MAX_COUNTS_DIFFERENCE:
- stats.addValue( Math.abs( similarity.getMaximalDifferenceInCounts() ) );
- break;
- case MAX_COUNTS_DIFFERENCE:
- stats.addValue( similarity.getMaximalDifferenceInCounts() );
- break;
- case DOMAIN_ID:
- stats.addValue( similarity.getMeanSimilarityScore() );
- break;
- case MIN:
- stats.addValue( similarity.getMinimalSimilarityScore() );
- break;
- case MAX:
- stats.addValue( similarity.getMaximalSimilarityScore() );
- break;
- case MAX_DIFFERENCE:
- stats.addValue( similarity.getMaximalDifference() );
- break;
- case MEAN:
- stats.addValue( similarity.getMeanSimilarityScore() );
- break;
- case SD:
- stats.addValue( similarity.getStandardDeviationOfSimilarityScore() );
- break;
- case SPECIES_COUNT:
- stats.addValue( similarity.getSpecies().size() );
- break;
- default:
- throw new AssertionError( "Unknown sort field: " + sort_field );
}
- }
- AsciiHistogram histo = null;
- try {
- if ( stats.getMin() < stats.getMax() ) {
- histo = new AsciiHistogram( stats, histogram_title );
+ catch ( final Exception e ) {
+ histo = null;
}
}
- catch ( Exception e ) {
- histo = null;
- }
if ( ( single_writer != null ) && ( ( split_writers == null ) || split_writers.isEmpty() ) ) {
split_writers = new HashMap<Character, Writer>();
split_writers.put( '_', single_writer );
w.write( "<html>" );
w.write( SurfacingConstants.NL );
if ( key != '_' ) {
- addHtmlHead( w, "DCs (" + html_title + ") " + key.toString().toUpperCase() );
+ addHtmlHead( w, "DC analysis (" + html_title + ") " + key.toString().toUpperCase() );
}
else {
- addHtmlHead( w, "DCs (" + html_title + ")" );
+ addHtmlHead( w, "DC analysis (" + html_title + ")" );
}
w.write( SurfacingConstants.NL );
w.write( "<body>" );
w.write( SurfacingConstants.NL );
w.write( html_desc.toString() );
w.write( SurfacingConstants.NL );
- w.write( "<hr>" );
- w.write( "<br>" );
- w.write( SurfacingConstants.NL );
- w.write( "<tt><pre>" );
- w.write( SurfacingConstants.NL );
- if ( histo != null ) {
- w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
- w.write( SurfacingConstants.NL );
- }
- w.write( "</pre></tt>" );
- w.write( SurfacingConstants.NL );
- w.write( "<table>" );
- w.write( SurfacingConstants.NL );
- w.write( "<tr><td>N: </td><td>" + stats.getN() + "</td></tr>" );
- w.write( SurfacingConstants.NL );
- w.write( "<tr><td>Min: </td><td>" + stats.getMin() + "</td></tr>" );
- w.write( SurfacingConstants.NL );
- w.write( "<tr><td>Max: </td><td>" + stats.getMax() + "</td></tr>" );
- w.write( SurfacingConstants.NL );
- w.write( "<tr><td>Mean: </td><td>" + stats.arithmeticMean() + "</td></tr>" );
- w.write( SurfacingConstants.NL );
- if ( stats.getN() > 1 ) {
- w.write( "<tr><td>SD: </td><td>" + stats.sampleStandardDeviation() + "</td></tr>" );
+ if ( print_some_stats ) {
+ printSomeStats( stats, histo, w );
}
- else {
- w.write( "<tr><td>SD: </td><td>n/a</td></tr>" );
- }
- w.write( SurfacingConstants.NL );
- w.write( "<tr><td>Median: </td><td>" + stats.median() + "</td></tr>" );
- w.write( SurfacingConstants.NL );
- w.write( "</table>" );
- w.write( SurfacingConstants.NL );
- w.write( "<br>" );
- w.write( SurfacingConstants.NL );
w.write( "<hr>" );
w.write( SurfacingConstants.NL );
w.write( "<br>" );
w.write( SurfacingConstants.NL );
w.write( "<table>" );
w.write( SurfacingConstants.NL );
+ w.write( "<tr><td><b>Domains:</b></td></tr>" );
+ w.write( SurfacingConstants.NL );
}
break;
}
- for( final Writer w : split_writers.values() ) {
- w.write( SurfacingConstants.NL );
- }
//
for( final DomainSimilarity similarity : similarities ) {
if ( ( species_order != null ) && !species_order.isEmpty() ) {
( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
}
if ( single_writer != null ) {
- single_writer.write( "<a href=\"#" + similarity.getDomainId() + "\">" + similarity.getDomainId()
- + "</a><br>" );
+ single_writer.write( "<tr><td><b><a href=\"#" + similarity.getDomainId() + "\">"
+ + similarity.getDomainId() + "</a></b></td></tr>" );
single_writer.write( SurfacingConstants.NL );
}
else {
if ( local_writer == null ) {
local_writer = split_writers.get( '0' );
}
- local_writer.write( "<a href=\"#" + similarity.getDomainId() + "\">" + similarity.getDomainId()
- + "</a><br>" );
+ local_writer.write( "<tr><td><b><a href=\"#" + similarity.getDomainId() + "\">"
+ + similarity.getDomainId() + "</a></b></td></tr>" );
local_writer.write( SurfacingConstants.NL );
}
}
- // w.write( "<hr>" );
- // w.write( SurfacingConstants.NL );
+ for( final Writer w : split_writers.values() ) {
+ w.write( "</table>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<hr>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<table>" );
+ w.write( SurfacingConstants.NL );
+ }
//
for( final DomainSimilarity similarity : similarities ) {
if ( ( species_order != null ) && !species_order.isEmpty() ) {
return stats;
}
+ private static void printSomeStats( final DescriptiveStatistics stats, final AsciiHistogram histo, final Writer w )
+ throws IOException {
+ w.write( "<hr>" );
+ w.write( "<br>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<tt><pre>" );
+ w.write( SurfacingConstants.NL );
+ if ( histo != null ) {
+ w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
+ w.write( SurfacingConstants.NL );
+ }
+ w.write( "</pre></tt>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<table>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<tr><td>N: </td><td>" + stats.getN() + "</td></tr>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<tr><td>Min: </td><td>" + stats.getMin() + "</td></tr>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<tr><td>Max: </td><td>" + stats.getMax() + "</td></tr>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<tr><td>Mean: </td><td>" + stats.arithmeticMean() + "</td></tr>" );
+ w.write( SurfacingConstants.NL );
+ if ( stats.getN() > 1 ) {
+ w.write( "<tr><td>SD: </td><td>" + stats.sampleStandardDeviation() + "</td></tr>" );
+ }
+ else {
+ w.write( "<tr><td>SD: </td><td>n/a</td></tr>" );
+ }
+ w.write( SurfacingConstants.NL );
+ w.write( "</table>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<br>" );
+ w.write( SurfacingConstants.NL );
+ }
+
public static void writeMatrixToFile( final CharacterStateMatrix<?> matrix,
final String filename,
final Format format ) {