final static private String PRG_VERSION = "2.280";
final static private String PRG_DATE = "130701";
final static private String E_MAIL = "czmasek@burnham.org";
- final static private String WWW = "www.phylosoft.org/forester/applications/surfacing";
+ final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing";
final static private boolean IGNORE_DUFS_DEFAULT = true;
final static private boolean IGNORE_COMBINATION_WITH_SAME_DEFAULLT = false;
final static private double MAX_E_VALUE_DEFAULT = -1;
}
System.out.println( "Time for processing : " + parser.getTime() + "ms" );
log( "", log_writer );
- html_desc.append( "<tr><td>" + input_file_properties[ i ][ 0 ] + " [species: "
- + input_file_properties[ i ][ 1 ] + "]" + ":</td><td>domains analyzed: "
- + parser.getDomainsStored() + "; domains ignored: [ind score cutoffs: "
+ html_desc.append( "<tr><td>" + input_file_properties[ i ][ 0 ] + ":</td><td>doms analyzed: "
+ + parser.getDomainsStored() + "; doms ignored: [ind score cutoffs: "
+ parser.getDomainsIgnoredDueToIndividualScoreCutoff() + "] [E-value cutoff: "
+ parser.getDomainsIgnoredDueToEval() + "] [DUF: " + parser.getDomainsIgnoredDueToDuf()
- + "] [virus like ids: " + parser.getDomainsIgnoredDueToVirusLikeIds()
- + "] [negative domain filter: " + parser.getDomainsIgnoredDueToNegativeDomainFilter()
- + "] [overlap: " + parser.getDomainsIgnoredDueToOverlap() + "]" );
+ + "] [virus like ids: " + parser.getDomainsIgnoredDueToVirusLikeIds() + "] [negative dom filter: "
+ + parser.getDomainsIgnoredDueToNegativeDomainFilter() + "] [overlap: "
+ + parser.getDomainsIgnoredDueToOverlap() + "]" );
if ( negative_filter_file != null ) {
html_desc.append( "; proteins ignored due to negative filter: "
+ parser.getProteinsIgnoredDueToFilter() );
gwcd_list,
ignore_domains_without_combs_in_all_spec,
ignore_species_specific_domains );
- SurfacingUtil.decoratePrintableDomainSimilarities( similarities,
- detailedness,
- go_annotation_output,
- go_id_to_term_map,
- go_namespace_limit );
+ SurfacingUtil.decoratePrintableDomainSimilarities( similarities, detailedness );
final Map<String, Integer> tax_code_to_id_map = SurfacingUtil.createTaxCodeToIdMap( intrees[ 0 ] );
try {
String my_outfile = output_file.toString();
genome_pair,
ignore_domains_without_combs_in_all_spec,
ignore_domains_specific_to_one_species );
- SurfacingUtil.decoratePrintableDomainSimilarities( similarities,
- detailedness,
- go_annotation_output,
- go_id_to_term_map,
- go_namespace_limit );
+ SurfacingUtil.decoratePrintableDomainSimilarities( similarities, detailedness );
final DescriptiveStatistics stats = SurfacingUtil
.calculateDescriptiveStatisticsForMeanValues( similarities );
final String species_j = species[ j ].getSpeciesId();
import java.util.SortedSet;
import java.util.TreeSet;
-import org.forester.go.GoId;
-import org.forester.go.GoNameSpace;
-import org.forester.go.GoTerm;
-import org.forester.go.GoXRef;
import org.forester.species.Species;
import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
-import org.forester.surfacing.DomainSimilarityCalculator.GoAnnotationOutput;
import org.forester.util.ForesterUtil;
public class PrintableDomainSimilarity implements DomainSimilarity {
final private int _n;
private final int _max_difference_in_counts;
private final int _max_difference;
- private DomainSimilarityCalculator.GoAnnotationOutput _go_annotation_output;
final private CombinableDomains _combinable_domains;
final private SortedMap<Species, SpeciesSpecificDomainSimilariyData> _species_data;
final private DomainSimilaritySortField _sort_field;
private List<Species> _species_order;
private final boolean _sort_by_species_count_first;
private DomainSimilarityCalculator.Detailedness _detailedness;
- private Map<GoId, GoTerm> _go_id_to_term_map;
- private GoNameSpace _go_namespace_limit;
private final boolean _treat_as_binary_comparison;
/**
}
}
- // private void addGoInformation( final StringBuffer sb, final boolean for_table, final boolean html ) {
- // if ( !for_table ) {
- // sb.append( "<" );
- // }
- // switch ( getGoAnnotationOutput() ) {
- // case ALL: {
- // final int go_ids = getCombinableDomains().getKeyDomain().getNumberOfGoIds();
- // boolean first = true;
- // for( int i = 0; i < go_ids; ++i ) {
- // final GoId go_id = getCombinableDomains().getKeyDomain().getGoId( i );
- // if ( getGoIdToTermMap() != null ) {
- // if ( getGoIdToTermMap().containsKey( go_id ) ) {
- // first = appendGoTerm( sb, getGoIdToTermMap().get( go_id ), first, html );
- // }
- // else {
- // sb.append( "go id \"" + go_id + "\" not found ["
- // + getCombinableDomains().getKeyDomain().getId() + "]" );
- // }
- // }
- // else {
- // if ( !first ) {
- // sb.append( ", " );
- // }
- // if ( html ) {
- // sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id
- // + "\" target=\"amigo_window\">" + go_id + "</a>" );
- // }
- // else {
- // sb.append( go_id );
- // }
- // first = false;
- // }
- // }
- // break;
- // }
- // case NONE: {
- // break;
- // }
- // default:
- // throw new RuntimeException( "unknown " + getGoAnnotationOutput() );
- // }
- // if ( !for_table ) {
- // sb.append( ">: " );
- // }
- // }
private void addSpeciesSpecificDomainData( final StringBuffer sb,
final Species species,
final boolean html,
sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
}
- private boolean appendGoTerm( final StringBuffer sb, final GoTerm go_term, final boolean first, final boolean html ) {
- if ( ( getGoNamespaceLimit() == null ) || getGoNamespaceLimit().equals( go_term.getGoNameSpace() ) ) {
- if ( !first ) {
- sb.append( ", " );
- }
- final GoId go_id = go_term.getGoId();
- if ( html ) {
- sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id + "\" target=\"amigo_window\">" + go_id
- + "</a>" );
- }
- else {
- sb.append( go_id );
- }
- sb.append( ":" );
- sb.append( go_term.getName() );
- if ( !html ) {
- if ( getGoNamespaceLimit() == null ) {
- sb.append( ":" );
- sb.append( go_term.getGoNameSpace().toString() );
- }
- for( final GoXRef xref : go_term.getGoXRefs() ) {
- sb.append( ":" );
- sb.append( xref.toString() );
- }
- }
- return false;
- }
- return true;
- }
-
private void boldEndIfSortedBy( final DomainSimilaritySortField sort_field, final StringBuffer sb ) {
if ( getSortField() == sort_field ) {
sb.append( "</b>" );
return getCombinableDomains().getKeyDomain();
}
- private DomainSimilarityCalculator.GoAnnotationOutput getGoAnnotationOutput() {
- return _go_annotation_output;
- }
-
- private Map<GoId, GoTerm> getGoIdToTermMap() {
- return _go_id_to_term_map;
- }
-
- public GoNameSpace getGoNamespaceLimit() {
- return _go_namespace_limit;
- }
-
@Override
public int getMaximalDifference() {
return _max_difference;
private void init() {
_detailedness = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS;
- _go_annotation_output = null;
- _go_id_to_term_map = null;
}
private boolean isSortBySpeciesCountFirst() {
_detailedness = detailedness;
}
- public void setGoAnnotationOutput( final GoAnnotationOutput go_annotation_output ) {
- _go_annotation_output = go_annotation_output;
- }
-
- public void setGoIdToTermMap( final Map<GoId, GoTerm> go_id_to_term_map ) {
- _go_id_to_term_map = go_id_to_term_map;
- }
-
- public void setGoNamespaceLimit( final GoNameSpace go_namespace_limit ) {
- _go_namespace_limit = go_namespace_limit;
- }
-
public void setSpeciesOrder( final List<Species> species_order ) {
if ( !species_order.containsAll( getSpeciesData().keySet() ) ) {
throw new IllegalArgumentException( "list to order species must contain all species of multiple combinable domains similarity" );
sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + getDomainId() + "\" target=\"pfam_window\">"
+ getDomainId() + "</a>" );
boldEndIfSortedBy( DomainSimilaritySortField.DOMAIN_ID, sb );
+ sb.append( "<a name=\"" + getDomainId() + "\">" );
sb.append( "</td>" );
sb.append( "<td>" );
sb.append( "<a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_SEARCH + getDomainId()
boldStartIfSortedBy( DomainSimilaritySortField.MIN, sb );
sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
boldEndIfSortedBy( DomainSimilaritySortField.MIN, sb );
- sb.append( "," );
+ sb.append( "-" );
boldStartIfSortedBy( DomainSimilaritySortField.MAX, sb );
sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
boldEndIfSortedBy( DomainSimilaritySortField.MAX, sb );
}
sb.append( "</td>" );
}
- // ^^ if ( getGoAnnotationOutput() != DomainSimilarityCalculator.GoAnnotationOutput.NONE ) {
- // ^^ sb.append( "<td>" );
- // ^^ addGoInformation( sb, true, true );
- // ^^ sb.append( "</td>" );
- // ^^ }
if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) {
sb.append( "<td>" );
sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map ) );
import org.forester.protein.Protein;
import org.forester.species.Species;
import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
-import org.forester.surfacing.DomainSimilarityCalculator.GoAnnotationOutput;
import org.forester.surfacing.GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder;
import org.forester.util.AsciiHistogram;
import org.forester.util.BasicDescriptiveStatistics;
}
public static void decoratePrintableDomainSimilarities( final SortedSet<DomainSimilarity> domain_similarities,
- final Detailedness detailedness,
- final GoAnnotationOutput go_annotation_output,
- final Map<GoId, GoTerm> go_id_to_term_map,
- final GoNameSpace go_namespace_limit ) {
- if ( ( go_namespace_limit != null ) && ( ( go_id_to_term_map == null ) || go_id_to_term_map.isEmpty() ) ) {
- throw new IllegalArgumentException( "attempt to use a GO namespace limit without a GO id to term map" );
- }
+ final Detailedness detailedness ) {
for( final DomainSimilarity domain_similarity : domain_similarities ) {
if ( domain_similarity instanceof PrintableDomainSimilarity ) {
final PrintableDomainSimilarity printable_domain_similarity = ( PrintableDomainSimilarity ) domain_similarity;
printable_domain_similarity.setDetailedness( detailedness );
- printable_domain_similarity.setGoAnnotationOutput( go_annotation_output );
- printable_domain_similarity.setGoIdToTermMap( go_id_to_term_map );
- printable_domain_similarity.setGoNamespaceLimit( go_namespace_limit );
}
}
}
}
}
AsciiHistogram histo = null;
- if ( stats.getMin() < stats.getMin() ) {
- histo = new AsciiHistogram( stats, histogram_title );
- }
- if ( verbose ) {
- if ( histo != null ) {
- System.out.println( histo.toStringBuffer( 20, '|', 40, 5 ) );
- }
- System.out.println();
- System.out.println( "N : " + stats.getN() );
- System.out.println( "Min : " + stats.getMin() );
- System.out.println( "Max : " + stats.getMax() );
- System.out.println( "Mean : " + stats.arithmeticMean() );
- if ( stats.getN() > 1 ) {
- System.out.println( "SD : " + stats.sampleStandardDeviation() );
- }
- else {
- System.out.println( "SD : n/a" );
- }
- System.out.println( "Median : " + stats.median() );
- if ( stats.getN() > 1 ) {
- System.out.println( "Pearsonian skewness : " + stats.pearsonianSkewness() );
- }
- else {
- System.out.println( "Pearsonian skewness : n/a" );
+ try {
+ if ( stats.getMin() < stats.getMax() ) {
+ histo = new AsciiHistogram( stats, histogram_title );
}
}
+ catch ( Exception e ) {
+ histo = null;
+ }
if ( ( single_writer != null ) && ( ( split_writers == null ) || split_writers.isEmpty() ) ) {
split_writers = new HashMap<Character, Writer>();
split_writers.put( '_', single_writer );
w.write( SurfacingConstants.NL );
w.write( "<tr><td>Median: </td><td>" + stats.median() + "</td></tr>" );
w.write( SurfacingConstants.NL );
- if ( stats.getN() > 1 ) {
- w.write( "<tr><td>Pearsonian skewness: </td><td>" + stats.pearsonianSkewness() + "</td></tr>" );
- }
- else {
- w.write( "<tr><td>Pearsonian skewness: </td><td>n/a</td></tr>" );
- }
- w.write( SurfacingConstants.NL );
w.write( "</table>" );
w.write( SurfacingConstants.NL );
w.write( "<br>" );
for( final Writer w : split_writers.values() ) {
w.write( SurfacingConstants.NL );
}
+ //
+ for( final DomainSimilarity similarity : similarities ) {
+ if ( ( species_order != null ) && !species_order.isEmpty() ) {
+ ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
+ }
+ if ( single_writer != null ) {
+ single_writer.write( "<a href=\"#" + similarity.getDomainId() + "\">" + similarity.getDomainId()
+ + "</a><br>" );
+ single_writer.write( SurfacingConstants.NL );
+ }
+ else {
+ Writer local_writer = split_writers.get( ( similarity.getDomainId().charAt( 0 ) + "" ).toLowerCase()
+ .charAt( 0 ) );
+ if ( local_writer == null ) {
+ local_writer = split_writers.get( '0' );
+ }
+ local_writer.write( "<a href=\"#" + similarity.getDomainId() + "\">" + similarity.getDomainId()
+ + "</a><br>" );
+ local_writer.write( SurfacingConstants.NL );
+ }
+ }
+ // w.write( "<hr>" );
+ // w.write( SurfacingConstants.NL );
+ //
for( final DomainSimilarity similarity : similarities ) {
if ( ( species_order != null ) && !species_order.isEmpty() ) {
( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
local_writer.write( similarity.toStringBuffer( print_option, tax_code_to_id_map ).toString() );
local_writer.write( SurfacingConstants.NL );
}
- // for( final Writer w : split_writers.values() ) {
- //w.write( SurfacingConstants.NL );
- // }
}
switch ( print_option ) {
case HTML: