// Copyright (C) 2008-2009 Christian M. Zmasek
// Copyright (C) 2008-2009 Burnham Institute for Medical Research
// All rights reserved
-//
+//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
-//
+//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
package org.forester.go.etc;
import org.forester.go.GoUtils;
import org.forester.go.OBOparser;
import org.forester.go.PfamToGoMapping;
-import org.forester.surfacing.BasicSpecies;
-import org.forester.surfacing.DomainId;
-import org.forester.surfacing.Species;
+import org.forester.species.BasicSpecies;
+import org.forester.species.Species;
import org.forester.surfacing.SurfacingConstants;
import org.forester.surfacing.SurfacingUtil;
import org.forester.util.ForesterUtil;
final static private String PRG_NAME = "meta_ontologizer";
private static final boolean VERBOSE = true;
//table-a_41_dollo_all_gains_d-Topology-Elim-Bonferroni.txt:
- private final static Pattern PATTERN_ONTOLOGIZER_TABLE_OUTPUT = Pattern.compile( ".*table-(.+)_dollo_.*",
+ //TODO change back
+ // private final static Pattern PATTERN_ONTOLOGIZER_TABLE_OUTPUT = Pattern.compile( ".*table-(.+)_dollo_.*",
+ // Pattern.CASE_INSENSITIVE ); //TODO this might need some work...
+ private final static Pattern PATTERN_ONTOLOGIZER_TABLE_OUTPUT = Pattern.compile( ".*table-(.+)\\.txt",
Pattern.CASE_INSENSITIVE ); //TODO this might need some work...
private static boolean hasResultsForSpecies( final Map<GoId, GoTerm> go_id_to_terms,
}
private static StringBuilder obtainDomainsForGoId( final List<PfamToGoMapping> pfam_to_go,
- final SortedSet<DomainId> domains_per_species,
+ final SortedSet<String> domains_per_species,
final Map<GoId, GoTerm> all_go_terms,
final GoId query_go_id,
- final Set<DomainId> found_domain_ids ) {
+ final Set<String> found_domain_ids ) {
final StringBuilder sb = new StringBuilder();
- D: for( final DomainId domain_id : domains_per_species ) {
+ D: for( final String domain_id : domains_per_species ) {
for( final PfamToGoMapping ptg : pfam_to_go ) {
if ( ptg.getKey().equals( domain_id ) ) {
final GoId go_id = ptg.getValue();
species = matcher.group( 1 );
if ( VERBOSE ) {
ForesterUtil
- .programMessage( PRG_NAME, "species for [" + ontologizer_outfile + "] is [" + species + "]" );
+ .programMessage( PRG_NAME, "species for [" + ontologizer_outfile + "] is [" + species + "]" );
}
}
else {
return species;
}
- private static SortedMap<Species, SortedSet<DomainId>> parseDomainGainLossFile( final File input )
- throws IOException {
+ private static SortedMap<Species, SortedSet<String>> parseDomainGainLossFile( final File input ) throws IOException {
final String error = ForesterUtil.isReadableFile( input );
if ( !ForesterUtil.isEmpty( error ) ) {
throw new IOException( error );
}
- final SortedMap<Species, SortedSet<DomainId>> speciesto_to_domain_id = new TreeMap<Species, SortedSet<DomainId>>();
+ final SortedMap<Species, SortedSet<String>> speciesto_to_domain_id = new TreeMap<Species, SortedSet<String>>();
final BufferedReader br = new BufferedReader( new FileReader( input ) );
String line;
int line_number = 0;
}
else if ( line.startsWith( "#" ) ) {
current_species = new BasicSpecies( line.substring( 1 ) );
- speciesto_to_domain_id.put( current_species, new TreeSet<DomainId>() );
+ speciesto_to_domain_id.put( current_species, new TreeSet<String>() );
+ if ( VERBOSE ) {
+ ForesterUtil.programMessage( PRG_NAME, "saw " + current_species );
+ }
}
else {
if ( current_species == null ) {
throw new IOException( "parsing problem [at line " + line_number + "] in [" + input + "]" );
}
- speciesto_to_domain_id.get( current_species ).add( new DomainId( line ) );
+ speciesto_to_domain_id.get( current_species ).add( new String( line ) );
}
}
}
final SortedMap<String, SortedSet<OntologizerResult>> species_to_results_map,
final String species,
final double p_adjusted_upper_limit,
- final SortedSet<DomainId> domains_per_species,
+ final SortedSet<String> domains_per_species,
final List<PfamToGoMapping> pfam_to_go,
- final Set<DomainId> domain_ids_with_go_annot ) throws IOException {
+ final Set<String> domain_ids_with_go_annot ) throws IOException {
final SortedSet<OntologizerResult> ontologizer_results = species_to_results_map.get( species );
for( final OntologizerResult ontologizer_result : ontologizer_results ) {
final GoTerm go_term = go_id_to_terms.get( ontologizer_result.getGoId() );
}
if ( ( p_adjusted_upper_limit < 0.0 ) || ( p_adjusted_upper_limit > 1.0 ) ) {
throw new IllegalArgumentException( "adjusted P values limit [" + p_adjusted_upper_limit
- + "] is out of range" );
+ + "] is out of range" );
}
- SortedMap<Species, SortedSet<DomainId>> speciesto_to_domain_id = null;
+ SortedMap<Species, SortedSet<String>> speciesto_to_domain_id = null;
if ( domain_gain_loss_file != null ) {
if ( !domain_gain_loss_file.exists() ) {
throw new IllegalArgumentException( "[" + domain_gain_loss_file + "] does not exist" );
speciesto_to_domain_id = parseDomainGainLossFile( domain_gain_loss_file );
if ( VERBOSE ) {
ForesterUtil.programMessage( PRG_NAME, "parsed gain/loss domains for " + speciesto_to_domain_id.size()
- + " species from [" + domain_gain_loss_file + "]" );
+ + " species from [" + domain_gain_loss_file + "]" );
}
}
final String[] children = ontologizer_outdir.list();
}
if ( VERBOSE ) {
ForesterUtil.programMessage( PRG_NAME, "need to analyze " + ontologizer_outfiles.size()
- + " Ontologizer outfiles from [" + ontologizer_outdir + "]" );
+ + " Ontologizer outfiles from [" + ontologizer_outdir + "]" );
}
final OBOparser parser = new OBOparser( obo_file, OBOparser.ReturnType.BASIC_GO_TERM );
final List<GoTerm> go_terms = parser.parse();
for( final File ontologizer_outfile : ontologizer_outfiles ) {
final String species = obtainSpecies( ontologizer_outfile );
final List<OntologizerResult> ontologizer_results = OntologizerResult.parse( new File( ontologizer_outdir
- + ForesterUtil.FILE_SEPARATOR + ontologizer_outfile ) );
+ + ForesterUtil.FILE_SEPARATOR + ontologizer_outfile ) );
final SortedSet<OntologizerResult> filtered_ontologizer_results = new TreeSet<OntologizerResult>();
for( final OntologizerResult ontologizer_result : ontologizer_results ) {
if ( ontologizer_result.getPAdjusted() <= p_adjusted_upper_limit ) {
writeHtmlHeader( b_html_writer,
GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS.toString() + " | Pmax = "
+ p_adjusted_upper_limit + " | " + comment,
- ontologizer_outdir.getAbsolutePath(),
- domain_gain_loss_file_full_path_str );
+ ontologizer_outdir.getAbsolutePath(),
+ domain_gain_loss_file_full_path_str );
writeHtmlHeader( c_html_writer,
GoNameSpace.GoNamespaceType.CELLULAR_COMPONENT.toString() + " | Pmax = "
+ p_adjusted_upper_limit + " | " + comment,
- ontologizer_outdir.getAbsolutePath(),
- domain_gain_loss_file_full_path_str );
+ ontologizer_outdir.getAbsolutePath(),
+ domain_gain_loss_file_full_path_str );
writeHtmlHeader( m_html_writer,
GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION.toString() + " | Pmax = "
+ p_adjusted_upper_limit + " | " + comment,
- ontologizer_outdir.getAbsolutePath(),
- domain_gain_loss_file_full_path_str );
+ ontologizer_outdir.getAbsolutePath(),
+ domain_gain_loss_file_full_path_str );
for( final String species : species_to_results_map.keySet() ) {
if ( hasResultsForSpecies( go_id_to_terms,
species_to_results_map,
GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION ) ) {
writeHtmlSpecies( m_html_writer, species );
}
- SortedSet<DomainId> domains_per_species = null;
+ SortedSet<String> domains_per_species = null;
if ( ( speciesto_to_domain_id != null ) && ( speciesto_to_domain_id.size() > 0 ) ) {
domains_per_species = speciesto_to_domain_id.get( new BasicSpecies( species ) );
}
- final Set<DomainId> domain_ids_with_go_annot = new HashSet<DomainId>();
+ final Set<String> domain_ids_with_go_annot = new HashSet<String>();
processOneSpecies( go_id_to_terms,
b_html_writer,
b_tab_writer,
m_tab_writer.close();
if ( VERBOSE ) {
ForesterUtil.programMessage( PRG_NAME, "successfully wrote biological process summary to [" + b_file_html
- + "]" );
+ + "]" );
ForesterUtil.programMessage( PRG_NAME, "successfully wrote biological process summary to [" + b_file_txt
- + "]" );
+ + "]" );
ForesterUtil.programMessage( PRG_NAME, "successfully wrote molecular function summary to [" + m_file_html
- + "]" );
+ + "]" );
ForesterUtil.programMessage( PRG_NAME, "successfully wrote molecular function summary to [" + m_file_txt
- + "]" );
+ + "]" );
ForesterUtil.programMessage( PRG_NAME, "successfully wrote cellular component summary to [" + c_file_html
- + "]" );
+ + "]" );
ForesterUtil.programMessage( PRG_NAME, "successfully wrote cellular component summary to [" + c_file_txt
- + "]" );
+ + "]" );
}
}
private static void writeHtmlDomains( final Writer writer,
- final SortedSet<DomainId> domains,
- final Set<DomainId> domain_ids_with_go_annot ) throws IOException {
+ final SortedSet<String> domains,
+ final Set<String> domain_ids_with_go_annot ) throws IOException {
writer.write( "<tr>" );
writer.write( "<td colspan=\"10\">" );
if ( domains != null ) {
- for( final DomainId domain : domains ) {
+ for( final String domain : domains ) {
if ( !domain_ids_with_go_annot.contains( domain ) ) {
writer.write( "[<a class=\"new_type\" href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain
- + "\">" + domain + "</a>] " );
+ + "\">" + domain + "</a>] " );
}
}
}
w.write( ForesterUtil.LINE_SEPARATOR );
w.write( "a.new_type:link { font-size: 7pt; color : #505050; text-decoration : none; }" );
w.write( ForesterUtil.LINE_SEPARATOR );
- w
- .write( "a.new_type:hover { font-size: 7pt; color : #000000; background-color : #FFFF00; text-decoration : none; }" );
+ w.write( "a.new_type:hover { font-size: 7pt; color : #000000; background-color : #FFFF00; text-decoration : none; }" );
w.write( ForesterUtil.LINE_SEPARATOR );
- w
- .write( "a.new_type:hover { font-size: 7pt; color : #000000; background-color : #FFFF00; text-decoration : none; }" );
+ w.write( "a.new_type:hover { font-size: 7pt; color : #000000; background-color : #FFFF00; text-decoration : none; }" );
w.write( ForesterUtil.LINE_SEPARATOR );
w.write( "td { text-align: left; vertical-align: top; font-family: Verdana, Arial, Helvetica; font-size: 8pt}" );
w.write( ForesterUtil.LINE_SEPARATOR );
- w
- .write( "th { text-align: left; vertical-align: top; font-family: Verdana, Arial, Helvetica; font-size: 10pt; font-weight: bold }" );
+ w.write( "th { text-align: left; vertical-align: top; font-family: Verdana, Arial, Helvetica; font-size: 10pt; font-weight: bold }" );
w.write( ForesterUtil.LINE_SEPARATOR );
w.write( "h1 { color : #000000; font-family: Verdana, Arial, Helvetica; font-size: 18pt; font-weight: bold }" );
w.write( ForesterUtil.LINE_SEPARATOR );
w.write( "h2 { color : #000000; font-family: Verdana, Arial, Helvetica; font-size: 16pt; font-weight: bold }" );
- w
- .write( "h3 { margin-top: 12px; margin-bottom: 0px; color : #000000; font-family: Verdana, Arial, Helvetica; font-size: 12pt; font-weight: bold }" );
+ w.write( "h3 { margin-top: 12px; margin-bottom: 0px; color : #000000; font-family: Verdana, Arial, Helvetica; font-size: 12pt; font-weight: bold }" );
w.write( ForesterUtil.LINE_SEPARATOR );
w.write( "</style>" );
w.write( ForesterUtil.LINE_SEPARATOR );
writer.write( "<tr>" );
writer.write( "<td><h3>" );
writer.write( species );
- SurfacingUtil.writeTaxonomyLinks( writer, species );
+ SurfacingUtil.writeTaxonomyLinks( writer, species, null );
writer.write( "</h3></td>" );
writer.write( "</tr>" );
writer.write( ForesterUtil.LINE_SEPARATOR );
final double p_adjusted_upper_limit,
final String species,
final Map<GoId, GoTerm> go_id_to_terms,
- final SortedSet<DomainId> domains_per_species,
+ final SortedSet<String> domains_per_species,
final List<PfamToGoMapping> pfam_to_go,
- final Set<DomainId> domain_ids_with_go_annot ) throws IOException {
+ final Set<String> domain_ids_with_go_annot ) throws IOException {
final Color p_adj_color = ForesterUtil.calcColor( ontologizer_result.getPAdjusted(),
0,
p_adjusted_upper_limit,
writer.write( "</font>" );
writer.write( "</td><td>" );
writer.write( "<a href=\"" + SurfacingConstants.GO_LINK + ontologizer_result.getGoId().getId()
- + "\" target=\"amigo_window\">" + ontologizer_result.getGoId().getId() + "</a>" );
+ + "\" target=\"amigo_window\">" + ontologizer_result.getGoId().getId() + "</a>" );
writer.write( "</td><td>" );
writer.write( "<font color=\"#" + ForesterUtil.colorToHex( p_adj_color ) + "\">" );
writer.write( FORMATER.format( ontologizer_result.getPAdjusted() ) );
writer.write( String.valueOf( ontologizer_result.getStudyTerm() ) );
writer.write( "</td><td>" );
if ( domains_per_species != null ) {
- final StringBuilder sb = obtainDomainsForGoId( pfam_to_go, domains_per_species, go_id_to_terms, go_term
- .getGoId(), domain_ids_with_go_annot );
+ final StringBuilder sb = obtainDomainsForGoId( pfam_to_go,
+ domains_per_species,
+ go_id_to_terms,
+ go_term.getGoId(),
+ domain_ids_with_go_annot );
writer.write( sb.toString() );
}
else {