// Copyright (C) 2008-2009 Christian M. Zmasek
// Copyright (C) 2008-2009 Burnham Institute for Medical Research
// All rights reserved
// Copyright (C) 2008-2009 Christian M. Zmasek
// Copyright (C) 2008-2009 Burnham Institute for Medical Research
// All rights reserved
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// Contact: phylosoft @ gmail . com
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// Contact: phylosoft @ gmail . com
import org.forester.go.GoUtils;
import org.forester.go.OBOparser;
import org.forester.go.PfamToGoMapping;
import org.forester.go.GoUtils;
import org.forester.go.OBOparser;
import org.forester.go.PfamToGoMapping;
-import org.forester.surfacing.BasicSpecies;
-import org.forester.surfacing.DomainId;
-import org.forester.surfacing.Species;
+import org.forester.species.BasicSpecies;
+import org.forester.species.Species;
import org.forester.surfacing.SurfacingConstants;
import org.forester.surfacing.SurfacingUtil;
import org.forester.util.ForesterUtil;
import org.forester.surfacing.SurfacingConstants;
import org.forester.surfacing.SurfacingUtil;
import org.forester.util.ForesterUtil;
final static private String PRG_NAME = "meta_ontologizer";
private static final boolean VERBOSE = true;
//table-a_41_dollo_all_gains_d-Topology-Elim-Bonferroni.txt:
final static private String PRG_NAME = "meta_ontologizer";
private static final boolean VERBOSE = true;
//table-a_41_dollo_all_gains_d-Topology-Elim-Bonferroni.txt:
- private final static Pattern PATTERN_ONTOLOGIZER_TABLE_OUTPUT = Pattern.compile( ".*table-(.+)_dollo_.*",
+ //TODO change back
+ // private final static Pattern PATTERN_ONTOLOGIZER_TABLE_OUTPUT = Pattern.compile( ".*table-(.+)_dollo_.*",
+ // Pattern.CASE_INSENSITIVE ); //TODO this might need some work...
+ private final static Pattern PATTERN_ONTOLOGIZER_TABLE_OUTPUT = Pattern.compile( ".*table-(.+)\\.txt",
Pattern.CASE_INSENSITIVE ); //TODO this might need some work...
private static boolean hasResultsForSpecies( final Map<GoId, GoTerm> go_id_to_terms,
Pattern.CASE_INSENSITIVE ); //TODO this might need some work...
private static boolean hasResultsForSpecies( final Map<GoId, GoTerm> go_id_to_terms,
for( final PfamToGoMapping ptg : pfam_to_go ) {
if ( ptg.getKey().equals( domain_id ) ) {
final GoId go_id = ptg.getValue();
for( final PfamToGoMapping ptg : pfam_to_go ) {
if ( ptg.getKey().equals( domain_id ) ) {
final GoId go_id = ptg.getValue();
final String error = ForesterUtil.isReadableFile( input );
if ( !ForesterUtil.isEmpty( error ) ) {
throw new IOException( error );
}
final String error = ForesterUtil.isReadableFile( input );
if ( !ForesterUtil.isEmpty( error ) ) {
throw new IOException( error );
}
- final SortedMap<Species, SortedSet<DomainId>> speciesto_to_domain_id = new TreeMap<Species, SortedSet<DomainId>>();
+ final SortedMap<Species, SortedSet<String>> speciesto_to_domain_id = new TreeMap<Species, SortedSet<String>>();
- speciesto_to_domain_id.put( current_species, new TreeSet<DomainId>() );
+ speciesto_to_domain_id.put( current_species, new TreeSet<String>() );
+ if ( VERBOSE ) {
+ ForesterUtil.programMessage( PRG_NAME, "saw " + current_species );
+ }
}
else {
if ( current_species == null ) {
throw new IOException( "parsing problem [at line " + line_number + "] in [" + input + "]" );
}
}
else {
if ( current_species == null ) {
throw new IOException( "parsing problem [at line " + line_number + "] in [" + input + "]" );
}
final SortedMap<String, SortedSet<OntologizerResult>> species_to_results_map,
final String species,
final double p_adjusted_upper_limit,
final SortedMap<String, SortedSet<OntologizerResult>> species_to_results_map,
final String species,
final double p_adjusted_upper_limit,
final SortedSet<OntologizerResult> ontologizer_results = species_to_results_map.get( species );
for( final OntologizerResult ontologizer_result : ontologizer_results ) {
final GoTerm go_term = go_id_to_terms.get( ontologizer_result.getGoId() );
final SortedSet<OntologizerResult> ontologizer_results = species_to_results_map.get( species );
for( final OntologizerResult ontologizer_result : ontologizer_results ) {
final GoTerm go_term = go_id_to_terms.get( ontologizer_result.getGoId() );
}
if ( ( p_adjusted_upper_limit < 0.0 ) || ( p_adjusted_upper_limit > 1.0 ) ) {
throw new IllegalArgumentException( "adjusted P values limit [" + p_adjusted_upper_limit
}
if ( ( p_adjusted_upper_limit < 0.0 ) || ( p_adjusted_upper_limit > 1.0 ) ) {
throw new IllegalArgumentException( "adjusted P values limit [" + p_adjusted_upper_limit
if ( domain_gain_loss_file != null ) {
if ( !domain_gain_loss_file.exists() ) {
throw new IllegalArgumentException( "[" + domain_gain_loss_file + "] does not exist" );
if ( domain_gain_loss_file != null ) {
if ( !domain_gain_loss_file.exists() ) {
throw new IllegalArgumentException( "[" + domain_gain_loss_file + "] does not exist" );
speciesto_to_domain_id = parseDomainGainLossFile( domain_gain_loss_file );
if ( VERBOSE ) {
ForesterUtil.programMessage( PRG_NAME, "parsed gain/loss domains for " + speciesto_to_domain_id.size()
speciesto_to_domain_id = parseDomainGainLossFile( domain_gain_loss_file );
if ( VERBOSE ) {
ForesterUtil.programMessage( PRG_NAME, "parsed gain/loss domains for " + speciesto_to_domain_id.size()
}
final OBOparser parser = new OBOparser( obo_file, OBOparser.ReturnType.BASIC_GO_TERM );
final List<GoTerm> go_terms = parser.parse();
}
final OBOparser parser = new OBOparser( obo_file, OBOparser.ReturnType.BASIC_GO_TERM );
final List<GoTerm> go_terms = parser.parse();
for( final File ontologizer_outfile : ontologizer_outfiles ) {
final String species = obtainSpecies( ontologizer_outfile );
final List<OntologizerResult> ontologizer_results = OntologizerResult.parse( new File( ontologizer_outdir
for( final File ontologizer_outfile : ontologizer_outfiles ) {
final String species = obtainSpecies( ontologizer_outfile );
final List<OntologizerResult> ontologizer_results = OntologizerResult.parse( new File( ontologizer_outdir
final SortedSet<OntologizerResult> filtered_ontologizer_results = new TreeSet<OntologizerResult>();
for( final OntologizerResult ontologizer_result : ontologizer_results ) {
if ( ontologizer_result.getPAdjusted() <= p_adjusted_upper_limit ) {
final SortedSet<OntologizerResult> filtered_ontologizer_results = new TreeSet<OntologizerResult>();
for( final OntologizerResult ontologizer_result : ontologizer_results ) {
if ( ontologizer_result.getPAdjusted() <= p_adjusted_upper_limit ) {
writeHtmlHeader( b_html_writer,
GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS.toString() + " | Pmax = "
+ p_adjusted_upper_limit + " | " + comment,
writeHtmlHeader( b_html_writer,
GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS.toString() + " | Pmax = "
+ p_adjusted_upper_limit + " | " + comment,
- ontologizer_outdir.getAbsolutePath(),
- domain_gain_loss_file_full_path_str );
+ ontologizer_outdir.getAbsolutePath(),
+ domain_gain_loss_file_full_path_str );
writeHtmlHeader( c_html_writer,
GoNameSpace.GoNamespaceType.CELLULAR_COMPONENT.toString() + " | Pmax = "
+ p_adjusted_upper_limit + " | " + comment,
writeHtmlHeader( c_html_writer,
GoNameSpace.GoNamespaceType.CELLULAR_COMPONENT.toString() + " | Pmax = "
+ p_adjusted_upper_limit + " | " + comment,
- ontologizer_outdir.getAbsolutePath(),
- domain_gain_loss_file_full_path_str );
+ ontologizer_outdir.getAbsolutePath(),
+ domain_gain_loss_file_full_path_str );
writeHtmlHeader( m_html_writer,
GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION.toString() + " | Pmax = "
+ p_adjusted_upper_limit + " | " + comment,
writeHtmlHeader( m_html_writer,
GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION.toString() + " | Pmax = "
+ p_adjusted_upper_limit + " | " + comment,
- ontologizer_outdir.getAbsolutePath(),
- domain_gain_loss_file_full_path_str );
+ ontologizer_outdir.getAbsolutePath(),
+ domain_gain_loss_file_full_path_str );
for( final String species : species_to_results_map.keySet() ) {
if ( hasResultsForSpecies( go_id_to_terms,
species_to_results_map,
for( final String species : species_to_results_map.keySet() ) {
if ( hasResultsForSpecies( go_id_to_terms,
species_to_results_map,
GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION ) ) {
writeHtmlSpecies( m_html_writer, species );
}
GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION ) ) {
writeHtmlSpecies( m_html_writer, species );
}
if ( ( speciesto_to_domain_id != null ) && ( speciesto_to_domain_id.size() > 0 ) ) {
domains_per_species = speciesto_to_domain_id.get( new BasicSpecies( species ) );
}
if ( ( speciesto_to_domain_id != null ) && ( speciesto_to_domain_id.size() > 0 ) ) {
domains_per_species = speciesto_to_domain_id.get( new BasicSpecies( species ) );
}
- final SortedSet<DomainId> domains,
- final Set<DomainId> domain_ids_with_go_annot ) throws IOException {
+ final SortedSet<String> domains,
+ final Set<String> domain_ids_with_go_annot ) throws IOException {
if ( !domain_ids_with_go_annot.contains( domain ) ) {
writer.write( "[<a class=\"new_type\" href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain
if ( !domain_ids_with_go_annot.contains( domain ) ) {
writer.write( "[<a class=\"new_type\" href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain
w.write( ForesterUtil.LINE_SEPARATOR );
w.write( "a.new_type:link { font-size: 7pt; color : #505050; text-decoration : none; }" );
w.write( ForesterUtil.LINE_SEPARATOR );
w.write( ForesterUtil.LINE_SEPARATOR );
w.write( "a.new_type:link { font-size: 7pt; color : #505050; text-decoration : none; }" );
w.write( ForesterUtil.LINE_SEPARATOR );
w.write( ForesterUtil.LINE_SEPARATOR );
w.write( "td { text-align: left; vertical-align: top; font-family: Verdana, Arial, Helvetica; font-size: 8pt}" );
w.write( ForesterUtil.LINE_SEPARATOR );
w.write( ForesterUtil.LINE_SEPARATOR );
w.write( "td { text-align: left; vertical-align: top; font-family: Verdana, Arial, Helvetica; font-size: 8pt}" );
w.write( ForesterUtil.LINE_SEPARATOR );
- w
- .write( "th { text-align: left; vertical-align: top; font-family: Verdana, Arial, Helvetica; font-size: 10pt; font-weight: bold }" );
+ w.write( "th { text-align: left; vertical-align: top; font-family: Verdana, Arial, Helvetica; font-size: 10pt; font-weight: bold }" );
w.write( ForesterUtil.LINE_SEPARATOR );
w.write( "h1 { color : #000000; font-family: Verdana, Arial, Helvetica; font-size: 18pt; font-weight: bold }" );
w.write( ForesterUtil.LINE_SEPARATOR );
w.write( "h2 { color : #000000; font-family: Verdana, Arial, Helvetica; font-size: 16pt; font-weight: bold }" );
w.write( ForesterUtil.LINE_SEPARATOR );
w.write( "h1 { color : #000000; font-family: Verdana, Arial, Helvetica; font-size: 18pt; font-weight: bold }" );
w.write( ForesterUtil.LINE_SEPARATOR );
w.write( "h2 { color : #000000; font-family: Verdana, Arial, Helvetica; font-size: 16pt; font-weight: bold }" );
- w
- .write( "h3 { margin-top: 12px; margin-bottom: 0px; color : #000000; font-family: Verdana, Arial, Helvetica; font-size: 12pt; font-weight: bold }" );
+ w.write( "h3 { margin-top: 12px; margin-bottom: 0px; color : #000000; font-family: Verdana, Arial, Helvetica; font-size: 12pt; font-weight: bold }" );
final double p_adjusted_upper_limit,
final String species,
final Map<GoId, GoTerm> go_id_to_terms,
final double p_adjusted_upper_limit,
final String species,
final Map<GoId, GoTerm> go_id_to_terms,
writer.write( "</font>" );
writer.write( "</td><td>" );
writer.write( "<a href=\"" + SurfacingConstants.GO_LINK + ontologizer_result.getGoId().getId()
writer.write( "</font>" );
writer.write( "</td><td>" );
writer.write( "<a href=\"" + SurfacingConstants.GO_LINK + ontologizer_result.getGoId().getId()
writer.write( "</td><td>" );
writer.write( "<font color=\"#" + ForesterUtil.colorToHex( p_adj_color ) + "\">" );
writer.write( FORMATER.format( ontologizer_result.getPAdjusted() ) );
writer.write( "</td><td>" );
writer.write( "<font color=\"#" + ForesterUtil.colorToHex( p_adj_color ) + "\">" );
writer.write( FORMATER.format( ontologizer_result.getPAdjusted() ) );
writer.write( String.valueOf( ontologizer_result.getStudyTerm() ) );
writer.write( "</td><td>" );
if ( domains_per_species != null ) {
writer.write( String.valueOf( ontologizer_result.getStudyTerm() ) );
writer.write( "</td><td>" );
if ( domains_per_species != null ) {
- final StringBuilder sb = obtainDomainsForGoId( pfam_to_go, domains_per_species, go_id_to_terms, go_term
- .getGoId(), domain_ids_with_go_annot );
+ final StringBuilder sb = obtainDomainsForGoId( pfam_to_go,
+ domains_per_species,
+ go_id_to_terms,
+ go_term.getGoId(),
+ domain_ids_with_go_annot );