X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fgo%2Fetc%2FMetaOntologizer.java;h=36b2801253acf75c6e1dbcfc9063ae4e6790bca5;hb=0b49b8e750b34d28a5989facdd8a7959870de996;hp=970f939b845a9978fe95ef2512383dd7ad79530f;hpb=48f7a89be9d34f1930a1f863e608235cc27184c5;p=jalview.git diff --git a/forester/java/src/org/forester/go/etc/MetaOntologizer.java b/forester/java/src/org/forester/go/etc/MetaOntologizer.java index 970f939..36b2801 100644 --- a/forester/java/src/org/forester/go/etc/MetaOntologizer.java +++ b/forester/java/src/org/forester/go/etc/MetaOntologizer.java @@ -5,7 +5,7 @@ // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved -// +// // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either @@ -15,13 +15,13 @@ // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. -// +// // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.go.etc; @@ -51,9 +51,8 @@ import org.forester.go.GoTerm; import org.forester.go.GoUtils; import org.forester.go.OBOparser; import org.forester.go.PfamToGoMapping; -import org.forester.surfacing.BasicSpecies; -import org.forester.surfacing.DomainId; -import org.forester.surfacing.Species; +import org.forester.species.BasicSpecies; +import org.forester.species.Species; import org.forester.surfacing.SurfacingConstants; import org.forester.surfacing.SurfacingUtil; import org.forester.util.ForesterUtil; @@ -66,7 +65,10 @@ public class MetaOntologizer { final static private String PRG_NAME = "meta_ontologizer"; private static final boolean VERBOSE = true; //table-a_41_dollo_all_gains_d-Topology-Elim-Bonferroni.txt: - private final static Pattern PATTERN_ONTOLOGIZER_TABLE_OUTPUT = Pattern.compile( ".*table-(.+)_dollo_.*", + //TODO change back + // private final static Pattern PATTERN_ONTOLOGIZER_TABLE_OUTPUT = Pattern.compile( ".*table-(.+)_dollo_.*", + // Pattern.CASE_INSENSITIVE ); //TODO this might need some work... + private final static Pattern PATTERN_ONTOLOGIZER_TABLE_OUTPUT = Pattern.compile( ".*table-(.+)\\.txt", Pattern.CASE_INSENSITIVE ); //TODO this might need some work... private static boolean hasResultsForSpecies( final Map go_id_to_terms, @@ -82,12 +84,12 @@ public class MetaOntologizer { } private static StringBuilder obtainDomainsForGoId( final List pfam_to_go, - final SortedSet domains_per_species, + final SortedSet domains_per_species, final Map all_go_terms, final GoId query_go_id, - final Set found_domain_ids ) { + final Set found_domain_ids ) { final StringBuilder sb = new StringBuilder(); - D: for( final DomainId domain_id : domains_per_species ) { + D: for( final String domain_id : domains_per_species ) { for( final PfamToGoMapping ptg : pfam_to_go ) { if ( ptg.getKey().equals( domain_id ) ) { final GoId go_id = ptg.getValue(); @@ -115,7 +117,7 @@ public class MetaOntologizer { species = matcher.group( 1 ); if ( VERBOSE ) { ForesterUtil - .programMessage( PRG_NAME, "species for [" + ontologizer_outfile + "] is [" + species + "]" ); + .programMessage( PRG_NAME, "species for [" + ontologizer_outfile + "] is [" + species + "]" ); } } else { @@ -125,13 +127,12 @@ public class MetaOntologizer { return species; } - private static SortedMap> parseDomainGainLossFile( final File input ) - throws IOException { + private static SortedMap> parseDomainGainLossFile( final File input ) throws IOException { final String error = ForesterUtil.isReadableFile( input ); if ( !ForesterUtil.isEmpty( error ) ) { throw new IOException( error ); } - final SortedMap> speciesto_to_domain_id = new TreeMap>(); + final SortedMap> speciesto_to_domain_id = new TreeMap>(); final BufferedReader br = new BufferedReader( new FileReader( input ) ); String line; int line_number = 0; @@ -145,13 +146,16 @@ public class MetaOntologizer { } else if ( line.startsWith( "#" ) ) { current_species = new BasicSpecies( line.substring( 1 ) ); - speciesto_to_domain_id.put( current_species, new TreeSet() ); + speciesto_to_domain_id.put( current_species, new TreeSet() ); + if ( VERBOSE ) { + ForesterUtil.programMessage( PRG_NAME, "saw " + current_species ); + } } else { if ( current_species == null ) { throw new IOException( "parsing problem [at line " + line_number + "] in [" + input + "]" ); } - speciesto_to_domain_id.get( current_species ).add( new DomainId( line ) ); + speciesto_to_domain_id.get( current_species ).add( new String( line ) ); } } } @@ -172,9 +176,9 @@ public class MetaOntologizer { final SortedMap> species_to_results_map, final String species, final double p_adjusted_upper_limit, - final SortedSet domains_per_species, + final SortedSet domains_per_species, final List pfam_to_go, - final Set domain_ids_with_go_annot ) throws IOException { + final Set domain_ids_with_go_annot ) throws IOException { final SortedSet ontologizer_results = species_to_results_map.get( species ); for( final OntologizerResult ontologizer_result : ontologizer_results ) { final GoTerm go_term = go_id_to_terms.get( ontologizer_result.getGoId() ); @@ -222,9 +226,9 @@ public class MetaOntologizer { } if ( ( p_adjusted_upper_limit < 0.0 ) || ( p_adjusted_upper_limit > 1.0 ) ) { throw new IllegalArgumentException( "adjusted P values limit [" + p_adjusted_upper_limit - + "] is out of range" ); + + "] is out of range" ); } - SortedMap> speciesto_to_domain_id = null; + SortedMap> speciesto_to_domain_id = null; if ( domain_gain_loss_file != null ) { if ( !domain_gain_loss_file.exists() ) { throw new IllegalArgumentException( "[" + domain_gain_loss_file + "] does not exist" ); @@ -232,7 +236,7 @@ public class MetaOntologizer { speciesto_to_domain_id = parseDomainGainLossFile( domain_gain_loss_file ); if ( VERBOSE ) { ForesterUtil.programMessage( PRG_NAME, "parsed gain/loss domains for " + speciesto_to_domain_id.size() - + " species from [" + domain_gain_loss_file + "]" ); + + " species from [" + domain_gain_loss_file + "]" ); } } final String[] children = ontologizer_outdir.list(); @@ -249,7 +253,7 @@ public class MetaOntologizer { } if ( VERBOSE ) { ForesterUtil.programMessage( PRG_NAME, "need to analyze " + ontologizer_outfiles.size() - + " Ontologizer outfiles from [" + ontologizer_outdir + "]" ); + + " Ontologizer outfiles from [" + ontologizer_outdir + "]" ); } final OBOparser parser = new OBOparser( obo_file, OBOparser.ReturnType.BASIC_GO_TERM ); final List go_terms = parser.parse(); @@ -277,7 +281,7 @@ public class MetaOntologizer { for( final File ontologizer_outfile : ontologizer_outfiles ) { final String species = obtainSpecies( ontologizer_outfile ); final List ontologizer_results = OntologizerResult.parse( new File( ontologizer_outdir - + ForesterUtil.FILE_SEPARATOR + ontologizer_outfile ) ); + + ForesterUtil.FILE_SEPARATOR + ontologizer_outfile ) ); final SortedSet filtered_ontologizer_results = new TreeSet(); for( final OntologizerResult ontologizer_result : ontologizer_results ) { if ( ontologizer_result.getPAdjusted() <= p_adjusted_upper_limit ) { @@ -296,18 +300,18 @@ public class MetaOntologizer { writeHtmlHeader( b_html_writer, GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS.toString() + " | Pmax = " + p_adjusted_upper_limit + " | " + comment, - ontologizer_outdir.getAbsolutePath(), - domain_gain_loss_file_full_path_str ); + ontologizer_outdir.getAbsolutePath(), + domain_gain_loss_file_full_path_str ); writeHtmlHeader( c_html_writer, GoNameSpace.GoNamespaceType.CELLULAR_COMPONENT.toString() + " | Pmax = " + p_adjusted_upper_limit + " | " + comment, - ontologizer_outdir.getAbsolutePath(), - domain_gain_loss_file_full_path_str ); + ontologizer_outdir.getAbsolutePath(), + domain_gain_loss_file_full_path_str ); writeHtmlHeader( m_html_writer, GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION.toString() + " | Pmax = " + p_adjusted_upper_limit + " | " + comment, - ontologizer_outdir.getAbsolutePath(), - domain_gain_loss_file_full_path_str ); + ontologizer_outdir.getAbsolutePath(), + domain_gain_loss_file_full_path_str ); for( final String species : species_to_results_map.keySet() ) { if ( hasResultsForSpecies( go_id_to_terms, species_to_results_map, @@ -327,11 +331,11 @@ public class MetaOntologizer { GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION ) ) { writeHtmlSpecies( m_html_writer, species ); } - SortedSet domains_per_species = null; + SortedSet domains_per_species = null; if ( ( speciesto_to_domain_id != null ) && ( speciesto_to_domain_id.size() > 0 ) ) { domains_per_species = speciesto_to_domain_id.get( new BasicSpecies( species ) ); } - final Set domain_ids_with_go_annot = new HashSet(); + final Set domain_ids_with_go_annot = new HashSet(); processOneSpecies( go_id_to_terms, b_html_writer, b_tab_writer, @@ -377,30 +381,30 @@ public class MetaOntologizer { m_tab_writer.close(); if ( VERBOSE ) { ForesterUtil.programMessage( PRG_NAME, "successfully wrote biological process summary to [" + b_file_html - + "]" ); + + "]" ); ForesterUtil.programMessage( PRG_NAME, "successfully wrote biological process summary to [" + b_file_txt - + "]" ); + + "]" ); ForesterUtil.programMessage( PRG_NAME, "successfully wrote molecular function summary to [" + m_file_html - + "]" ); + + "]" ); ForesterUtil.programMessage( PRG_NAME, "successfully wrote molecular function summary to [" + m_file_txt - + "]" ); + + "]" ); ForesterUtil.programMessage( PRG_NAME, "successfully wrote cellular component summary to [" + c_file_html - + "]" ); + + "]" ); ForesterUtil.programMessage( PRG_NAME, "successfully wrote cellular component summary to [" + c_file_txt - + "]" ); + + "]" ); } } private static void writeHtmlDomains( final Writer writer, - final SortedSet domains, - final Set domain_ids_with_go_annot ) throws IOException { + final SortedSet domains, + final Set domain_ids_with_go_annot ) throws IOException { writer.write( "" ); writer.write( "" ); if ( domains != null ) { - for( final DomainId domain : domains ) { + for( final String domain : domains ) { if ( !domain_ids_with_go_annot.contains( domain ) ) { writer.write( "[" + domain + "] " ); + + "\">" + domain + "] " ); } } } @@ -438,22 +442,18 @@ public class MetaOntologizer { w.write( ForesterUtil.LINE_SEPARATOR ); w.write( "a.new_type:link { font-size: 7pt; color : #505050; text-decoration : none; }" ); w.write( ForesterUtil.LINE_SEPARATOR ); - w - .write( "a.new_type:hover { font-size: 7pt; color : #000000; background-color : #FFFF00; text-decoration : none; }" ); + w.write( "a.new_type:hover { font-size: 7pt; color : #000000; background-color : #FFFF00; text-decoration : none; }" ); w.write( ForesterUtil.LINE_SEPARATOR ); - w - .write( "a.new_type:hover { font-size: 7pt; color : #000000; background-color : #FFFF00; text-decoration : none; }" ); + w.write( "a.new_type:hover { font-size: 7pt; color : #000000; background-color : #FFFF00; text-decoration : none; }" ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( "td { text-align: left; vertical-align: top; font-family: Verdana, Arial, Helvetica; font-size: 8pt}" ); w.write( ForesterUtil.LINE_SEPARATOR ); - w - .write( "th { text-align: left; vertical-align: top; font-family: Verdana, Arial, Helvetica; font-size: 10pt; font-weight: bold }" ); + w.write( "th { text-align: left; vertical-align: top; font-family: Verdana, Arial, Helvetica; font-size: 10pt; font-weight: bold }" ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( "h1 { color : #000000; font-family: Verdana, Arial, Helvetica; font-size: 18pt; font-weight: bold }" ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( "h2 { color : #000000; font-family: Verdana, Arial, Helvetica; font-size: 16pt; font-weight: bold }" ); - w - .write( "h3 { margin-top: 12px; margin-bottom: 0px; color : #000000; font-family: Verdana, Arial, Helvetica; font-size: 12pt; font-weight: bold }" ); + w.write( "h3 { margin-top: 12px; margin-bottom: 0px; color : #000000; font-family: Verdana, Arial, Helvetica; font-size: 12pt; font-weight: bold }" ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( "" ); w.write( ForesterUtil.LINE_SEPARATOR ); @@ -518,7 +518,7 @@ public class MetaOntologizer { writer.write( "" ); writer.write( "

" ); writer.write( species ); - SurfacingUtil.writeTaxonomyLinks( writer, species ); + SurfacingUtil.writeTaxonomyLinks( writer, species, null ); writer.write( "

" ); writer.write( "" ); writer.write( ForesterUtil.LINE_SEPARATOR ); @@ -553,9 +553,9 @@ public class MetaOntologizer { final double p_adjusted_upper_limit, final String species, final Map go_id_to_terms, - final SortedSet domains_per_species, + final SortedSet domains_per_species, final List pfam_to_go, - final Set domain_ids_with_go_annot ) throws IOException { + final Set domain_ids_with_go_annot ) throws IOException { final Color p_adj_color = ForesterUtil.calcColor( ontologizer_result.getPAdjusted(), 0, p_adjusted_upper_limit, @@ -573,7 +573,7 @@ public class MetaOntologizer { writer.write( "" ); writer.write( "" ); writer.write( "" + ontologizer_result.getGoId().getId() + "" ); + + "\" target=\"amigo_window\">" + ontologizer_result.getGoId().getId() + "" ); writer.write( "" ); writer.write( "" ); writer.write( FORMATER.format( ontologizer_result.getPAdjusted() ) ); @@ -592,8 +592,11 @@ public class MetaOntologizer { writer.write( String.valueOf( ontologizer_result.getStudyTerm() ) ); writer.write( "" ); if ( domains_per_species != null ) { - final StringBuilder sb = obtainDomainsForGoId( pfam_to_go, domains_per_species, go_id_to_terms, go_term - .getGoId(), domain_ids_with_go_annot ); + final StringBuilder sb = obtainDomainsForGoId( pfam_to_go, + domains_per_species, + go_id_to_terms, + go_term.getGoId(), + domain_ids_with_go_annot ); writer.write( sb.toString() ); } else {