X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fgo%2Fetc%2FMetaOntologizer.java;h=36b2801253acf75c6e1dbcfc9063ae4e6790bca5;hb=0b49b8e750b34d28a5989facdd8a7959870de996;hp=10ac1fdbe6a40297d2e27c6203d61083789700ed;hpb=eee996a6476a1e3d84c07f8f690dcde3ff4b2ef5;p=jalview.git diff --git a/forester/java/src/org/forester/go/etc/MetaOntologizer.java b/forester/java/src/org/forester/go/etc/MetaOntologizer.java index 10ac1fd..36b2801 100644 --- a/forester/java/src/org/forester/go/etc/MetaOntologizer.java +++ b/forester/java/src/org/forester/go/etc/MetaOntologizer.java @@ -21,7 +21,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.go.etc; @@ -51,9 +51,8 @@ import org.forester.go.GoTerm; import org.forester.go.GoUtils; import org.forester.go.OBOparser; import org.forester.go.PfamToGoMapping; -import org.forester.surfacing.BasicSpecies; -import org.forester.surfacing.DomainId; -import org.forester.surfacing.Species; +import org.forester.species.BasicSpecies; +import org.forester.species.Species; import org.forester.surfacing.SurfacingConstants; import org.forester.surfacing.SurfacingUtil; import org.forester.util.ForesterUtil; @@ -66,7 +65,10 @@ public class MetaOntologizer { final static private String PRG_NAME = "meta_ontologizer"; private static final boolean VERBOSE = true; //table-a_41_dollo_all_gains_d-Topology-Elim-Bonferroni.txt: - private final static Pattern PATTERN_ONTOLOGIZER_TABLE_OUTPUT = Pattern.compile( ".*table-(.+)_dollo_.*", + //TODO change back + // private final static Pattern PATTERN_ONTOLOGIZER_TABLE_OUTPUT = Pattern.compile( ".*table-(.+)_dollo_.*", + // Pattern.CASE_INSENSITIVE ); //TODO this might need some work... + private final static Pattern PATTERN_ONTOLOGIZER_TABLE_OUTPUT = Pattern.compile( ".*table-(.+)\\.txt", Pattern.CASE_INSENSITIVE ); //TODO this might need some work... private static boolean hasResultsForSpecies( final Map go_id_to_terms, @@ -82,12 +84,12 @@ public class MetaOntologizer { } private static StringBuilder obtainDomainsForGoId( final List pfam_to_go, - final SortedSet domains_per_species, + final SortedSet domains_per_species, final Map all_go_terms, final GoId query_go_id, - final Set found_domain_ids ) { + final Set found_domain_ids ) { final StringBuilder sb = new StringBuilder(); - D: for( final DomainId domain_id : domains_per_species ) { + D: for( final String domain_id : domains_per_species ) { for( final PfamToGoMapping ptg : pfam_to_go ) { if ( ptg.getKey().equals( domain_id ) ) { final GoId go_id = ptg.getValue(); @@ -115,7 +117,7 @@ public class MetaOntologizer { species = matcher.group( 1 ); if ( VERBOSE ) { ForesterUtil - .programMessage( PRG_NAME, "species for [" + ontologizer_outfile + "] is [" + species + "]" ); + .programMessage( PRG_NAME, "species for [" + ontologizer_outfile + "] is [" + species + "]" ); } } else { @@ -125,13 +127,12 @@ public class MetaOntologizer { return species; } - private static SortedMap> parseDomainGainLossFile( final File input ) - throws IOException { + private static SortedMap> parseDomainGainLossFile( final File input ) throws IOException { final String error = ForesterUtil.isReadableFile( input ); if ( !ForesterUtil.isEmpty( error ) ) { throw new IOException( error ); } - final SortedMap> speciesto_to_domain_id = new TreeMap>(); + final SortedMap> speciesto_to_domain_id = new TreeMap>(); final BufferedReader br = new BufferedReader( new FileReader( input ) ); String line; int line_number = 0; @@ -145,13 +146,16 @@ public class MetaOntologizer { } else if ( line.startsWith( "#" ) ) { current_species = new BasicSpecies( line.substring( 1 ) ); - speciesto_to_domain_id.put( current_species, new TreeSet() ); + speciesto_to_domain_id.put( current_species, new TreeSet() ); + if ( VERBOSE ) { + ForesterUtil.programMessage( PRG_NAME, "saw " + current_species ); + } } else { if ( current_species == null ) { throw new IOException( "parsing problem [at line " + line_number + "] in [" + input + "]" ); } - speciesto_to_domain_id.get( current_species ).add( new DomainId( line ) ); + speciesto_to_domain_id.get( current_species ).add( new String( line ) ); } } } @@ -172,9 +176,9 @@ public class MetaOntologizer { final SortedMap> species_to_results_map, final String species, final double p_adjusted_upper_limit, - final SortedSet domains_per_species, + final SortedSet domains_per_species, final List pfam_to_go, - final Set domain_ids_with_go_annot ) throws IOException { + final Set domain_ids_with_go_annot ) throws IOException { final SortedSet ontologizer_results = species_to_results_map.get( species ); for( final OntologizerResult ontologizer_result : ontologizer_results ) { final GoTerm go_term = go_id_to_terms.get( ontologizer_result.getGoId() ); @@ -222,9 +226,9 @@ public class MetaOntologizer { } if ( ( p_adjusted_upper_limit < 0.0 ) || ( p_adjusted_upper_limit > 1.0 ) ) { throw new IllegalArgumentException( "adjusted P values limit [" + p_adjusted_upper_limit - + "] is out of range" ); + + "] is out of range" ); } - SortedMap> speciesto_to_domain_id = null; + SortedMap> speciesto_to_domain_id = null; if ( domain_gain_loss_file != null ) { if ( !domain_gain_loss_file.exists() ) { throw new IllegalArgumentException( "[" + domain_gain_loss_file + "] does not exist" ); @@ -232,7 +236,7 @@ public class MetaOntologizer { speciesto_to_domain_id = parseDomainGainLossFile( domain_gain_loss_file ); if ( VERBOSE ) { ForesterUtil.programMessage( PRG_NAME, "parsed gain/loss domains for " + speciesto_to_domain_id.size() - + " species from [" + domain_gain_loss_file + "]" ); + + " species from [" + domain_gain_loss_file + "]" ); } } final String[] children = ontologizer_outdir.list(); @@ -249,7 +253,7 @@ public class MetaOntologizer { } if ( VERBOSE ) { ForesterUtil.programMessage( PRG_NAME, "need to analyze " + ontologizer_outfiles.size() - + " Ontologizer outfiles from [" + ontologizer_outdir + "]" ); + + " Ontologizer outfiles from [" + ontologizer_outdir + "]" ); } final OBOparser parser = new OBOparser( obo_file, OBOparser.ReturnType.BASIC_GO_TERM ); final List go_terms = parser.parse(); @@ -277,7 +281,7 @@ public class MetaOntologizer { for( final File ontologizer_outfile : ontologizer_outfiles ) { final String species = obtainSpecies( ontologizer_outfile ); final List ontologizer_results = OntologizerResult.parse( new File( ontologizer_outdir - + ForesterUtil.FILE_SEPARATOR + ontologizer_outfile ) ); + + ForesterUtil.FILE_SEPARATOR + ontologizer_outfile ) ); final SortedSet filtered_ontologizer_results = new TreeSet(); for( final OntologizerResult ontologizer_result : ontologizer_results ) { if ( ontologizer_result.getPAdjusted() <= p_adjusted_upper_limit ) { @@ -296,18 +300,18 @@ public class MetaOntologizer { writeHtmlHeader( b_html_writer, GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS.toString() + " | Pmax = " + p_adjusted_upper_limit + " | " + comment, - ontologizer_outdir.getAbsolutePath(), - domain_gain_loss_file_full_path_str ); + ontologizer_outdir.getAbsolutePath(), + domain_gain_loss_file_full_path_str ); writeHtmlHeader( c_html_writer, GoNameSpace.GoNamespaceType.CELLULAR_COMPONENT.toString() + " | Pmax = " + p_adjusted_upper_limit + " | " + comment, - ontologizer_outdir.getAbsolutePath(), - domain_gain_loss_file_full_path_str ); + ontologizer_outdir.getAbsolutePath(), + domain_gain_loss_file_full_path_str ); writeHtmlHeader( m_html_writer, GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION.toString() + " | Pmax = " + p_adjusted_upper_limit + " | " + comment, - ontologizer_outdir.getAbsolutePath(), - domain_gain_loss_file_full_path_str ); + ontologizer_outdir.getAbsolutePath(), + domain_gain_loss_file_full_path_str ); for( final String species : species_to_results_map.keySet() ) { if ( hasResultsForSpecies( go_id_to_terms, species_to_results_map, @@ -327,11 +331,11 @@ public class MetaOntologizer { GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION ) ) { writeHtmlSpecies( m_html_writer, species ); } - SortedSet domains_per_species = null; + SortedSet domains_per_species = null; if ( ( speciesto_to_domain_id != null ) && ( speciesto_to_domain_id.size() > 0 ) ) { domains_per_species = speciesto_to_domain_id.get( new BasicSpecies( species ) ); } - final Set domain_ids_with_go_annot = new HashSet(); + final Set domain_ids_with_go_annot = new HashSet(); processOneSpecies( go_id_to_terms, b_html_writer, b_tab_writer, @@ -377,30 +381,30 @@ public class MetaOntologizer { m_tab_writer.close(); if ( VERBOSE ) { ForesterUtil.programMessage( PRG_NAME, "successfully wrote biological process summary to [" + b_file_html - + "]" ); + + "]" ); ForesterUtil.programMessage( PRG_NAME, "successfully wrote biological process summary to [" + b_file_txt - + "]" ); + + "]" ); ForesterUtil.programMessage( PRG_NAME, "successfully wrote molecular function summary to [" + m_file_html - + "]" ); + + "]" ); ForesterUtil.programMessage( PRG_NAME, "successfully wrote molecular function summary to [" + m_file_txt - + "]" ); + + "]" ); ForesterUtil.programMessage( PRG_NAME, "successfully wrote cellular component summary to [" + c_file_html - + "]" ); + + "]" ); ForesterUtil.programMessage( PRG_NAME, "successfully wrote cellular component summary to [" + c_file_txt - + "]" ); + + "]" ); } } private static void writeHtmlDomains( final Writer writer, - final SortedSet domains, - final Set domain_ids_with_go_annot ) throws IOException { + final SortedSet domains, + final Set domain_ids_with_go_annot ) throws IOException { writer.write( "" ); writer.write( "" ); if ( domains != null ) { - for( final DomainId domain : domains ) { + for( final String domain : domains ) { if ( !domain_ids_with_go_annot.contains( domain ) ) { writer.write( "[" + domain + "] " ); + + "\">" + domain + "] " ); } } } @@ -514,7 +518,7 @@ public class MetaOntologizer { writer.write( "" ); writer.write( "

" ); writer.write( species ); - SurfacingUtil.writeTaxonomyLinks( writer, species ); + SurfacingUtil.writeTaxonomyLinks( writer, species, null ); writer.write( "

" ); writer.write( "" ); writer.write( ForesterUtil.LINE_SEPARATOR ); @@ -549,9 +553,9 @@ public class MetaOntologizer { final double p_adjusted_upper_limit, final String species, final Map go_id_to_terms, - final SortedSet domains_per_species, + final SortedSet domains_per_species, final List pfam_to_go, - final Set domain_ids_with_go_annot ) throws IOException { + final Set domain_ids_with_go_annot ) throws IOException { final Color p_adj_color = ForesterUtil.calcColor( ontologizer_result.getPAdjusted(), 0, p_adjusted_upper_limit, @@ -569,7 +573,7 @@ public class MetaOntologizer { writer.write( "" ); writer.write( "" ); writer.write( "" + ontologizer_result.getGoId().getId() + "" ); + + "\" target=\"amigo_window\">" + ontologizer_result.getGoId().getId() + "" ); writer.write( "" ); writer.write( "" ); writer.write( FORMATER.format( ontologizer_result.getPAdjusted() ) );