in progress
[jalview.git] / forester / java / src / org / forester / go / etc / MetaOntologizer.java
index 11afb54..36b2801 100644 (file)
@@ -51,7 +51,6 @@ import org.forester.go.GoTerm;
 import org.forester.go.GoUtils;
 import org.forester.go.OBOparser;
 import org.forester.go.PfamToGoMapping;
-import org.forester.protein.DomainId;
 import org.forester.species.BasicSpecies;
 import org.forester.species.Species;
 import org.forester.surfacing.SurfacingConstants;
@@ -85,12 +84,12 @@ public class MetaOntologizer {
     }
 
     private static StringBuilder obtainDomainsForGoId( final List<PfamToGoMapping> pfam_to_go,
-                                                       final SortedSet<DomainId> domains_per_species,
+                                                       final SortedSet<String> domains_per_species,
                                                        final Map<GoId, GoTerm> all_go_terms,
                                                        final GoId query_go_id,
-                                                       final Set<DomainId> found_domain_ids ) {
+                                                       final Set<String> found_domain_ids ) {
         final StringBuilder sb = new StringBuilder();
-        D: for( final DomainId domain_id : domains_per_species ) {
+        D: for( final String domain_id : domains_per_species ) {
             for( final PfamToGoMapping ptg : pfam_to_go ) {
                 if ( ptg.getKey().equals( domain_id ) ) {
                     final GoId go_id = ptg.getValue();
@@ -118,7 +117,7 @@ public class MetaOntologizer {
             species = matcher.group( 1 );
             if ( VERBOSE ) {
                 ForesterUtil
-                        .programMessage( PRG_NAME, "species for [" + ontologizer_outfile + "] is [" + species + "]" );
+                .programMessage( PRG_NAME, "species for [" + ontologizer_outfile + "] is [" + species + "]" );
             }
         }
         else {
@@ -128,13 +127,12 @@ public class MetaOntologizer {
         return species;
     }
 
-    private static SortedMap<Species, SortedSet<DomainId>> parseDomainGainLossFile( final File input )
-            throws IOException {
+    private static SortedMap<Species, SortedSet<String>> parseDomainGainLossFile( final File input ) throws IOException {
         final String error = ForesterUtil.isReadableFile( input );
         if ( !ForesterUtil.isEmpty( error ) ) {
             throw new IOException( error );
         }
-        final SortedMap<Species, SortedSet<DomainId>> speciesto_to_domain_id = new TreeMap<Species, SortedSet<DomainId>>();
+        final SortedMap<Species, SortedSet<String>> speciesto_to_domain_id = new TreeMap<Species, SortedSet<String>>();
         final BufferedReader br = new BufferedReader( new FileReader( input ) );
         String line;
         int line_number = 0;
@@ -148,7 +146,7 @@ public class MetaOntologizer {
                 }
                 else if ( line.startsWith( "#" ) ) {
                     current_species = new BasicSpecies( line.substring( 1 ) );
-                    speciesto_to_domain_id.put( current_species, new TreeSet<DomainId>() );
+                    speciesto_to_domain_id.put( current_species, new TreeSet<String>() );
                     if ( VERBOSE ) {
                         ForesterUtil.programMessage( PRG_NAME, "saw " + current_species );
                     }
@@ -157,7 +155,7 @@ public class MetaOntologizer {
                     if ( current_species == null ) {
                         throw new IOException( "parsing problem [at line " + line_number + "] in [" + input + "]" );
                     }
-                    speciesto_to_domain_id.get( current_species ).add( new DomainId( line ) );
+                    speciesto_to_domain_id.get( current_species ).add( new String( line ) );
                 }
             }
         }
@@ -178,9 +176,9 @@ public class MetaOntologizer {
                                            final SortedMap<String, SortedSet<OntologizerResult>> species_to_results_map,
                                            final String species,
                                            final double p_adjusted_upper_limit,
-                                           final SortedSet<DomainId> domains_per_species,
+                                           final SortedSet<String> domains_per_species,
                                            final List<PfamToGoMapping> pfam_to_go,
-                                           final Set<DomainId> domain_ids_with_go_annot ) throws IOException {
+                                           final Set<String> domain_ids_with_go_annot ) throws IOException {
         final SortedSet<OntologizerResult> ontologizer_results = species_to_results_map.get( species );
         for( final OntologizerResult ontologizer_result : ontologizer_results ) {
             final GoTerm go_term = go_id_to_terms.get( ontologizer_result.getGoId() );
@@ -228,9 +226,9 @@ public class MetaOntologizer {
         }
         if ( ( p_adjusted_upper_limit < 0.0 ) || ( p_adjusted_upper_limit > 1.0 ) ) {
             throw new IllegalArgumentException( "adjusted P values limit [" + p_adjusted_upper_limit
-                    + "] is out of range" );
+                                                + "] is out of range" );
         }
-        SortedMap<Species, SortedSet<DomainId>> speciesto_to_domain_id = null;
+        SortedMap<Species, SortedSet<String>> speciesto_to_domain_id = null;
         if ( domain_gain_loss_file != null ) {
             if ( !domain_gain_loss_file.exists() ) {
                 throw new IllegalArgumentException( "[" + domain_gain_loss_file + "] does not exist" );
@@ -238,7 +236,7 @@ public class MetaOntologizer {
             speciesto_to_domain_id = parseDomainGainLossFile( domain_gain_loss_file );
             if ( VERBOSE ) {
                 ForesterUtil.programMessage( PRG_NAME, "parsed gain/loss domains for " + speciesto_to_domain_id.size()
-                        + " species from [" + domain_gain_loss_file + "]" );
+                                             + " species from [" + domain_gain_loss_file + "]" );
             }
         }
         final String[] children = ontologizer_outdir.list();
@@ -255,7 +253,7 @@ public class MetaOntologizer {
         }
         if ( VERBOSE ) {
             ForesterUtil.programMessage( PRG_NAME, "need to analyze " + ontologizer_outfiles.size()
-                    + " Ontologizer outfiles from [" + ontologizer_outdir + "]" );
+                                         + " Ontologizer outfiles from [" + ontologizer_outdir + "]" );
         }
         final OBOparser parser = new OBOparser( obo_file, OBOparser.ReturnType.BASIC_GO_TERM );
         final List<GoTerm> go_terms = parser.parse();
@@ -283,7 +281,7 @@ public class MetaOntologizer {
         for( final File ontologizer_outfile : ontologizer_outfiles ) {
             final String species = obtainSpecies( ontologizer_outfile );
             final List<OntologizerResult> ontologizer_results = OntologizerResult.parse( new File( ontologizer_outdir
-                    + ForesterUtil.FILE_SEPARATOR + ontologizer_outfile ) );
+                                                                                                   + ForesterUtil.FILE_SEPARATOR + ontologizer_outfile ) );
             final SortedSet<OntologizerResult> filtered_ontologizer_results = new TreeSet<OntologizerResult>();
             for( final OntologizerResult ontologizer_result : ontologizer_results ) {
                 if ( ontologizer_result.getPAdjusted() <= p_adjusted_upper_limit ) {
@@ -302,18 +300,18 @@ public class MetaOntologizer {
         writeHtmlHeader( b_html_writer,
                          GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS.toString() + " | Pmax = "
                                  + p_adjusted_upper_limit + " | " + comment,
-                         ontologizer_outdir.getAbsolutePath(),
-                         domain_gain_loss_file_full_path_str );
+                                 ontologizer_outdir.getAbsolutePath(),
+                                 domain_gain_loss_file_full_path_str );
         writeHtmlHeader( c_html_writer,
                          GoNameSpace.GoNamespaceType.CELLULAR_COMPONENT.toString() + " | Pmax = "
                                  + p_adjusted_upper_limit + " | " + comment,
-                         ontologizer_outdir.getAbsolutePath(),
-                         domain_gain_loss_file_full_path_str );
+                                 ontologizer_outdir.getAbsolutePath(),
+                                 domain_gain_loss_file_full_path_str );
         writeHtmlHeader( m_html_writer,
                          GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION.toString() + " | Pmax = "
                                  + p_adjusted_upper_limit + " | " + comment,
-                         ontologizer_outdir.getAbsolutePath(),
-                         domain_gain_loss_file_full_path_str );
+                                 ontologizer_outdir.getAbsolutePath(),
+                                 domain_gain_loss_file_full_path_str );
         for( final String species : species_to_results_map.keySet() ) {
             if ( hasResultsForSpecies( go_id_to_terms,
                                        species_to_results_map,
@@ -333,11 +331,11 @@ public class MetaOntologizer {
                                        GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION ) ) {
                 writeHtmlSpecies( m_html_writer, species );
             }
-            SortedSet<DomainId> domains_per_species = null;
+            SortedSet<String> domains_per_species = null;
             if ( ( speciesto_to_domain_id != null ) && ( speciesto_to_domain_id.size() > 0 ) ) {
                 domains_per_species = speciesto_to_domain_id.get( new BasicSpecies( species ) );
             }
-            final Set<DomainId> domain_ids_with_go_annot = new HashSet<DomainId>();
+            final Set<String> domain_ids_with_go_annot = new HashSet<String>();
             processOneSpecies( go_id_to_terms,
                                b_html_writer,
                                b_tab_writer,
@@ -383,30 +381,30 @@ public class MetaOntologizer {
         m_tab_writer.close();
         if ( VERBOSE ) {
             ForesterUtil.programMessage( PRG_NAME, "successfully wrote biological process summary to [" + b_file_html
-                    + "]" );
+                                         + "]" );
             ForesterUtil.programMessage( PRG_NAME, "successfully wrote biological process summary to [" + b_file_txt
-                    + "]" );
+                                         + "]" );
             ForesterUtil.programMessage( PRG_NAME, "successfully wrote molecular function summary to [" + m_file_html
-                    + "]" );
+                                         + "]" );
             ForesterUtil.programMessage( PRG_NAME, "successfully wrote molecular function summary to [" + m_file_txt
-                    + "]" );
+                                         + "]" );
             ForesterUtil.programMessage( PRG_NAME, "successfully wrote cellular component summary to [" + c_file_html
-                    + "]" );
+                                         + "]" );
             ForesterUtil.programMessage( PRG_NAME, "successfully wrote cellular component summary to [" + c_file_txt
-                    + "]" );
+                                         + "]" );
         }
     }
 
     private static void writeHtmlDomains( final Writer writer,
-                                          final SortedSet<DomainId> domains,
-                                          final Set<DomainId> domain_ids_with_go_annot ) throws IOException {
+                                          final SortedSet<String> domains,
+                                          final Set<String> domain_ids_with_go_annot ) throws IOException {
         writer.write( "<tr>" );
         writer.write( "<td colspan=\"10\">" );
         if ( domains != null ) {
-            for( final DomainId domain : domains ) {
+            for( final String domain : domains ) {
                 if ( !domain_ids_with_go_annot.contains( domain ) ) {
                     writer.write( "[<a class=\"new_type\" href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain
-                            + "\">" + domain + "</a>] " );
+                                  + "\">" + domain + "</a>] " );
                 }
             }
         }
@@ -520,7 +518,7 @@ public class MetaOntologizer {
         writer.write( "<tr>" );
         writer.write( "<td><h3>" );
         writer.write( species );
-        SurfacingUtil.writeTaxonomyLinks( writer, species );
+        SurfacingUtil.writeTaxonomyLinks( writer, species, null );
         writer.write( "</h3></td>" );
         writer.write( "</tr>" );
         writer.write( ForesterUtil.LINE_SEPARATOR );
@@ -555,9 +553,9 @@ public class MetaOntologizer {
                                                  final double p_adjusted_upper_limit,
                                                  final String species,
                                                  final Map<GoId, GoTerm> go_id_to_terms,
-                                                 final SortedSet<DomainId> domains_per_species,
+                                                 final SortedSet<String> domains_per_species,
                                                  final List<PfamToGoMapping> pfam_to_go,
-                                                 final Set<DomainId> domain_ids_with_go_annot ) throws IOException {
+                                                 final Set<String> domain_ids_with_go_annot ) throws IOException {
         final Color p_adj_color = ForesterUtil.calcColor( ontologizer_result.getPAdjusted(),
                                                           0,
                                                           p_adjusted_upper_limit,
@@ -575,7 +573,7 @@ public class MetaOntologizer {
         writer.write( "</font>" );
         writer.write( "</td><td>" );
         writer.write( "<a href=\"" + SurfacingConstants.GO_LINK + ontologizer_result.getGoId().getId()
-                + "\" target=\"amigo_window\">" + ontologizer_result.getGoId().getId() + "</a>" );
+                      + "\" target=\"amigo_window\">" + ontologizer_result.getGoId().getId() + "</a>" );
         writer.write( "</td><td>" );
         writer.write( "<font color=\"#" + ForesterUtil.colorToHex( p_adj_color ) + "\">" );
         writer.write( FORMATER.format( ontologizer_result.getPAdjusted() ) );