in progress
[jalview.git] / forester / java / src / org / forester / go / etc / MetaOntologizer.java
index 970f939..98bf943 100644 (file)
@@ -5,7 +5,7 @@
 // Copyright (C) 2008-2009 Christian M. Zmasek
 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
 // All rights reserved
-// 
+//
 // This library is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
 // License as published by the Free Software Foundation; either
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 // Lesser General Public License for more details.
-// 
+//
 // You should have received a copy of the GNU Lesser General Public
 // License along with this library; if not, write to the Free Software
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 //
 // Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
 
 package org.forester.go.etc;
 
@@ -51,9 +51,8 @@ import org.forester.go.GoTerm;
 import org.forester.go.GoUtils;
 import org.forester.go.OBOparser;
 import org.forester.go.PfamToGoMapping;
-import org.forester.surfacing.BasicSpecies;
-import org.forester.surfacing.DomainId;
-import org.forester.surfacing.Species;
+import org.forester.species.BasicSpecies;
+import org.forester.species.Species;
 import org.forester.surfacing.SurfacingConstants;
 import org.forester.surfacing.SurfacingUtil;
 import org.forester.util.ForesterUtil;
@@ -66,7 +65,10 @@ public class MetaOntologizer {
     final static private String       PRG_NAME                         = "meta_ontologizer";
     private static final boolean      VERBOSE                          = true;
     //table-a_41_dollo_all_gains_d-Topology-Elim-Bonferroni.txt:
-    private final static Pattern      PATTERN_ONTOLOGIZER_TABLE_OUTPUT = Pattern.compile( ".*table-(.+)_dollo_.*",
+    //TODO change back
+    // private final static Pattern      PATTERN_ONTOLOGIZER_TABLE_OUTPUT = Pattern.compile( ".*table-(.+)_dollo_.*",
+    //                                                                                      Pattern.CASE_INSENSITIVE ); //TODO this might need some work...
+    private final static Pattern      PATTERN_ONTOLOGIZER_TABLE_OUTPUT = Pattern.compile( ".*table-(.+)\\.txt",
                                                                                           Pattern.CASE_INSENSITIVE ); //TODO this might need some work...
 
     private static boolean hasResultsForSpecies( final Map<GoId, GoTerm> go_id_to_terms,
@@ -82,12 +84,12 @@ public class MetaOntologizer {
     }
 
     private static StringBuilder obtainDomainsForGoId( final List<PfamToGoMapping> pfam_to_go,
-                                                       final SortedSet<DomainId> domains_per_species,
+                                                       final SortedSet<String> domains_per_species,
                                                        final Map<GoId, GoTerm> all_go_terms,
                                                        final GoId query_go_id,
-                                                       final Set<DomainId> found_domain_ids ) {
+                                                       final Set<String> found_domain_ids ) {
         final StringBuilder sb = new StringBuilder();
-        D: for( final DomainId domain_id : domains_per_species ) {
+        D: for( final String domain_id : domains_per_species ) {
             for( final PfamToGoMapping ptg : pfam_to_go ) {
                 if ( ptg.getKey().equals( domain_id ) ) {
                     final GoId go_id = ptg.getValue();
@@ -125,13 +127,12 @@ public class MetaOntologizer {
         return species;
     }
 
-    private static SortedMap<Species, SortedSet<DomainId>> parseDomainGainLossFile( final File input )
-            throws IOException {
+    private static SortedMap<Species, SortedSet<String>> parseDomainGainLossFile( final File input ) throws IOException {
         final String error = ForesterUtil.isReadableFile( input );
         if ( !ForesterUtil.isEmpty( error ) ) {
             throw new IOException( error );
         }
-        final SortedMap<Species, SortedSet<DomainId>> speciesto_to_domain_id = new TreeMap<Species, SortedSet<DomainId>>();
+        final SortedMap<Species, SortedSet<String>> speciesto_to_domain_id = new TreeMap<Species, SortedSet<String>>();
         final BufferedReader br = new BufferedReader( new FileReader( input ) );
         String line;
         int line_number = 0;
@@ -145,13 +146,16 @@ public class MetaOntologizer {
                 }
                 else if ( line.startsWith( "#" ) ) {
                     current_species = new BasicSpecies( line.substring( 1 ) );
-                    speciesto_to_domain_id.put( current_species, new TreeSet<DomainId>() );
+                    speciesto_to_domain_id.put( current_species, new TreeSet<String>() );
+                    if ( VERBOSE ) {
+                        ForesterUtil.programMessage( PRG_NAME, "saw " + current_species );
+                    }
                 }
                 else {
                     if ( current_species == null ) {
                         throw new IOException( "parsing problem [at line " + line_number + "] in [" + input + "]" );
                     }
-                    speciesto_to_domain_id.get( current_species ).add( new DomainId( line ) );
+                    speciesto_to_domain_id.get( current_species ).add( new String( line ) );
                 }
             }
         }
@@ -172,9 +176,9 @@ public class MetaOntologizer {
                                            final SortedMap<String, SortedSet<OntologizerResult>> species_to_results_map,
                                            final String species,
                                            final double p_adjusted_upper_limit,
-                                           final SortedSet<DomainId> domains_per_species,
+                                           final SortedSet<String> domains_per_species,
                                            final List<PfamToGoMapping> pfam_to_go,
-                                           final Set<DomainId> domain_ids_with_go_annot ) throws IOException {
+                                           final Set<String> domain_ids_with_go_annot ) throws IOException {
         final SortedSet<OntologizerResult> ontologizer_results = species_to_results_map.get( species );
         for( final OntologizerResult ontologizer_result : ontologizer_results ) {
             final GoTerm go_term = go_id_to_terms.get( ontologizer_result.getGoId() );
@@ -224,7 +228,7 @@ public class MetaOntologizer {
             throw new IllegalArgumentException( "adjusted P values limit [" + p_adjusted_upper_limit
                     + "] is out of range" );
         }
-        SortedMap<Species, SortedSet<DomainId>> speciesto_to_domain_id = null;
+        SortedMap<Species, SortedSet<String>> speciesto_to_domain_id = null;
         if ( domain_gain_loss_file != null ) {
             if ( !domain_gain_loss_file.exists() ) {
                 throw new IllegalArgumentException( "[" + domain_gain_loss_file + "] does not exist" );
@@ -327,11 +331,11 @@ public class MetaOntologizer {
                                        GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION ) ) {
                 writeHtmlSpecies( m_html_writer, species );
             }
-            SortedSet<DomainId> domains_per_species = null;
+            SortedSet<String> domains_per_species = null;
             if ( ( speciesto_to_domain_id != null ) && ( speciesto_to_domain_id.size() > 0 ) ) {
                 domains_per_species = speciesto_to_domain_id.get( new BasicSpecies( species ) );
             }
-            final Set<DomainId> domain_ids_with_go_annot = new HashSet<DomainId>();
+            final Set<String> domain_ids_with_go_annot = new HashSet<String>();
             processOneSpecies( go_id_to_terms,
                                b_html_writer,
                                b_tab_writer,
@@ -392,12 +396,12 @@ public class MetaOntologizer {
     }
 
     private static void writeHtmlDomains( final Writer writer,
-                                          final SortedSet<DomainId> domains,
-                                          final Set<DomainId> domain_ids_with_go_annot ) throws IOException {
+                                          final SortedSet<String> domains,
+                                          final Set<String> domain_ids_with_go_annot ) throws IOException {
         writer.write( "<tr>" );
         writer.write( "<td colspan=\"10\">" );
         if ( domains != null ) {
-            for( final DomainId domain : domains ) {
+            for( final String domain : domains ) {
                 if ( !domain_ids_with_go_annot.contains( domain ) ) {
                     writer.write( "[<a class=\"new_type\" href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain
                             + "\">" + domain + "</a>] " );
@@ -438,22 +442,18 @@ public class MetaOntologizer {
         w.write( ForesterUtil.LINE_SEPARATOR );
         w.write( "a.new_type:link { font-size: 7pt; color : #505050; text-decoration : none; }" );
         w.write( ForesterUtil.LINE_SEPARATOR );
-        w
-                .write( "a.new_type:hover { font-size: 7pt; color : #000000; background-color : #FFFF00; text-decoration : none; }" );
+        w.write( "a.new_type:hover { font-size: 7pt; color : #000000; background-color : #FFFF00; text-decoration : none; }" );
         w.write( ForesterUtil.LINE_SEPARATOR );
-        w
-                .write( "a.new_type:hover { font-size: 7pt; color : #000000; background-color : #FFFF00; text-decoration : none; }" );
+        w.write( "a.new_type:hover { font-size: 7pt; color : #000000; background-color : #FFFF00; text-decoration : none; }" );
         w.write( ForesterUtil.LINE_SEPARATOR );
         w.write( "td { text-align: left; vertical-align: top; font-family: Verdana, Arial, Helvetica; font-size: 8pt}" );
         w.write( ForesterUtil.LINE_SEPARATOR );
-        w
-                .write( "th { text-align: left; vertical-align: top; font-family: Verdana, Arial, Helvetica; font-size: 10pt; font-weight: bold }" );
+        w.write( "th { text-align: left; vertical-align: top; font-family: Verdana, Arial, Helvetica; font-size: 10pt; font-weight: bold }" );
         w.write( ForesterUtil.LINE_SEPARATOR );
         w.write( "h1 { color : #000000; font-family: Verdana, Arial, Helvetica; font-size: 18pt; font-weight: bold }" );
         w.write( ForesterUtil.LINE_SEPARATOR );
         w.write( "h2 { color : #000000; font-family: Verdana, Arial, Helvetica; font-size: 16pt; font-weight: bold }" );
-        w
-                .write( "h3 { margin-top: 12px;  margin-bottom: 0px; color : #000000; font-family: Verdana, Arial, Helvetica; font-size: 12pt; font-weight: bold }" );
+        w.write( "h3 { margin-top: 12px;  margin-bottom: 0px; color : #000000; font-family: Verdana, Arial, Helvetica; font-size: 12pt; font-weight: bold }" );
         w.write( ForesterUtil.LINE_SEPARATOR );
         w.write( "</style>" );
         w.write( ForesterUtil.LINE_SEPARATOR );
@@ -518,7 +518,7 @@ public class MetaOntologizer {
         writer.write( "<tr>" );
         writer.write( "<td><h3>" );
         writer.write( species );
-        SurfacingUtil.writeTaxonomyLinks( writer, species );
+        SurfacingUtil.writeTaxonomyLinks( writer, species, null );
         writer.write( "</h3></td>" );
         writer.write( "</tr>" );
         writer.write( ForesterUtil.LINE_SEPARATOR );
@@ -553,9 +553,9 @@ public class MetaOntologizer {
                                                  final double p_adjusted_upper_limit,
                                                  final String species,
                                                  final Map<GoId, GoTerm> go_id_to_terms,
-                                                 final SortedSet<DomainId> domains_per_species,
+                                                 final SortedSet<String> domains_per_species,
                                                  final List<PfamToGoMapping> pfam_to_go,
-                                                 final Set<DomainId> domain_ids_with_go_annot ) throws IOException {
+                                                 final Set<String> domain_ids_with_go_annot ) throws IOException {
         final Color p_adj_color = ForesterUtil.calcColor( ontologizer_result.getPAdjusted(),
                                                           0,
                                                           p_adjusted_upper_limit,
@@ -592,8 +592,11 @@ public class MetaOntologizer {
         writer.write( String.valueOf( ontologizer_result.getStudyTerm() ) );
         writer.write( "</td><td>" );
         if ( domains_per_species != null ) {
-            final StringBuilder sb = obtainDomainsForGoId( pfam_to_go, domains_per_species, go_id_to_terms, go_term
-                    .getGoId(), domain_ids_with_go_annot );
+            final StringBuilder sb = obtainDomainsForGoId( pfam_to_go,
+                                                           domains_per_species,
+                                                           go_id_to_terms,
+                                                           go_term.getGoId(),
+                                                           domain_ids_with_go_annot );
             writer.write( sb.toString() );
         }
         else {