in progress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Tue, 8 May 2012 00:59:00 +0000 (00:59 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Tue, 8 May 2012 00:59:00 +0000 (00:59 +0000)
forester/java/src/org/forester/application/surfacing.java
forester/java/src/org/forester/io/parsers/HmmPfamOutputParser.java
forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java
forester/java/src/org/forester/protein/BasicDomain.java [moved from forester/java/src/org/forester/surfacing/BasicDomain.java with 85% similarity]
forester/java/src/org/forester/protein/Domain.java
forester/java/src/org/forester/surfacing/SimpleDomain.java
forester/java/src/org/forester/surfacing/SurfacingUtil.java
forester/java/src/org/forester/surfacing/TestSurfacing.java

index 08dde5e..20e5b1f 100644 (file)
@@ -64,6 +64,7 @@ import org.forester.phylogeny.PhylogenyNode;
 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
 import org.forester.protein.BinaryDomainCombination;
+import org.forester.protein.Domain;
 import org.forester.protein.DomainId;
 import org.forester.protein.Protein;
 import org.forester.species.BasicSpecies;
@@ -234,8 +235,8 @@ public class surfacing {
     final static private String                               INPUT_SPECIES_TREE_OPTION                                                     = "species_tree";
     final static private String                               SEQ_EXTRACT_OPTION                                                            = "prot_extract";
     final static private char                                 SEPARATOR_FOR_INPUT_VALUES                                                    = '#';
-    final static private String                               PRG_VERSION                                                                   = "2.240";
-    final static private String                               PRG_DATE                                                                      = "2012.05.04";
+    final static private String                               PRG_VERSION                                                                   = "2.250";
+    final static private String                               PRG_DATE                                                                      = "2012.05.07";
     final static private String                               E_MAIL                                                                        = "czmasek@burnham.org";
     final static private String                               WWW                                                                           = "www.phylosoft.org/forester/applications/surfacing";
     final static private boolean                              IGNORE_DUFS_DEFAULT                                                           = true;
@@ -1761,6 +1762,7 @@ public class surfacing {
         }
         final Map<String, DescriptiveStatistics> protein_length_stats_by_dc = new HashMap<String, DescriptiveStatistics>();
         final Map<String, DescriptiveStatistics> domain_number_stats_by_dc = new HashMap<String, DescriptiveStatistics>();
+        final Map<String, DescriptiveStatistics> domain_length_stats_by_domain = new HashMap<String, DescriptiveStatistics>();
         // Main loop:
         for( int i = 0; i < number_of_genomes; ++i ) {
             System.out.println();
@@ -1911,6 +1913,13 @@ public class surfacing {
                     dc_data_writer.write( SurfacingUtil.proteinToDomainCombinations( protein, count + "", "\t" )
                             .toString() );
                     ++count;
+                    for( final Domain d : protein.getProteinDomains() ) {
+                        final String d_str = d.getDomainId().toString();
+                        if ( !domain_length_stats_by_domain.containsKey( d_str ) ) {
+                            domain_length_stats_by_domain.put( d_str, new BasicDescriptiveStatistics() );
+                        }
+                        domain_length_stats_by_domain.get( d_str ).addValue( d.getLength() );
+                    }
                 }
             }
             catch ( final IOException e ) {
@@ -2261,7 +2270,8 @@ public class surfacing {
                                                         all_bin_domain_combinations_lost_fitch,
                                                         dc_type,
                                                         protein_length_stats_by_dc,
-                                                        domain_number_stats_by_dc );
+                                                        domain_number_stats_by_dc,
+                                                        domain_length_stats_by_domain );
                 // Listing of all domain combinations gained is only done if only one input tree is used. 
                 if ( ( domain_id_to_secondary_features_maps != null )
                         && ( domain_id_to_secondary_features_maps.length > 0 ) ) {
index 800606c..4c9914e 100644 (file)
@@ -40,11 +40,11 @@ import java.util.SortedSet;
 import java.util.TreeMap;
 import java.util.TreeSet;
 
+import org.forester.protein.BasicDomain;
 import org.forester.protein.BasicProtein;
 import org.forester.protein.Domain;
 import org.forester.protein.DomainId;
 import org.forester.protein.Protein;
-import org.forester.surfacing.BasicDomain;
 import org.forester.surfacing.SurfacingUtil;
 import org.forester.util.ForesterUtil;
 
index 84fbfe4..0bb224c 100644 (file)
@@ -41,11 +41,11 @@ import java.util.SortedSet;
 import java.util.TreeMap;
 import java.util.TreeSet;
 
+import org.forester.protein.BasicDomain;
 import org.forester.protein.BasicProtein;
 import org.forester.protein.Domain;
 import org.forester.protein.DomainId;
 import org.forester.protein.Protein;
-import org.forester.surfacing.BasicDomain;
 import org.forester.surfacing.SurfacingUtil;
 import org.forester.util.ForesterUtil;
 
 // Contact: phylosoft @ gmail . com
 // WWW: www.phylosoft.org/forester
 
-package org.forester.surfacing;
+package org.forester.protein;
 
 import org.forester.go.GoId;
-import org.forester.protein.Domain;
-import org.forester.protein.DomainId;
 import org.forester.util.ForesterUtil;
 
 public class BasicDomain implements Domain {
@@ -65,28 +63,7 @@ public class BasicDomain implements Domain {
                         final short total_count,
                         final double per_sequence_evalue,
                         final double per_sequence_score ) {
-        if ( ( from >= to ) || ( from < 0 ) ) {
-            throw new IllegalArgumentException( "attempt to create protein domain from " + from + " to " + to );
-        }
-        if ( ForesterUtil.isEmpty( id_str ) ) {
-            throw new IllegalArgumentException( "attempt to create protein domain with null or empty id" );
-        }
-        if ( ( number > total_count ) || ( number < 0 ) ) {
-            throw new IllegalArgumentException( "attempt to create protein domain number " + number + " out of "
-                    + total_count );
-        }
-        if ( per_sequence_evalue < 0.0 ) {
-            throw new IllegalArgumentException( "attempt to create protein domain with E-value" );
-        }
-        _id = new DomainId( id_str );
-        _from = from;
-        _to = to;
-        _number = number;
-        _total_count = total_count;
-        _per_sequence_evalue = per_sequence_evalue;
-        _per_sequence_score = per_sequence_score;
-        _per_domain_evalue = -1;
-        _per_domain_score = -1;
+        this( id_str, from, to, number, total_count, per_sequence_evalue, per_sequence_score, 0, 0 );
     }
 
     public BasicDomain( final String id_str,
@@ -109,7 +86,7 @@ public class BasicDomain implements Domain {
                     + total_count );
         }
         if ( ( per_sequence_evalue < 0.0 ) || ( per_domain_evalue < 0.0 ) ) {
-            throw new IllegalArgumentException( "attempt to create protein domain with E-value" );
+            throw new IllegalArgumentException( "attempt to create protein domain with negative E-value" );
         }
         _id = new DomainId( id_str );
         _from = from;
@@ -234,4 +211,9 @@ public class BasicDomain implements Domain {
     public StringBuffer toStringBuffer() {
         return new StringBuffer( getDomainId().getId() );
     }
+
+    @Override
+    public int getLength() {
+        return 1 + getTo() - getFrom();
+    }
 }
index ef2e65a..c2a09b9 100644 (file)
@@ -34,6 +34,8 @@ public interface Domain extends Comparable<Domain> {
 
     public DomainId getDomainId();
 
+    public int getLength();
+
     public int getFrom();
 
     public GoId getGoId( int i );
index 71f6894..74173df 100644 (file)
@@ -74,6 +74,7 @@ public class SimpleDomain implements Domain {
         throw new RuntimeException( "method not implemented" );
     }
 
+    @Override
     public int getLength() {
         throw new RuntimeException( "method not implemented" );
     }
index 2237717..974409d 100644 (file)
@@ -72,6 +72,7 @@ import org.forester.phylogeny.PhylogenyNodeI.NH_CONVERSION_SUPPORT_VALUE_STYLE;
 import org.forester.phylogeny.data.BinaryCharacters;
 import org.forester.phylogeny.data.Confidence;
 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.protein.BasicDomain;
 import org.forester.protein.BasicProtein;
 import org.forester.protein.BinaryDomainCombination;
 import org.forester.protein.Domain;
@@ -180,7 +181,8 @@ public final class SurfacingUtil {
                                                                     final String outfilename_for_ancestor_species_counts,
                                                                     final String outfilename_for_protein_stats,
                                                                     final Map<String, DescriptiveStatistics> protein_length_stats_by_dc,
-                                                                    final Map<String, DescriptiveStatistics> domain_number_stats_by_dc ) {
+                                                                    final Map<String, DescriptiveStatistics> domain_number_stats_by_dc,
+                                                                    final Map<String, DescriptiveStatistics> domain_length_stats_by_domain ) {
         try {
             //
             //            if ( protein_length_stats_by_dc != null ) {
@@ -227,6 +229,7 @@ public final class SurfacingUtil {
             final SortedMap<Integer, StringBuilder> domain_lists = new TreeMap<Integer, StringBuilder>();
             final SortedMap<Integer, DescriptiveStatistics> dc_reapp_counts_to_protein_length_stats = new TreeMap<Integer, DescriptiveStatistics>();
             final SortedMap<Integer, DescriptiveStatistics> dc_reapp_counts_to_domain_number_stats = new TreeMap<Integer, DescriptiveStatistics>();
+            final SortedMap<Integer, DescriptiveStatistics> dc_reapp_counts_to_domain_lengths_stats = new TreeMap<Integer, DescriptiveStatistics>();
             final SortedMap<Integer, PriorityQueue<String>> domain_lists_go = new TreeMap<Integer, PriorityQueue<String>>();
             final SortedMap<Integer, SortedSet<String>> domain_lists_go_unique = new TreeMap<Integer, SortedSet<String>>();
             final Set<String> dcs = dc_gain_counts.keySet();
@@ -235,6 +238,8 @@ public final class SurfacingUtil {
             final DescriptiveStatistics gained_once_domain_count_stats = new BasicDescriptiveStatistics();
             final DescriptiveStatistics gained_multiple_times_lengths_stats = new BasicDescriptiveStatistics();
             final DescriptiveStatistics gained_multiple_times_domain_count_stats = new BasicDescriptiveStatistics();
+            final DescriptiveStatistics gained_multiple_times_domain_length_stats = new BasicDescriptiveStatistics();
+            final DescriptiveStatistics gained_once_domain_length_stats = new BasicDescriptiveStatistics();
             for( final String dc : dcs ) {
                 final int count = dc_gain_counts.get( dc );
                 if ( histogram.containsKey( count ) ) {
@@ -267,6 +272,16 @@ public final class SurfacingUtil {
                     dc_reapp_counts_to_domain_number_stats.get( count ).addValue( domain_number_stats_by_dc.get( dc )
                             .arithmeticMean() );
                 }
+                if ( domain_length_stats_by_domain != null ) {
+                    if ( !dc_reapp_counts_to_domain_lengths_stats.containsKey( count ) ) {
+                        dc_reapp_counts_to_domain_lengths_stats.put( count, new BasicDescriptiveStatistics() );
+                    }
+                    final String[] ds = dc.split( "=" );
+                    dc_reapp_counts_to_domain_lengths_stats.get( count ).addValue( domain_length_stats_by_domain
+                            .get( ds[ 0 ] ).arithmeticMean() );
+                    dc_reapp_counts_to_domain_lengths_stats.get( count ).addValue( domain_length_stats_by_domain
+                            .get( ds[ 1 ] ).arithmeticMean() );
+                }
                 if ( count > 1 ) {
                     more_than_once.add( dc );
                     if ( protein_length_stats_by_dc != null ) {
@@ -283,6 +298,19 @@ public final class SurfacingUtil {
                             gained_multiple_times_domain_count_stats.addValue( element );
                         }
                     }
+                    if ( domain_length_stats_by_domain != null ) {
+                        final String[] ds = dc.split( "=" );
+                        final DescriptiveStatistics s0 = domain_length_stats_by_domain.get( ds[ 0 ] );
+                        final DescriptiveStatistics s1 = domain_length_stats_by_domain.get( ds[ 1 ] );
+                        final double[] a0 = s0.getDataAsDoubleArray();
+                        final double[] a1 = s1.getDataAsDoubleArray();
+                        for( final double element : a0 ) {
+                            gained_multiple_times_domain_length_stats.addValue( element );
+                        }
+                        for( final double element : a1 ) {
+                            gained_multiple_times_domain_length_stats.addValue( element );
+                        }
+                    }
                 }
                 else {
                     if ( protein_length_stats_by_dc != null ) {
@@ -299,6 +327,19 @@ public final class SurfacingUtil {
                             gained_once_domain_count_stats.addValue( element );
                         }
                     }
+                    if ( domain_length_stats_by_domain != null ) {
+                        final String[] ds = dc.split( "=" );
+                        final DescriptiveStatistics s0 = domain_length_stats_by_domain.get( ds[ 0 ] );
+                        final DescriptiveStatistics s1 = domain_length_stats_by_domain.get( ds[ 1 ] );
+                        final double[] a0 = s0.getDataAsDoubleArray();
+                        final double[] a1 = s1.getDataAsDoubleArray();
+                        for( final double element : a0 ) {
+                            gained_once_domain_length_stats.addValue( element );
+                        }
+                        for( final double element : a1 ) {
+                            gained_once_domain_length_stats.addValue( element );
+                        }
+                    }
                 }
             }
             final Set<Integer> histogram_keys = histogram.keySet();
@@ -322,7 +363,6 @@ public final class SurfacingUtil {
             out_dc.close();
             out_dc_for_go_mapping.close();
             out_dc_for_go_mapping_unique.close();
-            //
             final SortedMap<String, Integer> lca_rank_counts = new TreeMap<String, Integer>();
             final SortedMap<String, Integer> lca_ancestor_species_counts = new TreeMap<String, Integer>();
             for( final String dc : more_than_once ) {
@@ -371,13 +411,28 @@ public final class SurfacingUtil {
             if ( !ForesterUtil.isEmpty( outfilename_for_protein_stats )
                     && ( ( protein_length_stats_by_dc != null ) || ( domain_number_stats_by_dc != null ) ) ) {
                 final BufferedWriter w = new BufferedWriter( new FileWriter( outfilename_for_protein_stats ) );
-                w.write( "Lengths: " );
+                w.write( "Domain Lengths: " );
+                w.write( "\n" );
+                if ( domain_length_stats_by_domain != null ) {
+                    for( final Entry<Integer, DescriptiveStatistics> entry : dc_reapp_counts_to_domain_lengths_stats
+                            .entrySet() ) {
+                        w.write( entry.getKey().toString() );
+                        w.write( "\t" + entry.getValue().arithmeticMean() );
+                        w.write( "\t" + entry.getValue().median() );
+                        w.write( "\n" );
+                    }
+                }
+                w.flush();
+                w.write( "\n" );
+                w.write( "\n" );
+                w.write( "Protein Lengths: " );
                 w.write( "\n" );
                 if ( protein_length_stats_by_dc != null ) {
                     for( final Entry<Integer, DescriptiveStatistics> entry : dc_reapp_counts_to_protein_length_stats
                             .entrySet() ) {
                         w.write( entry.getKey().toString() );
-                        w.write( ": " + entry.getValue().arithmeticMean() );
+                        w.write( "\t" + entry.getValue().arithmeticMean() );
+                        w.write( "\t" + entry.getValue().median() );
                         w.write( "\n" );
                     }
                 }
@@ -390,13 +445,26 @@ public final class SurfacingUtil {
                     for( final Entry<Integer, DescriptiveStatistics> entry : dc_reapp_counts_to_domain_number_stats
                             .entrySet() ) {
                         w.write( entry.getKey().toString() );
-                        w.write( ": " + entry.getValue().arithmeticMean() );
+                        w.write( "\t" + entry.getValue().arithmeticMean() );
+                        w.write( "\t" + entry.getValue().median() );
                         w.write( "\n" );
                     }
                 }
                 w.flush();
                 w.write( "\n" );
                 w.write( "\n" );
+                w.write( "Gained once, domain lengths:" );
+                w.write( "\n" );
+                w.write( gained_once_domain_length_stats.toString() );
+                w.write( "\n" );
+                w.write( "\n" );
+                w.write( "Gained multiple times, domain lengths:" );
+                w.write( "\n" );
+                w.write( gained_multiple_times_domain_length_stats.toString() );
+                w.write( "\n" );
+                w.write( "\n" );
+                w.write( "\n" );
+                w.write( "\n" );
                 w.write( "Gained once, protein lengths:" );
                 w.write( "\n" );
                 w.write( gained_once_lengths_stats.toString() );
@@ -702,7 +770,8 @@ public final class SurfacingUtil {
                                                  final List<BinaryDomainCombination> all_binary_domains_combination_lost_fitch,
                                                  final BinaryDomainCombination.DomainCombinationType dc_type,
                                                  final Map<String, DescriptiveStatistics> protein_length_stats_by_dc,
-                                                 final Map<String, DescriptiveStatistics> domain_number_stats_by_dc ) {
+                                                 final Map<String, DescriptiveStatistics> domain_number_stats_by_dc,
+                                                 final Map<String, DescriptiveStatistics> domain_length_stats_by_domain ) {
         final String sep = ForesterUtil.LINE_SEPARATOR + "###################" + ForesterUtil.LINE_SEPARATOR;
         final String date_time = ForesterUtil.getCurrentDateTime();
         final SortedSet<String> all_pfams_encountered = new TreeSet<String>();
@@ -932,7 +1001,8 @@ public final class SurfacingUtil {
                                                         outfile_name + "_indep_dc_gains_fitch_lca_taxonomies.txt",
                                                         outfile_name + "_indep_dc_gains_fitch_protein_statistics.txt",
                                                         protein_length_stats_by_dc,
-                                                        domain_number_stats_by_dc );
+                                                        domain_number_stats_by_dc,
+                                                        domain_length_stats_by_domain );
         }
     }
 
@@ -1005,7 +1075,7 @@ public final class SurfacingUtil {
                 + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX, outfile_name
                 + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX, outfile_name
                 + "_MAPPED_indep_dc_gains_fitch_lca_ranks.txt", outfile_name
-                + "_MAPPED_indep_dc_gains_fitch_lca_taxonomies.txt", null, null, null );
+                + "_MAPPED_indep_dc_gains_fitch_lca_taxonomies.txt", null, null, null, null );
     }
 
     public static void doit( final List<Protein> proteins,
@@ -1867,181 +1937,6 @@ public final class SurfacingUtil {
         ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote characters detailed HTML list: \"" + filename + "\"" );
     }
 
-    public static void writeBinaryStatesMatrixToListORIGIG( final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
-                                                            final Map<GoId, GoTerm> go_id_to_term_map,
-                                                            final GoNameSpace go_namespace_limit,
-                                                            final boolean domain_combinations,
-                                                            final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
-                                                            final CharacterStateMatrix.GainLossStates state,
-                                                            final String filename,
-                                                            final String indentifier_characters_separator,
-                                                            final String character_separator,
-                                                            final String title_for_html,
-                                                            final String prefix_for_html,
-                                                            final Map<DomainId, Set<String>>[] domain_id_to_secondary_features_maps,
-                                                            final SortedSet<String> all_pfams_encountered,
-                                                            final SortedSet<String> pfams_gained_or_lost,
-                                                            final String suffix_for_per_node_events_file ) {
-        if ( ( go_namespace_limit != null ) && ( ( go_id_to_term_map == null ) || ( go_id_to_term_map.size() < 1 ) ) ) {
-            throw new IllegalArgumentException( "attempt to use GO namespace limit without a GO-id to term map" );
-        }
-        else if ( ( ( domain_id_to_go_ids_map == null ) || ( domain_id_to_go_ids_map.size() < 1 ) ) ) {
-            throw new IllegalArgumentException( "attempt to output detailed HTML without a Pfam to GO map" );
-        }
-        else if ( ( ( go_id_to_term_map == null ) || ( go_id_to_term_map.size() < 1 ) ) ) {
-            throw new IllegalArgumentException( "attempt to output detailed HTML without a GO-id to term map" );
-        }
-        final File outfile = new File( filename );
-        checkForOutputFileWriteability( outfile );
-        final SortedSet<String> sorted_ids = new TreeSet<String>();
-        for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) {
-            sorted_ids.add( matrix.getIdentifier( i ) );
-        }
-        try {
-            final Writer out = new BufferedWriter( new FileWriter( outfile ) );
-            final File per_node_go_mapped_domain_gain_loss_files_base_dir = createBaseDirForPerNodeDomainFiles( surfacing.BASE_DIRECTORY_PER_NODE_DOMAIN_GAIN_LOSS_FILES,
-                                                                                                                domain_combinations,
-                                                                                                                state,
-                                                                                                                filename );
-            Writer per_node_go_mapped_domain_gain_loss_outfile_writer = null;
-            File per_node_go_mapped_domain_gain_loss_outfile = null;
-            int per_node_counter = 0;
-            out.write( "<html>" );
-            out.write( SurfacingConstants.NL );
-            addHtmlHead( out, title_for_html );
-            out.write( SurfacingConstants.NL );
-            out.write( "<body>" );
-            out.write( SurfacingConstants.NL );
-            out.write( "<h1>" );
-            out.write( SurfacingConstants.NL );
-            out.write( title_for_html );
-            out.write( SurfacingConstants.NL );
-            out.write( "</h1>" );
-            out.write( SurfacingConstants.NL );
-            out.write( "<table>" );
-            out.write( SurfacingConstants.NL );
-            for( final String id : sorted_ids ) {
-                out.write( "<tr>" );
-                out.write( "<td>" );
-                out.write( "<a href=\"#" + id + "\">" + id + "</a>" );
-                writeTaxonomyLinks( out, id );
-                out.write( "</td>" );
-                out.write( "</tr>" );
-                out.write( SurfacingConstants.NL );
-            }
-            out.write( "</table>" );
-            out.write( SurfacingConstants.NL );
-            for( final String id : sorted_ids ) {
-                out.write( SurfacingConstants.NL );
-                out.write( "<h2>" );
-                out.write( "<a name=\"" + id + "\">" + id + "</a>" );
-                writeTaxonomyLinks( out, id );
-                out.write( "</h2>" );
-                out.write( SurfacingConstants.NL );
-                out.write( "<table>" );
-                out.write( SurfacingConstants.NL );
-                out.write( "<tr>" );
-                out.write( "<td><b>" );
-                out.write( "Pfam domain(s)" );
-                out.write( "</b></td><td><b>" );
-                out.write( "GO term acc" );
-                out.write( "</b></td><td><b>" );
-                out.write( "GO term" );
-                out.write( "</b></td><td><b>" );
-                out.write( "Penultimate GO term" );
-                out.write( "</b></td><td><b>" );
-                out.write( "GO namespace" );
-                out.write( "</b></td>" );
-                out.write( "</tr>" );
-                out.write( SurfacingConstants.NL );
-                out.write( "</tr>" );
-                out.write( SurfacingConstants.NL );
-                per_node_counter = 0;
-                if ( matrix.getNumberOfCharacters() > 0 ) {
-                    per_node_go_mapped_domain_gain_loss_outfile = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
-                            + ForesterUtil.FILE_SEPARATOR + id + suffix_for_per_node_events_file );
-                    SurfacingUtil.checkForOutputFileWriteability( per_node_go_mapped_domain_gain_loss_outfile );
-                    per_node_go_mapped_domain_gain_loss_outfile_writer = ForesterUtil
-                            .createBufferedWriter( per_node_go_mapped_domain_gain_loss_outfile );
-                }
-                else {
-                    per_node_go_mapped_domain_gain_loss_outfile = null;
-                    per_node_go_mapped_domain_gain_loss_outfile_writer = null;
-                }
-                for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) {
-                    // Not nice:
-                    // using null to indicate either UNCHANGED_PRESENT or GAIN.
-                    if ( ( matrix.getState( id, c ) == state )
-                            || ( ( state == null ) && ( ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.UNCHANGED_PRESENT ) || ( matrix
-                                    .getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) ) ) ) {
-                        final String character = matrix.getCharacter( c );
-                        String domain_0 = "";
-                        String domain_1 = "";
-                        if ( character.indexOf( BinaryDomainCombination.SEPARATOR ) > 0 ) {
-                            final String[] s = character.split( BinaryDomainCombination.SEPARATOR );
-                            if ( s.length != 2 ) {
-                                throw new AssertionError( "this should not have happened: unexpected format for domain combination: ["
-                                        + character + "]" );
-                            }
-                            domain_0 = s[ 0 ];
-                            domain_1 = s[ 1 ];
-                        }
-                        else {
-                            domain_0 = character;
-                        }
-                        writeDomainData( domain_id_to_go_ids_map,
-                                         go_id_to_term_map,
-                                         go_namespace_limit,
-                                         out,
-                                         domain_0,
-                                         domain_1,
-                                         prefix_for_html,
-                                         character_separator,
-                                         domain_id_to_secondary_features_maps,
-                                         null );
-                        all_pfams_encountered.add( domain_0 );
-                        if ( pfams_gained_or_lost != null ) {
-                            pfams_gained_or_lost.add( domain_0 );
-                        }
-                        if ( !ForesterUtil.isEmpty( domain_1 ) ) {
-                            all_pfams_encountered.add( domain_1 );
-                            if ( pfams_gained_or_lost != null ) {
-                                pfams_gained_or_lost.add( domain_1 );
-                            }
-                        }
-                        if ( per_node_go_mapped_domain_gain_loss_outfile_writer != null ) {
-                            writeDomainsToIndividualFilePerTreeNode( per_node_go_mapped_domain_gain_loss_outfile_writer,
-                                                                     domain_0,
-                                                                     domain_1 );
-                            per_node_counter++;
-                        }
-                    }
-                }
-                if ( per_node_go_mapped_domain_gain_loss_outfile_writer != null ) {
-                    per_node_go_mapped_domain_gain_loss_outfile_writer.close();
-                    if ( per_node_counter < 1 ) {
-                        per_node_go_mapped_domain_gain_loss_outfile.delete();
-                    }
-                    per_node_counter = 0;
-                }
-                out.write( "</table>" );
-                out.write( SurfacingConstants.NL );
-                out.write( "<hr>" );
-                out.write( SurfacingConstants.NL );
-            } // for( final String id : sorted_ids ) {  
-            out.write( "</body>" );
-            out.write( SurfacingConstants.NL );
-            out.write( "</html>" );
-            out.write( SurfacingConstants.NL );
-            out.flush();
-            out.close();
-        }
-        catch ( final IOException e ) {
-            ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
-        }
-        ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote characters detailed HTML list: \"" + filename + "\"" );
-    }
-
     public static void writeDomainCombinationsCountsFile( final String[][] input_file_properties,
                                                           final File output_dir,
                                                           final Writer per_genome_domain_promiscuity_statistics_writer,
index 11f4246..44c9115 100644 (file)
@@ -48,6 +48,7 @@ import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyNode;
 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
 import org.forester.phylogeny.factories.PhylogenyFactory;
+import org.forester.protein.BasicDomain;
 import org.forester.protein.BasicProtein;
 import org.forester.protein.BinaryDomainCombination;
 import org.forester.protein.BinaryDomainCombination.DomainCombinationType;