in progress...
authorcmzmasek <chris.zma@outlook.com>
Wed, 10 May 2017 23:57:31 +0000 (16:57 -0700)
committercmzmasek <chris.zma@outlook.com>
Wed, 10 May 2017 23:57:31 +0000 (16:57 -0700)
forester/java/src/org/forester/archaeopteryx/TreePanel.java
forester/java/src/org/forester/phylogeny/PhylogenyMethods.java
forester/java/src/org/forester/surfacing/MinimalDomainomeCalculator.java
forester/java/src/org/forester/surfacing/MinimalDomainomeCalculatorOLD.java [new file with mode: 0644]
forester/java/src/org/forester/test/Test.java

index 9bc89cc..75ef09b 100644 (file)
@@ -1297,16 +1297,6 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee
         return _last_drag_point_y;
     }
 
-    final private short getMaxBranchesToLeaf( final PhylogenyNode node ) {
-        if ( !_nodeid_dist_to_leaf.containsKey( node.getId() ) ) {
-            final short m = PhylogenyMethods.calculateMaxBranchesToLeaf( node );
-            _nodeid_dist_to_leaf.put( node.getId(), m );
-            return m;
-        }
-        else {
-            return _nodeid_dist_to_leaf.get( node.getId() );
-        }
-    }
 
     final private double getMaxDistanceToRoot() {
         if ( _max_distance_to_root < 0 ) {
index 150177b..9f73532 100644 (file)
@@ -151,6 +151,30 @@ public class PhylogenyMethods {
     }
 
     /**
+     * For external nodes the level is 0.
+     * 
+     * @param node
+     * @return
+     */
+    public static int calculateLevel( final PhylogenyNode node ) {
+        if ( node.isExternal() ) {
+            return 0;
+        }
+        int level = 0;
+        for( PhylogenyNode ext : node.getAllExternalDescendants() ) {
+            int counter = 0;
+            while ( ext != node ) {
+                ext = ext.getParent();
+                ++counter;
+            }
+            if ( counter > level ) {
+                level = counter;
+            }
+        }
+        return level;
+    }
+
+    /**
      * Calculates the distance between PhylogenyNodes node1 and node2.
      *
      *
@@ -237,28 +261,7 @@ public class PhylogenyMethods {
         return node1;
     }
 
-    public static short calculateMaxBranchesToLeaf( final PhylogenyNode node ) {
-        if ( node.isExternal() ) {
-            return 0;
-        }
-        short max = 0;
-        for( PhylogenyNode d : node.getAllExternalDescendants() ) {
-            short steps = 0;
-            while ( d != node ) {
-                if ( d.isCollapse() ) {
-                    steps = 0;
-                }
-                else {
-                    steps++;
-                }
-                d = d.getParent();
-            }
-            if ( max < steps ) {
-                max = steps;
-            }
-        }
-        return max;
-    }
+   
 
     public static int calculateMaxDepth( final Phylogeny phy ) {
         int max = 0;
@@ -271,7 +274,7 @@ public class PhylogenyMethods {
         }
         return max;
     }
-    
+
     public static String[] obtainPresentRanksSorted( final Phylogeny phy ) {
         final Set<String> present_ranks = new HashSet<String>();
         for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
@@ -284,12 +287,12 @@ public class PhylogenyMethods {
                 }
             }
         }
-        final String ordered_ranks[] = new String[present_ranks.size() + 1];
+        final String ordered_ranks[] = new String[ present_ranks.size() + 1 ];
         int c = 0;
         for( final String rank : TaxonomyUtil.RANKS ) {
-             if ( present_ranks.contains( rank ) ) {
-                 ordered_ranks[ c++ ] = rank;
-             }
+            if ( present_ranks.contains( rank ) ) {
+                ordered_ranks[ c++ ] = rank;
+            }
         }
         ordered_ranks[ c ] = "off";
         return ordered_ranks;
@@ -1543,7 +1546,8 @@ public class PhylogenyMethods {
         return nodes_to_delete;
     }
 
-    final static public void transferInternalNamesToConfidenceValues( final Phylogeny phy, final String confidence_type ) {
+    final static public void transferInternalNamesToConfidenceValues( final Phylogeny phy,
+                                                                      final String confidence_type ) {
         final PhylogenyNodeIterator it = phy.iteratorPostorder();
         while ( it.hasNext() ) {
             final PhylogenyNode n = it.next();
@@ -2058,7 +2062,7 @@ public class PhylogenyMethods {
     }
 
     public final static void collapseToDepth( final Phylogeny phy, final int depth ) {
-         if ( phy.getNumberOfExternalNodes() < 3 ) {
+        if ( phy.getNumberOfExternalNodes() < 3 ) {
             return;
         }
         collapseToDepthHelper( phy.getRoot(), 0, depth );
@@ -2086,8 +2090,6 @@ public class PhylogenyMethods {
         }
     }
 
-   
-    
     public final static void collapseToRank( final Phylogeny phy, final int rank ) {
         if ( phy.getNumberOfExternalNodes() < 3 ) {
             return;
@@ -2112,7 +2114,6 @@ public class PhylogenyMethods {
             else {
                 if ( TaxonomyUtil.RANK_TO_INT.get( current_rank ) >= target_rank ) {
                     n.setCollapse( true );
-                    
                     final PhylogenyNodeIterator it = new PreorderTreeIterator( n );
                     while ( it.hasNext() ) {
                         it.next().setCollapse( true );
@@ -2127,7 +2128,7 @@ public class PhylogenyMethods {
             collapseToRankHelper( desc, target_rank );
         }
     }
-    
+
     public final static PhylogenyNode getFirstExternalNode( final PhylogenyNode node ) {
         PhylogenyNode n = node;
         while ( n.isInternal() ) {
@@ -2135,7 +2136,7 @@ public class PhylogenyMethods {
         }
         return n;
     }
-    
+
     public final static PhylogenyNode getLastExternalNode( final PhylogenyNode node ) {
         PhylogenyNode n = node;
         while ( n.isInternal() ) {
@@ -2153,5 +2154,4 @@ public class PhylogenyMethods {
         }
         return false;
     }
-    
 }
index ab82419..8c25cd3 100644 (file)
@@ -19,6 +19,7 @@ import java.util.TreeSet;
 
 import org.forester.application.surfacing;
 import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
 import org.forester.phylogeny.PhylogenyNode;
 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
 import org.forester.protein.Domain;
@@ -236,6 +237,250 @@ public final class MinimalDomainomeCalculator {
         }
     }
 
+    public final static void calcNEW( final boolean use_domain_architectures,
+                                      final Phylogeny tre,
+                                      final int level,
+                                      final SortedMap<Species, List<Protein>> protein_lists_per_species,
+                                      final String separator,
+                                      final double ie_cutoff,
+                                      final String outfile_base,
+                                      final boolean write_protein_files )
+            throws IOException {
+        final SortedMap<String, SortedSet<String>> species_to_features_map = new TreeMap<String, SortedSet<String>>();
+        if ( protein_lists_per_species == null || tre == null ) {
+            throw new IllegalArgumentException( "argument is null" );
+        }
+        if ( protein_lists_per_species.size() < 2 ) {
+            throw new IllegalArgumentException( "not enough genomes" );
+        }
+        final String x;
+        if ( use_domain_architectures ) {
+            x = "DA";
+        }
+        else {
+            x = "domain";
+        }
+        final File outfile = new File( outfile_base + "_minimal_" + x + "ome.tsv" );
+        final File outfile_table = new File( outfile_base + "_minimal_" + x + "ome_matrix.tsv" );
+        SurfacingUtil.checkForOutputFileWriteability( outfile );
+        SurfacingUtil.checkForOutputFileWriteability( outfile_table );
+        final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) );
+        final BufferedWriter out_table = new BufferedWriter( new FileWriter( outfile_table ) );
+        out.write( "SPECIES\tCOMMON NAME\tCODE\tRANK\t#EXT NODES\tEXT NODE CODES\t#" + x + "\t" + x + "" );
+        out.write( ForesterUtil.LINE_SEPARATOR );
+        ///////////
+        //////////
+        SortedMap<String, List<Protein>> protein_lists_per_quasi_species = null;
+        if ( level >= 1 ) {
+            protein_lists_per_quasi_species = makeProteinListsPerQuasiSpecies( tre, level, protein_lists_per_species );
+        }
+        /////////
+        ///////////
+        for( final PhylogenyNodeIterator iter = tre.iteratorPostorder(); iter.hasNext(); ) {
+            final PhylogenyNode node = iter.next();
+            final String species_name = node.getNodeData().isHasTaxonomy()
+                    ? node.getNodeData().getTaxonomy().getScientificName() : node.getName();
+            final String common = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getCommonName()
+                    : "";
+            final String tcode = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getTaxonomyCode()
+                    : "";
+            final String rank = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getRank() : "";
+            out.write( species_name );
+            if ( !ForesterUtil.isEmpty( common ) ) {
+                out.write( "\t" + common );
+            }
+            else {
+                out.write( "\t" );
+            }
+            if ( !ForesterUtil.isEmpty( tcode ) ) {
+                out.write( "\t" + tcode );
+            }
+            else {
+                out.write( "\t" );
+            }
+            if ( !ForesterUtil.isEmpty( rank ) ) {
+                out.write( "\t" + rank );
+            }
+            else {
+                out.write( "\t" );
+            }
+            final List<PhylogenyNode> external_descs = node.getAllExternalDescendants();
+            if ( node.isInternal() ) {
+                out.write( "\t" + external_descs.size() + "\t" );
+            }
+            else {
+                out.write( "\t\t" );
+            }
+            final List<Set<String>> features_per_genome_list = new ArrayList<Set<String>>();
+            boolean first = true;
+            for( final PhylogenyNode external_desc : external_descs ) {
+                final String code = external_desc.getNodeData().getTaxonomy().getTaxonomyCode();
+                if ( node.isInternal() ) {
+                    if ( first ) {
+                        first = false;
+                    }
+                    else {
+                        out.write( ", " );
+                    }
+                    out.write( code );
+                }
+                final List<Protein> proteins_per_species = protein_lists_per_species.get( new BasicSpecies( code ) );
+                final int node_level = PhylogenyMethods.calculateLevel( node );
+                final List<Protein> proteins_per_quasi_species = protein_lists_per_species
+                        .get( new BasicSpecies( code ) );
+                if ( proteins_per_species != null ) {
+                    final SortedSet<String> features_per_genome = new TreeSet<String>();
+                    for( final Protein protein : proteins_per_species ) {
+                        if ( use_domain_architectures ) {
+                            final String da = protein.toDomainArchitectureString( separator, ie_cutoff );
+                            features_per_genome.add( da );
+                        }
+                        else {
+                            List<Domain> domains = protein.getProteinDomains();
+                            for( final Domain domain : domains ) {
+                                if ( ( ie_cutoff <= -1 ) || ( domain.getPerDomainEvalue() <= ie_cutoff ) ) {
+                                    features_per_genome.add( domain.getDomainId() );
+                                }
+                            }
+                        }
+                    }
+                    if ( features_per_genome.size() > 0 ) {
+                        features_per_genome_list.add( features_per_genome );
+                    }
+                }
+            } // for( final PhylogenyNode external_desc : external_descs )
+            if ( features_per_genome_list.size() > 0 ) {
+                SortedSet<String> intersection = calcIntersection( features_per_genome_list );
+                out.write( "\t" + intersection.size() + "\t" );
+                first = true;
+                for( final String s : intersection ) {
+                    if ( first ) {
+                        first = false;
+                    }
+                    else {
+                        out.write( ", " );
+                    }
+                    out.write( s );
+                }
+                out.write( ForesterUtil.LINE_SEPARATOR );
+                species_to_features_map.put( species_name, intersection );
+            }
+        }
+        final SortedSet<String> all_species_names = new TreeSet<String>();
+        final SortedSet<String> all_features = new TreeSet<String>();
+        for( final Entry<String, SortedSet<String>> e : species_to_features_map.entrySet() ) {
+            all_species_names.add( e.getKey() );
+            for( final String f : e.getValue() ) {
+                all_features.add( f );
+            }
+        }
+        out_table.write( '\t' );
+        boolean first = true;
+        for( final String species_name : all_species_names ) {
+            if ( first ) {
+                first = false;
+            }
+            else {
+                out_table.write( '\t' );
+            }
+            out_table.write( species_name );
+        }
+        out_table.write( ForesterUtil.LINE_SEPARATOR );
+        for( final String das : all_features ) {
+            out_table.write( das );
+            out_table.write( '\t' );
+            first = true;
+            for( final String species_name : all_species_names ) {
+                if ( first ) {
+                    first = false;
+                }
+                else {
+                    out_table.write( '\t' );
+                }
+                if ( species_to_features_map.get( species_name ).contains( das ) ) {
+                    out_table.write( '1' );
+                }
+                else {
+                    out_table.write( '0' );
+                }
+            }
+            out_table.write( ForesterUtil.LINE_SEPARATOR );
+        }
+        out.flush();
+        out.close();
+        out_table.flush();
+        out_table.close();
+        ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote minimal DAome data to           : " + outfile );
+        ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote minimal DAome data to (as table): " + outfile_table );
+        if ( write_protein_files ) {
+            final String protdirname;
+            final String a;
+            final String b;
+            if ( use_domain_architectures ) {
+                a = "_DA";
+                b = "domain architectures (DAs)";
+                protdirname = "_DAS";
+            }
+            else {
+                a = "_domain";
+                b = "domains";
+                protdirname = "_DOMAINS";
+            }
+            final File prot_dir = new File( outfile_base + protdirname );
+            final boolean success = prot_dir.mkdir();
+            if ( !success ) {
+                throw new IOException( "failed to create dir " + prot_dir );
+            }
+            int total = 0;
+            final String dir = outfile_base + protdirname + "/";
+            for( final String feat : all_features ) {
+                final File extract_outfile = new File( dir + feat + a + surfacing.SEQ_EXTRACT_SUFFIX );
+                SurfacingUtil.checkForOutputFileWriteability( extract_outfile );
+                final Writer proteins_file_writer = new BufferedWriter( new FileWriter( extract_outfile ) );
+                final int counter = extractProteinFeatures( use_domain_architectures,
+                                                            protein_lists_per_species,
+                                                            feat,
+                                                            proteins_file_writer,
+                                                            ie_cutoff,
+                                                            separator );
+                if ( counter < 1 ) {
+                    ForesterUtil.printWarningMessage( "surfacing", feat + " not present (in " + b + " extraction)" );
+                }
+                total += counter;
+                proteins_file_writer.close();
+            }
+            ForesterUtil.programMessage( "surfacing",
+                                         "Wrote " + total + " individual " + b + " from a total of "
+                                                 + all_features.size() + " into: " + dir );
+        }
+    }
+
+    private final static SortedMap<String, List<Protein>> makeProteinListsPerQuasiSpecies( final Phylogeny tre,
+                                                                                           final int level,
+                                                                                           final SortedMap<Species, List<Protein>> protein_lists_per_species ) {
+        final SortedMap<String, List<Protein>> protein_lists_per_quasi_species = new TreeMap<String, List<Protein>>();
+        for( final PhylogenyNodeIterator iter = tre.iteratorPostorder(); iter.hasNext(); ) {
+            final PhylogenyNode node = iter.next();
+            final int node_level = PhylogenyMethods.calculateLevel( node );
+            if ( node_level == level ) {
+                final List<PhylogenyNode> external_descs = node.getAllExternalDescendants();
+                final List<Protein> protein_list_per_quasi_species = new ArrayList<Protein>();
+                for( final PhylogenyNode external_desc : external_descs ) {
+                    final String code = external_desc.getNodeData().getTaxonomy().getTaxonomyCode();
+                    final List<Protein> proteins_per_species = protein_lists_per_species
+                            .get( new BasicSpecies( code ) );
+                    for( Protein protein : proteins_per_species ) {
+                        protein_list_per_quasi_species.add( protein );
+                    }
+                }
+                final String species_name = node.getNodeData().isHasTaxonomy()
+                        ? node.getNodeData().getTaxonomy().getScientificName() : node.getName();
+                protein_lists_per_quasi_species.put( species_name, protein_list_per_quasi_species );
+            }
+        }
+        return protein_lists_per_quasi_species;
+    }
+
     private final static SortedSet<String> calcIntersection( final List<Set<String>> features_per_genome_list ) {
         final Set<String> first = features_per_genome_list.get( 0 );
         final SortedSet<String> my_first = new TreeSet<String>();
diff --git a/forester/java/src/org/forester/surfacing/MinimalDomainomeCalculatorOLD.java b/forester/java/src/org/forester/surfacing/MinimalDomainomeCalculatorOLD.java
new file mode 100644 (file)
index 0000000..d306c0f
--- /dev/null
@@ -0,0 +1,378 @@
+
+package org.forester.surfacing;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import org.forester.application.surfacing;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.protein.Domain;
+import org.forester.protein.Protein;
+import org.forester.species.BasicSpecies;
+import org.forester.species.Species;
+import org.forester.surfacing.SurfacingUtil.DomainComparator;
+import org.forester.util.ForesterUtil;
+
+public final class MinimalDomainomeCalculatorOLD {
+
+    public final static void calc( final boolean use_domain_architectures,
+                                   final Phylogeny tre,
+                                   final SortedMap<Species, List<Protein>> protein_lists_per_species,
+                                   final String separator,
+                                   final double ie_cutoff,
+                                   final String outfile_base,
+                                   final boolean write_protein_files )
+            throws IOException {
+        final SortedMap<String, SortedSet<String>> species_to_features_map = new TreeMap<String, SortedSet<String>>();
+        if ( protein_lists_per_species == null || tre == null ) {
+            throw new IllegalArgumentException( "argument is null" );
+        }
+        if ( protein_lists_per_species.size() < 2 ) {
+            throw new IllegalArgumentException( "not enough genomes" );
+        }
+        final String x;
+        if ( use_domain_architectures ) {
+            x = "DA";
+        }
+        else {
+            x = "domain";
+        }
+        final File outfile = new File( outfile_base + "_minimal_" + x + "ome.tsv" );
+        final File outfile_table = new File( outfile_base + "_minimal_" + x + "ome_matrix.tsv" );
+        SurfacingUtil.checkForOutputFileWriteability( outfile );
+        SurfacingUtil.checkForOutputFileWriteability( outfile_table );
+        final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) );
+        final BufferedWriter out_table = new BufferedWriter( new FileWriter( outfile_table ) );
+        out.write( "SPECIES\tCOMMON NAME\tCODE\tRANK\t#EXT NODES\tEXT NODE CODES\t#" + x + "\t" + x + "" );
+        out.write( ForesterUtil.LINE_SEPARATOR );
+        for( final PhylogenyNodeIterator iter = tre.iteratorPostorder(); iter.hasNext(); ) {
+            final PhylogenyNode node = iter.next();
+            final String species_name = node.getNodeData().isHasTaxonomy()
+                    ? node.getNodeData().getTaxonomy().getScientificName() : node.getName();
+            final String common = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getCommonName()
+                    : "";
+            final String tcode = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getTaxonomyCode()
+                    : "";
+            final String rank = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getRank() : "";
+            out.write( species_name );
+            if ( !ForesterUtil.isEmpty( common ) ) {
+                out.write( "\t" + common );
+            }
+            else {
+                out.write( "\t" );
+            }
+            if ( !ForesterUtil.isEmpty( tcode ) ) {
+                out.write( "\t" + tcode );
+            }
+            else {
+                out.write( "\t" );
+            }
+            if ( !ForesterUtil.isEmpty( rank ) ) {
+                out.write( "\t" + rank );
+            }
+            else {
+                out.write( "\t" );
+            }
+            final List<PhylogenyNode> external_descs = node.getAllExternalDescendants();
+            if ( node.isInternal() ) {
+                out.write( "\t" + external_descs.size() + "\t" );
+            }
+            else {
+                out.write( "\t\t" );
+            }
+            final List<Set<String>> features_per_genome_list = new ArrayList<Set<String>>();
+            boolean first = true;
+            for( final PhylogenyNode external_desc : external_descs ) {
+                final String code = external_desc.getNodeData().getTaxonomy().getTaxonomyCode();
+                if ( node.isInternal() ) {
+                    if ( first ) {
+                        first = false;
+                    }
+                    else {
+                        out.write( ", " );
+                    }
+                    out.write( code );
+                }
+                final List<Protein> proteins_per_species = protein_lists_per_species.get( new BasicSpecies( code ) );
+                if ( proteins_per_species != null ) {
+                    final SortedSet<String> features_per_genome = new TreeSet<String>();
+                    for( final Protein protein : proteins_per_species ) {
+                        if ( use_domain_architectures ) {
+                            final String da = protein.toDomainArchitectureString( separator, ie_cutoff );
+                            features_per_genome.add( da );
+                        }
+                        else {
+                            List<Domain> domains = protein.getProteinDomains();
+                            for( final Domain domain : domains ) {
+                                if ( ( ie_cutoff <= -1 ) || ( domain.getPerDomainEvalue() <= ie_cutoff ) ) {
+                                    features_per_genome.add( domain.getDomainId() );
+                                }
+                            }
+                        }
+                    }
+                    if ( features_per_genome.size() > 0 ) {
+                        features_per_genome_list.add( features_per_genome );
+                    }
+                }
+            }
+            if ( features_per_genome_list.size() > 0 ) {
+                SortedSet<String> intersection = calcIntersection( features_per_genome_list );
+                out.write( "\t" + intersection.size() + "\t" );
+                first = true;
+                for( final String s : intersection ) {
+                    if ( first ) {
+                        first = false;
+                    }
+                    else {
+                        out.write( ", " );
+                    }
+                    out.write( s );
+                }
+                out.write( ForesterUtil.LINE_SEPARATOR );
+                species_to_features_map.put( species_name, intersection );
+            }
+        }
+        final SortedSet<String> all_species_names = new TreeSet<String>();
+        final SortedSet<String> all_features = new TreeSet<String>();
+        for( final Entry<String, SortedSet<String>> e : species_to_features_map.entrySet() ) {
+            all_species_names.add( e.getKey() );
+            for( final String f : e.getValue() ) {
+                all_features.add( f );
+            }
+        }
+        out_table.write( '\t' );
+        boolean first = true;
+        for( final String species_name : all_species_names ) {
+            if ( first ) {
+                first = false;
+            }
+            else {
+                out_table.write( '\t' );
+            }
+            out_table.write( species_name );
+        }
+        out_table.write( ForesterUtil.LINE_SEPARATOR );
+        for( final String das : all_features ) {
+            out_table.write( das );
+            out_table.write( '\t' );
+            first = true;
+            for( final String species_name : all_species_names ) {
+                if ( first ) {
+                    first = false;
+                }
+                else {
+                    out_table.write( '\t' );
+                }
+                if ( species_to_features_map.get( species_name ).contains( das ) ) {
+                    out_table.write( '1' );
+                }
+                else {
+                    out_table.write( '0' );
+                }
+            }
+            out_table.write( ForesterUtil.LINE_SEPARATOR );
+        }
+        out.flush();
+        out.close();
+        out_table.flush();
+        out_table.close();
+        ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote minimal DAome data to           : " + outfile );
+        ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote minimal DAome data to (as table): " + outfile_table );
+        if ( write_protein_files ) {
+            final String protdirname;
+            final String a;
+            final String b;
+            if ( use_domain_architectures ) {
+                a = "_DA";
+                b = "domain architectures (DAs)";
+                protdirname = "_DAS";
+            }
+            else {
+                a = "_domain";
+                b = "domains";
+                protdirname = "_DOMAINS";
+            }
+            final File prot_dir = new File( outfile_base + protdirname );
+            final boolean success = prot_dir.mkdir();
+            if ( !success ) {
+                throw new IOException( "failed to create dir " + prot_dir );
+            }
+            int total = 0;
+            final String dir = outfile_base + protdirname + "/";
+            for( final String feat : all_features ) {
+                final File extract_outfile = new File( dir + feat + a + surfacing.SEQ_EXTRACT_SUFFIX );
+                SurfacingUtil.checkForOutputFileWriteability( extract_outfile );
+                final Writer proteins_file_writer = new BufferedWriter( new FileWriter( extract_outfile ) );
+                final int counter = extractProteinFeatures( use_domain_architectures,
+                                                            protein_lists_per_species,
+                                                            feat,
+                                                            proteins_file_writer,
+                                                            ie_cutoff,
+                                                            separator );
+                if ( counter < 1 ) {
+                    ForesterUtil.printWarningMessage( "surfacing", feat + " not present (in " + b + " extraction)" );
+                }
+                total += counter;
+                proteins_file_writer.close();
+            }
+            ForesterUtil.programMessage( "surfacing",
+                                         "Wrote " + total + " individual " + b + " from a total of "
+                                                 + all_features.size() + " into: " + dir );
+        }
+    }
+
+    private final static SortedSet<String> calcIntersection( final List<Set<String>> features_per_genome_list ) {
+        final Set<String> first = features_per_genome_list.get( 0 );
+        final SortedSet<String> my_first = new TreeSet<String>();
+        for( final String s : first ) {
+            my_first.add( s );
+        }
+        for( int i = 1; i < features_per_genome_list.size(); ++i ) {
+            my_first.retainAll( features_per_genome_list.get( i ) );
+        }
+        return my_first;
+    }
+
+    private final static int extractProteinFeatures( final boolean use_domain_architectures,
+                                                     final SortedMap<Species, List<Protein>> protein_lists_per_species,
+                                                     final String domain_id,
+                                                     final Writer out,
+                                                     final double ie_cutoff,
+                                                     final String domain_separator )
+            throws IOException {
+        int counter = 0;
+        final String separator_for_output = "\t";
+        for( final Species species : protein_lists_per_species.keySet() ) {
+            final List<Protein> proteins_per_species = protein_lists_per_species.get( species );
+            for( final Protein protein : proteins_per_species ) {
+                if ( use_domain_architectures ) {
+                    if ( domain_id.equals( protein.toDomainArchitectureString( domain_separator, ie_cutoff ) ) ) {
+                        int from = Integer.MAX_VALUE;
+                        int to = -1;
+                        for( final Domain d : protein.getProteinDomains() ) {
+                            if ( ( ie_cutoff <= -1 ) || ( d.getPerDomainEvalue() <= ie_cutoff ) ) {
+                                if ( d.getFrom() < from ) {
+                                    from = d.getFrom();
+                                }
+                                if ( d.getTo() > to ) {
+                                    to = d.getTo();
+                                }
+                            }
+                        }
+                        out.write( protein.getSpecies().getSpeciesId() );
+                        out.write( separator_for_output );
+                        out.write( protein.getProteinId().getId() );
+                        out.write( separator_for_output );
+                        out.write( domain_id );
+                        out.write( separator_for_output );
+                        out.write( "/" );
+                        out.write( from + "-" + to );
+                        out.write( "/" );
+                        out.write( SurfacingConstants.NL );
+                        ++counter;
+                    }
+                }
+                else {
+                    final List<Domain> domains = protein.getProteinDomains( domain_id );
+                    if ( domains.size() > 0 ) {
+                        out.write( protein.getSpecies().getSpeciesId() );
+                        out.write( separator_for_output );
+                        out.write( protein.getProteinId().getId() );
+                        out.write( separator_for_output );
+                        out.write( domain_id );
+                        out.write( separator_for_output );
+                        for( final Domain domain : domains ) {
+                            if ( ( ie_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= ie_cutoff ) ) {
+                                out.write( "/" );
+                                out.write( domain.getFrom() + "-" + domain.getTo() );
+                            }
+                        }
+                        out.write( "/" );
+                        out.write( separator_for_output );
+                        final List<Domain> domain_list = new ArrayList<Domain>();
+                        for( final Domain domain : protein.getProteinDomains() ) {
+                            if ( ( ie_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= ie_cutoff ) ) {
+                                domain_list.add( domain );
+                            }
+                        }
+                        final Domain domain_ary[] = new Domain[ domain_list.size() ];
+                        for( int i = 0; i < domain_list.size(); ++i ) {
+                            domain_ary[ i ] = domain_list.get( i );
+                        }
+                        Arrays.sort( domain_ary, new DomainComparator( true ) );
+                        out.write( "{" );
+                        boolean first = true;
+                        for( final Domain domain : domain_ary ) {
+                            if ( first ) {
+                                first = false;
+                            }
+                            else {
+                                out.write( "," );
+                            }
+                            out.write( domain.getDomainId().toString() );
+                            out.write( ":" + domain.getFrom() + "-" + domain.getTo() );
+                            out.write( ":" + domain.getPerDomainEvalue() );
+                        }
+                        out.write( "}" );
+                        if ( !( ForesterUtil.isEmpty( protein.getDescription() )
+                                || protein.getDescription().equals( SurfacingConstants.NONE ) ) ) {
+                            out.write( protein.getDescription() );
+                        }
+                        out.write( separator_for_output );
+                        if ( !( ForesterUtil.isEmpty( protein.getAccession() )
+                                || protein.getAccession().equals( SurfacingConstants.NONE ) ) ) {
+                            out.write( protein.getAccession() );
+                        }
+                        out.write( SurfacingConstants.NL );
+                        ++counter;
+                    }
+                }
+            }
+        }
+        out.flush();
+        return counter;
+    }
+
+    public static void main( final String[] args ) {
+        Set<String> a = new HashSet<String>();
+        Set<String> b = new HashSet<String>();
+        Set<String> c = new HashSet<String>();
+        Set<String> d = new HashSet<String>();
+        a.add( "x" );
+        a.add( "b" );
+        a.add( "c" );
+        b.add( "a" );
+        b.add( "b" );
+        b.add( "c" );
+        c.add( "a" );
+        c.add( "b" );
+        c.add( "c" );
+        c.add( "c" );
+        c.add( "f" );
+        d.add( "a" );
+        d.add( "c" );
+        d.add( "d" );
+        List<Set<String>> domains_per_genome_list = new ArrayList<Set<String>>();
+        domains_per_genome_list.add( a );
+        domains_per_genome_list.add( b );
+        domains_per_genome_list.add( c );
+        domains_per_genome_list.add( d );
+        Set<String> x = calcIntersection( domains_per_genome_list );
+        System.out.println( x );
+    }
+}
index de674db..949d9dd 100644 (file)
@@ -551,6 +551,15 @@ public final class Test {
             System.out.println( "failed." );
             failed++;
         }
+        System.out.print( "Phylogeny methods:" );
+        if ( Test.testPhylogenyMethods() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
         System.out.print( "Postorder Iterator: " );
         if ( Test.testPostOrderIterator() ) {
             System.out.println( "OK." );
@@ -13444,6 +13453,75 @@ public final class Test {
         }
         return true;
     }
+    
+    private static boolean testPhylogenyMethods() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny t0 = factory.create( "((((A,B)ab,C)abc,D)abcd,E)r", new NHXParser() )[ 0 ];
+          
+            if ( PhylogenyMethods.calculateLevel( t0.getNode( "A" ) ) != 0 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.calculateLevel( t0.getNode( "B" ) ) != 0 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.calculateLevel( t0.getNode( "ab" ) ) != 1 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.calculateLevel( t0.getNode( "C" ) ) != 0 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.calculateLevel( t0.getNode( "abc" ) ) != 2 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.calculateLevel( t0.getNode( "D" ) ) != 0 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.calculateLevel( t0.getNode( "abcd" ) ) != 3 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.calculateLevel( t0.getNode( "E" ) ) != 0 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.calculateLevel( t0.getNode( "r" ) ) != 4 ) {
+                return false;
+            }
+            final Phylogeny t1 = factory.create( "((((A,B)ab,C)abc,D)abcd,E,((((((X)1)2)3)4)5)6)r", new NHXParser() )[ 0 ];
+            if ( PhylogenyMethods.calculateLevel( t1.getNode( "r" ) ) != 7 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.calculateLevel( t1.getNode( "X" ) ) != 0 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.calculateLevel( t1.getNode( "6" ) ) != 6 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.calculateLevel( t1.getNode( "5" ) ) != 5 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.calculateLevel( t1.getNode( "4" ) ) != 4 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.calculateLevel( t1.getNode( "3" ) ) != 3 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.calculateLevel( t1.getNode( "2" ) ) != 2 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.calculateLevel( t1.getNode( "1" ) ) != 1 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.calculateLevel( t1.getNode( "abcd" ) ) != 3 ) {
+                return false;
+            }
+            
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
 
     private static boolean testUniprotEntryRetrieval() {
         try {