removed unneeded import
[jalview.git] / forester / java / src / org / forester / surfacing / MinimalDomainomeCalculator.java
index ab82419..0c9ca73 100644 (file)
@@ -19,6 +19,7 @@ import java.util.TreeSet;
 
 import org.forester.application.surfacing;
 import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
 import org.forester.phylogeny.PhylogenyNode;
 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
 import org.forester.protein.Domain;
@@ -32,6 +33,7 @@ public final class MinimalDomainomeCalculator {
 
     public final static void calc( final boolean use_domain_architectures,
                                    final Phylogeny tre,
+                                   final int target_level,
                                    final SortedMap<Species, List<Protein>> protein_lists_per_species,
                                    final String separator,
                                    final double ie_cutoff,
@@ -60,8 +62,20 @@ public final class MinimalDomainomeCalculator {
         final BufferedWriter out_table = new BufferedWriter( new FileWriter( outfile_table ) );
         out.write( "SPECIES\tCOMMON NAME\tCODE\tRANK\t#EXT NODES\tEXT NODE CODES\t#" + x + "\t" + x + "" );
         out.write( ForesterUtil.LINE_SEPARATOR );
+        ///////////
+        //////////
+        SortedMap<String, List<Protein>> protein_lists_per_quasi_species = null;
+        if ( target_level >= 1 ) {
+            protein_lists_per_quasi_species = makeProteinListsPerQuasiSpecies( tre,
+                                                                               target_level,
+                                                                               protein_lists_per_species );
+           
+        }
+        /////////
+        ///////////
         for( final PhylogenyNodeIterator iter = tre.iteratorPostorder(); iter.hasNext(); ) {
             final PhylogenyNode node = iter.next();
+            final int node_level = PhylogenyMethods.calculateLevel( node );
             final String species_name = node.getNodeData().isHasTaxonomy()
                     ? node.getNodeData().getTaxonomy().getScientificName() : node.getName();
             final String common = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getCommonName()
@@ -69,67 +83,126 @@ public final class MinimalDomainomeCalculator {
             final String tcode = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getTaxonomyCode()
                     : "";
             final String rank = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getRank() : "";
-            out.write( species_name );
-            if ( !ForesterUtil.isEmpty( common ) ) {
-                out.write( "\t" + common );
-            }
-            else {
-                out.write( "\t" );
-            }
-            if ( !ForesterUtil.isEmpty( tcode ) ) {
-                out.write( "\t" + tcode );
-            }
-            else {
-                out.write( "\t" );
-            }
-            if ( !ForesterUtil.isEmpty( rank ) ) {
-                out.write( "\t" + rank );
-            }
-            else {
-                out.write( "\t" );
-            }
             final List<PhylogenyNode> external_descs = node.getAllExternalDescendants();
-            if ( node.isInternal() ) {
-                out.write( "\t" + external_descs.size() + "\t" );
-            }
-            else {
-                out.write( "\t\t" );
+            if ( ( target_level < 1 ) || ( node_level >= target_level ) ) {
+                out.write( species_name );
+                if ( !ForesterUtil.isEmpty( common ) ) {
+                    out.write( "\t" + common );
+                }
+                else {
+                    out.write( "\t" );
+                }
+                if ( !ForesterUtil.isEmpty( tcode ) ) {
+                    out.write( "\t" + tcode );
+                }
+                else {
+                    out.write( "\t" );
+                }
+                if ( !ForesterUtil.isEmpty( rank ) ) {
+                    out.write( "\t" + rank );
+                }
+                else {
+                    out.write( "\t" );
+                }
+                if ( node.isInternal() ) {
+                    out.write( "\t" + external_descs.size() + "\t" );
+                }
+                else {
+                    out.write( "\t\t" );
+                }
             }
             final List<Set<String>> features_per_genome_list = new ArrayList<Set<String>>();
             boolean first = true;
-            for( final PhylogenyNode external_desc : external_descs ) {
-                final String code = external_desc.getNodeData().getTaxonomy().getTaxonomyCode();
-                if ( node.isInternal() ) {
-                    if ( first ) {
-                        first = false;
-                    }
-                    else {
-                        out.write( ", " );
+            if ( target_level >= 1 ) {
+                ////////////
+                ////////////
+                if ( node_level >= target_level ) {
+                    final List<PhylogenyNode> given_level_descs = PhylogenyMethods
+                            .getAllDescendantsOfGivenLevel( node, target_level );
+                    for( final PhylogenyNode given_level_desc : given_level_descs ) {
+                        final String spec_name = given_level_desc.getNodeData().isHasTaxonomy()
+                                ? given_level_desc.getNodeData().getTaxonomy().getScientificName()
+                                : given_level_desc.getName();
+                        if ( node.isInternal() ) {
+                            if ( first ) {
+                                first = false;
+                            }
+                            else {
+                                out.write( ", " );
+                            }
+                            out.write( "sp_n=" + spec_name );
+                        }
+                        final List<Protein> proteins_per_species = protein_lists_per_quasi_species.get( spec_name );
+                        if ( proteins_per_species != null ) {
+                            final SortedSet<String> features_per_genome = new TreeSet<String>();
+                            for( final Protein protein : proteins_per_species ) {
+                                if ( use_domain_architectures ) {
+                                    final String da = protein.toDomainArchitectureString( separator, ie_cutoff );
+                                    features_per_genome.add( da );
+                                }
+                                else {
+                                    List<Domain> domains = protein.getProteinDomains();
+                                    for( final Domain domain : domains ) {
+                                        if ( ( ie_cutoff <= -1 ) || ( domain.getPerDomainEvalue() <= ie_cutoff ) ) {
+                                            features_per_genome.add( domain.getDomainId() );
+                                        }
+                                    }
+                                }
+                            }
+                            System.out.println( ">>>>>>>>>>>>>> features_per_genome.size()=" + features_per_genome.size() );
+                            if ( features_per_genome.size() > 0 ) {
+                                features_per_genome_list.add( features_per_genome );
+                            }
+                            else {
+                                System.out.println( "error!" );
+                                System.exit( -1 );
+                            }
+                        }
+                        else {
+                            System.out.println( "error!" );
+                            System.exit( -1 );
+                        }
                     }
-                    out.write( code );
                 }
-                final List<Protein> proteins_per_species = protein_lists_per_species.get( new BasicSpecies( code ) );
-                if ( proteins_per_species != null ) {
-                    final SortedSet<String> features_per_genome = new TreeSet<String>();
-                    for( final Protein protein : proteins_per_species ) {
-                        if ( use_domain_architectures ) {
-                            final String da = protein.toDomainArchitectureString( separator, ie_cutoff );
-                            features_per_genome.add( da );
+                ///////////
+                ///////////
+            }
+            else {
+                for( final PhylogenyNode external_desc : external_descs ) {
+                    final String code = external_desc.getNodeData().getTaxonomy().getTaxonomyCode();
+                    if ( node.isInternal() ) {
+                        if ( first ) {
+                            first = false;
                         }
                         else {
-                            List<Domain> domains = protein.getProteinDomains();
-                            for( final Domain domain : domains ) {
-                                if ( ( ie_cutoff <= -1 ) || ( domain.getPerDomainEvalue() <= ie_cutoff ) ) {
-                                    features_per_genome.add( domain.getDomainId() );
+                            out.write( ", " );
+                        }
+                        out.write( code );
+                    }
+                    final List<Protein> proteins_per_species = protein_lists_per_species
+                            .get( new BasicSpecies( code ) );
+                    if ( proteins_per_species != null ) {
+                        final SortedSet<String> features_per_genome = new TreeSet<String>();
+                        for( final Protein protein : proteins_per_species ) {
+                            if ( use_domain_architectures ) {
+                                final String da = protein.toDomainArchitectureString( separator, ie_cutoff );
+                                features_per_genome.add( da );
+                            }
+                            else {
+                                List<Domain> domains = protein.getProteinDomains();
+                                for( final Domain domain : domains ) {
+                                    if ( ( ie_cutoff <= -1 ) || ( domain.getPerDomainEvalue() <= ie_cutoff ) ) {
+                                        features_per_genome.add( domain.getDomainId() );
+                                    }
                                 }
                             }
                         }
+                        if ( features_per_genome.size() > 0 ) {
+                            features_per_genome_list.add( features_per_genome );
+                        }
                     }
-                    if ( features_per_genome.size() > 0 ) {
-                        features_per_genome_list.add( features_per_genome );
-                    }
-                }
-            }
+                } // for( final PhylogenyNode external_desc : external_descs )
+            } // else
             if ( features_per_genome_list.size() > 0 ) {
                 SortedSet<String> intersection = calcIntersection( features_per_genome_list );
                 out.write( "\t" + intersection.size() + "\t" );
@@ -236,6 +309,39 @@ public final class MinimalDomainomeCalculator {
         }
     }
 
+    private final static SortedMap<String, List<Protein>> makeProteinListsPerQuasiSpecies( final Phylogeny tre,
+                                                                                           final int level,
+                                                                                           final SortedMap<Species, List<Protein>> protein_lists_per_species ) {
+        final SortedMap<String, List<Protein>> protein_lists_per_quasi_species = new TreeMap<String, List<Protein>>();
+        System.out.println( "---------------------------------" );
+        System.out.println( "level=" + level );
+        for( final PhylogenyNodeIterator iter = tre.iteratorPostorder(); iter.hasNext(); ) {
+            final PhylogenyNode node = iter.next();
+            final int node_level = PhylogenyMethods.calculateLevel( node );
+            if ( node_level == level ) {
+                System.out.println( "level=" + level );
+                final List<PhylogenyNode> external_descs = node.getAllExternalDescendants();
+                final List<Protein> protein_list_per_quasi_species = new ArrayList<Protein>();
+                for( final PhylogenyNode external_desc : external_descs ) {
+                    final String code = external_desc.getNodeData().getTaxonomy().getTaxonomyCode();
+                    final List<Protein> proteins_per_species = protein_lists_per_species
+                            .get( new BasicSpecies( code ) );
+                    //System.out.println( code );
+                    for( Protein protein : proteins_per_species ) {
+                        protein_list_per_quasi_species.add( protein );
+                    }
+                }
+                final String species_name = node.getNodeData().isHasTaxonomy()
+                        ? node.getNodeData().getTaxonomy().getScientificName() : node.getName();
+                System.out.println( "species_name=" + species_name );
+                protein_lists_per_quasi_species.put( species_name, protein_list_per_quasi_species );
+                System.out.println( ">>>>" + protein_list_per_quasi_species.size() );
+            }
+        }
+      
+        return protein_lists_per_quasi_species;
+    }
+
     private final static SortedSet<String> calcIntersection( final List<Set<String>> features_per_genome_list ) {
         final Set<String> first = features_per_genome_list.get( 0 );
         final SortedSet<String> my_first = new TreeSet<String>();