X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsurfacing%2FMinimalDomainomeCalculator.java;h=0c9ca73ca250da1e43768e892fefa157e9f2c44d;hb=d605114bdf420c6cb680b02bb10ea25f09db769c;hp=ab8241968b4f7790b19d9e54b9dc6aa594c5153d;hpb=1ec82d057c07d5b936fb42fa129b809d65aeb5e5;p=jalview.git diff --git a/forester/java/src/org/forester/surfacing/MinimalDomainomeCalculator.java b/forester/java/src/org/forester/surfacing/MinimalDomainomeCalculator.java index ab82419..0c9ca73 100644 --- a/forester/java/src/org/forester/surfacing/MinimalDomainomeCalculator.java +++ b/forester/java/src/org/forester/surfacing/MinimalDomainomeCalculator.java @@ -19,6 +19,7 @@ import java.util.TreeSet; import org.forester.application.surfacing; import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.protein.Domain; @@ -32,6 +33,7 @@ public final class MinimalDomainomeCalculator { public final static void calc( final boolean use_domain_architectures, final Phylogeny tre, + final int target_level, final SortedMap> protein_lists_per_species, final String separator, final double ie_cutoff, @@ -60,8 +62,20 @@ public final class MinimalDomainomeCalculator { final BufferedWriter out_table = new BufferedWriter( new FileWriter( outfile_table ) ); out.write( "SPECIES\tCOMMON NAME\tCODE\tRANK\t#EXT NODES\tEXT NODE CODES\t#" + x + "\t" + x + "" ); out.write( ForesterUtil.LINE_SEPARATOR ); + /////////// + ////////// + SortedMap> protein_lists_per_quasi_species = null; + if ( target_level >= 1 ) { + protein_lists_per_quasi_species = makeProteinListsPerQuasiSpecies( tre, + target_level, + protein_lists_per_species ); + + } + ///////// + /////////// for( final PhylogenyNodeIterator iter = tre.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); + final int node_level = PhylogenyMethods.calculateLevel( node ); final String species_name = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getScientificName() : node.getName(); final String common = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getCommonName() @@ -69,67 +83,126 @@ public final class MinimalDomainomeCalculator { final String tcode = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getTaxonomyCode() : ""; final String rank = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getRank() : ""; - out.write( species_name ); - if ( !ForesterUtil.isEmpty( common ) ) { - out.write( "\t" + common ); - } - else { - out.write( "\t" ); - } - if ( !ForesterUtil.isEmpty( tcode ) ) { - out.write( "\t" + tcode ); - } - else { - out.write( "\t" ); - } - if ( !ForesterUtil.isEmpty( rank ) ) { - out.write( "\t" + rank ); - } - else { - out.write( "\t" ); - } final List external_descs = node.getAllExternalDescendants(); - if ( node.isInternal() ) { - out.write( "\t" + external_descs.size() + "\t" ); - } - else { - out.write( "\t\t" ); + if ( ( target_level < 1 ) || ( node_level >= target_level ) ) { + out.write( species_name ); + if ( !ForesterUtil.isEmpty( common ) ) { + out.write( "\t" + common ); + } + else { + out.write( "\t" ); + } + if ( !ForesterUtil.isEmpty( tcode ) ) { + out.write( "\t" + tcode ); + } + else { + out.write( "\t" ); + } + if ( !ForesterUtil.isEmpty( rank ) ) { + out.write( "\t" + rank ); + } + else { + out.write( "\t" ); + } + if ( node.isInternal() ) { + out.write( "\t" + external_descs.size() + "\t" ); + } + else { + out.write( "\t\t" ); + } } final List> features_per_genome_list = new ArrayList>(); boolean first = true; - for( final PhylogenyNode external_desc : external_descs ) { - final String code = external_desc.getNodeData().getTaxonomy().getTaxonomyCode(); - if ( node.isInternal() ) { - if ( first ) { - first = false; - } - else { - out.write( ", " ); + if ( target_level >= 1 ) { + //////////// + //////////// + if ( node_level >= target_level ) { + final List given_level_descs = PhylogenyMethods + .getAllDescendantsOfGivenLevel( node, target_level ); + for( final PhylogenyNode given_level_desc : given_level_descs ) { + final String spec_name = given_level_desc.getNodeData().isHasTaxonomy() + ? given_level_desc.getNodeData().getTaxonomy().getScientificName() + : given_level_desc.getName(); + if ( node.isInternal() ) { + if ( first ) { + first = false; + } + else { + out.write( ", " ); + } + out.write( "sp_n=" + spec_name ); + } + final List proteins_per_species = protein_lists_per_quasi_species.get( spec_name ); + if ( proteins_per_species != null ) { + final SortedSet features_per_genome = new TreeSet(); + for( final Protein protein : proteins_per_species ) { + if ( use_domain_architectures ) { + final String da = protein.toDomainArchitectureString( separator, ie_cutoff ); + features_per_genome.add( da ); + } + else { + List domains = protein.getProteinDomains(); + for( final Domain domain : domains ) { + if ( ( ie_cutoff <= -1 ) || ( domain.getPerDomainEvalue() <= ie_cutoff ) ) { + features_per_genome.add( domain.getDomainId() ); + } + } + } + } + System.out.println( ">>>>>>>>>>>>>> features_per_genome.size()=" + features_per_genome.size() ); + if ( features_per_genome.size() > 0 ) { + features_per_genome_list.add( features_per_genome ); + } + else { + System.out.println( "error!" ); + System.exit( -1 ); + } + } + else { + System.out.println( "error!" ); + System.exit( -1 ); + } } - out.write( code ); } - final List proteins_per_species = protein_lists_per_species.get( new BasicSpecies( code ) ); - if ( proteins_per_species != null ) { - final SortedSet features_per_genome = new TreeSet(); - for( final Protein protein : proteins_per_species ) { - if ( use_domain_architectures ) { - final String da = protein.toDomainArchitectureString( separator, ie_cutoff ); - features_per_genome.add( da ); + /////////// + /////////// + } + else { + for( final PhylogenyNode external_desc : external_descs ) { + final String code = external_desc.getNodeData().getTaxonomy().getTaxonomyCode(); + if ( node.isInternal() ) { + if ( first ) { + first = false; } else { - List domains = protein.getProteinDomains(); - for( final Domain domain : domains ) { - if ( ( ie_cutoff <= -1 ) || ( domain.getPerDomainEvalue() <= ie_cutoff ) ) { - features_per_genome.add( domain.getDomainId() ); + out.write( ", " ); + } + out.write( code ); + } + final List proteins_per_species = protein_lists_per_species + .get( new BasicSpecies( code ) ); + if ( proteins_per_species != null ) { + final SortedSet features_per_genome = new TreeSet(); + for( final Protein protein : proteins_per_species ) { + if ( use_domain_architectures ) { + final String da = protein.toDomainArchitectureString( separator, ie_cutoff ); + features_per_genome.add( da ); + } + else { + List domains = protein.getProteinDomains(); + for( final Domain domain : domains ) { + if ( ( ie_cutoff <= -1 ) || ( domain.getPerDomainEvalue() <= ie_cutoff ) ) { + features_per_genome.add( domain.getDomainId() ); + } } } } + if ( features_per_genome.size() > 0 ) { + features_per_genome_list.add( features_per_genome ); + } } - if ( features_per_genome.size() > 0 ) { - features_per_genome_list.add( features_per_genome ); - } - } - } + } // for( final PhylogenyNode external_desc : external_descs ) + } // else if ( features_per_genome_list.size() > 0 ) { SortedSet intersection = calcIntersection( features_per_genome_list ); out.write( "\t" + intersection.size() + "\t" ); @@ -236,6 +309,39 @@ public final class MinimalDomainomeCalculator { } } + private final static SortedMap> makeProteinListsPerQuasiSpecies( final Phylogeny tre, + final int level, + final SortedMap> protein_lists_per_species ) { + final SortedMap> protein_lists_per_quasi_species = new TreeMap>(); + System.out.println( "---------------------------------" ); + System.out.println( "level=" + level ); + for( final PhylogenyNodeIterator iter = tre.iteratorPostorder(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + final int node_level = PhylogenyMethods.calculateLevel( node ); + if ( node_level == level ) { + System.out.println( "level=" + level ); + final List external_descs = node.getAllExternalDescendants(); + final List protein_list_per_quasi_species = new ArrayList(); + for( final PhylogenyNode external_desc : external_descs ) { + final String code = external_desc.getNodeData().getTaxonomy().getTaxonomyCode(); + final List proteins_per_species = protein_lists_per_species + .get( new BasicSpecies( code ) ); + //System.out.println( code ); + for( Protein protein : proteins_per_species ) { + protein_list_per_quasi_species.add( protein ); + } + } + final String species_name = node.getNodeData().isHasTaxonomy() + ? node.getNodeData().getTaxonomy().getScientificName() : node.getName(); + System.out.println( "species_name=" + species_name ); + protein_lists_per_quasi_species.put( species_name, protein_list_per_quasi_species ); + System.out.println( ">>>>" + protein_list_per_quasi_species.size() ); + } + } + + return protein_lists_per_quasi_species; + } + private final static SortedSet calcIntersection( final List> features_per_genome_list ) { final Set first = features_per_genome_list.get( 0 ); final SortedSet my_first = new TreeSet();