2 package org.forester.surfacing;
4 import java.io.BufferedWriter;
6 import java.io.FileWriter;
7 import java.io.IOException;
8 import java.util.ArrayList;
9 import java.util.HashMap;
10 import java.util.HashSet;
11 import java.util.List;
13 import java.util.Map.Entry;
15 import java.util.SortedMap;
16 import java.util.SortedSet;
17 import java.util.TreeMap;
18 import java.util.TreeSet;
20 import org.forester.application.surfacing;
21 import org.forester.phylogeny.Phylogeny;
22 import org.forester.phylogeny.PhylogenyNode;
23 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
24 import org.forester.protein.Domain;
25 import org.forester.protein.Protein;
26 import org.forester.species.BasicSpecies;
27 import org.forester.species.Species;
28 import org.forester.util.ForesterUtil;
30 public final class MinimalDomainomeCalculator {
32 static final public void calcDomainome( final Phylogeny tre,
33 final SortedMap<Species, List<Protein>> protein_lists_per_species,
34 final double ie_cutoff ) {
35 if ( protein_lists_per_species == null || tre == null ) {
36 throw new IllegalArgumentException( "argument is null" );
38 if ( protein_lists_per_species.size() < 2 ) {
39 throw new IllegalArgumentException( "not enough genomes" );
41 for( final PhylogenyNodeIterator iter = tre.iteratorPostorder(); iter.hasNext(); ) {
42 final PhylogenyNode node = iter.next();
43 if ( node.isInternal() ) {
45 if ( node.getNodeData().isHasTaxonomy() ) {
46 System.out.println( node.getNodeData().getTaxonomy().getScientificName() + ":" );
49 System.out.println( node.getName() + ":" );
51 final List<PhylogenyNode> external_descs = node.getAllExternalDescendants();
52 final List<Set<String>> domains_per_genome_list = new ArrayList<Set<String>>();
53 for( final PhylogenyNode external_desc : external_descs ) {
54 final String code = external_desc.getNodeData().getTaxonomy().getTaxonomyCode();
55 System.out.print( code + " " );
56 final List<Protein> proteins_per_species = protein_lists_per_species
57 .get( new BasicSpecies( code ) );
58 if ( proteins_per_species != null ) {
59 final SortedSet<String> domains_per_genome = new TreeSet<String>();
60 for( final Protein protein : proteins_per_species ) {
61 List<Domain> domains = protein.getProteinDomains();
62 for( final Domain domain : domains ) {
63 if ( ( domain.getPerDomainEvalue() <= ie_cutoff ) || ( ie_cutoff <= -1 ) ) {
64 domains_per_genome.add( domain.getDomainId() );
68 if ( domains_per_genome.size() > 0 ) {
69 domains_per_genome_list.add( domains_per_genome );
74 if ( domains_per_genome_list.size() > 0 ) {
75 Set<String> intersection = calcIntersection( domains_per_genome_list );
76 System.out.println( intersection );
82 static final public void calcDAome( final Phylogeny tre,
83 final SortedMap<Species, List<Protein>> protein_lists_per_species,
84 final String separator,
85 final double ie_cutoff,
86 final String outfile_base )
88 final SortedMap<String, SortedSet<String>> species_to_das_map = new TreeMap<String, SortedSet<String>>();
89 if ( protein_lists_per_species == null || tre == null ) {
90 throw new IllegalArgumentException( "argument is null" );
92 if ( protein_lists_per_species.size() < 2 ) {
93 throw new IllegalArgumentException( "not enough genomes" );
95 final File outfile = new File( outfile_base + "_minimal_daome.txt" );
96 final File outfile_table = new File( outfile_base + "_minimal_daome.tsv" );
97 SurfacingUtil.checkForOutputFileWriteability( outfile );
98 SurfacingUtil.checkForOutputFileWriteability( outfile_table );
99 final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) );
100 final BufferedWriter out_table = new BufferedWriter( new FileWriter( outfile_table ) );
101 for( final PhylogenyNodeIterator iter = tre.iteratorPostorder(); iter.hasNext(); ) {
102 final PhylogenyNode node = iter.next();
103 final String species_name = node.getNodeData().isHasTaxonomy()
104 ? node.getNodeData().getTaxonomy().getScientificName() : node.getName();
105 out.write( species_name );
106 final List<PhylogenyNode> external_descs = node.getAllExternalDescendants();
107 out.write( "\t[" + external_descs.size() + "]:" );
108 final List<Set<String>> das_per_genome_list = new ArrayList<Set<String>>();
109 for( final PhylogenyNode external_desc : external_descs ) {
110 final String code = external_desc.getNodeData().getTaxonomy().getTaxonomyCode();
111 out.write( '\t' + code );
112 final List<Protein> proteins_per_species = protein_lists_per_species.get( new BasicSpecies( code ) );
113 if ( proteins_per_species != null ) {
114 final SortedSet<String> das_per_genome = new TreeSet<String>();
115 for( final Protein protein : proteins_per_species ) {
116 final String da = protein.toDomainArchitectureString( separator, ie_cutoff );
117 das_per_genome.add( da );
119 if ( das_per_genome.size() > 0 ) {
120 das_per_genome_list.add( das_per_genome );
124 if ( das_per_genome_list.size() > 0 ) {
125 SortedSet<String> intersection = calcIntersection( das_per_genome_list );
126 out.write( "\t[" + intersection.size() + "]:" );
127 for( final String s : intersection ) {
128 out.write( '\t' + s );
130 out.write( ForesterUtil.LINE_SEPARATOR );
131 out.write( ForesterUtil.LINE_SEPARATOR );
132 species_to_das_map.put( species_name, intersection );
135 final SortedSet<String> all_species_names = new TreeSet<String>();
136 final SortedSet<String> all_das = new TreeSet<String>();
137 for( final Entry<String, SortedSet<String>> e : species_to_das_map.entrySet() ) {
138 all_species_names.add( e.getKey() );
139 for( final String das : e.getValue() ) {
143 out_table.write( '\t' );
144 boolean first = true;
145 for( final String species_name : all_species_names ) {
150 out_table.write( '\t' );
152 out_table.write( species_name );
154 out_table.write( ForesterUtil.LINE_SEPARATOR );
155 for( final String das : all_das ) {
156 out_table.write( das );
157 out_table.write( '\t' );
159 for( final String species_name : all_species_names ) {
164 out_table.write( '\t' );
166 if ( species_to_das_map.get( species_name ).contains( das ) ) {
167 out_table.write( '1' );
170 out_table.write( '0' );
173 out_table.write( ForesterUtil.LINE_SEPARATOR );
179 ForesterUtil.programMessage( surfacing.PRG_NAME,
180 "Wrote minimal DAome data to : " + outfile );
181 ForesterUtil.programMessage( surfacing.PRG_NAME,
182 "Wrote minimal DAome data to (as table): " + outfile_table );
185 private final static SortedSet<String> calcIntersection( final List<Set<String>> features_per_genome_list ) {
186 final Set<String> first = features_per_genome_list.get( 0 );
187 final SortedSet<String> my_first = new TreeSet<String>();
188 for( final String s : first ) {
191 for( int i = 1; i < features_per_genome_list.size(); ++i ) {
192 my_first.retainAll( features_per_genome_list.get( i ) );
197 public static void main( final String[] args ) {
198 Set<String> a = new HashSet<String>();
199 Set<String> b = new HashSet<String>();
200 Set<String> c = new HashSet<String>();
201 Set<String> d = new HashSet<String>();
216 List<Set<String>> domains_per_genome_list = new ArrayList<Set<String>>();
217 domains_per_genome_list.add( a );
218 domains_per_genome_list.add( b );
219 domains_per_genome_list.add( c );
220 domains_per_genome_list.add( d );
221 Set<String> x = calcIntersection( domains_per_genome_list );
222 System.out.println( x );