public final static void calc( final boolean use_domain_architectures,
final Phylogeny tre,
+ final int target_level,
final SortedMap<Species, List<Protein>> protein_lists_per_species,
final String separator,
final double ie_cutoff,
final BufferedWriter out_table = new BufferedWriter( new FileWriter( outfile_table ) );
out.write( "SPECIES\tCOMMON NAME\tCODE\tRANK\t#EXT NODES\tEXT NODE CODES\t#" + x + "\t" + x + "" );
out.write( ForesterUtil.LINE_SEPARATOR );
+ ///////////
+ //////////
+ SortedMap<String, List<Protein>> protein_lists_per_quasi_species = null;
+ if ( target_level >= 1 ) {
+ protein_lists_per_quasi_species = makeProteinListsPerQuasiSpecies( tre,
+ target_level,
+ protein_lists_per_species );
+
+ }
+ /////////
+ ///////////
for( final PhylogenyNodeIterator iter = tre.iteratorPostorder(); iter.hasNext(); ) {
final PhylogenyNode node = iter.next();
+ final int node_level = PhylogenyMethods.calculateLevel( node );
final String species_name = node.getNodeData().isHasTaxonomy()
? node.getNodeData().getTaxonomy().getScientificName() : node.getName();
final String common = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getCommonName()
final String tcode = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getTaxonomyCode()
: "";
final String rank = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getRank() : "";
- out.write( species_name );
- if ( !ForesterUtil.isEmpty( common ) ) {
- out.write( "\t" + common );
- }
- else {
- out.write( "\t" );
- }
- if ( !ForesterUtil.isEmpty( tcode ) ) {
- out.write( "\t" + tcode );
- }
- else {
- out.write( "\t" );
- }
- if ( !ForesterUtil.isEmpty( rank ) ) {
- out.write( "\t" + rank );
- }
- else {
- out.write( "\t" );
- }
final List<PhylogenyNode> external_descs = node.getAllExternalDescendants();
- if ( node.isInternal() ) {
- out.write( "\t" + external_descs.size() + "\t" );
- }
- else {
- out.write( "\t\t" );
- }
- final List<Set<String>> features_per_genome_list = new ArrayList<Set<String>>();
- boolean first = true;
- for( final PhylogenyNode external_desc : external_descs ) {
- final String code = external_desc.getNodeData().getTaxonomy().getTaxonomyCode();
- if ( node.isInternal() ) {
- if ( first ) {
- first = false;
- }
- else {
- out.write( ", " );
- }
- out.write( code );
+ if ( ( target_level < 1 ) || ( node_level >= target_level ) ) {
+ out.write( species_name + " " + node_level);
+ if ( !ForesterUtil.isEmpty( common ) ) {
+ out.write( "\t" + common );
}
- final List<Protein> proteins_per_species = protein_lists_per_species.get( new BasicSpecies( code ) );
- if ( proteins_per_species != null ) {
- final SortedSet<String> features_per_genome = new TreeSet<String>();
- for( final Protein protein : proteins_per_species ) {
- if ( use_domain_architectures ) {
- final String da = protein.toDomainArchitectureString( separator, ie_cutoff );
- features_per_genome.add( da );
- }
- else {
- List<Domain> domains = protein.getProteinDomains();
- for( final Domain domain : domains ) {
- if ( ( ie_cutoff <= -1 ) || ( domain.getPerDomainEvalue() <= ie_cutoff ) ) {
- features_per_genome.add( domain.getDomainId() );
- }
- }
- }
- }
- if ( features_per_genome.size() > 0 ) {
- features_per_genome_list.add( features_per_genome );
- }
+ else {
+ out.write( "\t" );
}
- }
- if ( features_per_genome_list.size() > 0 ) {
- SortedSet<String> intersection = calcIntersection( features_per_genome_list );
- out.write( "\t" + intersection.size() + "\t" );
- first = true;
- for( final String s : intersection ) {
- if ( first ) {
- first = false;
- }
- else {
- out.write( ", " );
- }
- out.write( s );
- }
- out.write( ForesterUtil.LINE_SEPARATOR );
- species_to_features_map.put( species_name, intersection );
- }
- }
- final SortedSet<String> all_species_names = new TreeSet<String>();
- final SortedSet<String> all_features = new TreeSet<String>();
- for( final Entry<String, SortedSet<String>> e : species_to_features_map.entrySet() ) {
- all_species_names.add( e.getKey() );
- for( final String f : e.getValue() ) {
- all_features.add( f );
- }
- }
- out_table.write( '\t' );
- boolean first = true;
- for( final String species_name : all_species_names ) {
- if ( first ) {
- first = false;
- }
- else {
- out_table.write( '\t' );
- }
- out_table.write( species_name );
- }
- out_table.write( ForesterUtil.LINE_SEPARATOR );
- for( final String das : all_features ) {
- out_table.write( das );
- out_table.write( '\t' );
- first = true;
- for( final String species_name : all_species_names ) {
- if ( first ) {
- first = false;
+ if ( !ForesterUtil.isEmpty( tcode ) ) {
+ out.write( "\t" + tcode );
}
else {
- out_table.write( '\t' );
+ out.write( "\t" );
}
- if ( species_to_features_map.get( species_name ).contains( das ) ) {
- out_table.write( '1' );
+ if ( !ForesterUtil.isEmpty( rank ) ) {
+ out.write( "\t" + rank );
}
else {
- out_table.write( '0' );
+ out.write( "\t" );
}
- }
- out_table.write( ForesterUtil.LINE_SEPARATOR );
- }
- out.flush();
- out.close();
- out_table.flush();
- out_table.close();
- ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote minimal DAome data to : " + outfile );
- ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote minimal DAome data to (as table): " + outfile_table );
- if ( write_protein_files ) {
- final String protdirname;
- final String a;
- final String b;
- if ( use_domain_architectures ) {
- a = "_DA";
- b = "domain architectures (DAs)";
- protdirname = "_DAS";
- }
- else {
- a = "_domain";
- b = "domains";
- protdirname = "_DOMAINS";
- }
- final File prot_dir = new File( outfile_base + protdirname );
- final boolean success = prot_dir.mkdir();
- if ( !success ) {
- throw new IOException( "failed to create dir " + prot_dir );
- }
- int total = 0;
- final String dir = outfile_base + protdirname + "/";
- for( final String feat : all_features ) {
- final File extract_outfile = new File( dir + feat + a + surfacing.SEQ_EXTRACT_SUFFIX );
- SurfacingUtil.checkForOutputFileWriteability( extract_outfile );
- final Writer proteins_file_writer = new BufferedWriter( new FileWriter( extract_outfile ) );
- final int counter = extractProteinFeatures( use_domain_architectures,
- protein_lists_per_species,
- feat,
- proteins_file_writer,
- ie_cutoff,
- separator );
- if ( counter < 1 ) {
- ForesterUtil.printWarningMessage( "surfacing", feat + " not present (in " + b + " extraction)" );
+ if ( node.isInternal() ) {
+ out.write( "\t" + external_descs.size() + "\t" );
+ }
+ else {
+ out.write( "\t\t" );
}
- total += counter;
- proteins_file_writer.close();
- }
- ForesterUtil.programMessage( "surfacing",
- "Wrote " + total + " individual " + b + " from a total of "
- + all_features.size() + " into: " + dir );
- }
- }
-
- public final static void calcNEW( final boolean use_domain_architectures,
- final Phylogeny tre,
- final int level,
- final SortedMap<Species, List<Protein>> protein_lists_per_species,
- final String separator,
- final double ie_cutoff,
- final String outfile_base,
- final boolean write_protein_files )
- throws IOException {
- final SortedMap<String, SortedSet<String>> species_to_features_map = new TreeMap<String, SortedSet<String>>();
- if ( protein_lists_per_species == null || tre == null ) {
- throw new IllegalArgumentException( "argument is null" );
- }
- if ( protein_lists_per_species.size() < 2 ) {
- throw new IllegalArgumentException( "not enough genomes" );
- }
- final String x;
- if ( use_domain_architectures ) {
- x = "DA";
- }
- else {
- x = "domain";
- }
- final File outfile = new File( outfile_base + "_minimal_" + x + "ome.tsv" );
- final File outfile_table = new File( outfile_base + "_minimal_" + x + "ome_matrix.tsv" );
- SurfacingUtil.checkForOutputFileWriteability( outfile );
- SurfacingUtil.checkForOutputFileWriteability( outfile_table );
- final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) );
- final BufferedWriter out_table = new BufferedWriter( new FileWriter( outfile_table ) );
- out.write( "SPECIES\tCOMMON NAME\tCODE\tRANK\t#EXT NODES\tEXT NODE CODES\t#" + x + "\t" + x + "" );
- out.write( ForesterUtil.LINE_SEPARATOR );
- ///////////
- //////////
- SortedMap<String, List<Protein>> protein_lists_per_quasi_species = null;
- if ( level >= 1 ) {
- protein_lists_per_quasi_species = makeProteinListsPerQuasiSpecies( tre, level, protein_lists_per_species );
- }
- /////////
- ///////////
- for( final PhylogenyNodeIterator iter = tre.iteratorPostorder(); iter.hasNext(); ) {
- final PhylogenyNode node = iter.next();
- final String species_name = node.getNodeData().isHasTaxonomy()
- ? node.getNodeData().getTaxonomy().getScientificName() : node.getName();
- final String common = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getCommonName()
- : "";
- final String tcode = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getTaxonomyCode()
- : "";
- final String rank = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getRank() : "";
- out.write( species_name );
- if ( !ForesterUtil.isEmpty( common ) ) {
- out.write( "\t" + common );
- }
- else {
- out.write( "\t" );
- }
- if ( !ForesterUtil.isEmpty( tcode ) ) {
- out.write( "\t" + tcode );
- }
- else {
- out.write( "\t" );
- }
- if ( !ForesterUtil.isEmpty( rank ) ) {
- out.write( "\t" + rank );
- }
- else {
- out.write( "\t" );
- }
- final List<PhylogenyNode> external_descs = node.getAllExternalDescendants();
- if ( node.isInternal() ) {
- out.write( "\t" + external_descs.size() + "\t" );
- }
- else {
- out.write( "\t\t" );
}
final List<Set<String>> features_per_genome_list = new ArrayList<Set<String>>();
boolean first = true;
- if ( level >= 1 ) {
+ if ( target_level >= 1 ) {
////////////
////////////
- final int node_level = PhylogenyMethods.calculateLevel( node );
- if ( node_level >= level ) {
+ if ( node_level >= target_level ) {
final List<PhylogenyNode> given_level_descs = PhylogenyMethods
- .getAllDescendantsOfGivenLevel( node, level );
+ .getAllDescendantsOfGivenLevel( node, target_level );
for( final PhylogenyNode given_level_desc : given_level_descs ) {
final String spec_name = given_level_desc.getNodeData().isHasTaxonomy()
- ? given_level_desc.getNodeData().getTaxonomy().getScientificName() : given_level_desc.getName();
+ ? given_level_desc.getNodeData().getTaxonomy().getScientificName()
+ : given_level_desc.getName();
+ if ( node.isInternal() ) {
+ if ( first ) {
+ first = false;
+ }
+ else {
+ out.write( ", " );
+ }
+ out.write( "sp_n=" + spec_name );
+ }
+ final List<Protein> proteins_per_species = protein_lists_per_quasi_species.get( spec_name );
+ if ( proteins_per_species != null ) {
+ final SortedSet<String> features_per_genome = new TreeSet<String>();
+ for( final Protein protein : proteins_per_species ) {
+ if ( use_domain_architectures ) {
+ final String da = protein.toDomainArchitectureString( separator, ie_cutoff );
+ features_per_genome.add( da );
+ }
+ else {
+ List<Domain> domains = protein.getProteinDomains();
+ for( final Domain domain : domains ) {
+ if ( ( ie_cutoff <= -1 ) || ( domain.getPerDomainEvalue() <= ie_cutoff ) ) {
+ features_per_genome.add( domain.getDomainId() );
+ }
+ }
+ }
+ }
+ System.out.println( ">>>>>>>>>>>>>> features_per_genome.size()=" + features_per_genome.size() );
+ if ( features_per_genome.size() > 0 ) {
+ features_per_genome_list.add( features_per_genome );
+ }
+ else {
+ System.out.println( "error!" );
+ System.exit( -1 );
+ }
+ }
+ else {
+ System.out.println( "error!" );
+ System.exit( -1 );
+ }
}
}
///////////
final int level,
final SortedMap<Species, List<Protein>> protein_lists_per_species ) {
final SortedMap<String, List<Protein>> protein_lists_per_quasi_species = new TreeMap<String, List<Protein>>();
+ System.out.println( "---------------------------------" );
+ System.out.println( "level=" + level );
for( final PhylogenyNodeIterator iter = tre.iteratorPostorder(); iter.hasNext(); ) {
final PhylogenyNode node = iter.next();
final int node_level = PhylogenyMethods.calculateLevel( node );
if ( node_level == level ) {
+ System.out.println( "level=" + level );
final List<PhylogenyNode> external_descs = node.getAllExternalDescendants();
final List<Protein> protein_list_per_quasi_species = new ArrayList<Protein>();
for( final PhylogenyNode external_desc : external_descs ) {
final String code = external_desc.getNodeData().getTaxonomy().getTaxonomyCode();
final List<Protein> proteins_per_species = protein_lists_per_species
.get( new BasicSpecies( code ) );
+ //System.out.println( code );
for( Protein protein : proteins_per_species ) {
protein_list_per_quasi_species.add( protein );
}
}
final String species_name = node.getNodeData().isHasTaxonomy()
? node.getNodeData().getTaxonomy().getScientificName() : node.getName();
+ System.out.println( "species_name=" + species_name );
protein_lists_per_quasi_species.put( species_name, protein_list_per_quasi_species );
+ System.out.println( ">>>>" + protein_list_per_quasi_species.size() );
}
}
+
return protein_lists_per_quasi_species;
}