final static private String INPUT_GENOMES_FILE_OPTION = "genomes";
final static private String INPUT_SPECIES_TREE_OPTION = "species_tree";
final static private String SEQ_EXTRACT_OPTION = "prot_extract";
- final static private String PRG_VERSION = "2.500";
- final static private String PRG_DATE = "170323";
+ final static private String PRG_VERSION = "2.501";
+ final static private String PRG_DATE = "170327";
final static private String E_MAIL = "czmasek@burnham.org";
final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing";
final static private boolean IGNORE_DUFS_DEFAULT = true;
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt";
private static final boolean CALC_SIMILARITY_SCORES = false;
+ private static final String SEPARATOR_FOR_DA = "--";
@SuppressWarnings( "unchecked")
public static void main( final String args[] ) {
.programMessage( PRG_NAME,
"Wrote domain promiscuities to: " + per_genome_domain_promiscuity_statistics_file );
try {
- MinimalDomainomeCalculator.calcOme( false,
- intrees[ 0 ],
- protein_lists_per_species,
- "---",
- -1,
- out_dir.toString() + "/" + output_file );
+ MinimalDomainomeCalculator.calc( false,
+ intrees[ 0 ],
+ protein_lists_per_species,
+ SEPARATOR_FOR_DA,
+ -1,
+ out_dir.toString() + "/" + output_file,
+ true );
}
catch ( IOException e ) {
ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
}
try {
- MinimalDomainomeCalculator.calcOme( true,
- intrees[ 0 ],
- protein_lists_per_species,
- "---",
- -1,
- out_dir.toString() + "/" + output_file );
+ MinimalDomainomeCalculator.calc( true,
+ intrees[ 0 ],
+ protein_lists_per_species,
+ SEPARATOR_FOR_DA,
+ -1,
+ out_dir.toString() + "/" + output_file,
+ true );
}
catch ( IOException e ) {
ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
"Free memory : " + free_memory + "MB, total memory: " + total_memory + "MB" );
ForesterUtil.programMessage( PRG_NAME, "If this application is useful to you, please cite:" );
ForesterUtil.programMessage( PRG_NAME, surfacing.WWW );
+ ForesterUtil
+ .programMessage( PRG_NAME,
+ "[next step for phylogenomic analysis pipeline (example, in \"DAS\" dir): % mse.rb .prot . FL_seqs DA_seqs ../../genome_locations.txt]" );
ForesterUtil.programMessage( PRG_NAME, "OK" );
System.out.println();
}
public final class MinimalDomainomeCalculator {
- static final public void calcOme( final boolean use_domain_architectures,
- final Phylogeny tre,
- final SortedMap<Species, List<Protein>> protein_lists_per_species,
- final String separator,
- final double ie_cutoff,
- final String outfile_base )
+ public final static void calc( final boolean use_domain_architectures,
+ final Phylogeny tre,
+ final SortedMap<Species, List<Protein>> protein_lists_per_species,
+ final String separator,
+ final double ie_cutoff,
+ final String outfile_base,
+ final boolean write_protein_files )
throws IOException {
final SortedMap<String, SortedSet<String>> species_to_features_map = new TreeMap<String, SortedSet<String>>();
if ( protein_lists_per_species == null || tre == null ) {
out_table.close();
ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote minimal DAome data to : " + outfile );
ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote minimal DAome data to (as table): " + outfile_table );
- for( String f : all_features ) {
+ if ( write_protein_files ) {
+ final String protdirname;
final String a;
+ final String b;
if ( use_domain_architectures ) {
- a = "DA_";
+ a = "_DA";
+ b = "domain architectures (DAs)";
+ protdirname = "_DAS";
}
else {
- a = "domain_";
+ a = "_domain";
+ b = "domains";
+ protdirname = "_DOMAINS";
}
- final File prot_dir = new File( outfile_base + "_prot" );
- prot_dir.mkdir();
- final File outt = new File( outfile_base + "_prot/" + a + f + surfacing.SEQ_EXTRACT_SUFFIX );
- final Writer proteins_file_writer = new BufferedWriter( new FileWriter( outt ) );
- extractProteinFeatures( use_domain_architectures,
- protein_lists_per_species,
- f,
- proteins_file_writer,
- ie_cutoff,
- separator );
- proteins_file_writer.close();
+ final File prot_dir = new File( outfile_base + protdirname );
+ final boolean success = prot_dir.mkdir();
+ if ( !success ) {
+ throw new IOException( "failed to create dir " + prot_dir );
+ }
+ int total = 0;
+ final String dir = outfile_base + protdirname + "/";
+ for( final String feat : all_features ) {
+ final File extract_outfile = new File( dir + feat + a + surfacing.SEQ_EXTRACT_SUFFIX );
+ SurfacingUtil.checkForOutputFileWriteability( extract_outfile );
+ final Writer proteins_file_writer = new BufferedWriter( new FileWriter( extract_outfile ) );
+ final int counter = extractProteinFeatures( use_domain_architectures,
+ protein_lists_per_species,
+ feat,
+ proteins_file_writer,
+ ie_cutoff,
+ separator );
+ if ( counter < 1 ) {
+ ForesterUtil.printWarningMessage( "surfacing", feat + " not present (in " + b + " extraction)" );
+ }
+ total += counter;
+ proteins_file_writer.close();
+ }
+ ForesterUtil.programMessage( "surfacing",
+ "Wrote " + total + " individual " + b + " from a total of "
+ + all_features.size() + " into: " + dir );
}
}
return my_first;
}
- public static void extractProteinFeatures( final boolean use_domain_architectures,
- final SortedMap<Species, List<Protein>> protein_lists_per_species,
- final String domain_id,
- final Writer out,
- final double ie_cutoff,
- final String domain_separator )
+ private final static int extractProteinFeatures( final boolean use_domain_architectures,
+ final SortedMap<Species, List<Protein>> protein_lists_per_species,
+ final String domain_id,
+ final Writer out,
+ final double ie_cutoff,
+ final String domain_separator )
throws IOException {
+ int counter = 0;
final String separator_for_output = "\t";
for( final Species species : protein_lists_per_species.keySet() ) {
final List<Protein> proteins_per_species = protein_lists_per_species.get( species );
out.write( from + "-" + to );
out.write( "/" );
out.write( SurfacingConstants.NL );
+ ++counter;
}
}
else {
out.write( protein.getAccession() );
}
out.write( SurfacingConstants.NL );
+ ++counter;
}
}
}
}
out.flush();
+ return counter;
}
public static void main( final String[] args ) {