package org.forester.rio;
import java.io.File;
-import java.io.FilenameFilter;
+import java.io.FileNotFoundException;
import java.io.IOException;
import java.math.RoundingMode;
import java.util.ArrayList;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeSet;
import org.forester.io.parsers.nexus.NexusPhylogeniesParser;
import org.forester.io.parsers.nhx.NHXParser;
import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
+import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
import org.forester.io.parsers.phyloxml.PhyloXmlParser;
import org.forester.io.parsers.util.ParserUtils;
import org.forester.io.writers.PhylogenyWriter;
import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY;
import org.forester.phylogeny.data.Sequence;
+import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
+import org.forester.phylogeny.factories.PhylogenyFactory;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
import org.forester.rio.RIO.REROOTING;
+import org.forester.sdi.GSDIR;
import org.forester.sdi.SDIException;
+import org.forester.sdi.SDIutil;
import org.forester.sdi.SDIutil.ALGORITHM;
import org.forester.util.BasicDescriptiveStatistics;
import org.forester.util.BasicTable;
public final class RIOUtil {
+ public final static String STRIPPED_SPECIES_TREE_SUFFIX = "_RIO_stripped_species_tree.xml";
+ public final static String ORTHO_OUTTABLE_SUFFIX = "_RIO_orthologies.tsv";
+ public final static String ORTHO_OUTTABLE_WITH_MAP_SUFFIX = "_RIO_orthologies_ext_map.tsv";
+ public final static String OUT_MIN_DUP_GENE_TREE_SUFFIX = "_RIO_gene_tree_min_dup_";
+ public final static String OUT_MED_DUP_GENE_TREE_SUFFIX = "_RIO_gene_tree_med_dup_";
+ public final static String BEST_TREE_SUFFIX = "_RIO_consensus_gene_tree_dup_";
+ public final static String ORTHOLOG_GROUPS_SUFFIX = "_RIO_ortholog_groups.tsv";
+ public final static String LOGFILE_SUFFIX = "_RIO_log.tsv";
+
public static final void executeAnalysis( final File gene_trees_file,
final File species_tree_file,
final File orthology_outtable,
final double ortholog_group_cutoff,
final boolean perform_id_mapping,
final File id_mapping_dir,
- final String id_mapping_suffix ) {
+ final String id_mapping_suffix,
+ final boolean perform_gsdir_on_best_tree,
+ final File outdir,
+ final File best_trees_indir,
+ final String best_trees_suffix ) {
try {
final SortedMap<String, String> id_map;
if ( perform_id_mapping ) {
else {
m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true );
}
+ final GSDIR gsdir_for_best_tree;
+ if ( perform_gsdir_on_best_tree ) {
+ gsdir_for_best_tree = analyzeConsensusTree( gene_trees_file,
+ species_tree_file,
+ outdir,
+ best_trees_indir,
+ id_map,
+ best_trees_suffix );
+ }
+ else {
+ gsdir_for_best_tree = null;
+ }
final BasicDescriptiveStatistics stats = rio.getDuplicationsStatistics();
if ( perform_id_mapping ) {
writeOrthologyTable( orthology_outtable, stats.getN(), m, !use_gene_trees_dir, id_map, true );
log.print( "\t" );
log.print( Integer.toString( ortholog_groups_095 ) );
//
+ if ( true ) {
+ log.print( "\t" );
+ log.print( Integer.toString( gsdir_for_best_tree.getMinDuplicationsSum() ) );
+ log.print( "\t" );
+ log.print( df.format( median - gsdir_for_best_tree.getMinDuplicationsSum() ) );
+ }
+ //
log.print( "\t" );
if ( stats.getN() > 3 ) {
log.print( df.format( median ) );
}
}
+ private final static GSDIR analyzeConsensusTree( final File gene_trees_file,
+ final File species_tree_file,
+ final File outdir,
+ final File best_trees_indir,
+ final SortedMap<String, String> id_map,
+ final String best_trees_suffix )
+ throws IOException, FileNotFoundException, PhyloXmlDataFormatException, SDIException {
+ final File the_one = ForesterUtil.getMatchingFile( best_trees_indir,
+ gene_trees_file.getName(),
+ best_trees_suffix );
+ final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+ final Phylogeny best_tree = factory.create( the_one, PhyloXmlParser.createPhyloXmlParserXsdValidating() )[ 0 ];
+ final Phylogeny species_tree = SDIutil
+ .parseSpeciesTree( best_tree, species_tree_file, false, true, TAXONOMY_EXTRACTION.NO );
+ PhylogenyMethods.deleteInternalNodesWithOnlyOneDescendent( species_tree );
+ best_tree.setRooted( true );
+ species_tree.setRooted( true );
+ if ( !best_tree.isCompletelyBinaryAllow3ChildrenAtRoot() ) {
+ throw new IOException( "gene tree matching to ["
+ + ForesterUtil.removeFileExtension( gene_trees_file.getName() ) + "] is not completely binary" );
+ }
+ final PhylogenyNodeIterator it = best_tree.iteratorExternalForward();
+ while ( it.hasNext() ) {
+ final PhylogenyNode n = it.next();
+ final String name = n.getName().trim();
+ if ( !ForesterUtil.isEmpty( name ) ) {
+ try {
+ ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE );
+ }
+ catch ( final PhyloXmlDataFormatException e ) {
+ // Ignore.
+ }
+ }
+ }
+ final GSDIR gsdir_for_best_tree = new GSDIR( best_tree, species_tree, true, true, true );
+ final Phylogeny result_gene_tree = gsdir_for_best_tree.getMinDuplicationsSumGeneTree();
+ result_gene_tree.setRerootable( false );
+ PhylogenyMethods.orderAppearance( result_gene_tree.getRoot(), true, true, DESCENDANT_SORT_PRIORITY.NODE_NAME );
+ final String outname = ForesterUtil.removeFileExtension( the_one.getName() );
+ final File outfile = new File( outdir.getCanonicalFile() + "/" + outname + RIOUtil.BEST_TREE_SUFFIX
+ + gsdir_for_best_tree.getMinDuplicationsSum() + ".xml" );
+ writeTree( result_gene_tree, outfile, null, id_map );
+ return gsdir_for_best_tree;
+ }
+
private static final void writeOrthologyTable( final File table_outfile,
final int gene_trees_analyzed,
final IntMatrix m,
}
if ( !replace_ids && id_map != null && id_map.size() > 0 ) {
w.println();
+
+ final Iterator<?> it = id_map.entrySet().iterator();
+ while (it.hasNext()) {
+ Map.Entry<String, String> pair = ( Entry<String, String> ) it.next();
+ w.println( pair.getKey() + "\t" + pair.getValue() );
+ } //TODO
+
+ /*
id_map.forEach( ( k, v ) -> {
try {
w.println( k + "\t" + v );
catch ( final IOException e ) {
//ignore
}
- } );
+ } );*/
}
w.close();
if ( verbose ) {
final String prefix,
final String suffix )
throws IOException {
- if ( !dir.exists() ) {
- throw new IOException( "[" + dir + "] does not exist" );
- }
- if ( !dir.isDirectory() ) {
- throw new IOException( "[" + dir + "] is not a directory" );
- }
- final File mapping_files[] = dir.listFiles( new FilenameFilter() {
-
- @Override
- public boolean accept( final File dir, final String name ) {
- return ( name.endsWith( suffix ) );
- }
- } );
- if ( mapping_files.length == 1 ) {
- throw new IOException( "no files ending with \"" + suffix + "\" found in [" + dir + "]" );
- }
- String my_prefix = ForesterUtil.removeFileExtension( prefix );
- boolean done = false;
- boolean more_than_one = false;
- File the_one = null;
- do {
- int matches = 0;
- for( File file : mapping_files ) {
- if ( file.getName().startsWith( my_prefix ) ) {
- matches++;
- if ( matches > 1 ) {
- the_one = null;
- break;
- }
- the_one = file;
- }
- }
- if ( matches > 1 ) {
- more_than_one = true;
- done = true;
- }
- if ( matches == 1 ) {
- done = true;
- }
- else {
- if ( my_prefix.length() <= 1 ) {
- throw new IOException( "no file matching \"" + ForesterUtil.removeFileExtension( prefix )
- + "\" and ending with \"" + suffix + "\" found in [" + dir + "]" );
- }
- my_prefix = my_prefix.substring( 0, my_prefix.length() - 1 );
- }
- } while ( !done );
- if ( more_than_one ) {
- throw new IOException( "multiple files matching \"" + ForesterUtil.removeFileExtension( prefix )
- + "\" and ending with \"" + suffix + "\" found in [" + dir + "]" );
- }
- else if ( the_one != null ) {
- }
- else {
- throw new IOException( "no file matching \"" + ForesterUtil.removeFileExtension( prefix )
- + "\" and ending with \"" + suffix + "\" found in [" + dir + "]" );
- }
+ final File the_one = ForesterUtil.getMatchingFile( dir, prefix, suffix );
final BasicTable<String> t = BasicTableParser.parse( the_one, '\t' );
return t.getColumnsAsMap( 0, 1 );
}