import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
-import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
private final REROOTING _rerooting;
private final Phylogeny _species_tree;
private Phylogeny _min_dub_gene_tree;
+ private Map<Integer, Phylogeny> _dup_to_tree_map;
private RIO( final IteratingPhylogenyParser p,
final Phylogeny species_tree,
int last,
final boolean produce_log,
final boolean verbose,
- final boolean transfer_taxonomy ) throws IOException, SDIException, RIOException {
+ final boolean transfer_taxonomy )
+ throws IOException, SDIException, RIOException {
if ( ( last == DEFAULT_RANGE ) && ( first >= 0 ) ) {
last = END_OF_GT;
}
int last,
final boolean produce_log,
final boolean verbose,
- final boolean transfer_taxonomy ) throws IOException, SDIException, RIOException {
+ final boolean transfer_taxonomy )
+ throws IOException, SDIException, RIOException {
if ( ( last == DEFAULT_RANGE ) && ( first >= 0 ) ) {
last = gene_trees.length - 1;
}
/**
* Returns the numbers of number of ext nodes in gene trees analyzed (after
* stripping).
- *
+ *
* @return number of ext nodes in gene trees analyzed (after stripping)
*/
public final int getExtNodesOfAnalyzedGeneTrees() {
/**
* Returns the numbers of number of int nodes in gene trees analyzed (after
* stripping).
- *
+ *
* @return number of int nodes in gene trees analyzed (after stripping)
*/
public final int getIntNodesOfAnalyzedGeneTrees() {
final String outgroup,
int first,
final int last,
- final boolean transfer_taxonomy ) throws SDIException, RIOException,
- FileNotFoundException, IOException {
+ final boolean transfer_taxonomy )
+ throws SDIException, RIOException, FileNotFoundException, IOException {
if ( !parser.hasNext() ) {
throw new RIOException( "no gene trees to analyze" );
}
if ( _verbose ) {
System.out.println();
}
- final DecimalFormat pf = new java.text.DecimalFormat( "000" );
int gene_tree_ext_nodes = 0;
int i = 0;
int counter = 0;
throw new RIOException( "gene tree #" + i + " has only one external node" );
}
if ( _verbose ) {
- ForesterUtil.updateProgress( i, pf );
+ System.out.print( "\r" + i );
}
if ( counter == 0 ) {
if ( algorithm == ALGORITHM.SDIR ) {
}
++i;
}
+ if ( _verbose ) {
+ System.out.print( "\rGene trees analyzed :\t" + counter );
+ }
if ( ( first >= 0 ) && ( counter == 0 ) && ( i > 0 ) ) {
throw new RIOException( "attempt to analyze first gene tree #" + first + " in a set of " + i );
}
first = 0;
}
if ( log() ) {
- postLog( species_tree, first, first + counter - 1 );
+ postLog( species_tree, first, ( first + counter ) - 1 );
}
if ( _verbose ) {
System.out.println();
final String outgroup,
final int first,
final int last,
- final boolean transfer_taxonomy ) throws SDIException, RIOException,
- FileNotFoundException, IOException {
+ final boolean transfer_taxonomy )
+ throws SDIException, RIOException, FileNotFoundException, IOException {
if ( algorithm == ALGORITHM.SDIR ) {
// Removes from species_tree all species not found in gene_tree.
PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_trees[ 0 ], species_tree );
}
private final void logRemovedGeneTreeNodes() {
- log( "Species stripped from gene trees:" );
final SortedSet<String> rn = new TreeSet<String>();
for( final PhylogenyNode n : getRemovedGeneTreeNodes() ) {
final Taxonomy t = n.getNodeData().getTaxonomy();
}
}
}
+ final StringBuilder sb = new StringBuilder();
for( final String s : rn ) {
- log( s );
+ sb.append( '\t' );
+ sb.append( s );
}
- log( "" );
+ log( "Species stripped from gene trees :" + sb );
}
private final Phylogeny performOrthologInference( final Phylogeny gene_tree,
final ALGORITHM algorithm,
final String outgroup,
final int i,
- final boolean transfer_taxonomy ) throws SDIException,
- RIOException {
+ final boolean transfer_taxonomy )
+ throws SDIException, RIOException {
final Phylogeny assigned_tree;
switch ( algorithm ) {
case SDIR: {
break;
}
case GSDIR: {
- assigned_tree = performOrthologInferenceByGSDI( gene_tree, species_tree, outgroup, i, transfer_taxonomy );
+ assigned_tree = performOrthologInferenceByGSDI( gene_tree,
+ species_tree,
+ outgroup,
+ i,
+ transfer_taxonomy );
break;
}
default: {
final Phylogeny species_tree,
final String outgroup,
final int i,
- final boolean transfer_taxonomy ) throws SDIException,
- RIOException {
+ final boolean transfer_taxonomy )
+ throws SDIException, RIOException {
final Phylogeny assigned_tree;
final int dups;
if ( _rerooting == REROOTING.BY_ALGORITHM ) {
}
dups = gsdi.getDuplicationsSum();
}
+ assigned_tree.setRerootable( false );
+ double new_dist = -1;
if ( ( i == 0 ) || ( dups < _duplications_stats.getMin() ) ) {
_min_dub_gene_tree = assigned_tree;
}
+ else if ( dups == _duplications_stats.getMin() ) {
+ new_dist = PhylogenyMethods.calculateMaxDistanceToRoot( assigned_tree );
+ if ( new_dist < PhylogenyMethods.calculateMaxDistanceToRoot( _min_dub_gene_tree ) ) {
+ _min_dub_gene_tree = assigned_tree;
+ }
+ }
+ if ( _dup_to_tree_map == null ) {
+ _dup_to_tree_map = new HashMap<Integer, Phylogeny>();
+ }
+ if ( !_dup_to_tree_map.containsKey( dups ) ) {
+ _dup_to_tree_map.put( dups, assigned_tree );
+ }
+ else {
+ if ( new_dist == -1 ) {
+ new_dist = PhylogenyMethods.calculateMaxDistanceToRoot( assigned_tree );
+ }
+ if ( new_dist < PhylogenyMethods.calculateMaxDistanceToRoot( _dup_to_tree_map.get( dups ) ) ) {
+ _dup_to_tree_map.put( dups, assigned_tree );
+ }
+ }
_duplications_stats.addValue( dups );
return assigned_tree;
}
+ final public Map<Integer, Phylogeny> getDuplicationsToTreeMap() {
+ return _dup_to_tree_map;
+ }
+
private final Phylogeny performOrthologInferenceBySDI( final Phylogeny gene_tree, final Phylogeny species_tree )
throws SDIException {
final SDIR sdir = new SDIR();
- return sdir.infer( gene_tree, species_tree, false, true, true, true, 1 )[ 0 ];
+ final Phylogeny r = sdir.infer( gene_tree, species_tree, false, true, true, true, 1 )[ 0 ];
+ r.setRerootable( false );
+ final int dups = sdir.getMinimalDuplications();
+ _duplications_stats.addValue( dups );
+ return r;
}
private final void postLog( final Phylogeny species_tree, final int first, final int last ) {
- log( "" );
+ final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.##" );
+ final int min = ( int ) getDuplicationsStatistics().getMin();
+ final int max = ( int ) getDuplicationsStatistics().getMax();
+ final int median = ( int ) getDuplicationsStatistics().median();
+ int min_count = 0;
+ int max_count = 0;
+ int median_count = 0;
+ for( double d : getDuplicationsStatistics().getData() ) {
+ if ( ( ( int ) d ) == min ) {
+ ++min_count;
+ }
+ if ( ( ( int ) d ) == max ) {
+ ++max_count;
+ }
+ if ( ( ( int ) d ) == median ) {
+ ++median_count;
+ }
+ }
+ final double min_count_percentage = ( 100.0 * min_count ) / getDuplicationsStatistics().getN();
+ final double max_count_percentage = ( 100.0 * max_count ) / getDuplicationsStatistics().getN();
+ final double median_count_percentage = ( 100.0 * median_count ) / getDuplicationsStatistics().getN();
if ( ( getRemovedGeneTreeNodes() != null ) && ( getRemovedGeneTreeNodes().size() > 0 ) ) {
logRemovedGeneTreeNodes();
}
- log( "Species tree external nodes (after stripping) : " + species_tree.getNumberOfExternalNodes() );
- log( "Species tree polytomies (after stripping) : "
- + PhylogenyMethods.countNumberOfPolytomies( species_tree ) );
- log( "Taxonomy linking based on : " + getGSDIRtaxCompBase() );
- final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.#" );
+ log( "Gene trees analyzed :\t" + getDuplicationsStatistics().getN() );
if ( ( first >= 0 ) && ( last >= 0 ) ) {
- log( "Gene trees analyzed range : " + first + "-" + last );
- }
- log( "Gene trees analyzed : " + _duplications_stats.getN() );
- log( "Mean number of duplications : " + df.format( _duplications_stats.arithmeticMean() )
- + " (sd: " + df.format( _duplications_stats.sampleStandardDeviation() ) + ")" + " ("
- + df.format( ( 100.0 * _duplications_stats.arithmeticMean() ) / getIntNodesOfAnalyzedGeneTrees() )
- + "%)" );
- if ( _duplications_stats.getN() > 3 ) {
- log( "Median number of duplications : " + df.format( _duplications_stats.median() )
- + " (" + df.format( ( 100.0 * _duplications_stats.median() ) / getIntNodesOfAnalyzedGeneTrees() )
- + "%)" );
- }
- log( "Minimum duplications : " + ( int ) _duplications_stats.getMin() + " ("
- + df.format( ( 100.0 * _duplications_stats.getMin() ) / getIntNodesOfAnalyzedGeneTrees() ) + "%)" );
- log( "Maximum duplications : " + ( int ) _duplications_stats.getMax() + " ("
- + df.format( ( 100.0 * _duplications_stats.getMax() ) / getIntNodesOfAnalyzedGeneTrees() ) + "%)" );
- log( "Gene tree internal nodes : " + getIntNodesOfAnalyzedGeneTrees() );
- log( "Gene tree external nodes : " + getExtNodesOfAnalyzedGeneTrees() );
+ log( "Gene trees analyzed range :\t" + first + "-" + last );
+ }
+ log( "Gene tree internal nodes :\t" + getIntNodesOfAnalyzedGeneTrees() );
+ log( "Gene tree external nodes :\t" + getExtNodesOfAnalyzedGeneTrees() );
+ log( "Removed ext gene tree nodes :\t" + getRemovedGeneTreeNodes().size() );
+ log( "Spec tree ext nodes (after strip) :\t" + species_tree.getNumberOfExternalNodes() );
+ log( "Spec tree polytomies (after strip) :\t" + PhylogenyMethods.countNumberOfPolytomies( species_tree ) );
+ log( "Taxonomy linking based on :\t" + getGSDIRtaxCompBase() );
+ log( "Mean number of duplications :\t" + df.format( getDuplicationsStatistics().arithmeticMean() )
+ + "\t"
+ + df.format( ( 100.0 * getDuplicationsStatistics().arithmeticMean() )
+ / getIntNodesOfAnalyzedGeneTrees() )
+ + "%\t(sd: " + df.format( getDuplicationsStatistics().sampleStandardDeviation() ) + ")" );
+ if ( getDuplicationsStatistics().getN() > 3 ) {
+ log( "Median number of duplications :\t" + df.format( median ) + "\t"
+ + df.format( ( 100.0 * median ) / getIntNodesOfAnalyzedGeneTrees() ) + "%" );
+ }
+ log( "Minimum duplications :\t" + min + "\t"
+ + df.format( ( 100.0 * min ) / getIntNodesOfAnalyzedGeneTrees() ) + "%" );
+ log( "Maximum duplications :\t" + ( int ) max + "\t"
+ + df.format( ( 100.0 * max ) / getIntNodesOfAnalyzedGeneTrees() ) + "%" );
+ log( "Gene trees with median duplications :\t" + median_count + "\t" + df.format( median_count_percentage )
+ + "%" );
+ log( "Gene trees with minimum duplications:\t" + min_count + "\t" + df.format( min_count_percentage ) + "%" );
+ log( "Gene trees with maximum duplications:\t" + max_count + "\t" + df.format( max_count_percentage ) + "%" );
}
private final void preLog( final int gene_trees,
final ALGORITHM algorithm,
final String outgroup ) {
if ( gene_trees > 0 ) {
- log( "Number of gene trees (total) : " + gene_trees );
+ log( "Number of gene trees (total) :\t" + gene_trees );
}
- log( "Algorithm : " + algorithm );
- log( "Species tree external nodes (prior to stripping): " + species_tree.getNumberOfExternalNodes() );
- log( "Species tree polytomies (prior to stripping) : "
- + PhylogenyMethods.countNumberOfPolytomies( species_tree ) );
+ log( "Algorithm :\t" + algorithm );
+ log( "Spec tree ext nodes (prior strip) :\t" + species_tree.getNumberOfExternalNodes() );
+ log( "Spec tree polytomies (prior strip) :\t" + PhylogenyMethods.countNumberOfPolytomies( species_tree ) );
String rs = "";
switch ( _rerooting ) {
case BY_ALGORITHM: {
break;
}
}
- log( "Re-rooting : " + rs );
+ log( "Re-rooting :\t" + rs );
}
public final static IntMatrix calculateOrthologTable( final Phylogeny[] analyzed_gene_trees, final boolean sort )
final int last,
final boolean produce_log,
final boolean verbose,
- final boolean transfer_taxonomy ) throws IOException, SDIException,
- RIOException {
+ final boolean transfer_taxonomy )
+ throws IOException, SDIException, RIOException {
final Phylogeny[] gene_trees = parseGeneTrees( gene_trees_file );
if ( gene_trees.length < 1 ) {
throw new RIOException( "\"" + gene_trees_file + "\" is devoid of appropriate gene trees" );
}
- final Phylogeny species_tree = SDIutil.parseSpeciesTree( gene_trees[ 0 ],
- species_tree_file,
- false,
- true,
- TAXONOMY_EXTRACTION.NO );
+ final Phylogeny species_tree = SDIutil
+ .parseSpeciesTree( gene_trees[ 0 ], species_tree_file, false, true, TAXONOMY_EXTRACTION.NO );
return new RIO( gene_trees,
species_tree,
algorithm,
final String outgroup,
final boolean produce_log,
final boolean verbose,
- final boolean transfer_taxonomy ) throws IOException, SDIException,
- RIOException {
+ final boolean transfer_taxonomy )
+ throws IOException, SDIException, RIOException {
return new RIO( parseGeneTrees( gene_trees_file ),
species_tree,
algorithm,
final int last,
final boolean produce_log,
final boolean verbose,
- final boolean transfer_taxonomy ) throws IOException, SDIException,
- RIOException {
+ final boolean transfer_taxonomy )
+ throws IOException, SDIException, RIOException {
return new RIO( parseGeneTrees( gene_trees_file ),
species_tree,
algorithm,
final int last,
final boolean produce_log,
final boolean verbose,
- final boolean transfer_taxonomy ) throws IOException, SDIException,
- RIOException {
+ final boolean transfer_taxonomy )
+ throws IOException, SDIException, RIOException {
final Phylogeny g0 = p.next();
if ( ( g0 == null ) || g0.isEmpty() || ( g0.getNumberOfExternalNodes() < 2 ) ) {
throw new RIOException( "input file does not seem to contain any gene trees" );
}
- final Phylogeny species_tree = SDIutil.parseSpeciesTree( g0,
- species_tree_file,
- false,
- true,
- TAXONOMY_EXTRACTION.NO );
+ final Phylogeny species_tree = SDIutil
+ .parseSpeciesTree( g0, species_tree_file, false, true, TAXONOMY_EXTRACTION.NO );
p.reset();
return new RIO( p,
species_tree,
final String outgroup,
final boolean produce_log,
final boolean verbose,
- final boolean transfer_taxonomy ) throws IOException, SDIException,
- RIOException {
+ final boolean transfer_taxonomy )
+ throws IOException, SDIException, RIOException {
return new RIO( p,
species_tree,
algorithm,
final int last,
final boolean produce_log,
final boolean verbose,
- final boolean transfer_taxonomy ) throws IOException, SDIException,
- RIOException {
+ final boolean transfer_taxonomy )
+ throws IOException, SDIException, RIOException {
return new RIO( p,
species_tree,
algorithm,
final String outgroup,
final boolean produce_log,
final boolean verbose,
- final boolean transfer_taxonomy ) throws IOException, SDIException,
- RIOException {
+ final boolean transfer_taxonomy )
+ throws IOException, SDIException, RIOException {
return new RIO( gene_trees,
species_tree,
algorithm,
final int last,
final boolean produce_log,
final boolean verbose,
- final boolean transfer_taxonomy ) throws IOException, SDIException,
- RIOException {
+ final boolean transfer_taxonomy )
+ throws IOException, SDIException, RIOException {
return new RIO( gene_trees,
species_tree,
algorithm,
final REROOTING rerooting,
final String outgroup,
final int first,
- final int last ) throws RIOException, IOException {
+ final int last )
+ throws RIOException, IOException {
final Phylogeny g0 = p.next();
if ( ( g0 == null ) || g0.isEmpty() ) {
throw new RIOException( "input file does not seem to contain any gene trees" );
final REROOTING rerooting,
final String outgroup,
final int first,
- final int last ) throws RIOException {
+ final int last )
+ throws RIOException {
if ( !species_tree.isRooted() ) {
throw new RIOException( "species tree is not rooted" );
}
if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getName() ) ) {
label = n.getNodeData().getSequence().getName();
}
- else if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) {
+ else if ( n.getNodeData().isHasSequence()
+ && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) {
label = n.getNodeData().getSequence().getSymbol();
}
+ else if ( n.getNodeData().isHasSequence()
+ && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getGeneName() ) ) {
+ label = n.getNodeData().getSequence().getGeneName();
+ }
else if ( !ForesterUtil.isEmpty( n.getName() ) ) {
label = n.getName();
}
return label;
}
- private final static Phylogeny[] parseGeneTrees( final File gene_trees_file ) throws FileNotFoundException,
- IOException {
+ private final static Phylogeny[] parseGeneTrees( final File gene_trees_file )
+ throws FileNotFoundException, IOException {
final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true );
if ( p instanceof NHXParser ) {
final NHXParser nhx = ( NHXParser ) p;
nhx.setReplaceUnderscores( false );
nhx.setIgnoreQuotes( true );
- nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGRESSIVE );
+ nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE );
}
else if ( p instanceof NexusPhylogeniesParser ) {
final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p;
nex.setReplaceUnderscores( false );
nex.setIgnoreQuotes( true );
- nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGRESSIVE );
+ nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE );
}
return factory.create( gene_trees_file, p );
}
}
public enum REROOTING {
- NONE, BY_ALGORITHM, MIDPOINT, OUTGROUP;
+ NONE,
+ BY_ALGORITHM,
+ MIDPOINT,
+ OUTGROUP;
}
}