int first,
int last,
final boolean produce_log,
- final boolean verbose ) throws IOException, SDIException, RIOException {
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException, RIOException {
if ( ( last == DEFAULT_RANGE ) && ( first >= 0 ) ) {
last = END_OF_GT;
}
_removed_gene_tree_nodes = null;
_duplications_stats = new BasicDescriptiveStatistics();
p.reset();
- inferOrthologs( p, species_tree, algorithm, outgroup, first, last );
+ inferOrthologs( p, species_tree, algorithm, outgroup, first, last, transfer_taxonomy );
_species_tree = species_tree;
}
int first,
int last,
final boolean produce_log,
- final boolean verbose ) throws IOException, SDIException, RIOException {
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException, RIOException {
if ( ( last == DEFAULT_RANGE ) && ( first >= 0 ) ) {
last = gene_trees.length - 1;
}
_analyzed_gene_trees = null;
_removed_gene_tree_nodes = null;
_duplications_stats = new BasicDescriptiveStatistics();
- inferOrthologs( gene_trees, species_tree, algorithm, outgroup, first, last );
+ inferOrthologs( gene_trees, species_tree, algorithm, outgroup, first, last, transfer_taxonomy );
_species_tree = species_tree;
}
/**
* Returns the numbers of number of ext nodes in gene trees analyzed (after
* stripping).
- *
+ *
* @return number of ext nodes in gene trees analyzed (after stripping)
*/
public final int getExtNodesOfAnalyzedGeneTrees() {
/**
* Returns the numbers of number of int nodes in gene trees analyzed (after
* stripping).
- *
+ *
* @return number of int nodes in gene trees analyzed (after stripping)
*/
public final int getIntNodesOfAnalyzedGeneTrees() {
final ALGORITHM algorithm,
final String outgroup,
int first,
- final int last ) throws SDIException, RIOException, FileNotFoundException,
- IOException {
+ final int last,
+ final boolean transfer_taxonomy ) throws SDIException, RIOException,
+ FileNotFoundException, IOException {
if ( !parser.hasNext() ) {
throw new RIOException( "no gene trees to analyze" );
}
while ( parser.hasNext() ) {
final Phylogeny gt = parser.next();
if ( no_range || ( ( i >= first ) && ( i <= last ) ) ) {
+ if ( gt.isEmpty() ) {
+ throw new RIOException( "gene tree #" + i + " is empty" );
+ }
+ if ( gt.getNumberOfExternalNodes() == 1 ) {
+ throw new RIOException( "gene tree #" + i + " has only one external node" );
+ }
if ( _verbose ) {
ForesterUtil.updateProgress( i, pf );
}
throw new RIOException( "failed to establish species based mapping between gene and species trees" );
}
}
- final Phylogeny analyzed_gt = performOrthologInference( gt, species_tree, algorithm, outgroup, counter );
+ final Phylogeny analyzed_gt = performOrthologInference( gt,
+ species_tree,
+ algorithm,
+ outgroup,
+ counter,
+ transfer_taxonomy );
RIO.calculateOrthologTable( analyzed_gt, true, counter );
++counter;
}
first = 0;
}
if ( log() ) {
- postLog( species_tree, first, first + counter - 1 );
+ postLog( species_tree, first, ( first + counter ) - 1 );
}
if ( _verbose ) {
System.out.println();
final ALGORITHM algorithm,
final String outgroup,
final int first,
- final int last ) throws SDIException, RIOException, FileNotFoundException,
- IOException {
+ final int last,
+ final boolean transfer_taxonomy ) throws SDIException, RIOException,
+ FileNotFoundException, IOException {
if ( algorithm == ALGORITHM.SDIR ) {
// Removes from species_tree all species not found in gene_tree.
PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_trees[ 0 ], species_tree );
int gene_tree_ext_nodes = 0;
for( int i = 0; i < my_gene_trees.length; ++i ) {
final Phylogeny gt = my_gene_trees[ i ];
+ if ( gt.isEmpty() ) {
+ throw new RIOException( "gene tree #" + i + " is empty" );
+ }
+ if ( gt.getNumberOfExternalNodes() == 1 ) {
+ throw new RIOException( "gene tree #" + i + " has only one external node" );
+ }
if ( _verbose && ( my_gene_trees.length > 4 ) ) {
ForesterUtil.updateProgress( ( ( double ) i ) / my_gene_trees.length );
}
throw new RIOException( "failed to establish species based mapping between gene and species trees" );
}
}
- _analyzed_gene_trees[ i ] = performOrthologInference( gt, species_tree, algorithm, outgroup, i );
+ _analyzed_gene_trees[ i ] = performOrthologInference( gt,
+ species_tree,
+ algorithm,
+ outgroup,
+ i,
+ transfer_taxonomy );
}
if ( log() ) {
postLog( species_tree, first, last );
final Phylogeny species_tree,
final ALGORITHM algorithm,
final String outgroup,
- final int i ) throws SDIException, RIOException {
+ final int i,
+ final boolean transfer_taxonomy ) throws SDIException,
+ RIOException {
final Phylogeny assigned_tree;
switch ( algorithm ) {
case SDIR: {
break;
}
case GSDIR: {
- assigned_tree = performOrthologInferenceByGSDI( gene_tree, species_tree, outgroup, i );
+ assigned_tree = performOrthologInferenceByGSDI( gene_tree, species_tree, outgroup, i, transfer_taxonomy );
break;
}
default: {
private final Phylogeny performOrthologInferenceByGSDI( final Phylogeny gene_tree,
final Phylogeny species_tree,
final String outgroup,
- final int i ) throws SDIException, RIOException {
+ final int i,
+ final boolean transfer_taxonomy ) throws SDIException,
+ RIOException {
final Phylogeny assigned_tree;
final int dups;
if ( _rerooting == REROOTING.BY_ALGORITHM ) {
- final GSDIR gsdir = new GSDIR( gene_tree, species_tree, true, i == 0 );
+ final GSDIR gsdir = new GSDIR( gene_tree, species_tree, true, i == 0, transfer_taxonomy );
assigned_tree = gsdir.getMinDuplicationsSumGeneTree();
if ( i == 0 ) {
_removed_gene_tree_nodes = gsdir.getStrippedExternalGeneTreeNodes();
final PhylogenyNode n = gene_tree.getNode( outgroup );
gene_tree.reRoot( n );
}
- final GSDI gsdi = new GSDI( gene_tree, species_tree, true, true, true );
+ final GSDI gsdi = new GSDI( gene_tree, species_tree, true, true, true, transfer_taxonomy );
_removed_gene_tree_nodes = gsdi.getStrippedExternalGeneTreeNodes();
for( final PhylogenyNode r : _removed_gene_tree_nodes ) {
if ( !r.getNodeData().isHasTaxonomy() ) {
throw new RIOException( "node with no (appropriate) taxonomic information found in gene tree #" + i
- + ": " + r.toString() );
+ + ": " + r.toString() );
}
}
assigned_tree = gene_tree;
}
log( "Gene trees analyzed : " + _duplications_stats.getN() );
log( "Mean number of duplications : " + df.format( _duplications_stats.arithmeticMean() )
- + " (sd: " + df.format( _duplications_stats.sampleStandardDeviation() ) + ")" + " ("
- + df.format( ( 100.0 * _duplications_stats.arithmeticMean() ) / getIntNodesOfAnalyzedGeneTrees() )
- + "%)" );
+ + " (sd: " + df.format( _duplications_stats.sampleStandardDeviation() ) + ")" + " ("
+ + df.format( ( 100.0 * _duplications_stats.arithmeticMean() ) / getIntNodesOfAnalyzedGeneTrees() )
+ + "%)" );
if ( _duplications_stats.getN() > 3 ) {
log( "Median number of duplications : " + df.format( _duplications_stats.median() )
- + " (" + df.format( ( 100.0 * _duplications_stats.median() ) / getIntNodesOfAnalyzedGeneTrees() )
- + "%)" );
+ + " (" + df.format( ( 100.0 * _duplications_stats.median() ) / getIntNodesOfAnalyzedGeneTrees() )
+ + "%)" );
}
log( "Minimum duplications : " + ( int ) _duplications_stats.getMin() + " ("
+ df.format( ( 100.0 * _duplications_stats.getMin() ) / getIntNodesOfAnalyzedGeneTrees() ) + "%)" );
final int first,
final int last,
final boolean produce_log,
- final boolean verbose ) throws IOException, SDIException, RIOException {
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
final Phylogeny[] gene_trees = parseGeneTrees( gene_trees_file );
if ( gene_trees.length < 1 ) {
throw new RIOException( "\"" + gene_trees_file + "\" is devoid of appropriate gene trees" );
false,
true,
TAXONOMY_EXTRACTION.NO );
- return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose );
+ return new RIO( gene_trees,
+ species_tree,
+ algorithm,
+ rerooting,
+ outgroup,
+ first,
+ last,
+ produce_log,
+ verbose,
+ transfer_taxonomy );
}
public final static RIO executeAnalysis( final File gene_trees_file,
final REROOTING rerooting,
final String outgroup,
final boolean produce_log,
- final boolean verbose ) throws IOException, SDIException, RIOException {
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
return new RIO( parseGeneTrees( gene_trees_file ),
species_tree,
algorithm,
DEFAULT_RANGE,
DEFAULT_RANGE,
produce_log,
- verbose );
+ verbose,
+ transfer_taxonomy );
}
public final static RIO executeAnalysis( final File gene_trees_file,
final int first,
final int last,
final boolean produce_log,
- final boolean verbose ) throws IOException, SDIException, RIOException {
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
return new RIO( parseGeneTrees( gene_trees_file ),
species_tree,
algorithm,
first,
last,
produce_log,
- verbose );
+ verbose,
+ transfer_taxonomy );
}
public final static RIO executeAnalysis( final IteratingPhylogenyParser p,
final int first,
final int last,
final boolean produce_log,
- final boolean verbose ) throws IOException, SDIException, RIOException {
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
final Phylogeny g0 = p.next();
if ( ( g0 == null ) || g0.isEmpty() || ( g0.getNumberOfExternalNodes() < 2 ) ) {
throw new RIOException( "input file does not seem to contain any gene trees" );
true,
TAXONOMY_EXTRACTION.NO );
p.reset();
- return new RIO( p, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose );
+ return new RIO( p,
+ species_tree,
+ algorithm,
+ rerooting,
+ outgroup,
+ first,
+ last,
+ produce_log,
+ verbose,
+ transfer_taxonomy );
}
public final static RIO executeAnalysis( final IteratingPhylogenyParser p,
final REROOTING rerooting,
final String outgroup,
final boolean produce_log,
- final boolean verbose ) throws IOException, SDIException, RIOException {
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
return new RIO( p,
species_tree,
algorithm,
DEFAULT_RANGE,
DEFAULT_RANGE,
produce_log,
- verbose );
+ verbose,
+ transfer_taxonomy );
}
public final static RIO executeAnalysis( final IteratingPhylogenyParser p,
final int first,
final int last,
final boolean produce_log,
- final boolean verbose ) throws IOException, SDIException, RIOException {
- return new RIO( p, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose );
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
+ return new RIO( p,
+ species_tree,
+ algorithm,
+ rerooting,
+ outgroup,
+ first,
+ last,
+ produce_log,
+ verbose,
+ transfer_taxonomy );
}
public final static RIO executeAnalysis( final Phylogeny[] gene_trees, final Phylogeny species_tree )
DEFAULT_RANGE,
DEFAULT_RANGE,
false,
+ false,
false );
}
final REROOTING rerooting,
final String outgroup,
final boolean produce_log,
- final boolean verbose ) throws IOException, SDIException, RIOException {
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
return new RIO( gene_trees,
species_tree,
algorithm,
DEFAULT_RANGE,
DEFAULT_RANGE,
produce_log,
- verbose );
+ verbose,
+ transfer_taxonomy );
}
public final static RIO executeAnalysis( final Phylogeny[] gene_trees,
final int first,
final int last,
final boolean produce_log,
- final boolean verbose ) throws IOException, SDIException, RIOException {
- return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose );
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
+ return new RIO( gene_trees,
+ species_tree,
+ algorithm,
+ rerooting,
+ outgroup,
+ first,
+ last,
+ produce_log,
+ verbose,
+ transfer_taxonomy );
}
private final static void calculateOrthologTable( final Phylogeny g, final boolean sort, final int counter )
else if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) {
label = n.getNodeData().getSequence().getSymbol();
}
+ else if ( n.getNodeData().isHasSequence()
+ && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getGeneName() ) ) {
+ label = n.getNodeData().getSequence().getGeneName();
+ }
else if ( !ForesterUtil.isEmpty( n.getName() ) ) {
label = n.getName();
}
}
private final static Phylogeny[] parseGeneTrees( final File gene_trees_file ) throws FileNotFoundException,
- IOException {
+ IOException {
final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true );
if ( p instanceof NHXParser ) {
final NHXParser nhx = ( NHXParser ) p;
nhx.setReplaceUnderscores( false );
nhx.setIgnoreQuotes( true );
- nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES );
+ nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE );
}
else if ( p instanceof NexusPhylogeniesParser ) {
final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p;
nex.setReplaceUnderscores( false );
nex.setIgnoreQuotes( true );
- nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.YES );
+ nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE );
}
return factory.create( gene_trees_file, p );
}
if ( o > 0 ) {
if ( verbose ) {
System.out.println( "warning: species tree has " + o
- + " internal nodes with only one descendent which are therefore going to be removed" );
+ + " internal nodes with only one descendent which are therefore going to be removed" );
}
PhylogenyMethods.deleteInternalNodesWithOnlyOneDescendent( species_tree );
}