import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
private final REROOTING _rerooting;
private final Phylogeny _species_tree;
private Phylogeny _min_dub_gene_tree;
+ private Map<Integer, Phylogeny> _dup_to_tree_map;
private RIO( final IteratingPhylogenyParser p,
final Phylogeny species_tree,
int last,
final boolean produce_log,
final boolean verbose,
- final boolean transfer_taxonomy ) throws IOException, SDIException, RIOException {
+ final boolean transfer_taxonomy )
+ throws IOException, SDIException, RIOException {
if ( ( last == DEFAULT_RANGE ) && ( first >= 0 ) ) {
last = END_OF_GT;
}
int last,
final boolean produce_log,
final boolean verbose,
- final boolean transfer_taxonomy ) throws IOException, SDIException, RIOException {
+ final boolean transfer_taxonomy )
+ throws IOException, SDIException, RIOException {
if ( ( last == DEFAULT_RANGE ) && ( first >= 0 ) ) {
last = gene_trees.length - 1;
}
final String outgroup,
int first,
final int last,
- final boolean transfer_taxonomy ) throws SDIException, RIOException,
- FileNotFoundException, IOException {
+ final boolean transfer_taxonomy )
+ throws SDIException, RIOException, FileNotFoundException, IOException {
if ( !parser.hasNext() ) {
throw new RIOException( "no gene trees to analyze" );
}
++i;
}
if ( _verbose ) {
- System.out.print( "\rGene trees analyzed :\t" + counter );
+ System.out.print( "\rGene trees analyzed :\t" + counter );
}
if ( ( first >= 0 ) && ( counter == 0 ) && ( i > 0 ) ) {
throw new RIOException( "attempt to analyze first gene tree #" + first + " in a set of " + i );
final String outgroup,
final int first,
final int last,
- final boolean transfer_taxonomy ) throws SDIException, RIOException,
- FileNotFoundException, IOException {
+ final boolean transfer_taxonomy )
+ throws SDIException, RIOException, FileNotFoundException, IOException {
if ( algorithm == ALGORITHM.SDIR ) {
// Removes from species_tree all species not found in gene_tree.
PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_trees[ 0 ], species_tree );
sb.append( '\t' );
sb.append( s );
}
- log( "Species stripped from gene trees :" + sb);
+ log( "Species stripped from gene trees :" + sb );
}
private final Phylogeny performOrthologInference( final Phylogeny gene_tree,
final ALGORITHM algorithm,
final String outgroup,
final int i,
- final boolean transfer_taxonomy ) throws SDIException,
- RIOException {
+ final boolean transfer_taxonomy )
+ throws SDIException, RIOException {
final Phylogeny assigned_tree;
switch ( algorithm ) {
case SDIR: {
break;
}
case GSDIR: {
- assigned_tree = performOrthologInferenceByGSDI( gene_tree, species_tree, outgroup, i, transfer_taxonomy );
+ assigned_tree = performOrthologInferenceByGSDI( gene_tree,
+ species_tree,
+ outgroup,
+ i,
+ transfer_taxonomy );
break;
}
default: {
final Phylogeny species_tree,
final String outgroup,
final int i,
- final boolean transfer_taxonomy ) throws SDIException,
- RIOException {
+ final boolean transfer_taxonomy )
+ throws SDIException, RIOException {
final Phylogeny assigned_tree;
final int dups;
if ( _rerooting == REROOTING.BY_ALGORITHM ) {
for( final PhylogenyNode r : _removed_gene_tree_nodes ) {
if ( !r.getNodeData().isHasTaxonomy() ) {
throw new RIOException( "node with no (appropriate) taxonomic information found in gene tree #" + i
- + ": " + r.toString() );
+ + ": " + r.toString() );
}
}
assigned_tree = gene_tree;
}
dups = gsdi.getDuplicationsSum();
}
+ assigned_tree.setRerootable( false );
+ double new_dist = -1;
if ( ( i == 0 ) || ( dups < _duplications_stats.getMin() ) ) {
_min_dub_gene_tree = assigned_tree;
- _min_dub_gene_tree.setRerootable( false );
+ }
+ else if ( dups == _duplications_stats.getMin() ) {
+ new_dist = PhylogenyMethods.calculateMaxDistanceToRoot( assigned_tree );
+ if ( new_dist < PhylogenyMethods.calculateMaxDistanceToRoot( _min_dub_gene_tree ) ) {
+ _min_dub_gene_tree = assigned_tree;
+ }
+ }
+ if ( _dup_to_tree_map == null ) {
+ _dup_to_tree_map = new HashMap<Integer, Phylogeny>();
+ }
+ if ( !_dup_to_tree_map.containsKey( dups ) ) {
+ _dup_to_tree_map.put( dups, assigned_tree );
+ }
+ else {
+ if ( new_dist == -1 ) {
+ new_dist = PhylogenyMethods.calculateMaxDistanceToRoot( assigned_tree );
+ }
+ if ( new_dist < PhylogenyMethods.calculateMaxDistanceToRoot( _dup_to_tree_map.get( dups ) ) ) {
+ _dup_to_tree_map.put( dups, assigned_tree );
+ }
}
_duplications_stats.addValue( dups );
return assigned_tree;
}
+ final public Map<Integer, Phylogeny> getDuplicationsToTreeMap() {
+ return _dup_to_tree_map;
+ }
+
private final Phylogeny performOrthologInferenceBySDI( final Phylogeny gene_tree, final Phylogeny species_tree )
throws SDIException {
final SDIR sdir = new SDIR();
- return sdir.infer( gene_tree, species_tree, false, true, true, true, 1 )[ 0 ];
+ final Phylogeny r = sdir.infer( gene_tree, species_tree, false, true, true, true, 1 )[ 0 ];
+ r.setRerootable( false );
+ final int dups = sdir.getMinimalDuplications();
+ _duplications_stats.addValue( dups );
+ return r;
}
private final void postLog( final Phylogeny species_tree, final int first, final int last ) {
final double min_count_percentage = ( 100.0 * min_count ) / getDuplicationsStatistics().getN();
final double max_count_percentage = ( 100.0 * max_count ) / getDuplicationsStatistics().getN();
final double median_count_percentage = ( 100.0 * median_count ) / getDuplicationsStatistics().getN();
-
-
if ( ( getRemovedGeneTreeNodes() != null ) && ( getRemovedGeneTreeNodes().size() > 0 ) ) {
logRemovedGeneTreeNodes();
}
-
log( "Gene trees analyzed :\t" + getDuplicationsStatistics().getN() );
if ( ( first >= 0 ) && ( last >= 0 ) ) {
log( "Gene trees analyzed range :\t" + first + "-" + last );
log( "Gene tree external nodes :\t" + getExtNodesOfAnalyzedGeneTrees() );
log( "Removed ext gene tree nodes :\t" + getRemovedGeneTreeNodes().size() );
log( "Spec tree ext nodes (after strip) :\t" + species_tree.getNumberOfExternalNodes() );
- log( "Spec tree polytomies (after strip) :\t"
- + PhylogenyMethods.countNumberOfPolytomies( species_tree ) );
+ log( "Spec tree polytomies (after strip) :\t" + PhylogenyMethods.countNumberOfPolytomies( species_tree ) );
log( "Taxonomy linking based on :\t" + getGSDIRtaxCompBase() );
log( "Mean number of duplications :\t" + df.format( getDuplicationsStatistics().arithmeticMean() )
- + "\t" + df.format( ( 100.0 * getDuplicationsStatistics().arithmeticMean() ) / getIntNodesOfAnalyzedGeneTrees() )
+ + "\t"
+ + df.format( ( 100.0 * getDuplicationsStatistics().arithmeticMean() )
+ / getIntNodesOfAnalyzedGeneTrees() )
+ "%\t(sd: " + df.format( getDuplicationsStatistics().sampleStandardDeviation() ) + ")" );
if ( getDuplicationsStatistics().getN() > 3 ) {
log( "Median number of duplications :\t" + df.format( median ) + "\t"
+ df.format( ( 100.0 * min ) / getIntNodesOfAnalyzedGeneTrees() ) + "%" );
log( "Maximum duplications :\t" + ( int ) max + "\t"
+ df.format( ( 100.0 * max ) / getIntNodesOfAnalyzedGeneTrees() ) + "%" );
- log( "Gene trees with median duplications :\t" + median_count + "\t"
- + df.format( median_count_percentage ) + "%" );
- log( "Gene trees with minimum duplications:\t" + min_count + "\t"
- + df.format( min_count_percentage ) + "%" );
- log( "Gene trees with maximum duplications:\t" + max_count + "\t"
- + df.format( max_count_percentage ) + "%" );
-
+ log( "Gene trees with median duplications :\t" + median_count + "\t" + df.format( median_count_percentage )
+ + "%" );
+ log( "Gene trees with minimum duplications:\t" + min_count + "\t" + df.format( min_count_percentage ) + "%" );
+ log( "Gene trees with maximum duplications:\t" + max_count + "\t" + df.format( max_count_percentage ) + "%" );
}
private final void preLog( final int gene_trees,
if ( gene_trees > 0 ) {
log( "Number of gene trees (total) :\t" + gene_trees );
}
-
log( "Algorithm :\t" + algorithm );
log( "Spec tree ext nodes (prior strip) :\t" + species_tree.getNumberOfExternalNodes() );
- log( "Spec tree polytomies (prior strip) :\t"
- + PhylogenyMethods.countNumberOfPolytomies( species_tree ) );
+ log( "Spec tree polytomies (prior strip) :\t" + PhylogenyMethods.countNumberOfPolytomies( species_tree ) );
String rs = "";
switch ( _rerooting ) {
case BY_ALGORITHM: {
}
}
log( "Re-rooting :\t" + rs );
-
}
public final static IntMatrix calculateOrthologTable( final Phylogeny[] analyzed_gene_trees, final boolean sort )
final int last,
final boolean produce_log,
final boolean verbose,
- final boolean transfer_taxonomy ) throws IOException, SDIException,
- RIOException {
+ final boolean transfer_taxonomy )
+ throws IOException, SDIException, RIOException {
final Phylogeny[] gene_trees = parseGeneTrees( gene_trees_file );
if ( gene_trees.length < 1 ) {
throw new RIOException( "\"" + gene_trees_file + "\" is devoid of appropriate gene trees" );
}
- final Phylogeny species_tree = SDIutil.parseSpeciesTree( gene_trees[ 0 ],
- species_tree_file,
- false,
- true,
- TAXONOMY_EXTRACTION.NO );
+ final Phylogeny species_tree = SDIutil
+ .parseSpeciesTree( gene_trees[ 0 ], species_tree_file, false, true, TAXONOMY_EXTRACTION.NO );
return new RIO( gene_trees,
species_tree,
algorithm,
final String outgroup,
final boolean produce_log,
final boolean verbose,
- final boolean transfer_taxonomy ) throws IOException, SDIException,
- RIOException {
+ final boolean transfer_taxonomy )
+ throws IOException, SDIException, RIOException {
return new RIO( parseGeneTrees( gene_trees_file ),
species_tree,
algorithm,
final int last,
final boolean produce_log,
final boolean verbose,
- final boolean transfer_taxonomy ) throws IOException, SDIException,
- RIOException {
+ final boolean transfer_taxonomy )
+ throws IOException, SDIException, RIOException {
return new RIO( parseGeneTrees( gene_trees_file ),
species_tree,
algorithm,
final int last,
final boolean produce_log,
final boolean verbose,
- final boolean transfer_taxonomy ) throws IOException, SDIException,
- RIOException {
+ final boolean transfer_taxonomy )
+ throws IOException, SDIException, RIOException {
final Phylogeny g0 = p.next();
if ( ( g0 == null ) || g0.isEmpty() || ( g0.getNumberOfExternalNodes() < 2 ) ) {
throw new RIOException( "input file does not seem to contain any gene trees" );
}
- final Phylogeny species_tree = SDIutil.parseSpeciesTree( g0,
- species_tree_file,
- false,
- true,
- TAXONOMY_EXTRACTION.NO );
+ final Phylogeny species_tree = SDIutil
+ .parseSpeciesTree( g0, species_tree_file, false, true, TAXONOMY_EXTRACTION.NO );
p.reset();
return new RIO( p,
species_tree,
final String outgroup,
final boolean produce_log,
final boolean verbose,
- final boolean transfer_taxonomy ) throws IOException, SDIException,
- RIOException {
+ final boolean transfer_taxonomy )
+ throws IOException, SDIException, RIOException {
return new RIO( p,
species_tree,
algorithm,
final int last,
final boolean produce_log,
final boolean verbose,
- final boolean transfer_taxonomy ) throws IOException, SDIException,
- RIOException {
+ final boolean transfer_taxonomy )
+ throws IOException, SDIException, RIOException {
return new RIO( p,
species_tree,
algorithm,
final String outgroup,
final boolean produce_log,
final boolean verbose,
- final boolean transfer_taxonomy ) throws IOException, SDIException,
- RIOException {
+ final boolean transfer_taxonomy )
+ throws IOException, SDIException, RIOException {
return new RIO( gene_trees,
species_tree,
algorithm,
final int last,
final boolean produce_log,
final boolean verbose,
- final boolean transfer_taxonomy ) throws IOException, SDIException,
- RIOException {
+ final boolean transfer_taxonomy )
+ throws IOException, SDIException, RIOException {
return new RIO( gene_trees,
species_tree,
algorithm,
final REROOTING rerooting,
final String outgroup,
final int first,
- final int last ) throws RIOException, IOException {
+ final int last )
+ throws RIOException, IOException {
final Phylogeny g0 = p.next();
if ( ( g0 == null ) || g0.isEmpty() ) {
throw new RIOException( "input file does not seem to contain any gene trees" );
final REROOTING rerooting,
final String outgroup,
final int first,
- final int last ) throws RIOException {
+ final int last )
+ throws RIOException {
if ( !species_tree.isRooted() ) {
throw new RIOException( "species tree is not rooted" );
}
if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getName() ) ) {
label = n.getNodeData().getSequence().getName();
}
- else if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) {
+ else if ( n.getNodeData().isHasSequence()
+ && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) {
label = n.getNodeData().getSequence().getSymbol();
}
else if ( n.getNodeData().isHasSequence()
return label;
}
- private final static Phylogeny[] parseGeneTrees( final File gene_trees_file ) throws FileNotFoundException,
- IOException {
+ private final static Phylogeny[] parseGeneTrees( final File gene_trees_file )
+ throws FileNotFoundException, IOException {
final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true );
if ( p instanceof NHXParser ) {
if ( o > 0 ) {
if ( verbose ) {
System.out.println( "warning: species tree has " + o
- + " internal nodes with only one descendent which are therefore going to be removed" );
+ + " internal nodes with only one descendent which are therefore going to be removed" );
}
PhylogenyMethods.deleteInternalNodesWithOnlyOneDescendent( species_tree );
}
}
public enum REROOTING {
- NONE, BY_ALGORITHM, MIDPOINT, OUTGROUP;
+ NONE,
+ BY_ALGORITHM,
+ MIDPOINT,
+ OUTGROUP;
}
}