+ public final static RIO executeAnalysis( final File gene_trees_file,
+ final Phylogeny species_tree,
+ final ALGORITHM algorithm,
+ final REROOTING rerooting,
+ final String outgroup,
+ final boolean produce_log,
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
+ return new RIO( parseGeneTrees( gene_trees_file ),
+ species_tree,
+ algorithm,
+ rerooting,
+ outgroup,
+ DEFAULT_RANGE,
+ DEFAULT_RANGE,
+ produce_log,
+ verbose,
+ transfer_taxonomy );
+ }
+
+ public final static RIO executeAnalysis( final File gene_trees_file,
+ final Phylogeny species_tree,
+ final ALGORITHM algorithm,
+ final REROOTING rerooting,
+ final String outgroup,
+ final int first,
+ final int last,
+ final boolean produce_log,
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
+ return new RIO( parseGeneTrees( gene_trees_file ),
+ species_tree,
+ algorithm,
+ rerooting,
+ outgroup,
+ first,
+ last,
+ produce_log,
+ verbose,
+ transfer_taxonomy );
+ }
+
+ public final static RIO executeAnalysis( final IteratingPhylogenyParser p,
+ final File species_tree_file,
+ final ALGORITHM algorithm,
+ final REROOTING rerooting,
+ final String outgroup,
+ final int first,
+ final int last,
+ final boolean produce_log,
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
+ final Phylogeny g0 = p.next();
+ if ( ( g0 == null ) || g0.isEmpty() || ( g0.getNumberOfExternalNodes() < 2 ) ) {
+ throw new RIOException( "input file does not seem to contain any gene trees" );
+ }
+ final Phylogeny species_tree = SDIutil.parseSpeciesTree( g0,
+ species_tree_file,
+ false,
+ true,
+ TAXONOMY_EXTRACTION.NO );
+ p.reset();
+ return new RIO( p,
+ species_tree,
+ algorithm,
+ rerooting,
+ outgroup,
+ first,
+ last,
+ produce_log,
+ verbose,
+ transfer_taxonomy );
+ }
+
+ public final static RIO executeAnalysis( final IteratingPhylogenyParser p,
+ final Phylogeny species_tree,
+ final ALGORITHM algorithm,
+ final REROOTING rerooting,
+ final String outgroup,
+ final boolean produce_log,
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
+ return new RIO( p,
+ species_tree,
+ algorithm,
+ rerooting,
+ outgroup,
+ DEFAULT_RANGE,
+ DEFAULT_RANGE,
+ produce_log,
+ verbose,
+ transfer_taxonomy );
+ }
+
+ public final static RIO executeAnalysis( final IteratingPhylogenyParser p,
+ final Phylogeny species_tree,
+ final ALGORITHM algorithm,
+ final REROOTING rerooting,
+ final String outgroup,
+ final int first,
+ final int last,
+ final boolean produce_log,
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
+ return new RIO( p,
+ species_tree,
+ algorithm,
+ rerooting,
+ outgroup,
+ first,
+ last,
+ produce_log,
+ verbose,
+ transfer_taxonomy );
+ }
+
+ public final static RIO executeAnalysis( final Phylogeny[] gene_trees, final Phylogeny species_tree )
+ throws IOException, SDIException, RIOException {
+ return new RIO( gene_trees,
+ species_tree,
+ ALGORITHM.GSDIR,
+ REROOTING.BY_ALGORITHM,
+ null,
+ DEFAULT_RANGE,
+ DEFAULT_RANGE,
+ false,
+ false,
+ false );
+ }
+
+ public final static RIO executeAnalysis( final Phylogeny[] gene_trees,
+ final Phylogeny species_tree,
+ final ALGORITHM algorithm,
+ final REROOTING rerooting,
+ final String outgroup,
+ final boolean produce_log,
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
+ return new RIO( gene_trees,
+ species_tree,
+ algorithm,
+ rerooting,
+ outgroup,
+ DEFAULT_RANGE,
+ DEFAULT_RANGE,
+ produce_log,
+ verbose,
+ transfer_taxonomy );
+ }
+
+ public final static RIO executeAnalysis( final Phylogeny[] gene_trees,
+ final Phylogeny species_tree,
+ final ALGORITHM algorithm,
+ final REROOTING rerooting,
+ final String outgroup,
+ final int first,
+ final int last,
+ final boolean produce_log,
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
+ return new RIO( gene_trees,
+ species_tree,
+ algorithm,
+ rerooting,
+ outgroup,
+ first,
+ last,
+ produce_log,
+ verbose,
+ transfer_taxonomy );
+ }
+
+ private final static void calculateOrthologTable( final Phylogeny g, final boolean sort, final int counter )
+ throws RIOException {
+ if ( counter == 0 ) {
+ final List<String> labels = new ArrayList<String>();
+ final Set<String> labels_set = new HashSet<String>();
+ for( final PhylogenyNode n : g.getExternalNodes() ) {
+ final String label = obtainLabel( labels_set, n );
+ labels_set.add( label );
+ labels.add( label );
+ }
+ if ( sort ) {
+ Collections.sort( labels );
+ }
+ _m = new IntMatrix( labels );
+ }
+ updateCounts( _m, counter, g );
+ }
+
+ private final static void checkPreconditions( final IteratingPhylogenyParser p,
+ final Phylogeny species_tree,
+ final REROOTING rerooting,
+ final String outgroup,
+ final int first,
+ final int last ) throws RIOException, IOException {
+ final Phylogeny g0 = p.next();
+ if ( ( g0 == null ) || g0.isEmpty() ) {
+ throw new RIOException( "input file does not seem to contain any gene trees" );
+ }
+ if ( g0.getNumberOfExternalNodes() < 2 ) {
+ throw new RIOException( "input file does not seem to contain any useable gene trees" );
+ }
+ if ( !species_tree.isRooted() ) {
+ throw new RIOException( "species tree is not rooted" );
+ }
+ if ( !( ( last == DEFAULT_RANGE ) && ( first == DEFAULT_RANGE ) )
+ && ( ( last < first ) || ( last < 0 ) || ( first < 0 ) ) ) {
+ throw new RIOException( "attempt to set range (0-based) of gene to analyze to: from " + first + " to "
+ + last );
+ }
+ if ( ( rerooting == REROOTING.OUTGROUP ) && ForesterUtil.isEmpty( outgroup ) ) {
+ throw new RIOException( "outgroup not set for midpoint rooting" );
+ }
+ if ( ( rerooting != REROOTING.OUTGROUP ) && !ForesterUtil.isEmpty( outgroup ) ) {
+ throw new RIOException( "outgroup only used for midpoint rooting" );
+ }
+ if ( ( rerooting == REROOTING.MIDPOINT ) && ( PhylogenyMethods.calculateMaxDistanceToRoot( g0 ) <= 0 ) ) {
+ throw new RIOException( "attempt to use midpoint rooting on gene trees which seem to have no (positive) branch lengths (cladograms)" );
+ }
+ if ( rerooting == REROOTING.OUTGROUP ) {
+ try {
+ g0.getNode( outgroup );
+ }
+ catch ( final IllegalArgumentException e ) {
+ throw new RIOException( "cannot perform re-rooting by outgroup: " + e.getLocalizedMessage() );
+ }
+ }
+ }
+
+ private final static void checkPreconditions( final Phylogeny[] gene_trees,
+ final Phylogeny species_tree,
+ final REROOTING rerooting,
+ final String outgroup,
+ final int first,
+ final int last ) throws RIOException {
+ if ( !species_tree.isRooted() ) {
+ throw new RIOException( "species tree is not rooted" );
+ }
+ if ( !( ( last == DEFAULT_RANGE ) && ( first == DEFAULT_RANGE ) )
+ && ( ( last < first ) || ( last >= gene_trees.length ) || ( last < 0 ) || ( first < 0 ) ) ) {
+ throw new RIOException( "attempt to set range (0-based) of gene to analyze to: from " + first + " to "
+ + last + " (out of " + gene_trees.length + ")" );
+ }
+ if ( ( rerooting == REROOTING.OUTGROUP ) && ForesterUtil.isEmpty( outgroup ) ) {
+ throw new RIOException( "outgroup not set for midpoint rooting" );
+ }
+ if ( ( rerooting != REROOTING.OUTGROUP ) && !ForesterUtil.isEmpty( outgroup ) ) {
+ throw new RIOException( "outgroup only used for midpoint rooting" );
+ }
+ if ( ( rerooting == REROOTING.MIDPOINT )
+ && ( PhylogenyMethods.calculateMaxDistanceToRoot( gene_trees[ 0 ] ) <= 0 ) ) {
+ throw new RIOException( "attempt to use midpoint rooting on gene trees which seem to have no (positive) branch lengths (cladograms)" );
+ }
+ if ( rerooting == REROOTING.OUTGROUP ) {
+ try {
+ gene_trees[ 0 ].getNode( outgroup );
+ }
+ catch ( final IllegalArgumentException e ) {
+ throw new RIOException( "cannot perform re-rooting by outgroup: " + e.getLocalizedMessage() );
+ }
+ }
+ }
+
+ private final static String obtainLabel( final Set<String> labels_set, final PhylogenyNode n ) throws RIOException {
+ String label;
+ if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getName() ) ) {
+ label = n.getNodeData().getSequence().getName();
+ }
+ else if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) {
+ label = n.getNodeData().getSequence().getSymbol();
+ }
+ else if ( n.getNodeData().isHasSequence()
+ && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getGeneName() ) ) {
+ label = n.getNodeData().getSequence().getGeneName();
+ }
+ else if ( !ForesterUtil.isEmpty( n.getName() ) ) {
+ label = n.getName();
+ }
+ else {
+ throw new RIOException( "node " + n + " has no appropriate label" );
+ }
+ if ( labels_set.contains( label ) ) {
+ throw new RIOException( "label " + label + " is not unique" );
+ }
+ return label;
+ }
+
+ private final static Phylogeny[] parseGeneTrees( final File gene_trees_file ) throws FileNotFoundException,
+ IOException {
+ final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+ final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true );
+ if ( p instanceof NHXParser ) {
+ final NHXParser nhx = ( NHXParser ) p;
+ nhx.setReplaceUnderscores( false );
+ nhx.setIgnoreQuotes( true );
+ nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE );
+ }
+ else if ( p instanceof NexusPhylogeniesParser ) {
+ final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p;
+ nex.setReplaceUnderscores( false );
+ nex.setIgnoreQuotes( true );
+ nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE );
+ }
+ return factory.create( gene_trees_file, p );
+ }
+
+ private final static void removeSingleDescendentsNodes( final Phylogeny species_tree, final boolean verbose ) {
+ final int o = PhylogenyMethods.countNumberOfOneDescendantNodes( species_tree );
+ if ( o > 0 ) {
+ if ( verbose ) {
+ System.out.println( "warning: species tree has " + o
+ + " internal nodes with only one descendent which are therefore going to be removed" );
+ }
+ PhylogenyMethods.deleteInternalNodesWithOnlyOneDescendent( species_tree );
+ }
+ }
+
+ private final static void updateCounts( final IntMatrix m, final int counter, final Phylogeny g )
+ throws RIOException {
+ PhylogenyMethods.preOrderReId( g );
+ final HashMap<String, PhylogenyNode> map = PhylogenyMethods.createNameToExtNodeMap( g );
+ for( int x = 0; x < m.size(); ++x ) {
+ final String mx = m.getLabel( x );
+ final PhylogenyNode nx = map.get( mx );
+ if ( nx == null ) {
+ throw new RIOException( "node \"" + mx + "\" not present in gene tree #" + counter );
+ }
+ String my;
+ PhylogenyNode ny;
+ for( int y = 0; y < m.size(); ++y ) {
+ my = m.getLabel( y );
+ ny = map.get( my );
+ if ( ny == null ) {
+ throw new RIOException( "node \"" + my + "\" not present in gene tree #" + counter );
+ }
+ if ( !PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( nx, ny ).isDuplication() ) {
+ m.inreaseByOne( x, y );
+ }
+ }
+ }
+ }
+
+ public enum REROOTING {
+ NONE, BY_ALGORITHM, MIDPOINT, OUTGROUP;