final static private String PRG_NAME = "rio";
final static private String PRG_VERSION = "4.000 beta 3";
- final static private String PRG_DATE = "2012.12.17";
+ final static private String PRG_DATE = "2012.12.18";
final static private String E_MAIL = "czmasek@burnham.org";
final static private String WWW = "www.phylosoft.org/forester/";
final static private String HELP_OPTION_1 = "help";
else {
logfile = null;
}
+ boolean sdir = false;
+ if ( cla.isOptionSet( USE_SDIR ) ) {
+ if ( cla.isOptionHasAValue( USE_SDIR ) ) {
+ ForesterUtil.fatalError( PRG_NAME, "no value allowed for -" + USE_SDIR );
+ }
+ sdir = true;
+ if ( logfile != null ) {
+ ForesterUtil.fatalError( PRG_NAME, "no logfile output for SDIR algorithm" );
+ }
+ }
String outgroup = null;
if ( cla.isOptionSet( OUTGROUP ) ) {
if ( !cla.isOptionHasAValue( OUTGROUP ) ) {
ForesterUtil.fatalError( PRG_NAME, "no value for -" + OUTGROUP );
}
+ if ( sdir ) {
+ ForesterUtil.fatalError( PRG_NAME, "no outgroup option for SDIR algorithm" );
+ }
outgroup = cla.getOptionValueAsCleanString( OUTGROUP );
}
REROOTING rerooting = REROOTING.BY_ALGORITHM;
if ( !cla.isOptionHasAValue( REROOTING_OPT ) ) {
ForesterUtil.fatalError( PRG_NAME, "no value for -" + REROOTING_OPT );
}
+ if ( sdir ) {
+ ForesterUtil.fatalError( PRG_NAME, "no re-rooting option for SDIR algorithm" );
+ }
final String rerooting_str = cla.getOptionValueAsCleanString( REROOTING_OPT ).toLowerCase();
if ( rerooting_str.equals( "none" ) ) {
rerooting = REROOTING.NONE;
rerooting = REROOTING.OUTGROUP;
}
else {
- ForesterUtil.fatalError( PRG_NAME, "legal values for -" + REROOTING_OPT
- + " are: none, midpoint, or outgroup (minizming duplications is default)" );
+ ForesterUtil
+ .fatalError( PRG_NAME,
+ "values for re-rooting are: 'none', 'midpoint', or 'outgroup' (minizming duplications is default)" );
}
}
- int gt_first = -1;
- int gt_last = -1;
+ if ( ForesterUtil.isEmpty( outgroup ) && ( rerooting == REROOTING.OUTGROUP ) ) {
+ ForesterUtil.fatalError( PRG_NAME, "selected re-rooting by outgroup, but outgroup not set" );
+ }
+ if ( !ForesterUtil.isEmpty( outgroup ) && ( rerooting != REROOTING.OUTGROUP ) ) {
+ ForesterUtil.fatalError( PRG_NAME, "outgroup set, but selected re-rooting by other approach" );
+ }
+ int gt_first = RIO.DEFAULT_RANGE;
+ int gt_last = RIO.DEFAULT_RANGE;
if ( cla.isOptionSet( GT_FIRST ) ) {
if ( !cla.isOptionHasAValue( GT_FIRST ) ) {
ForesterUtil.fatalError( PRG_NAME, "no value for -" + GT_FIRST );
}
+ if ( sdir ) {
+ ForesterUtil.fatalError( PRG_NAME, "no gene tree range option for SDIR algorithm" );
+ }
try {
gt_first = cla.getOptionValueAsInt( GT_FIRST );
}
- catch ( IOException e ) {
- ForesterUtil.fatalError( PRG_NAME, "could not parse integer for -" + GT_FIRST );
+ catch ( final IOException e ) {
+ ForesterUtil.fatalError( PRG_NAME, "could not parse integer for -" + GT_FIRST + " option" );
+ }
+ if ( gt_first < 0 ) {
+ ForesterUtil.fatalError( PRG_NAME, "attempt to set index of first tree to analyze to: " + gt_first );
}
}
if ( cla.isOptionSet( GT_LAST ) ) {
if ( !cla.isOptionHasAValue( GT_LAST ) ) {
ForesterUtil.fatalError( PRG_NAME, "no value for -" + GT_LAST );
}
+ if ( sdir ) {
+ ForesterUtil.fatalError( PRG_NAME, "no gene tree range option for SDIR algorithm" );
+ }
try {
gt_last = cla.getOptionValueAsInt( GT_LAST );
}
- catch ( IOException e ) {
- ForesterUtil.fatalError( PRG_NAME, "could not parse integer for -" + GT_LAST );
+ catch ( final IOException e ) {
+ ForesterUtil.fatalError( PRG_NAME, "could not parse integer for -" + GT_LAST + " option" );
+ }
+ if ( gt_last < 0 ) {
+ ForesterUtil.fatalError( PRG_NAME, "attempt to set index of last tree to analyze to: " + gt_last );
}
}
+ if ( ( ( gt_last != RIO.DEFAULT_RANGE ) && ( gt_first != RIO.DEFAULT_RANGE ) ) && ( ( gt_last < gt_first ) ) ) {
+ ForesterUtil.fatalError( PRG_NAME, "attempt to set range (0-based) of gene to analyze to: from " + gt_first
+ + " to " + gt_last );
+ }
ForesterUtil.fatalErrorIfFileNotReadable( PRG_NAME, gene_trees_file );
ForesterUtil.fatalErrorIfFileNotReadable( PRG_NAME, species_tree_file );
if ( orthology_outtable.exists() ) {
ForesterUtil.fatalError( PRG_NAME, "\"" + orthology_outtable + "\" already exists" );
}
- boolean sdir = false;
- if ( cla.isOptionSet( USE_SDIR ) ) {
- if ( cla.isOptionHasAValue( USE_SDIR ) ) {
- ForesterUtil.fatalError( PRG_NAME, "no value allowed for -" + USE_SDIR );
- }
- sdir = true;
- if ( logfile != null ) {
- ForesterUtil.fatalError( PRG_NAME, "logfile output only for GSDIR algorithm" );
- }
- }
long time = 0;
System.out.println( "Gene trees : " + gene_trees_file );
System.out.println( "Species tree : " + species_tree_file );
System.out.println( "All vs all orthology table: " + orthology_outtable );
- if ( !sdir ) {
- if ( logfile != null ) {
- System.out.println( "Logfile : " + logfile );
+ if ( logfile != null ) {
+ System.out.println( "Logfile : " + logfile );
+ }
+ if ( gt_first != RIO.DEFAULT_RANGE ) {
+ System.out.println( "First gene tree to analyze: " + gt_first );
+ }
+ if ( gt_last != RIO.DEFAULT_RANGE ) {
+ System.out.println( "Last gene tree to analyze : " + gt_last );
+ }
+ String rerooting_str = "";
+ switch ( rerooting ) {
+ case BY_ALGORITHM: {
+ rerooting_str = "by minimizing duplications";
+ break;
+ }
+ case MIDPOINT: {
+ rerooting_str = "by midpoint method";
+ break;
}
- System.out.println( "Non binary species tree : allowed (GSDIR algorithm)" );
+ case OUTGROUP: {
+ rerooting_str = "by outgroup: " + outgroup;
+ break;
+ }
+ case NONE: {
+ rerooting_str = "none";
+ break;
+ }
+ }
+ System.out.println( "Re-rooting : " + rerooting_str );
+ if ( !sdir ) {
+ System.out.println( "Non binary species tree : allowed" );
}
else {
- System.out.println( "Non binary species tree : disallowed (SDIR algorithm)" );
+ System.out.println( "Non binary species tree : disallowed" );
}
time = System.currentTimeMillis();
Phylogeny species_tree = null;
algorithm,
rerooting,
outgroup,
+ gt_first,
+ gt_last,
logfile != null,
true );
if ( algorithm == ALGORITHM.GSDIR ) {
+ " [options] <gene trees infile> <species tree infile> <all vs all orthology table outfile> [logfile]" );
System.out.println();
System.out.println( " Options" );
- System.out.println( " -" + GT_FIRST + "=<first> : to" );
- System.out.println( " -" + GT_LAST + "=<last> : to" );
- System.out.println( " -" + REROOTING_OPT + " : to" );
- System.out.println( " -" + OUTGROUP + "=<outgroup>: tp" );
+ System.out.println( " -" + GT_FIRST + "=<first> : first gene tree to analyze (0-based index)" );
+ System.out.println( " -" + GT_LAST + "=<last> : last gene tree to analyze (0-based index)" );
+ System.out.println( " -" + REROOTING_OPT
+ + "=<re-rooting>: re-rooting method for gene trees, possible values or 'none', 'midpoint'," );
+ System.out.println( " or 'outgroup' (default: by minizming duplications)" );
+ System.out.println( " -" + OUTGROUP
+ + "=<outgroup> : for rooting by outgroup, name of outgroup (external gene tree node)" );
System.out.println( " -" + USE_SDIR
- + " : to use SDIR instead of GSDIR (faster, but non-binary species trees are disallowed)" );
+ + " : to use SDIR instead of GSDIR (faster, but non-binary species trees are" );
+ System.out.println( " disallowed, as are most options)" );
System.out.println();
System.out.println( " Formats" );
System.out.println( " The species tree is expected to be in phyloXML format." );
}
final List<PhylogenyNode> nodes = getNodes( name );
if ( ( nodes == null ) || ( nodes.size() < 1 ) ) {
- throw new IllegalArgumentException( "node named [" + name + "] not found" );
+ throw new IllegalArgumentException( "node named \"" + name + "\" not found" );
}
if ( nodes.size() > 1 ) {
- throw new IllegalArgumentException( "node named [" + name + "] not unique" );
+ throw new IllegalArgumentException( "node named \"" + name + "\" not unique" );
}
return nodes.get( 0 );
}
public final class RIO {
+ public static final int DEFAULT_RANGE = -1;
private Phylogeny[] _analyzed_gene_trees;
private List<PhylogenyNode> _removed_gene_tree_nodes;
private int _ext_nodes;
final ALGORITHM algorithm,
final REROOTING rerooting,
final String outgroup,
- final int first,
- final int last,
+ int first,
+ int last,
final boolean produce_log,
final boolean verbose ) throws IOException, SDIException, RIOException {
- if ( !ForesterUtil.isEmpty( outgroup ) && ( rerooting != REROOTING.OUTGROUP ) ) {
- throw new IllegalArgumentException( "can only use outgroup when re-rooting by outgroup" );
+ if ( ( last == DEFAULT_RANGE ) && ( first >= 0 ) ) {
+ last = gene_trees.length - 1;
}
- if ( !( ( last == -1 ) && ( first == -1 ) )
- && ( ( last < first ) || ( last >= gene_trees.length ) || ( first >= gene_trees.length ) || ( last < 0 ) || ( first < 0 ) ) ) {
- throw new IllegalArgumentException( "gene tree range is out of range: " + first + "-" + last );
+ else if ( ( first == DEFAULT_RANGE ) && ( last >= 0 ) ) {
+ first = 0;
}
+ checkPreconditions( gene_trees, rerooting, outgroup, first, last );
_produce_log = produce_log;
_verbose = verbose;
_rerooting = rerooting;
throw new RIOException( "failed to establish species based mapping between gene and species trees" );
}
}
- if ( log() ) {
- preLog( gene_trees, species_tree, algorithm, outgroup );
- }
final Phylogeny[] my_gene_trees;
if ( ( first >= 0 ) && ( last >= first ) && ( last < gene_trees.length ) ) {
- if ( log() ) {
- log( "Gene tree range: " + first + "-" + last );
- }
my_gene_trees = new Phylogeny[ 1 + last - first ];
int c = 0;
for( int i = first; i <= last; ++i ) {
else {
my_gene_trees = gene_trees;
}
- if ( _verbose && ( my_gene_trees.length > 10 ) ) {
+ if ( log() ) {
+ preLog( gene_trees, species_tree, algorithm, outgroup, first, last );
+ }
+ if ( _verbose && ( my_gene_trees.length >= 4 ) ) {
System.out.println();
}
_analyzed_gene_trees = new Phylogeny[ my_gene_trees.length ];
int gene_tree_ext_nodes = 0;
for( int i = 0; i < my_gene_trees.length; ++i ) {
final Phylogeny gt = my_gene_trees[ i ];
- if ( _verbose && ( my_gene_trees.length > 10 ) ) {
+ if ( _verbose && ( my_gene_trees.length > 4 ) ) {
ForesterUtil.updateProgress( ( ( double ) i ) / my_gene_trees.length );
}
if ( i == 0 ) {
if ( log() ) {
postLog( species_tree );
}
- if ( _verbose && ( my_gene_trees.length > 10 ) ) {
+ if ( _verbose && ( my_gene_trees.length > 4 ) ) {
System.out.println();
System.out.println();
}
PhylogenyMethods.midpointRoot( gene_tree );
}
else if ( _rerooting == REROOTING.OUTGROUP ) {
- PhylogenyNode n;
- try {
- n = gene_tree.getNode( outgroup );
- }
- catch ( final IllegalArgumentException e ) {
- throw new RIOException( "failed to perform re-rooting by outgroup: " + e.getLocalizedMessage() );
- }
+ final PhylogenyNode n = gene_tree.getNode( outgroup );
gene_tree.reRoot( n );
}
final GSDI gsdi = new GSDI( gene_tree, species_tree, true, true, true );
private final void preLog( final Phylogeny[] gene_trees,
final Phylogeny species_tree,
final ALGORITHM algorithm,
- final String outgroup ) {
+ final String outgroup,
+ final int first,
+ final int last ) {
log( "Number of gene tree (total) : " + gene_trees.length );
log( "Algorithm : " + algorithm );
log( "Species tree external nodes (prior to stripping): " + species_tree.getNumberOfExternalNodes() );
}
}
log( "Re-rooting : " + rs );
+ if ( ( first >= 0 ) || ( last >= 0 ) ) {
+ log( "Gene trees analyzed range : " + first + "-" + last );
+ }
if ( _rerooting == REROOTING.BY_ALGORITHM ) {
writeLogSubHeader();
}
nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES );
}
final Phylogeny[] gene_trees = factory.create( gene_trees_file, p );
- return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, -1, -1, produce_log, verbose );
+ return new RIO( gene_trees,
+ species_tree,
+ algorithm,
+ rerooting,
+ outgroup,
+ DEFAULT_RANGE,
+ DEFAULT_RANGE,
+ produce_log,
+ verbose );
}
-
+
public final static RIO executeAnalysis( final File gene_trees_file,
final Phylogeny species_tree,
final ALGORITHM algorithm,
public final static RIO executeAnalysis( final Phylogeny[] gene_trees, final Phylogeny species_tree )
throws IOException, SDIException, RIOException {
- return new RIO( gene_trees, species_tree, ALGORITHM.GSDIR, REROOTING.BY_ALGORITHM, null, -1, -1, false, false );
+ return new RIO( gene_trees,
+ species_tree,
+ ALGORITHM.GSDIR,
+ REROOTING.BY_ALGORITHM,
+ null,
+ DEFAULT_RANGE,
+ DEFAULT_RANGE,
+ false,
+ false );
}
public final static RIO executeAnalysis( final Phylogeny[] gene_trees,
final String outgroup,
final boolean produce_log,
final boolean verbose ) throws IOException, SDIException, RIOException {
- return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, -1, -1, produce_log, verbose );
+ return new RIO( gene_trees,
+ species_tree,
+ algorithm,
+ rerooting,
+ outgroup,
+ DEFAULT_RANGE,
+ DEFAULT_RANGE,
+ produce_log,
+ verbose );
}
-
+
public final static RIO executeAnalysis( final Phylogeny[] gene_trees,
final Phylogeny species_tree,
final ALGORITHM algorithm,
return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose );
}
+ private final static void checkPreconditions( final Phylogeny[] gene_trees,
+ final REROOTING rerooting,
+ final String outgroup,
+ final int first,
+ final int last ) throws RIOException {
+ if ( !( ( last == DEFAULT_RANGE ) && ( first == DEFAULT_RANGE ) )
+ && ( ( last < first ) || ( last >= gene_trees.length ) || ( last < 0 ) || ( first < 0 ) ) ) {
+ throw new RIOException( "attempt to set range (0-based) of gene to analyze to: from " + first + " to "
+ + last + " (out of " + gene_trees.length + ")" );
+ }
+ if ( ( rerooting == REROOTING.OUTGROUP ) && ForesterUtil.isEmpty( outgroup ) ) {
+ throw new RIOException( "outgroup not set for midpoint rooting" );
+ }
+ if ( ( rerooting != REROOTING.OUTGROUP ) && !ForesterUtil.isEmpty( outgroup ) ) {
+ throw new RIOException( "outgroup only used for midpoint rooting" );
+ }
+ if ( ( rerooting == REROOTING.MIDPOINT )
+ && ( PhylogenyMethods.calculateMaxDistanceToRoot( gene_trees[ 0 ] ) <= 0 ) ) {
+ throw new RIOException( "attempt to use midpoint rooting on gene trees which seem to have no (positive) branch lengths (cladograms)" );
+ }
+ if ( rerooting == REROOTING.OUTGROUP ) {
+ try {
+ gene_trees[ 0 ].getNode( outgroup );
+ }
+ catch ( final IllegalArgumentException e ) {
+ throw new RIOException( "cannot perform re-rooting by outgroup: " + e.getLocalizedMessage() );
+ }
+ }
+ }
+
public enum REROOTING {
NONE, BY_ALGORITHM, MIDPOINT, OUTGROUP;
}
public double sampleVariance() {
validate();
if ( getN() < 2 ) {
- throw new ArithmeticException( "attempt to calculate sample variance for less then two values" );
+ return 0;
}
return ( sumDeviations() / ( getN() - 1 ) );
}