From: cmzmasek@gmail.com Date: Tue, 11 Dec 2012 04:59:28 +0000 (+0000) Subject: "rio" work X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;ds=sidebyside;h=9a9e40a5768c100cbbb70e32a2637c82890d2d53;p=jalview.git "rio" work --- diff --git a/forester/java/src/org/forester/application/gsdi.java b/forester/java/src/org/forester/application/gsdi.java index eab52df..e062730 100644 --- a/forester/java/src/org/forester/application/gsdi.java +++ b/forester/java/src/org/forester/application/gsdi.java @@ -50,6 +50,7 @@ import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.sdi.GSDI; import org.forester.sdi.GSDIR; import org.forester.sdi.SDI; +import org.forester.sdi.SDI.ALGORITHM; import org.forester.sdi.SDI.TaxonomyComparisonBase; import org.forester.sdi.SDIException; import org.forester.sdi.SDIse; @@ -60,9 +61,6 @@ import org.forester.util.ForesterUtil; public final class gsdi { - private enum BASE_ALGORITHM { - GSDIR, GSDI, SDI - } final static public boolean REPLACE_UNDERSCORES_IN_NH_SPECIES_TREE = true; final static private String ALLOW_STRIPPING_OF_GENE_TREE_OPTION = "g"; final static private String SDISE_OPTION = "b"; @@ -127,23 +125,23 @@ public final class gsdi { } private static void execute( final CommandLineArguments cla ) throws IOException { - BASE_ALGORITHM base_algorithm = BASE_ALGORITHM.GSDI; + ALGORITHM base_algorithm = ALGORITHM.GSDI; boolean most_parsimonous_duplication_model = false; boolean allow_stripping_of_gene_tree = false; if ( cla.isOptionSet( gsdi.GSDIR_OPTION ) ) { - base_algorithm = BASE_ALGORITHM.GSDIR; + base_algorithm = ALGORITHM.GSDIR; } else if ( cla.isOptionSet( gsdi.SDISE_OPTION ) ) { - base_algorithm = BASE_ALGORITHM.SDI; + base_algorithm = ALGORITHM.SDI; } if ( cla.isOptionSet( gsdi.MOST_PARSIMONIOUS_OPTION ) ) { - if ( base_algorithm == BASE_ALGORITHM.SDI ) { + if ( base_algorithm == ALGORITHM.SDI ) { ForesterUtil.fatalError( gsdi.PRG_NAME, "Cannot use most parsimonious duplication mode with SDI" ); } most_parsimonous_duplication_model = true; } if ( cla.isOptionSet( gsdi.ALLOW_STRIPPING_OF_GENE_TREE_OPTION ) ) { - if ( base_algorithm == BASE_ALGORITHM.SDI ) { + if ( base_algorithm == ALGORITHM.SDI ) { ForesterUtil.fatalError( gsdi.PRG_NAME, "Cannot allow stripping of gene tree with SDI" ); } allow_stripping_of_gene_tree = true; @@ -254,7 +252,7 @@ public final class gsdi { if ( !gene_tree.isCompletelyBinary() ) { fatalError( "user error", "gene tree is not completely binary", log_writer ); } - if ( base_algorithm == BASE_ALGORITHM.SDI ) { + if ( base_algorithm == ALGORITHM.SDI ) { if ( !species_tree.isCompletelyBinary() ) { fatalError( "user error", "species tree is not completely binary, use GSDI or GSDIR instead", @@ -285,12 +283,12 @@ public final class gsdi { SDI sdi = null; final long start_time = new Date().getTime(); try { - if ( ( base_algorithm == BASE_ALGORITHM.GSDI ) || ( base_algorithm == BASE_ALGORITHM.GSDIR ) ) { - if ( base_algorithm == BASE_ALGORITHM.GSDI ) { + if ( ( base_algorithm == ALGORITHM.GSDI ) || ( base_algorithm == ALGORITHM.GSDIR ) ) { + if ( base_algorithm == ALGORITHM.GSDI ) { System.out.println( "Algorithm : GSDI" ); log_writer.println( "Algorithm : GSDI" ); } - else if ( base_algorithm == BASE_ALGORITHM.GSDIR ) { + else if ( base_algorithm == ALGORITHM.GSDIR ) { System.out.println( "Algorithm : GSDIR" ); log_writer.println( "Algorithm : GSDIR" ); } @@ -299,14 +297,14 @@ public final class gsdi { log_writer.println( "Use most parsimonous duplication model : " + most_parsimonous_duplication_model ); log_writer.println( "Allow stripping of gene tree nodes : " + allow_stripping_of_gene_tree ); log_writer.flush(); - if ( base_algorithm == BASE_ALGORITHM.GSDI ) { + if ( base_algorithm == ALGORITHM.GSDI ) { sdi = new GSDI( gene_tree, species_tree, most_parsimonous_duplication_model, allow_stripping_of_gene_tree, true ); } - else if ( base_algorithm == BASE_ALGORITHM.GSDIR ) { + else if ( base_algorithm == ALGORITHM.GSDIR ) { sdi = new GSDIR( gene_tree, species_tree, allow_stripping_of_gene_tree, 1 ); } } @@ -332,12 +330,12 @@ public final class gsdi { + "ms" ); log_writer.println( "Running time (excluding I/O) : " + ( new Date().getTime() - start_time ) + "ms" ); - if ( ( base_algorithm == BASE_ALGORITHM.GSDI ) ) { + if ( ( base_algorithm == ALGORITHM.GSDI ) ) { final GSDI gsdi = ( GSDI ) sdi; System.out.println( "Mapping based on : " + gsdi.getTaxCompBase() ); log_writer.println( "Mapping based on : " + gsdi.getTaxCompBase() ); } - if ( ( base_algorithm == BASE_ALGORITHM.GSDIR ) ) { + if ( ( base_algorithm == ALGORITHM.GSDIR ) ) { final GSDIR gsdir = ( GSDIR ) sdi; System.out.println( "Mapping based on : " + gsdir.getTaxCompBase() ); log_writer.println( "Mapping based on : " + gsdir.getTaxCompBase() ); @@ -348,7 +346,7 @@ public final class gsdi { } try { final PhylogenyWriter writer = new PhylogenyWriter(); - if ( base_algorithm == BASE_ALGORITHM.GSDIR ) { + if ( base_algorithm == ALGORITHM.GSDIR ) { writer.toPhyloXML( out_file, ( ( GSDIR ) sdi ).getMinDuplicationsSumGeneTrees(), 0, @@ -364,12 +362,12 @@ public final class gsdi { } System.out.println( "Wrote resulting gene tree to : " + out_file.getCanonicalPath() ); log_writer.println( "Wrote resulting gene tree to : " + out_file.getCanonicalPath() ); - if ( base_algorithm == BASE_ALGORITHM.SDI ) { + if ( base_algorithm == ALGORITHM.SDI ) { sdi.computeMappingCostL(); System.out.println( "Mapping cost : " + sdi.computeMappingCostL() ); log_writer.println( "Mapping cost : " + sdi.computeMappingCostL() ); } - else if ( ( base_algorithm == BASE_ALGORITHM.GSDI ) || ( base_algorithm == BASE_ALGORITHM.GSDIR ) ) { + else if ( ( base_algorithm == ALGORITHM.GSDI ) || ( base_algorithm == ALGORITHM.GSDIR ) ) { final GSDI gsdi = ( GSDI ) sdi; final File species_tree_used_file = new File( ForesterUtil.removeSuffix( out_file.toString() ) + SUFFIX_FOR_SPECIES_TREE_USED ); @@ -400,7 +398,7 @@ public final class gsdi { + sdi.getSpeciesTree().getNumberOfExternalNodes() ); log_writer.println( "Number of external nodes in species tree : " + sdi.getSpeciesTree().getNumberOfExternalNodes() ); - if ( ( base_algorithm == BASE_ALGORITHM.GSDI ) ) { + if ( ( base_algorithm == ALGORITHM.GSDI ) ) { final GSDI gsdi = ( GSDI ) sdi; final int poly = PhylogenyMethods.countNumberOfPolytomies( gsdi.getSpeciesTree() ); System.out.println( "Number of polytomies in species tree : " + poly ); @@ -417,7 +415,7 @@ public final class gsdi { System.out.println(); System.out.println( "Number of duplications : " + sdi.getDuplicationsSum() ); log_writer.println( "Number of duplications : " + sdi.getDuplicationsSum() ); - if ( ( base_algorithm == BASE_ALGORITHM.GSDI ) ) { + if ( ( base_algorithm == ALGORITHM.GSDI ) ) { final GSDI gsdi = ( GSDI ) sdi; if ( !most_parsimonous_duplication_model ) { final int u = gsdi.getSpeciationOrDuplicationEventsSum(); diff --git a/forester/java/src/org/forester/application/rio.java b/forester/java/src/org/forester/application/rio.java index 7d7632c..80a34ab 100644 --- a/forester/java/src/org/forester/application/rio.java +++ b/forester/java/src/org/forester/application/rio.java @@ -41,6 +41,8 @@ import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.rio.RIO; import org.forester.rio.RIOException; +import org.forester.sdi.SDI; +import org.forester.sdi.SDI.ALGORITHM; import org.forester.sdi.SDIException; import org.forester.util.CommandLineArguments; import org.forester.util.EasyWriter; @@ -48,19 +50,20 @@ import org.forester.util.ForesterUtil; public class rio { - final static private String PRG_NAME = "rio"; - final static private String PRG_VERSION = "3.00 beta 3"; - final static private String PRG_DATE = "2012.12.05"; - final static private String E_MAIL = "czmasek@burnham.org"; - final static private String WWW = "www.phylosoft.org/forester/"; - final static private String HELP_OPTION_1 = "help"; - final static private String HELP_OPTION_2 = "h"; - final static private String QUERY_OPTION = "q"; - final static private String SORT_OPTION = "s"; - final static private String OUTPUT_ULTRA_P_OPTION = "u"; - final static private String CUTOFF_ULTRA_P_OPTION = "cu"; - final static private String CUTOFF_ORTHO_OPTION = "co"; - final static private String TABLE_OUTPUT_OPTION = "t"; + final static private String PRG_NAME = "rio"; + final static private String PRG_VERSION = "3.00 beta 4"; + final static private String PRG_DATE = "2012.12.10"; + final static private String E_MAIL = "czmasek@burnham.org"; + final static private String WWW = "www.phylosoft.org/forester/"; + final static private String HELP_OPTION_1 = "help"; + final static private String HELP_OPTION_2 = "h"; + final static private String QUERY_OPTION = "q"; + final static private String SORT_OPTION = "s"; + final static private String ALLOW_NON_BIN_SPECIES_TREE_OPTION = "g"; + final static private String OUTPUT_ULTRA_P_OPTION = "u"; + final static private String CUTOFF_ULTRA_P_OPTION = "cu"; + final static private String CUTOFF_ORTHO_OPTION = "co"; + final static private String TABLE_OUTPUT_OPTION = "t"; public static void main( final String[] args ) { ForesterUtil.printProgramInformation( PRG_NAME, @@ -93,6 +96,7 @@ public class rio { allowed_options.add( CUTOFF_ORTHO_OPTION ); allowed_options.add( TABLE_OUTPUT_OPTION ); allowed_options.add( OUTPUT_ULTRA_P_OPTION ); + allowed_options.add( ALLOW_NON_BIN_SPECIES_TREE_OPTION ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options ); @@ -123,6 +127,10 @@ public class rio { if ( cla.isOptionSet( OUTPUT_ULTRA_P_OPTION ) ) { output_ultraparalogs = true; } + boolean gsdir = false; + if ( cla.isOptionSet( ALLOW_NON_BIN_SPECIES_TREE_OPTION ) ) { + gsdir = true; + } double cutoff_for_orthologs = 50; double cutoff_for_ultra_paralogs = 50; int sort = 1; @@ -159,6 +167,12 @@ public class rio { long time = 0; System.out.println( "Gene trees : " + gene_trees_file ); System.out.println( "Species tree : " + species_tree_file ); + if ( gsdir ) { + System.out.println( "Non binary species tree : allowed (GSDIR algorithm)" ); + } + else { + System.out.println( "Non binary species tree : disallowed (SDIR algorithm)" ); + } if ( query != null ) { System.out.println( "Query : " + query ); System.out.println( "Outfile : " + outfile ); @@ -185,16 +199,20 @@ public class rio { if ( !species_tree.isRooted() ) { ForesterUtil.fatalError( PRG_NAME, "species tree is not rooted" ); } - if ( !species_tree.isCompletelyBinary() ) { - ForesterUtil.fatalError( PRG_NAME, "species tree is not completely binary" ); + final SDI.ALGORITHM algorithm; + if ( gsdir ) { + algorithm = ALGORITHM.GSDIR; + } + else { + algorithm = ALGORITHM.SDIR; } try { final RIO rio; if ( ForesterUtil.isEmpty( query ) ) { - rio = new RIO( gene_trees_file, species_tree ); + rio = new RIO( gene_trees_file, species_tree, algorithm ); } else { - rio = new RIO( gene_trees_file, species_tree, query ); + rio = new RIO( gene_trees_file, species_tree, query, algorithm ); } if ( outfile != null ) { final StringBuilder output = new StringBuilder(); @@ -236,7 +254,7 @@ public class rio { } private static void tableOutput( final File table_outfile, final RIO rio ) throws IOException, RIOException { - final IntMatrix m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees() ); + final IntMatrix m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true ); writeTable( table_outfile, rio, m ); } @@ -275,6 +293,7 @@ public class rio { System.out.println( PRG_NAME + " [options] [outfile]" ); System.out.println(); System.out.println( " Options" ); + System.out.println( " -" + ALLOW_NON_BIN_SPECIES_TREE_OPTION + " : to allow non-binary species tree" ); System.out.println( " -" + CUTOFF_ORTHO_OPTION + " : cutoff for ortholog output (default: 50)" ); System.out.println( " -" + TABLE_OUTPUT_OPTION + " : file-name for output table of all vs. all ortholgy support" ); diff --git a/forester/java/src/org/forester/rio/RIO.java b/forester/java/src/org/forester/rio/RIO.java index 5a9be41..dea0492 100644 --- a/forester/java/src/org/forester/rio/RIO.java +++ b/forester/java/src/org/forester/rio/RIO.java @@ -32,6 +32,7 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -47,6 +48,8 @@ import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; +import org.forester.sdi.GSDIR; +import org.forester.sdi.SDI; import org.forester.sdi.SDIException; import org.forester.sdi.SDIR; import org.forester.util.ForesterUtil; @@ -70,19 +73,21 @@ public final class RIO { * @throws IOException * @throws RIOException */ - public RIO( final File gene_trees_file, final Phylogeny species_tree, final String query ) throws IOException, - SDIException, RIOException { + public RIO( final File gene_trees_file, + final Phylogeny species_tree, + final String query, + final SDI.ALGORITHM algorithm ) throws IOException, SDIException, RIOException { if ( ForesterUtil.isEmpty( query ) ) { throw new IllegalArgumentException( "query is empty" ); } init(); - inferOrthologs( gene_trees_file, species_tree, query ); + inferOrthologs( gene_trees_file, species_tree, query, algorithm ); } - public RIO( final File gene_trees_file, final Phylogeny species_tree ) throws IOException, SDIException, - RIOException { + public RIO( final File gene_trees_file, final Phylogeny species_tree, final SDI.ALGORITHM algorithm ) + throws IOException, SDIException, RIOException { init(); - inferOrthologs( gene_trees_file, species_tree, null ); + inferOrthologs( gene_trees_file, species_tree, null, algorithm ); } public final Phylogeny[] getAnalyzedGeneTrees() { @@ -381,8 +386,11 @@ public final class RIO { * @throws IOException * @throws FileNotFoundException */ - private final void inferOrthologs( final File gene_trees_file, final Phylogeny species_tree, final String query ) - throws SDIException, RIOException, FileNotFoundException, IOException { + private final void inferOrthologs( final File gene_trees_file, + final Phylogeny species_tree, + final String query, + final SDI.ALGORITHM algorithm ) throws SDIException, RIOException, + FileNotFoundException, IOException { // Read in first tree to get its sequence names // and strip species_tree. final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); @@ -433,22 +441,37 @@ public final class RIO { + " has a different number of external nodes (" + gt.getNumberOfExternalNodes() + ") than those gene trees preceding it (" + gene_tree_ext_nodes + ")" ); } - _analyzed_gene_trees[ c++ ] = performOrthologInference( gt, species_tree, query ); + _analyzed_gene_trees[ c++ ] = performOrthologInference( gt, species_tree, query, algorithm ); } setNumberOfSamples( gene_trees.length ); } private final Phylogeny performOrthologInference( final Phylogeny gene_tree, final Phylogeny species_tree, - final String query ) throws SDIException, RIOException { - final SDIR sdiunrooted = new SDIR(); - final Phylogeny assigned_tree = sdiunrooted.infer( gene_tree, - species_tree, - false, - RIO.ROOT_BY_MINIMIZING_SUM_OF_DUPS, - RIO.ROOT_BY_MINIMIZING_TREE_HEIGHT, - true, - 1 )[ 0 ]; + final String query, + final SDI.ALGORITHM algorithm ) throws SDIException, RIOException { + final Phylogeny assigned_tree; + switch ( algorithm ) { + case SDIR: { + final SDIR sdir = new SDIR(); + assigned_tree = sdir.infer( gene_tree, + species_tree, + false, + RIO.ROOT_BY_MINIMIZING_SUM_OF_DUPS, + RIO.ROOT_BY_MINIMIZING_TREE_HEIGHT, + true, + 1 )[ 0 ]; + break; + } + case GSDIR: { + final GSDIR gsdir = new GSDIR( gene_tree, species_tree, true, 1 ); + assigned_tree = gsdir.getMinDuplicationsSumGeneTrees().get( 1 ); + break; + } + default: { + throw new IllegalArgumentException( "illegal algorithm: " + algorithm ); + } + } setExtNodesOfAnalyzedGeneTrees( assigned_tree.getNumberOfExternalNodes() ); if ( !ForesterUtil.isEmpty( query ) ) { final List nodes = getNodesViaSequenceName( assigned_tree, query ); @@ -513,7 +536,8 @@ public final class RIO { } } - public final static IntMatrix calculateOrthologTable( final Phylogeny[] analyzed_gene_trees ) throws RIOException { + public final static IntMatrix calculateOrthologTable( final Phylogeny[] analyzed_gene_trees, final boolean sort ) + throws RIOException { final List labels = new ArrayList(); final Set labels_set = new HashSet(); String label; @@ -537,6 +561,9 @@ public final class RIO { labels_set.add( label ); labels.add( label ); } + if ( sort ) { + Collections.sort( labels ); + } final IntMatrix m = new IntMatrix( labels ); int counter = 0; for( final Phylogeny gt : analyzed_gene_trees ) { diff --git a/forester/java/src/org/forester/sdi/GSDIR.java b/forester/java/src/org/forester/sdi/GSDIR.java index dc3fcae..8179811 100644 --- a/forester/java/src/org/forester/sdi/GSDIR.java +++ b/forester/java/src/org/forester/sdi/GSDIR.java @@ -28,6 +28,7 @@ import java.util.ArrayList; import java.util.List; import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyBranch; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; @@ -41,20 +42,23 @@ public class GSDIR extends GSDI { public GSDIR( final Phylogeny gene_tree, final Phylogeny species_tree, final boolean strip_gene_tree, final int x ) throws SDIException { - super( gene_tree, species_tree, true, strip_gene_tree, true, 1 ); + super( gene_tree.copy(), species_tree, true, strip_gene_tree, true, 1 ); _min_duplications_sum = Integer.MAX_VALUE; _min_duplications_sum_gene_trees = new ArrayList(); _duplications_sum_stats = new BasicDescriptiveStatistics(); linkNodesOfG(); - final List gene_tree_nodes_post_order = new ArrayList(); - for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) { - gene_tree_nodes_post_order.add( it.next() ); + final List gene_tree_branches_post_order = new ArrayList(); + for( final PhylogenyNodeIterator it = _gene_tree.iteratorPostorder(); it.hasNext(); ) { + final PhylogenyNode n = it.next(); + if ( !n.isRoot() ) { + gene_tree_branches_post_order.add( new PhylogenyBranch( n, n.getParent() ) ); + } } - for( final PhylogenyNode root : gene_tree_nodes_post_order ) { + for( final PhylogenyBranch branch : gene_tree_branches_post_order ) { _duplications_sum = 0; _speciation_or_duplication_events_sum = 0; _speciations_sum = 0; - _gene_tree.reRoot( root.getId() ); //TODO reRoot( root ) + _gene_tree.reRoot( branch ); PhylogenyMethods.preOrderReId( getSpeciesTree() ); //TEST, remove later for( final PhylogenyNodeIterator it = _gene_tree.iteratorPostorder(); it.hasNext(); ) { @@ -72,9 +76,9 @@ public class GSDIR extends GSDI { else if ( _duplications_sum == _min_duplications_sum ) { _min_duplications_sum_gene_trees.add( getGeneTree().copy() ); } - System.out.println( getDuplicationsSum() ); _duplications_sum_stats.addValue( _duplications_sum ); } + System.out.println( _duplications_sum_stats.getSummaryAsString() ); } public int getMinDuplicationsSum() { diff --git a/forester/java/src/org/forester/sdi/SDI.java b/forester/java/src/org/forester/sdi/SDI.java index 1f1e163..fbc3c65 100644 --- a/forester/java/src/org/forester/sdi/SDI.java +++ b/forester/java/src/org/forester/sdi/SDI.java @@ -39,6 +39,9 @@ import org.forester.util.ForesterUtil; public abstract class SDI { + public enum ALGORITHM { + GSDIR, GSDI, SDI, SDIR + } final Phylogeny _gene_tree; final Phylogeny _species_tree; int _duplications_sum; // Sum of duplications. @@ -76,9 +79,6 @@ public abstract class SDI { if ( species_tree.isEmpty() || gene_tree.isEmpty() ) { throw new IllegalArgumentException( "attempt to infer duplications using empty tree(s)" ); } - if ( !gene_tree.isRooted() ) { - throw new IllegalArgumentException( "attempt to infer duplications on unrooted gene tree" ); - } if ( !species_tree.isRooted() ) { throw new IllegalArgumentException( "attempt to infer duplications on unrooted species tree" ); } diff --git a/forester/java/src/org/forester/sdi/SDIR.java b/forester/java/src/org/forester/sdi/SDIR.java index c9224da..ce7fc00 100644 --- a/forester/java/src/org/forester/sdi/SDIR.java +++ b/forester/java/src/org/forester/sdi/SDIR.java @@ -266,20 +266,17 @@ public class SDIR { final PhylogenyNode n = iter.next(); if ( n.isRoot() ) { if ( ( n.getNumberOfDescendants() != 2 ) && ( n.getNumberOfDescendants() != 3 ) ) { - throw new IllegalArgumentException( "attempt to run SDI on gene tree with " - + n.getNumberOfDescendants() + " child nodes at its root" ); + throw new SDIException( "gene tree has " + n.getNumberOfDescendants() + " descendents at its root" ); } } else if ( !n.isExternal() && ( n.getNumberOfDescendants() != 2 ) ) { - throw new IllegalArgumentException( "attempt to run SDI on gene tree which is not completely binary [found node with " - + n.getNumberOfDescendants() + " child nodes]" ); + throw new SDIException( "gene tree is not completely binary" ); } } for( final PhylogenyNodeIterator iter = species_tree.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( !n.isExternal() && ( n.getNumberOfDescendants() != 2 ) ) { - throw new IllegalArgumentException( "attempt to run SDI with a species tree which is not completely binary (after stripping) [found node with " - + n.getNumberOfDescendants() + " child nodes]" ); + throw new SDIException( "species tree (after stripping) is not completely binary" ); } } g.reRoot( g.getFirstExternalNode() ); diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 19d01c1..be7bee3 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -6888,7 +6888,7 @@ public final class Test { gt.setRooted( true ); final GSDI sdi = new GSDI( gt, s1, true, true, true ); } - final IntMatrix m = RIO.calculateOrthologTable( g1 ); + final IntMatrix m = RIO.calculateOrthologTable( g1, true ); // System.out.println( m.toString() ); } catch ( final Exception e ) { diff --git a/forester/java/src/org/forester/util/BasicDescriptiveStatistics.java b/forester/java/src/org/forester/util/BasicDescriptiveStatistics.java index 0818951..dbb1db9 100644 --- a/forester/java/src/org/forester/util/BasicDescriptiveStatistics.java +++ b/forester/java/src/org/forester/util/BasicDescriptiveStatistics.java @@ -50,9 +50,6 @@ public class BasicDescriptiveStatistics implements DescriptiveStatistics { setDescription( desc ); } - /* (non-Javadoc) - * @see org.forester.util.DescriptiveStatisticsI#addValue(double) - */ @Override public void addValue( final double d ) { _recalc_sigma = true; @@ -66,18 +63,12 @@ public class BasicDescriptiveStatistics implements DescriptiveStatistics { } } - /* (non-Javadoc) - * @see org.forester.util.DescriptiveStatisticsI#arithmeticMean() - */ @Override public double arithmeticMean() { validate(); return getSum() / getN(); } - /* (non-Javadoc) - * @see org.forester.util.DescriptiveStatisticsI#asSummary() - */ @Override public String asSummary() { if ( getN() > 1 ) { @@ -89,18 +80,17 @@ public class BasicDescriptiveStatistics implements DescriptiveStatistics { } } - /* (non-Javadoc) - * @see org.forester.util.DescriptiveStatisticsI#coefficientOfVariation() - */ @Override public double coefficientOfVariation() { validate(); return ( sampleStandardDeviation() / arithmeticMean() ); } - /* (non-Javadoc) - * @see org.forester.util.DescriptiveStatisticsI#getDataAsDoubleArray() - */ + @Override + public List getData() { + return _data; + } + @Override public double[] getDataAsDoubleArray() { validate(); @@ -111,35 +101,28 @@ public class BasicDescriptiveStatistics implements DescriptiveStatistics { return data_array; } - /* (non-Javadoc) - * @see org.forester.util.DescriptiveStatisticsI#getMax() - */ + @Override + public String getDescription() { + return _desc; + } + @Override public double getMax() { validate(); return _max; } - /* (non-Javadoc) - * @see org.forester.util.DescriptiveStatisticsI#getMin() - */ @Override public double getMin() { validate(); return _min; } - /* (non-Javadoc) - * @see org.forester.util.DescriptiveStatisticsI#getN() - */ @Override public int getN() { return _data.size(); } - /* (non-Javadoc) - * @see org.forester.util.DescriptiveStatisticsI#getSum() - */ @Override public double getSum() { validate(); @@ -151,7 +134,7 @@ public class BasicDescriptiveStatistics implements DescriptiveStatistics { validate(); final double mean = arithmeticMean(); final double sd = sampleStandardDeviation(); - return "" + mean + ( ( char ) 177 ) + sd + " [" + getMin() + "..." + getMax() + "]"; + return "" + mean + ( ( char ) 177 ) + sd + " [" + getN() + "] [" + getMin() + "-" + getMax() + "]"; } @Override @@ -160,19 +143,6 @@ public class BasicDescriptiveStatistics implements DescriptiveStatistics { return ( ( ( _data.get( index ) ) ).doubleValue() ); } - private void init() { - _data = new ArrayList(); - _sum = 0.0; - _min = Double.MAX_VALUE; - _max = -Double.MAX_VALUE; - _sigma = 0.0; - _recalc_sigma = true; - _desc = ""; - } - - /* (non-Javadoc) - * @see org.forester.util.DescriptiveStatisticsI#median() - */ @Override public double median() { validate(); @@ -195,18 +165,12 @@ public class BasicDescriptiveStatistics implements DescriptiveStatistics { return median; } - /* (non-Javadoc) - * @see org.forester.util.DescriptiveStatisticsI#midrange() - */ @Override public double midrange() { validate(); return ( _min + _max ) / 2.0; } - /* (non-Javadoc) - * @see org.forester.util.DescriptiveStatisticsI#pearsonianSkewness() - */ @Override public double pearsonianSkewness() { validate(); @@ -216,26 +180,17 @@ public class BasicDescriptiveStatistics implements DescriptiveStatistics { return ( ( 3 * ( mean - median ) ) / sd ); } - /* (non-Javadoc) - * @see org.forester.util.DescriptiveStatisticsI#sampleStandardDeviation() - */ @Override public double sampleStandardDeviation() { return Math.sqrt( sampleVariance() ); } - /* (non-Javadoc) - * @see org.forester.util.DescriptiveStatisticsI#sampleStandardUnit(double) - */ @Override public double sampleStandardUnit( final double value ) { validate(); return BasicDescriptiveStatistics.sampleStandardUnit( value, arithmeticMean(), sampleStandardDeviation() ); } - /* (non-Javadoc) - * @see org.forester.util.DescriptiveStatisticsI#sampleVariance() - */ @Override public double sampleVariance() { validate(); @@ -245,18 +200,17 @@ public class BasicDescriptiveStatistics implements DescriptiveStatistics { return ( sumDeviations() / ( getN() - 1 ) ); } - /* (non-Javadoc) - * @see org.forester.util.DescriptiveStatisticsI#standardErrorOfMean() - */ + @Override + public void setDescription( final String desc ) { + _desc = desc; + } + @Override public double standardErrorOfMean() { validate(); return ( sampleStandardDeviation() / Math.sqrt( getN() ) ); } - /* (non-Javadoc) - * @see org.forester.util.DescriptiveStatisticsI#sumDeviations() - */ @Override public double sumDeviations() { validate(); @@ -271,9 +225,6 @@ public class BasicDescriptiveStatistics implements DescriptiveStatistics { return _sigma; } - /* (non-Javadoc) - * @see org.forester.util.DescriptiveStatisticsI#toString() - */ @Override public String toString() { if ( getN() < 1 ) { @@ -308,6 +259,16 @@ public class BasicDescriptiveStatistics implements DescriptiveStatistics { return sb.toString(); } + private void init() { + _data = new ArrayList(); + _sum = 0.0; + _min = Double.MAX_VALUE; + _max = -Double.MAX_VALUE; + _sigma = 0.0; + _recalc_sigma = true; + _desc = ""; + } + private void validate() throws ArithmeticException { if ( getN() < 1 ) { throw new ArithmeticException( "attempt to get a result from empty data set statistics" ); @@ -357,19 +318,4 @@ public class BasicDescriptiveStatistics implements DescriptiveStatistics { public static double sampleStandardUnit( final double value, final double mean, final double sd ) { return ( value - mean ) / sd; } - - @Override - public List getData() { - return _data; - } - - @Override - public void setDescription( final String desc ) { - _desc = desc; - } - - @Override - public String getDescription() { - return _desc; - } }