From: cmzmasek@gmail.com Date: Tue, 15 Jan 2013 02:29:50 +0000 (+0000) Subject: fixed 2nd rio bug X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=5e4c941ad91dc4cb91e9e0a782708ad2fcf953b3;p=jalview.git fixed 2nd rio bug --- diff --git a/forester/java/src/org/forester/application/rio.java b/forester/java/src/org/forester/application/rio.java index 3230c41..6ef7350 100644 --- a/forester/java/src/org/forester/application/rio.java +++ b/forester/java/src/org/forester/application/rio.java @@ -55,8 +55,8 @@ import org.forester.util.ForesterUtil; public class rio { final static private String PRG_NAME = "rio"; - final static private String PRG_VERSION = "4.000 beta 8"; - final static private String PRG_DATE = "2013.01.11"; + final static private String PRG_VERSION = "4.000 beta 9"; + final static private String PRG_DATE = "2013.01.14"; final static private String E_MAIL = "phyloxml@gmail.com"; final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; final static private String HELP_OPTION_1 = "help"; diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java index 8d43782..a3efa67 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java @@ -327,8 +327,8 @@ public class PhylogenyMethods { public static final HashMap createNameToExtNodeMap( final Phylogeny phy ) { final HashMap nodes = new HashMap(); - List ext = phy.getExternalNodes(); - for( PhylogenyNode n : ext ) { + final List ext = phy.getExternalNodes(); + for( final PhylogenyNode n : ext ) { nodes.put( n.getName(), n ); } // for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { diff --git a/forester/java/src/org/forester/rio/RIO.java b/forester/java/src/org/forester/rio/RIO.java index 68256f4..d360a1f 100644 --- a/forester/java/src/org/forester/rio/RIO.java +++ b/forester/java/src/org/forester/rio/RIO.java @@ -221,6 +221,12 @@ public final class RIO { while ( parser.hasNext() ) { final Phylogeny gt = parser.next(); if ( no_range || ( ( i >= first ) && ( i <= last ) ) ) { + if ( gt.isEmpty() ) { + throw new RIOException( "gene tree #" + i + " is empty" ); + } + if ( gt.getNumberOfExternalNodes() == 1 ) { + throw new RIOException( "gene tree #" + i + " has only one external node" ); + } if ( _verbose ) { ForesterUtil.updateProgress( i, pf ); } @@ -302,6 +308,12 @@ public final class RIO { int gene_tree_ext_nodes = 0; for( int i = 0; i < my_gene_trees.length; ++i ) { final Phylogeny gt = my_gene_trees[ i ]; + if ( gt.isEmpty() ) { + throw new RIOException( "gene tree #" + i + " is empty" ); + } + if ( gt.getNumberOfExternalNodes() == 1 ) { + throw new RIOException( "gene tree #" + i + " has only one external node" ); + } if ( _verbose && ( my_gene_trees.length > 4 ) ) { ForesterUtil.updateProgress( ( ( double ) i ) / my_gene_trees.length ); } diff --git a/forester/java/src/org/forester/rio/TestRIO.java b/forester/java/src/org/forester/rio/TestRIO.java index 58b2050..a0604d6 100644 --- a/forester/java/src/org/forester/rio/TestRIO.java +++ b/forester/java/src/org/forester/rio/TestRIO.java @@ -65,9 +65,6 @@ public final class TestRIO { "", true, false ); - //if ( rio.getAnalyzedGeneTrees().length != 5 ) { - // return false; - //} if ( rio.getExtNodesOfAnalyzedGeneTrees() != 6 ) { return false; } @@ -138,6 +135,179 @@ public final class TestRIO { nhx.setReplaceUnderscores( false ); nhx.setIgnoreQuotes( true ); nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES ); + // + final String gene_trees_00_str = "(MOUSE,RAT);(MOUSE,RAT);(MOUSE,RAT);(RAT,MOUSE);"; + final Phylogeny[] gene_trees_00 = factory.create( gene_trees_00_str, nhx ); + final String species_trees_00_str = "(MOUSE,RAT);"; + final Phylogeny species_tree_00 = factory.create( species_trees_00_str, new NHXParser() )[ 0 ]; + species_tree_00.setRooted( true ); + PhylogenyMethods.transferNodeNameToField( species_tree_00, PhylogenyNodeField.TAXONOMY_CODE, true ); + RIO rio = RIO.executeAnalysis( gene_trees_00, + species_tree_00, + ALGORITHM.GSDIR, + REROOTING.BY_ALGORITHM, + "", + true, + false ); + if ( rio.getAnalyzedGeneTrees().length != 4 ) { + return false; + } + if ( rio.getExtNodesOfAnalyzedGeneTrees() != 2 ) { + return false; + } + if ( rio.getGSDIRtaxCompBase() != TaxonomyComparisonBase.CODE ) { + return false; + } + if ( rio.getRemovedGeneTreeNodes().size() != 0 ) { + return false; + } + IntMatrix m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true ); + if ( !m.getRowAsString( 0, ',' ).equals( "MOUSE,4,4" ) ) { + System.out.println( m.toString() ); + return false; + } + if ( !m.getRowAsString( 1, ',' ).equals( "RAT,4,4" ) ) { + System.out.println( m.toString() ); + return false; + } + // + final String gene_trees_000_str = "(MOUSE1[&&NHX:S=MOUSE],MOUSE2[&&NHX:S=MOUSE]);(MOUSE1[&&NHX:S=MOUSE],MOUSE2[&&NHX:S=MOUSE])"; + final Phylogeny[] gene_trees_000 = factory.create( gene_trees_000_str, nhx ); + final String species_trees_000_str = "[&&NHX:S=MOUSE];"; + final Phylogeny species_tree_000 = factory.create( species_trees_000_str, new NHXParser() )[ 0 ]; + species_tree_000.setRooted( true ); + rio = RIO.executeAnalysis( gene_trees_000, + species_tree_000, + ALGORITHM.GSDIR, + REROOTING.BY_ALGORITHM, + "", + true, + false ); + if ( rio.getAnalyzedGeneTrees().length != 2 ) { + return false; + } + if ( rio.getExtNodesOfAnalyzedGeneTrees() != 2 ) { + return false; + } + if ( rio.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) { + return false; + } + if ( rio.getRemovedGeneTreeNodes().size() != 0 ) { + return false; + } + m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true ); + if ( !m.getRowAsString( 0, ',' ).equals( "MOUSE1,2,0" ) ) { + System.out.println( m.toString() ); + return false; + } + if ( !m.getRowAsString( 1, ',' ).equals( "MOUSE2,0,2" ) ) { + System.out.println( m.toString() ); + return false; + } + // + // + final String gene_trees_0000_str = "(MOUSE1[&&NHX:S=MOUSE],MOUSE2[&&NHX:S=MOUSE]);(MOUSE1[&&NHX:S=MOUSE],MOUSE2[&&NHX:S=MOUSE]);(MOUSE1[&&NHX:S=MOUSE],MOUSE2[&&NHX:S=MOUSE])"; + final Phylogeny[] gene_trees_0000 = factory.create( gene_trees_0000_str, nhx ); + final String species_trees_0000_str = "([&&NHX:S=MOUSE]);"; + final Phylogeny species_tree_0000 = factory.create( species_trees_0000_str, new NHXParser() )[ 0 ]; + species_tree_0000.setRooted( true ); + rio = RIO.executeAnalysis( gene_trees_0000, + species_tree_0000, + ALGORITHM.GSDIR, + REROOTING.BY_ALGORITHM, + "", + true, + false ); + if ( rio.getAnalyzedGeneTrees().length != 3 ) { + return false; + } + if ( rio.getExtNodesOfAnalyzedGeneTrees() != 2 ) { + return false; + } + if ( rio.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) { + return false; + } + if ( rio.getRemovedGeneTreeNodes().size() != 0 ) { + return false; + } + m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true ); + if ( !m.getRowAsString( 0, ',' ).equals( "MOUSE1,3,0" ) ) { + System.out.println( m.toString() ); + return false; + } + if ( !m.getRowAsString( 1, ',' ).equals( "MOUSE2,0,3" ) ) { + System.out.println( m.toString() ); + return false; + } + // + final String gene_trees_x_str = "(MOUSE1[&&NHX:S=MOUSE],MOUSE2[&&NHX:S=MOUSE])"; + final Phylogeny[] gene_trees_x = factory.create( gene_trees_x_str, nhx ); + final String species_trees_x_str = "[&&NHX:S=MOUSE];"; + final Phylogeny species_tree_x = factory.create( species_trees_x_str, new NHXParser() )[ 0 ]; + species_tree_x.setRooted( true ); + rio = RIO.executeAnalysis( gene_trees_x, + species_tree_x, + ALGORITHM.GSDIR, + REROOTING.BY_ALGORITHM, + "", + true, + false ); + if ( rio.getAnalyzedGeneTrees().length != 1 ) { + return false; + } + if ( rio.getExtNodesOfAnalyzedGeneTrees() != 2 ) { + return false; + } + if ( rio.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) { + return false; + } + if ( rio.getRemovedGeneTreeNodes().size() != 0 ) { + return false; + } + m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true ); + if ( !m.getRowAsString( 0, ',' ).equals( "MOUSE1,1,0" ) ) { + System.out.println( m.toString() ); + return false; + } + if ( !m.getRowAsString( 1, ',' ).equals( "MOUSE2,0,1" ) ) { + System.out.println( m.toString() ); + return false; + } + // + final String gene_trees_xx_str = "(MOUSE1[&&NHX:S=MOUSE],RAT1[&&NHX:S=RAT])"; + final Phylogeny[] gene_trees_xx = factory.create( gene_trees_xx_str, nhx ); + final String species_trees_xx_str = "([&&NHX:S=MOUSE],[&&NHX:S=RAT]);"; + final Phylogeny species_tree_xx = factory.create( species_trees_xx_str, new NHXParser() )[ 0 ]; + species_tree_xx.setRooted( true ); + rio = RIO.executeAnalysis( gene_trees_xx, + species_tree_xx, + ALGORITHM.GSDIR, + REROOTING.BY_ALGORITHM, + "", + true, + false ); + if ( rio.getAnalyzedGeneTrees().length != 1 ) { + return false; + } + if ( rio.getExtNodesOfAnalyzedGeneTrees() != 2 ) { + return false; + } + if ( rio.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) { + return false; + } + if ( rio.getRemovedGeneTreeNodes().size() != 0 ) { + return false; + } + m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true ); + if ( !m.getRowAsString( 0, ',' ).equals( "MOUSE1,1,1" ) ) { + System.out.println( m.toString() ); + return false; + } + if ( !m.getRowAsString( 1, ',' ).equals( "RAT1,1,1" ) ) { + System.out.println( m.toString() ); + return false; + } + // final String gene_trees_1_str = "(((((MOUSE,RAT),HUMAN),CAEEL),YEAST),ARATH);" + "((((MOUSE,RAT),HUMAN),(ARATH,YEAST)),CAEEL);" + "((MOUSE,RAT),(((ARATH,YEAST),CAEEL),HUMAN));" + "(((((MOUSE,HUMAN),RAT),CAEEL),YEAST),ARATH);" + "((((HUMAN,MOUSE),RAT),(ARATH,YEAST)),CAEEL);"; @@ -146,14 +316,13 @@ public final class TestRIO { final Phylogeny species_tree_1 = factory.create( species_trees_1_str, new NHXParser() )[ 0 ]; species_tree_1.setRooted( true ); PhylogenyMethods.transferNodeNameToField( species_tree_1, PhylogenyNodeField.TAXONOMY_CODE, true ); - //Archaeopteryx.createApplication( species_trees_1 ); - RIO rio = RIO.executeAnalysis( gene_trees_1, - species_tree_1, - ALGORITHM.GSDIR, - REROOTING.BY_ALGORITHM, - "", - true, - false ); + rio = RIO.executeAnalysis( gene_trees_1, + species_tree_1, + ALGORITHM.GSDIR, + REROOTING.BY_ALGORITHM, + "", + true, + false ); if ( rio.getAnalyzedGeneTrees().length != 5 ) { return false; } @@ -166,7 +335,7 @@ public final class TestRIO { if ( rio.getRemovedGeneTreeNodes().size() != 0 ) { return false; } - IntMatrix m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true ); + m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true ); // System.out.println( m.toString() ); if ( !m.getRowAsString( 0, ',' ).equals( "ARATH,5,5,5,5,5,5" ) ) { return false; diff --git a/forester/java/src/org/forester/sdi/GSDI.java b/forester/java/src/org/forester/sdi/GSDI.java index 87d28bd..4caeb72 100644 --- a/forester/java/src/org/forester/sdi/GSDI.java +++ b/forester/java/src/org/forester/sdi/GSDI.java @@ -337,7 +337,7 @@ public final class GSDI implements GSDII { final Set set = new HashSet(); for( PhylogenyNode n : g.getChildNode1().getAllExternalDescendants() ) { n = n.getLink(); - while ( n.getParent() != s ) { + while ( ( n.getParent() != s ) && ( n.getParent() != null ) ) { n = n.getParent(); if ( n.isRoot() ) { break; @@ -348,7 +348,7 @@ public final class GSDI implements GSDII { boolean multiple = false; for( PhylogenyNode n : g.getChildNode2().getAllExternalDescendants() ) { n = n.getLink(); - while ( n.getParent() != s ) { + while ( ( n.getParent() != s ) && ( n.getParent() != null ) ) { n = n.getParent(); if ( n.isRoot() ) { break; diff --git a/forester/java/src/org/forester/sdi/GSDIR.java b/forester/java/src/org/forester/sdi/GSDIR.java index d0ee1a7..80a0d3a 100644 --- a/forester/java/src/org/forester/sdi/GSDIR.java +++ b/forester/java/src/org/forester/sdi/GSDIR.java @@ -77,7 +77,7 @@ public class GSDIR implements GSDII { gene_tree.reRoot( branch ); PhylogenyMethods.preOrderReId( species_tree ); //TEST, remove later - // for( final PhylogenyNodeIterator it = _gene_tree.iteratorPostorder(); it.hasNext(); ) { + // for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) { // final PhylogenyNode g = it.next(); // if ( g.isInternal() ) { // g.setLink( null ); diff --git a/forester/java/src/org/forester/sdi/TestGSDI.java b/forester/java/src/org/forester/sdi/TestGSDI.java index 4e39e5c..3f84384 100644 --- a/forester/java/src/org/forester/sdi/TestGSDI.java +++ b/forester/java/src/org/forester/sdi/TestGSDI.java @@ -56,13 +56,6 @@ public final class TestGSDI { else { System.out.println( "OK" ); } - // boolean success = test(); - // if ( success ) { - // System.out.println( "OK" ); - // } - // else { - // System.out.println( "failed" ); - // } } public static boolean test() { @@ -90,6 +83,23 @@ public final class TestGSDI { private static boolean testGSDI_against_binary_gene_tree() { try { + final PhylogenyFactory factory0 = ParserBasedPhylogenyFactory.getInstance(); + final String s0 = "([&&NHX:S=1]);"; + final String gene_0_str = "([&&NHX:S=1],[&&NHX:S=1]);"; + final Phylogeny s_0 = factory0.create( s0, new NHXParser() )[ 0 ]; + final Phylogeny gene_0 = factory0.create( gene_0_str, new NHXParser() )[ 0 ]; + s_0.setRooted( true ); + gene_0.setRooted( true ); + final GSDI sdi0 = new GSDI( gene_0, s_0, false, false, false ); + if ( sdi0.getSpeciationOrDuplicationEventsSum() != 0 ) { + return false; + } + if ( sdi0.getDuplicationsSum() != 1 ) { + return false; + } + if ( sdi0.getSpeciationsSum() != 0 ) { + return false; + } final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final String multi_species_2_str = "(((((([&&NHX:S=1],[&&NHX:S=2])," + "([&&NHX:S=3],[&&NHX:S=4],[&&NHX:S=5]))," @@ -1450,6 +1460,36 @@ public final class TestGSDI { private static boolean testGSDIR_general() { try { + final PhylogenyFactory factory0 = ParserBasedPhylogenyFactory.getInstance(); + final String s0 = "([&&NHX:S=1]);"; + final String gene_0_str = "([&&NHX:S=1],[&&NHX:S=1]);"; + final Phylogeny s_0 = factory0.create( s0, new NHXParser() )[ 0 ]; + final Phylogeny gene_0 = factory0.create( gene_0_str, new NHXParser() )[ 0 ]; + s_0.setRooted( true ); + gene_0.setRooted( true ); + final GSDIR sdi0 = new GSDIR( gene_0, s_0, true, true ); + if ( sdi0.getSpeciationsSum() != 0 ) { + return false; + } + if ( sdi0.getMinDuplicationsSum() != 1 ) { + return false; + } + // + final PhylogenyFactory factory00 = ParserBasedPhylogenyFactory.getInstance(); + final String s00 = "[&&NHX:S=1];"; + final String gene_00_str = "([&&NHX:S=1],[&&NHX:S=1]);"; + final Phylogeny s_00 = factory00.create( s00, new NHXParser() )[ 0 ]; + final Phylogeny gene_00 = factory00.create( gene_00_str, new NHXParser() )[ 0 ]; + s_00.setRooted( true ); + gene_00.setRooted( true ); + final GSDIR sdi00 = new GSDIR( gene_00, s_00, true, true ); + if ( sdi00.getSpeciationsSum() != 0 ) { + return false; + } + if ( sdi00.getMinDuplicationsSum() != 1 ) { + return false; + } + // final String s1str = "(((([&&NHX:S=HUMAN],([&&NHX:S=MOUSE],[&&NHX:S=RAT])),([&&NHX:S=CAEEL],[&&NHX:S=CAEBR])),[&&NHX:S=YEAST]),[&&NHX:S=ARATH])"; final Phylogeny s1 = ParserBasedPhylogenyFactory.getInstance().create( s1str, new NHXParser() )[ 0 ]; s1.setRooted( true ); diff --git a/forester/java/src/org/forester/util/ForesterConstants.java b/forester/java/src/org/forester/util/ForesterConstants.java index f147188..ab5db92 100644 --- a/forester/java/src/org/forester/util/ForesterConstants.java +++ b/forester/java/src/org/forester/util/ForesterConstants.java @@ -27,8 +27,8 @@ package org.forester.util; public final class ForesterConstants { - public final static String FORESTER_VERSION = "1.018"; - public final static String FORESTER_DATE = "130108"; + public final static String FORESTER_VERSION = "1.019"; + public final static String FORESTER_DATE = "130114"; public final static String PHYLO_XML_VERSION = "1.10"; public final static String PHYLO_XML_LOCATION = "http://www.phyloxml.org"; public final static String PHYLO_XML_XSD = "phyloxml.xsd";