fixed 2nd rio bug
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Tue, 15 Jan 2013 02:29:50 +0000 (02:29 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Tue, 15 Jan 2013 02:29:50 +0000 (02:29 +0000)
forester/java/src/org/forester/application/rio.java
forester/java/src/org/forester/phylogeny/PhylogenyMethods.java
forester/java/src/org/forester/rio/RIO.java
forester/java/src/org/forester/rio/TestRIO.java
forester/java/src/org/forester/sdi/GSDI.java
forester/java/src/org/forester/sdi/GSDIR.java
forester/java/src/org/forester/sdi/TestGSDI.java
forester/java/src/org/forester/util/ForesterConstants.java

index 3230c41..6ef7350 100644 (file)
@@ -55,8 +55,8 @@ import org.forester.util.ForesterUtil;
 public class rio {
 
     final static private String PRG_NAME              = "rio";
-    final static private String PRG_VERSION           = "4.000 beta 8";
-    final static private String PRG_DATE              = "2013.01.11";
+    final static private String PRG_VERSION           = "4.000 beta 9";
+    final static private String PRG_DATE              = "2013.01.14";
     final static private String E_MAIL                = "phyloxml@gmail.com";
     final static private String WWW                   = "https://sites.google.com/site/cmzmasek/home/software/forester";
     final static private String HELP_OPTION_1         = "help";
index 8d43782..a3efa67 100644 (file)
@@ -327,8 +327,8 @@ public class PhylogenyMethods {
 
     public static final HashMap<String, PhylogenyNode> createNameToExtNodeMap( final Phylogeny phy ) {
         final HashMap<String, PhylogenyNode> nodes = new HashMap<String, PhylogenyNode>();
-        List<PhylogenyNode> ext = phy.getExternalNodes();
-        for( PhylogenyNode n : ext ) {
+        final List<PhylogenyNode> ext = phy.getExternalNodes();
+        for( final PhylogenyNode n : ext ) {
             nodes.put( n.getName(), n );
         }
         // for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
index 68256f4..d360a1f 100644 (file)
@@ -221,6 +221,12 @@ public final class RIO {
         while ( parser.hasNext() ) {
             final Phylogeny gt = parser.next();
             if ( no_range || ( ( i >= first ) && ( i <= last ) ) ) {
+                if ( gt.isEmpty() ) {
+                    throw new RIOException( "gene tree #" + i + " is empty" );
+                }
+                if ( gt.getNumberOfExternalNodes() == 1 ) {
+                    throw new RIOException( "gene tree #" + i + " has only one external node" );
+                }
                 if ( _verbose ) {
                     ForesterUtil.updateProgress( i, pf );
                 }
@@ -302,6 +308,12 @@ public final class RIO {
         int gene_tree_ext_nodes = 0;
         for( int i = 0; i < my_gene_trees.length; ++i ) {
             final Phylogeny gt = my_gene_trees[ i ];
+            if ( gt.isEmpty() ) {
+                throw new RIOException( "gene tree #" + i + " is empty" );
+            }
+            if ( gt.getNumberOfExternalNodes() == 1 ) {
+                throw new RIOException( "gene tree #" + i + " has only one external node" );
+            }
             if ( _verbose && ( my_gene_trees.length > 4 ) ) {
                 ForesterUtil.updateProgress( ( ( double ) i ) / my_gene_trees.length );
             }
index 58b2050..a0604d6 100644 (file)
@@ -65,9 +65,6 @@ public final class TestRIO {
                                            "",
                                            true,
                                            false );
-            //if ( rio.getAnalyzedGeneTrees().length != 5 ) {
-            //    return false;
-            //}
             if ( rio.getExtNodesOfAnalyzedGeneTrees() != 6 ) {
                 return false;
             }
@@ -138,6 +135,179 @@ public final class TestRIO {
             nhx.setReplaceUnderscores( false );
             nhx.setIgnoreQuotes( true );
             nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES );
+            //
+            final String gene_trees_00_str = "(MOUSE,RAT);(MOUSE,RAT);(MOUSE,RAT);(RAT,MOUSE);";
+            final Phylogeny[] gene_trees_00 = factory.create( gene_trees_00_str, nhx );
+            final String species_trees_00_str = "(MOUSE,RAT);";
+            final Phylogeny species_tree_00 = factory.create( species_trees_00_str, new NHXParser() )[ 0 ];
+            species_tree_00.setRooted( true );
+            PhylogenyMethods.transferNodeNameToField( species_tree_00, PhylogenyNodeField.TAXONOMY_CODE, true );
+            RIO rio = RIO.executeAnalysis( gene_trees_00,
+                                           species_tree_00,
+                                           ALGORITHM.GSDIR,
+                                           REROOTING.BY_ALGORITHM,
+                                           "",
+                                           true,
+                                           false );
+            if ( rio.getAnalyzedGeneTrees().length != 4 ) {
+                return false;
+            }
+            if ( rio.getExtNodesOfAnalyzedGeneTrees() != 2 ) {
+                return false;
+            }
+            if ( rio.getGSDIRtaxCompBase() != TaxonomyComparisonBase.CODE ) {
+                return false;
+            }
+            if ( rio.getRemovedGeneTreeNodes().size() != 0 ) {
+                return false;
+            }
+            IntMatrix m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true );
+            if ( !m.getRowAsString( 0, ',' ).equals( "MOUSE,4,4" ) ) {
+                System.out.println( m.toString() );
+                return false;
+            }
+            if ( !m.getRowAsString( 1, ',' ).equals( "RAT,4,4" ) ) {
+                System.out.println( m.toString() );
+                return false;
+            }
+            //
+            final String gene_trees_000_str = "(MOUSE1[&&NHX:S=MOUSE],MOUSE2[&&NHX:S=MOUSE]);(MOUSE1[&&NHX:S=MOUSE],MOUSE2[&&NHX:S=MOUSE])";
+            final Phylogeny[] gene_trees_000 = factory.create( gene_trees_000_str, nhx );
+            final String species_trees_000_str = "[&&NHX:S=MOUSE];";
+            final Phylogeny species_tree_000 = factory.create( species_trees_000_str, new NHXParser() )[ 0 ];
+            species_tree_000.setRooted( true );
+            rio = RIO.executeAnalysis( gene_trees_000,
+                                       species_tree_000,
+                                       ALGORITHM.GSDIR,
+                                       REROOTING.BY_ALGORITHM,
+                                       "",
+                                       true,
+                                       false );
+            if ( rio.getAnalyzedGeneTrees().length != 2 ) {
+                return false;
+            }
+            if ( rio.getExtNodesOfAnalyzedGeneTrees() != 2 ) {
+                return false;
+            }
+            if ( rio.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) {
+                return false;
+            }
+            if ( rio.getRemovedGeneTreeNodes().size() != 0 ) {
+                return false;
+            }
+            m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true );
+            if ( !m.getRowAsString( 0, ',' ).equals( "MOUSE1,2,0" ) ) {
+                System.out.println( m.toString() );
+                return false;
+            }
+            if ( !m.getRowAsString( 1, ',' ).equals( "MOUSE2,0,2" ) ) {
+                System.out.println( m.toString() );
+                return false;
+            }
+            //
+            //
+            final String gene_trees_0000_str = "(MOUSE1[&&NHX:S=MOUSE],MOUSE2[&&NHX:S=MOUSE]);(MOUSE1[&&NHX:S=MOUSE],MOUSE2[&&NHX:S=MOUSE]);(MOUSE1[&&NHX:S=MOUSE],MOUSE2[&&NHX:S=MOUSE])";
+            final Phylogeny[] gene_trees_0000 = factory.create( gene_trees_0000_str, nhx );
+            final String species_trees_0000_str = "([&&NHX:S=MOUSE]);";
+            final Phylogeny species_tree_0000 = factory.create( species_trees_0000_str, new NHXParser() )[ 0 ];
+            species_tree_0000.setRooted( true );
+            rio = RIO.executeAnalysis( gene_trees_0000,
+                                       species_tree_0000,
+                                       ALGORITHM.GSDIR,
+                                       REROOTING.BY_ALGORITHM,
+                                       "",
+                                       true,
+                                       false );
+            if ( rio.getAnalyzedGeneTrees().length != 3 ) {
+                return false;
+            }
+            if ( rio.getExtNodesOfAnalyzedGeneTrees() != 2 ) {
+                return false;
+            }
+            if ( rio.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) {
+                return false;
+            }
+            if ( rio.getRemovedGeneTreeNodes().size() != 0 ) {
+                return false;
+            }
+            m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true );
+            if ( !m.getRowAsString( 0, ',' ).equals( "MOUSE1,3,0" ) ) {
+                System.out.println( m.toString() );
+                return false;
+            }
+            if ( !m.getRowAsString( 1, ',' ).equals( "MOUSE2,0,3" ) ) {
+                System.out.println( m.toString() );
+                return false;
+            }
+            //
+            final String gene_trees_x_str = "(MOUSE1[&&NHX:S=MOUSE],MOUSE2[&&NHX:S=MOUSE])";
+            final Phylogeny[] gene_trees_x = factory.create( gene_trees_x_str, nhx );
+            final String species_trees_x_str = "[&&NHX:S=MOUSE];";
+            final Phylogeny species_tree_x = factory.create( species_trees_x_str, new NHXParser() )[ 0 ];
+            species_tree_x.setRooted( true );
+            rio = RIO.executeAnalysis( gene_trees_x,
+                                       species_tree_x,
+                                       ALGORITHM.GSDIR,
+                                       REROOTING.BY_ALGORITHM,
+                                       "",
+                                       true,
+                                       false );
+            if ( rio.getAnalyzedGeneTrees().length != 1 ) {
+                return false;
+            }
+            if ( rio.getExtNodesOfAnalyzedGeneTrees() != 2 ) {
+                return false;
+            }
+            if ( rio.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) {
+                return false;
+            }
+            if ( rio.getRemovedGeneTreeNodes().size() != 0 ) {
+                return false;
+            }
+            m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true );
+            if ( !m.getRowAsString( 0, ',' ).equals( "MOUSE1,1,0" ) ) {
+                System.out.println( m.toString() );
+                return false;
+            }
+            if ( !m.getRowAsString( 1, ',' ).equals( "MOUSE2,0,1" ) ) {
+                System.out.println( m.toString() );
+                return false;
+            }
+            //
+            final String gene_trees_xx_str = "(MOUSE1[&&NHX:S=MOUSE],RAT1[&&NHX:S=RAT])";
+            final Phylogeny[] gene_trees_xx = factory.create( gene_trees_xx_str, nhx );
+            final String species_trees_xx_str = "([&&NHX:S=MOUSE],[&&NHX:S=RAT]);";
+            final Phylogeny species_tree_xx = factory.create( species_trees_xx_str, new NHXParser() )[ 0 ];
+            species_tree_xx.setRooted( true );
+            rio = RIO.executeAnalysis( gene_trees_xx,
+                                       species_tree_xx,
+                                       ALGORITHM.GSDIR,
+                                       REROOTING.BY_ALGORITHM,
+                                       "",
+                                       true,
+                                       false );
+            if ( rio.getAnalyzedGeneTrees().length != 1 ) {
+                return false;
+            }
+            if ( rio.getExtNodesOfAnalyzedGeneTrees() != 2 ) {
+                return false;
+            }
+            if ( rio.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) {
+                return false;
+            }
+            if ( rio.getRemovedGeneTreeNodes().size() != 0 ) {
+                return false;
+            }
+            m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true );
+            if ( !m.getRowAsString( 0, ',' ).equals( "MOUSE1,1,1" ) ) {
+                System.out.println( m.toString() );
+                return false;
+            }
+            if ( !m.getRowAsString( 1, ',' ).equals( "RAT1,1,1" ) ) {
+                System.out.println( m.toString() );
+                return false;
+            }
+            //
             final String gene_trees_1_str = "(((((MOUSE,RAT),HUMAN),CAEEL),YEAST),ARATH);"
                     + "((((MOUSE,RAT),HUMAN),(ARATH,YEAST)),CAEEL);" + "((MOUSE,RAT),(((ARATH,YEAST),CAEEL),HUMAN));"
                     + "(((((MOUSE,HUMAN),RAT),CAEEL),YEAST),ARATH);" + "((((HUMAN,MOUSE),RAT),(ARATH,YEAST)),CAEEL);";
@@ -146,14 +316,13 @@ public final class TestRIO {
             final Phylogeny species_tree_1 = factory.create( species_trees_1_str, new NHXParser() )[ 0 ];
             species_tree_1.setRooted( true );
             PhylogenyMethods.transferNodeNameToField( species_tree_1, PhylogenyNodeField.TAXONOMY_CODE, true );
-            //Archaeopteryx.createApplication( species_trees_1 );
-            RIO rio = RIO.executeAnalysis( gene_trees_1,
-                                           species_tree_1,
-                                           ALGORITHM.GSDIR,
-                                           REROOTING.BY_ALGORITHM,
-                                           "",
-                                           true,
-                                           false );
+            rio = RIO.executeAnalysis( gene_trees_1,
+                                       species_tree_1,
+                                       ALGORITHM.GSDIR,
+                                       REROOTING.BY_ALGORITHM,
+                                       "",
+                                       true,
+                                       false );
             if ( rio.getAnalyzedGeneTrees().length != 5 ) {
                 return false;
             }
@@ -166,7 +335,7 @@ public final class TestRIO {
             if ( rio.getRemovedGeneTreeNodes().size() != 0 ) {
                 return false;
             }
-            IntMatrix m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true );
+            m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true );
             // System.out.println( m.toString() );
             if ( !m.getRowAsString( 0, ',' ).equals( "ARATH,5,5,5,5,5,5" ) ) {
                 return false;
index 87d28bd..4caeb72 100644 (file)
@@ -337,7 +337,7 @@ public final class GSDI implements GSDII {
                 final Set<PhylogenyNode> set = new HashSet<PhylogenyNode>();
                 for( PhylogenyNode n : g.getChildNode1().getAllExternalDescendants() ) {
                     n = n.getLink();
-                    while ( n.getParent() != s ) {
+                    while ( ( n.getParent() != s ) && ( n.getParent() != null ) ) {
                         n = n.getParent();
                         if ( n.isRoot() ) {
                             break;
@@ -348,7 +348,7 @@ public final class GSDI implements GSDII {
                 boolean multiple = false;
                 for( PhylogenyNode n : g.getChildNode2().getAllExternalDescendants() ) {
                     n = n.getLink();
-                    while ( n.getParent() != s ) {
+                    while ( ( n.getParent() != s ) && ( n.getParent() != null ) ) {
                         n = n.getParent();
                         if ( n.isRoot() ) {
                             break;
index d0ee1a7..80a0d3a 100644 (file)
@@ -77,7 +77,7 @@ public class GSDIR implements GSDII {
             gene_tree.reRoot( branch );\r
             PhylogenyMethods.preOrderReId( species_tree );\r
             //TEST, remove later\r
-            //            for( final PhylogenyNodeIterator it = _gene_tree.iteratorPostorder(); it.hasNext(); ) {\r
+            //            for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) {\r
             //                final PhylogenyNode g = it.next();\r
             //                if ( g.isInternal() ) {\r
             //                    g.setLink( null );\r
index 4e39e5c..3f84384 100644 (file)
@@ -56,13 +56,6 @@ public final class TestGSDI {
         else {
             System.out.println( "OK" );
         }
-        //        boolean success = test();
-        //        if ( success ) {
-        //            System.out.println( "OK" );
-        //        }
-        //        else {
-        //            System.out.println( "failed" );
-        //        }
     }
 
     public static boolean test() {
@@ -90,6 +83,23 @@ public final class TestGSDI {
 
     private static boolean testGSDI_against_binary_gene_tree() {
         try {
+            final PhylogenyFactory factory0 = ParserBasedPhylogenyFactory.getInstance();
+            final String s0 = "([&&NHX:S=1]);";
+            final String gene_0_str = "([&&NHX:S=1],[&&NHX:S=1]);";
+            final Phylogeny s_0 = factory0.create( s0, new NHXParser() )[ 0 ];
+            final Phylogeny gene_0 = factory0.create( gene_0_str, new NHXParser() )[ 0 ];
+            s_0.setRooted( true );
+            gene_0.setRooted( true );
+            final GSDI sdi0 = new GSDI( gene_0, s_0, false, false, false );
+            if ( sdi0.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi0.getDuplicationsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi0.getSpeciationsSum() != 0 ) {
+                return false;
+            }
             final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
             final String multi_species_2_str = "(((((([&&NHX:S=1],[&&NHX:S=2]),"
                     + "([&&NHX:S=3],[&&NHX:S=4],[&&NHX:S=5])),"
@@ -1450,6 +1460,36 @@ public final class TestGSDI {
 
     private static boolean testGSDIR_general() {
         try {
+            final PhylogenyFactory factory0 = ParserBasedPhylogenyFactory.getInstance();
+            final String s0 = "([&&NHX:S=1]);";
+            final String gene_0_str = "([&&NHX:S=1],[&&NHX:S=1]);";
+            final Phylogeny s_0 = factory0.create( s0, new NHXParser() )[ 0 ];
+            final Phylogeny gene_0 = factory0.create( gene_0_str, new NHXParser() )[ 0 ];
+            s_0.setRooted( true );
+            gene_0.setRooted( true );
+            final GSDIR sdi0 = new GSDIR( gene_0, s_0, true, true );
+            if ( sdi0.getSpeciationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi0.getMinDuplicationsSum() != 1 ) {
+                return false;
+            }
+            //
+            final PhylogenyFactory factory00 = ParserBasedPhylogenyFactory.getInstance();
+            final String s00 = "[&&NHX:S=1];";
+            final String gene_00_str = "([&&NHX:S=1],[&&NHX:S=1]);";
+            final Phylogeny s_00 = factory00.create( s00, new NHXParser() )[ 0 ];
+            final Phylogeny gene_00 = factory00.create( gene_00_str, new NHXParser() )[ 0 ];
+            s_00.setRooted( true );
+            gene_00.setRooted( true );
+            final GSDIR sdi00 = new GSDIR( gene_00, s_00, true, true );
+            if ( sdi00.getSpeciationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi00.getMinDuplicationsSum() != 1 ) {
+                return false;
+            }
+            //
             final String s1str = "(((([&&NHX:S=HUMAN],([&&NHX:S=MOUSE],[&&NHX:S=RAT])),([&&NHX:S=CAEEL],[&&NHX:S=CAEBR])),[&&NHX:S=YEAST]),[&&NHX:S=ARATH])";
             final Phylogeny s1 = ParserBasedPhylogenyFactory.getInstance().create( s1str, new NHXParser() )[ 0 ];
             s1.setRooted( true );
index f147188..ab5db92 100644 (file)
@@ -27,8 +27,8 @@ package org.forester.util;
 
 public final class ForesterConstants {
 
-    public final static String  FORESTER_VERSION            = "1.018";
-    public final static String  FORESTER_DATE               = "130108";
+    public final static String  FORESTER_VERSION            = "1.019";
+    public final static String  FORESTER_DATE               = "130114";
     public final static String  PHYLO_XML_VERSION           = "1.10";
     public final static String  PHYLO_XML_LOCATION          = "http://www.phyloxml.org";
     public final static String  PHYLO_XML_XSD               = "phyloxml.xsd";