in progress
[jalview.git] / forester / java / src / org / forester / sdi / TestGSDI.java
index 6cf3c72..045eebb 100644 (file)
@@ -5,7 +5,7 @@
 // Copyright (C) 2008-2009 Christian M. Zmasek
 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
 // All rights reserved
-// 
+//
 // This library is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
 // License as published by the Free Software Foundation; either
@@ -15,7 +15,7 @@
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 // Lesser General Public License for more details.
-// 
+//
 // You should have received a copy of the GNU Lesser General Public
 // License along with this library; if not, write to the Free Software
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
@@ -29,14 +29,20 @@ import java.io.IOException;
 
 import org.forester.development.DevelopmentTools;
 import org.forester.io.parsers.nhx.NHXParser;
+import org.forester.io.parsers.util.ParserUtils;
 import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyMethods;
 import org.forester.phylogeny.data.Event;
 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
 import org.forester.phylogeny.factories.PhylogenyFactory;
+import org.forester.sdi.SDI.TaxonomyComparisonBase;
+import org.forester.util.ForesterUtil;
 
 public final class TestGSDI {
 
+    private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator()
+                                                          + "test_data" + ForesterUtil.getFileSeparator();
+
     private final static Phylogeny createPhylogeny( final String nhx ) throws IOException {
         final Phylogeny p = ParserBasedPhylogenyFactory.getInstance().create( nhx, new NHXParser() )[ 0 ];
         p.setRooted( true );
@@ -44,7 +50,7 @@ public final class TestGSDI {
     }
 
     private final static Event getEvent( final Phylogeny p, final String n1, final String n2 ) {
-        return PhylogenyMethods.getInstance().obtainLCA( p.getNode( n1 ), p.getNode( n2 ) ).getNodeData().getEvent();
+        return PhylogenyMethods.obtainLCA( p.getNode( n1 ), p.getNode( n2 ) ).getNodeData().getEvent();
     }
 
     public static boolean test() {
@@ -92,12 +98,33 @@ public final class TestGSDI {
     private static boolean testGSDI_general() {
         try {
             final PhylogenyMethods pm = PhylogenyMethods.getInstance();
+            final String s2_ = "((" + "([&&NHX:S=a1],[&&NHX:S=a2],[&&NHX:S=a3],[&&NHX:S=a4]),"
+                    + "([&&NHX:S=b1],[&&NHX:S=b2],[&&NHX:S=b3],[&&NHX:S=b4]),"
+                    + "([&&NHX:S=c1],[&&NHX:S=c2],[&&NHX:S=c3],[&&NHX:S=c4]),"
+                    + "([&&NHX:S=d1],[&&NHX:S=d2],[&&NHX:S=d3],[&&NHX:S=d4])),("
+                    + "([&&NHX:S=e1],[&&NHX:S=e2],[&&NHX:S=e3],[&&NHX:S=e4]),"
+                    + "([&&NHX:S=f1],[&&NHX:S=f2],[&&NHX:S=f3],[&&NHX:S=f4]),"
+                    + "([&&NHX:S=g1],[&&NHX:S=g2],[&&NHX:S=g3],[&&NHX:S=g4]),"
+                    + "([&&NHX:S=h1],[&&NHX:S=h2],[&&NHX:S=h3],[&&NHX:S=h4])),("
+                    + "([&&NHX:S=i1],[&&NHX:S=i2],[&&NHX:S=i3],[&&NHX:S=i4]),"
+                    + "([&&NHX:S=j1],[&&NHX:S=j2],[&&NHX:S=j3],[&&NHX:S=j4]),"
+                    + "([&&NHX:S=k1],[&&NHX:S=k2],[&&NHX:S=k3],[&&NHX:S=k4]),"
+                    + "([&&NHX:S=l1],[&&NHX:S=l2],[&&NHX:S=l3],[&&NHX:S=l4])),("
+                    + "([&&NHX:S=m1],[&&NHX:S=m2],[&&NHX:S=m3],[&&NHX:S=m4]),"
+                    + "([&&NHX:S=n1],[&&NHX:S=n2],[&&NHX:S=n3],[&&NHX:S=n4]),"
+                    + "([&&NHX:S=o1],[&&NHX:S=o2],[&&NHX:S=o3],[&&NHX:S=o4]),"
+                    + "([&&NHX:S=p1],[&&NHX:S=p2],[&&NHX:S=p3],[&&NHX:S=p4])"
+                    + "),[&&NHX:S=x],[&&NHX:S=y],[&&NHX:S=z])";
+            final Phylogeny s2 = ParserBasedPhylogenyFactory.getInstance().create( s2_, new NHXParser() )[ 0 ];
+            s2.setRooted( true );
             final String s1_ = "((([&&NHX:S=A2],[&&NHX:S=A1]),[&&NHX:S=B],[&&NHX:S=C]),[&&NHX:S=D])";
             final Phylogeny s1 = ParserBasedPhylogenyFactory.getInstance().create( s1_, new NHXParser() )[ 0 ];
             s1.setRooted( true );
             final Phylogeny g1 = TestGSDI
                     .createPhylogeny( "((((B[&&NHX:S=B],A1[&&NHX:S=A1]),C[&&NHX:S=C]),A2[&&NHX:S=A2]),D[&&NHX:S=D])" );
             final GSDI sdi1 = new GSDI( g1, s1, false );
+            // Archaeopteryx.createApplication( g1 );
+            // Archaeopteryx.createApplication( s1 );
             if ( sdi1.getDuplicationsSum() != 1 ) {
                 return false;
             }
@@ -227,25 +254,6 @@ public final class TestGSDI {
             if ( !TestGSDI.getEvent( gene7_2, "a1", "z" ).isSpeciation() ) {
                 return false;
             }
-            final String s2_ = "((" + "([&&NHX:S=a1],[&&NHX:S=a2],[&&NHX:S=a3],[&&NHX:S=a4]),"
-                    + "([&&NHX:S=b1],[&&NHX:S=b2],[&&NHX:S=b3],[&&NHX:S=b4]),"
-                    + "([&&NHX:S=c1],[&&NHX:S=c2],[&&NHX:S=c3],[&&NHX:S=c4]),"
-                    + "([&&NHX:S=d1],[&&NHX:S=d2],[&&NHX:S=d3],[&&NHX:S=d4])),("
-                    + "([&&NHX:S=e1],[&&NHX:S=e2],[&&NHX:S=e3],[&&NHX:S=e4]),"
-                    + "([&&NHX:S=f1],[&&NHX:S=f2],[&&NHX:S=f3],[&&NHX:S=f4]),"
-                    + "([&&NHX:S=g1],[&&NHX:S=g2],[&&NHX:S=g3],[&&NHX:S=g4]),"
-                    + "([&&NHX:S=h1],[&&NHX:S=h2],[&&NHX:S=h3],[&&NHX:S=h4])),("
-                    + "([&&NHX:S=i1],[&&NHX:S=i2],[&&NHX:S=i3],[&&NHX:S=i4]),"
-                    + "([&&NHX:S=j1],[&&NHX:S=j2],[&&NHX:S=j3],[&&NHX:S=j4]),"
-                    + "([&&NHX:S=k1],[&&NHX:S=k2],[&&NHX:S=k3],[&&NHX:S=k4]),"
-                    + "([&&NHX:S=l1],[&&NHX:S=l2],[&&NHX:S=l3],[&&NHX:S=l4])),("
-                    + "([&&NHX:S=m1],[&&NHX:S=m2],[&&NHX:S=m3],[&&NHX:S=m4]),"
-                    + "([&&NHX:S=n1],[&&NHX:S=n2],[&&NHX:S=n3],[&&NHX:S=n4]),"
-                    + "([&&NHX:S=o1],[&&NHX:S=o2],[&&NHX:S=o3],[&&NHX:S=o4]),"
-                    + "([&&NHX:S=p1],[&&NHX:S=p2],[&&NHX:S=p3],[&&NHX:S=p4])"
-                    + "),[&&NHX:S=x],[&&NHX:S=y],[&&NHX:S=z])";
-            final Phylogeny s2 = ParserBasedPhylogenyFactory.getInstance().create( s2_, new NHXParser() )[ 0 ];
-            s2.setRooted( true );
             final Phylogeny g2_0 = TestGSDI.createPhylogeny( "(m1[&&NHX:S=m1],m3[&&NHX:S=m3])" );
             final GSDI sdi2_0 = new GSDI( g2_0, s2, false );
             if ( sdi2_0.getDuplicationsSum() != 0 ) {
@@ -629,6 +637,8 @@ public final class TestGSDI {
             final Phylogeny g2_22 = TestGSDI
                     .createPhylogeny( "((n1[&&NHX:S=n1],n2[&&NHX:S=n2]),(n3[&&NHX:S=n3],n4[&&NHX:S=n4]))" );
             final GSDI sdi2_22 = new GSDI( g2_22, s2, false );
+            //Archaeopteryx.createApplication( g2_22 );
+            //Archaeopteryx.createApplication( s2 );
             if ( sdi2_22.getDuplicationsSum() != 0 ) {
                 return false;
             }
@@ -923,6 +933,18 @@ public final class TestGSDI {
             if ( !TestGSDI.getEvent( g2_33, "a1", "z" ).isSpeciationOrDuplication() ) {
                 return false;
             }
+            final Phylogeny g2_33_d = TestGSDI
+                    .createPhylogeny( "((((((((((((a1[&&NHX:S=a1],a2[&&NHX:S=a2])[&&NHX:D=N],b1[&&NHX:S=b1])[&&NHX:D=N],c1[&&NHX:S=c1])[&&NHX:D=?],d1[&&NHX:S=d1])[&&NHX:D=?],x[&&NHX:S=x])[&&NHX:D=N],p1[&&NHX:S=p1])[&&NHX:D=?],i1[&&NHX:S=i1])[&&NHX:D=?],k2[&&NHX:S=k2])[&&NHX:D=Y],e1[&&NHX:S=e1])[&&NHX:D=Y],y[&&NHX:S=y])[&&NHX:D=Y],z[&&NHX:S=z])[&&NHX:D=?],(((((((((((a1[&&NHX:S=a1],a2[&&NHX:S=a2])[&&NHX:D=N],b1[&&NHX:S=b1])[&&NHX:D=N],c1[&&NHX:S=c1])[&&NHX:D=?],d1[&&NHX:S=d1])[&&NHX:D=?],x[&&NHX:S=x])[&&NHX:D=N],p1[&&NHX:S=p1])[&&NHX:D=?],i1[&&NHX:S=i1])[&&NHX:D=?],k2[&&NHX:S=k2])[&&NHX:D=Y],e1[&&NHX:S=e1])[&&NHX:D=Y],y[&&NHX:S=y])[&&NHX:D=Y],z[&&NHX:S=z])[&&NHX:D=?])" );
+            final GSDI sdi2_33_d = new GSDI( g2_33_d, s2, false );
+            if ( sdi2_33_d.getDuplicationsSum() != 3 ) {
+                return false;
+            }
+            if ( sdi2_33_d.getSpeciationOrDuplicationEventsSum() != 14 ) {
+                return false;
+            }
+            if ( sdi2_33_d.getSpeciationsSum() != 6 ) {
+                return false;
+            }
             final Phylogeny g2_34 = TestGSDI
                     .createPhylogeny( "(((n1_0[&&NHX:S=n1],n2_0[&&NHX:S=n2]),(n1_1[&&NHX:S=n1],n3_0[&&NHX:S=n3])),n4_0[&&NHX:S=n4])" );
             final GSDI sdi2_34 = new GSDI( g2_34, s2, false );
@@ -1188,23 +1210,183 @@ public final class TestGSDI {
             if ( sdi7_4_2.getSpeciationsSum() != 5 ) {
                 return false;
             }
-            // final String g2_0_ =
-            // "(([&&NHX:S=a1],[&&NHX:S=a2]),([&&NHX:S=o2],[&&NHX:S=o4]))";
-            // final Phylogeny g2_0 = factory.create( g2_0_, new NHXParser() )[
-            // 0 ];
-            // g2_0.setRooted( true );
-            // final GSDI sdi2_0 = new GSDI( g2_0, s2, false );
-            // if ( sdi2_0.getDuplicationsSum() != 0 ) {
-            // return false;
-            // }
-            // final String g2_1_= "";
-            // final Phylogeny g2_1 = factory.create( g2_1_, new NHXParser() )[
-            // 0 ];
-            // g2_1.setRooted( true );
-            // final GSDI sdi2_1 = new GSDI( g2_1, s2, false );
-            // if ( sdi2_1.getDuplicationsSum() != 0 ) {
-            // return false;
-            // }
+            final String g2_0_ = "(([&&NHX:S=a1],[&&NHX:S=a2]),([&&NHX:S=o2],[&&NHX:S=o4]))";
+            final Phylogeny g2_0p = TestGSDI.createPhylogeny( g2_0_ );
+            g2_0.setRooted( true );
+            final GSDI sdi2_0p = new GSDI( g2_0p, s2, false );
+            if ( sdi2_0p.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            //--
+            final Phylogeny tol_143_ = ParserUtils.readPhylogenies( PATH_TO_TEST_DATA + "tol_143.xml" )[ 0 ];
+            final Phylogeny gene_tree_tax_code_4_ = ParserUtils.readPhylogenies( PATH_TO_TEST_DATA
+                    + "gene_tree_tax_code_4.xml" )[ 0 ];
+            final GSDI gsdi_143_4_1 = new GSDI( gene_tree_tax_code_4_.copy(), tol_143_.copy(), false, true, true );
+            if ( gsdi_143_4_1.getDuplicationsSum() != 21 ) {
+                return false;
+            }
+            if ( gsdi_143_4_1.getSpeciationsSum() != 28 ) {
+                return false;
+            }
+            if ( gsdi_143_4_1.getSpeciationOrDuplicationEventsSum() != 6 ) {
+                return false;
+            }
+            //--
+            final Phylogeny gsdi_test_gene_tree_sn_wnt = ParserUtils.readPhylogenies( PATH_TO_TEST_DATA
+                    + "gsdi_test_gene_tree_sn_wnt.xml" )[ 0 ];
+            gsdi_test_gene_tree_sn_wnt.setRooted( true );
+            final GSDI a = new GSDI( gsdi_test_gene_tree_sn_wnt.copy(), tol_143_.copy(), false, true, true );
+            if ( a.getDuplicationsSum() != 33 ) {
+                return false;
+            }
+            if ( a.getSpeciationsSum() != 31 ) {
+                return false;
+            }
+            if ( a.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( a.getTaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) {
+                return false;
+            }
+            if ( a.getMappedExternalSpeciesTreeNodes().size() != 26 ) {
+                return false;
+            }
+            if ( a.getReMappedScientificNamesFromGeneTree().size() != 0 ) {
+                return false;
+            }
+            //--
+            final Phylogeny gsdi_test_species_tree_sn_xml = ParserUtils.readPhylogenies( PATH_TO_TEST_DATA
+                    + "gsdi_test_species_tree_sn.xml" )[ 0 ];
+            final GSDI b = new GSDI( gsdi_test_gene_tree_sn_wnt.copy(),
+                                     gsdi_test_species_tree_sn_xml.copy(),
+                                     false,
+                                     true,
+                                     true );
+            if ( b.getDuplicationsSum() != 8 ) {
+                return false;
+            }
+            if ( b.getSpeciationsSum() != 2 ) {
+                return false;
+            }
+            if ( b.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( b.getTaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) {
+                return false;
+            }
+            if ( b.getMappedExternalSpeciesTreeNodes().size() != 2 ) {
+                return false;
+            }
+            if ( b.getReMappedScientificNamesFromGeneTree().size() != 0 ) {
+                return false;
+            }
+            if ( b.getStrippedExternalGeneTreeNodes().size() != 87 ) {
+                return false;
+            }
+            if ( b.getStrippedSpeciesTreeNodes().size() != 17 ) {
+                return false;
+            }
+            //--
+            final Phylogeny gsdi_test_species_tree_sn_nh = TestGSDI
+                    .createPhylogeny( "((((((('Homo sapiens','Mus musculus')Euarchontoglires,'Petromyzon marinus')Vertebrata,'Nematostella vectensis')'Bilateria Cnidaria',(('Mycosphaerella graminicola','Mycosphaerella pini')Mycosphaerella,'Saccharomyces cerevisiae')'Pezizomycotina Saccharomycetales')Opisthokonta,('Plasmodium chabaudi','Plasmodium falciparum','Plasmodium yoelii yoelii')Plasmodium)Eukaryota,'Pyrococcus horikoshii')Neomura,(('Kineococcus radiotolerans','Kocuria rhizophila','Streptomyces coelicolor','Thermobifida fusca','Microlunatus phosphovorus'),'Bacteroides thetaiotaomicron'))'cellular organisms';" );
+            PhylogenyMethods.transferNodeNameToField( gsdi_test_species_tree_sn_nh,
+                                                      PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME,
+                                                      true );
+            final GSDI c = new GSDI( gsdi_test_gene_tree_sn_wnt.copy(),
+                                     gsdi_test_species_tree_sn_nh.copy(),
+                                     false,
+                                     true,
+                                     true );
+            if ( c.getDuplicationsSum() != 8 ) {
+                return false;
+            }
+            if ( c.getSpeciationsSum() != 2 ) {
+                return false;
+            }
+            if ( c.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( c.getTaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) {
+                return false;
+            }
+            if ( c.getMappedExternalSpeciesTreeNodes().size() != 2 ) {
+                return false;
+            }
+            if ( c.getReMappedScientificNamesFromGeneTree().size() != 0 ) {
+                return false;
+            }
+            if ( c.getStrippedExternalGeneTreeNodes().size() != 87 ) {
+                return false;
+            }
+            if ( c.getStrippedSpeciesTreeNodes().size() != 15 ) {
+                return false;
+            }
+            //--
+            final Phylogeny gsdi_test_gene_tree_codes_xml = ParserUtils.readPhylogenies( PATH_TO_TEST_DATA
+                    + "gsdi_test_gene_tree_codes.xml" )[ 0 ];
+            final Phylogeny gsdi_test_species_tree_codes_xml = ParserUtils.readPhylogenies( PATH_TO_TEST_DATA
+                    + "gsdi_test_species_tree_codes.xml" )[ 0 ];
+            final GSDI d = new GSDI( gsdi_test_gene_tree_codes_xml.copy(),
+                                     gsdi_test_species_tree_codes_xml.copy(),
+                                     false,
+                                     true,
+                                     true );
+            if ( d.getDuplicationsSum() != 21 ) {
+                return false;
+            }
+            if ( d.getSpeciationsSum() != 28 ) {
+                return false;
+            }
+            if ( d.getSpeciationOrDuplicationEventsSum() != 6 ) {
+                return false;
+            }
+            if ( d.getTaxCompBase() != TaxonomyComparisonBase.CODE ) {
+                return false;
+            }
+            if ( d.getMappedExternalSpeciesTreeNodes().size() != 17 ) {
+                return false;
+            }
+            if ( d.getReMappedScientificNamesFromGeneTree().size() != 0 ) {
+                return false;
+            }
+            if ( d.getStrippedExternalGeneTreeNodes().size() != 12 ) {
+                return false;
+            }
+            if ( d.getStrippedSpeciesTreeNodes().size() != 3 ) {
+                return false;
+            }
+            //--
+            final Phylogeny gsdi_test_gene_tree_sn_xml = ParserUtils.readPhylogenies( PATH_TO_TEST_DATA
+                    + "gsdi_test_gene_tree_sn.xml" )[ 0 ];
+            final GSDI e = new GSDI( gsdi_test_gene_tree_sn_xml.copy(),
+                                     gsdi_test_species_tree_sn_xml.copy(),
+                                     false,
+                                     true,
+                                     true );
+            if ( e.getDuplicationsSum() != 7 ) {
+                return false;
+            }
+            if ( e.getSpeciationsSum() != 9 ) {
+                return false;
+            }
+            if ( e.getSpeciationOrDuplicationEventsSum() != 1 ) {
+                return false;
+            }
+            if ( e.getTaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) {
+                return false;
+            }
+            if ( e.getMappedExternalSpeciesTreeNodes().size() != 12 ) {
+                return false;
+            }
+            if ( e.getReMappedScientificNamesFromGeneTree().size() != 8 ) {
+                return false;
+            }
+            if ( e.getStrippedExternalGeneTreeNodes().size() != 3 ) {
+                return false;
+            }
+            if ( e.getStrippedSpeciesTreeNodes().size() != 7 ) {
+                return false;
+            }
         }
         catch ( final Exception e ) {
             e.printStackTrace( System.out );
@@ -1212,4 +1394,23 @@ public final class TestGSDI {
         }
         return true;
     }
+
+    public static void main( final String[] args ) {
+        if ( !TestGSDI.testGSDI_against_binary_gene_tree() ) {
+            System.out.println( "binary failed" );
+        }
+        if ( !TestGSDI.testGSDI_general() ) {
+            System.out.println( "general failed" );
+        }
+        else {
+            System.out.println( "general OK" );
+        }
+        //        boolean success = test();
+        //        if ( success ) {
+        //            System.out.println( "OK" );
+        //        }
+        //        else {
+        //            System.out.println( "failed" );
+        //        }
+    }
 }