"rio" work
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Thu, 27 Dec 2012 03:29:34 +0000 (03:29 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Thu, 27 Dec 2012 03:29:34 +0000 (03:29 +0000)
forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java
forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java
forester/java/src/org/forester/io/parsers/util/ParserUtils.java
forester/java/src/org/forester/rio/TestRIO.java
forester/java/src/org/forester/test/Test.java

index ff467b8..d3b700d 100644 (file)
@@ -1130,10 +1130,10 @@ public final class MainFrameApplication extends MainFrame {
                 .setToolTipText( "To collapse branches with confidence values below a threshold into multifurcations (in the case of multiple confidences per branch: without at least one confidence value above a threshold)" );
         _tools_menu.addSeparator();
         _tools_menu
-                .add( _extract_tax_code_from_node_names_jmi = new JMenuItem( "Extract Taxonomic Codes or Ids from Node Names" ) );
+                .add( _extract_tax_code_from_node_names_jmi = new JMenuItem( "Extract Taxonomic Data from Node Names" ) );
         customizeJMenuItem( _extract_tax_code_from_node_names_jmi );
         _extract_tax_code_from_node_names_jmi
-                .setToolTipText( "To extract SwissProt/Uniprot taxonomic codes (mnemonics) from nodes names in the form of 'xyz_CAEEL', or Uniprot/NCBI identifiers from nodes names in the form of 'xyz_6239'" );
+                .setToolTipText( "To extract SwissProt/Uniprot taxonomic codes (mnemonics) from nodes names in the form of 'xyz_CAEEL', Uniprot/NCBI identifiers form of 'xyz_6239', or scientific names form of 'xyz_Caenorhabditis_elegans'" );
         _tools_menu
                 .add( _move_node_names_to_tax_sn_jmi = new JMenuItem( "Transfer Node Names to Taxonomic Scientific Names" ) );
         customizeJMenuItem( _move_node_names_to_tax_sn_jmi );
@@ -1923,8 +1923,8 @@ public final class MainFrameApplication extends MainFrame {
                 else {
                     JOptionPane
                             .showMessageDialog( this,
-                                                "Could not extract any taxonomic data. Maybe node names are empty\n"
-                                                        + "or not in the forms \"XYZ_CAEEL\", \"XYZ_CAEEL/12-394\", or \"XYZ_6239\",\n"
+                                                "Could not extract any taxonomic data.\nMaybe node names are empty\n"
+                                                        + "or not in the forms \"XYZ_CAEEL\", \"XYZ_6239\", or \"XYZ_Caenorhabditis_elegans\"\n"
                                                         + "or nodes already have taxonomic data?\n",
                                                 "No Taxonomic Data Extracted",
                                                 JOptionPane.ERROR_MESSAGE );
index 2594b5a..8b6e638 100644 (file)
@@ -335,10 +335,10 @@ public class NexusPhylogeniesParser implements PhylogenyParser {
         for( final String pair : s.split( "," ) ) {
             final String[] kv = pair.trim().split( "\\s+" );
             if ( ( kv.length < 2 ) || ( kv.length > 3 ) ) {
-                throw new IOException( "ill formatted translate values: " + translate_sb );
+                throw new IOException( "ill-formatted translate values: " + translate_sb );
             }
             if ( ( kv.length == 3 ) && !kv[ 0 ].toLowerCase().trim().equals( translate ) ) {
-                throw new IOException( "ill formatted translate values: " + translate_sb );
+                throw new IOException( "ill-formatted translate values: " + translate_sb );
             }
             String key = "";
             String value = "";
index c6afab7..51f7f89 100644 (file)
@@ -55,6 +55,8 @@ import org.forester.util.ForesterUtil;
 
 public final class ParserUtils {
 
+    final public static Pattern  TAXOMONY_SN_PATTERN            = Pattern
+                                                                        .compile( "[^_]{2,}_([A-Z][a-z]+_[a-z]{2,}(_[A-Za-z]\\w+|))\\b" );
     final public static Pattern  TAXOMONY_CODE_PATTERN_1        = Pattern.compile( "\\b[A-Z0-9]{5}|RAT|PIG|PEA|CAP\\b" );
     final private static Pattern TAXOMONY_CODE_PATTERN_2        = Pattern
                                                                         .compile( "([A-Z0-9]{5}|RAT|PIG|PEA|CAP)[^0-9A-Za-z].*" );
@@ -256,6 +258,14 @@ public final class ParserUtils {
         return null;
     }
 
+    public final static String extractScientificNameFromNodeName( final String name ) {
+        final Matcher m1 = TAXOMONY_SN_PATTERN.matcher( name );
+        if ( m1.matches() ) {
+            return m1.group( 1 ).replace( '_', ' ' );
+        }
+        return null;
+    }
+
     public final static String extractTaxonomyDataFromNodeName( final PhylogenyNode node,
                                                                 final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction )
             throws PhyloXmlDataFormatException {
@@ -281,6 +291,18 @@ public final class ParserUtils {
                     return code;
                 }
             }
+            else if ( taxonomy_extraction == TAXONOMY_EXTRACTION.YES ) {
+                final String sn = extractScientificNameFromNodeName( node.getName() );
+                if ( !ForesterUtil.isEmpty( sn ) ) {
+                    if ( !node.getNodeData().isHasTaxonomy() ) {
+                        node.getNodeData().setTaxonomy( new Taxonomy() );
+                    }
+                    if ( ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) {
+                        node.getNodeData().getTaxonomy().setScientificName( sn );
+                        return sn;
+                    }
+                }
+            }
         }
         return null;
     }
index 81d2db2..13d1da4 100644 (file)
@@ -1,6 +1,8 @@
 
 package org.forester.rio;
 
+import java.io.File;
+
 import org.forester.datastructures.IntMatrix;
 import org.forester.io.parsers.nhx.NHXParser;
 import org.forester.phylogeny.Phylogeny;
@@ -116,6 +118,422 @@ public final class TestRIO {
             if ( !m.getRowAsString( 5, ',' ).equals( "YEAST,5,5,5,5,5,5" ) ) {
                 return false;
             }
+            //
+            RIO r0 = RIO.executeAnalysis( new File( PATH_TO_TEST_DATA + "rio_mb_taxcode.run1.t" ),
+                                          new File( PATH_TO_TEST_DATA + "rio_tol_1.xml" ),
+                                          ALGORITHM.GSDIR,
+                                          REROOTING.BY_ALGORITHM,
+                                          "",
+                                          -1,
+                                          -1,
+                                          true,
+                                          false );
+            if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.CODE ) {
+                return false;
+            }
+            if ( r0.getAnalyzedGeneTrees().length != 201 ) {
+                return false;
+            }
+            if ( r0.getExtNodesOfAnalyzedGeneTrees() != 6 ) {
+                return false;
+            }
+            if ( r0.getIntNodesOfAnalyzedGeneTrees() != 5 ) {
+                return false;
+            }
+            if ( r0.getRemovedGeneTreeNodes().size() != 0 ) {
+                return false;
+            }
+            if ( ForesterUtil.roundToInt( r0.getDuplicationsStatistics().median() ) != 1 ) {
+                return false;
+            }
+            m = RIO.calculateOrthologTable( r0.getAnalyzedGeneTrees(), true );
+            if ( !m.getRowAsString( 0, ',' ).equals( "A7SHU1_NEMVE,201,201,200,200,200,200" ) ) {
+                System.out.println( m.getRowAsString( 0, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 1, ',' ).equals( "BCDO2_HUMAN,201,201,200,200,200,43" ) ) {
+                System.out.println( m.getRowAsString( 1, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 2, ',' ).equals( "BCDO2_MOUSE,200,200,201,201,201,43" ) ) {
+                System.out.println( m.getRowAsString( 2, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 3, ',' ).equals( "H2ZH97_CIOSA,200,200,201,201,201,201" ) ) {
+                System.out.println( m.getRowAsString( 3, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 4, ',' ).equals( "Q1RLW1_DANRE,200,200,201,201,201,43" ) ) {
+                System.out.println( m.getRowAsString( 4, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 5, ',' ).equals( "Q6DIN7_XENTR,200,43,43,201,43,201" ) ) {
+                System.out.println( m.getRowAsString( 5, ',' ) );
+                return false;
+            }
+            //
+            r0 = RIO.executeAnalysis( new File( PATH_TO_TEST_DATA + "rio_mb_taxid.run1.t" ),
+                                      new File( PATH_TO_TEST_DATA + "rio_tol_1.xml" ),
+                                      ALGORITHM.GSDIR,
+                                      REROOTING.BY_ALGORITHM,
+                                      "",
+                                      -1,
+                                      -1,
+                                      true,
+                                      false );
+            if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.ID ) {
+                return false;
+            }
+            if ( r0.getAnalyzedGeneTrees().length != 201 ) {
+                return false;
+            }
+            if ( r0.getExtNodesOfAnalyzedGeneTrees() != 6 ) {
+                return false;
+            }
+            if ( r0.getIntNodesOfAnalyzedGeneTrees() != 5 ) {
+                return false;
+            }
+            if ( r0.getRemovedGeneTreeNodes().size() != 0 ) {
+                return false;
+            }
+            if ( ForesterUtil.roundToInt( r0.getDuplicationsStatistics().median() ) != 1 ) {
+                return false;
+            }
+            m = RIO.calculateOrthologTable( r0.getAnalyzedGeneTrees(), true );
+            if ( !m.getRowAsString( 0, ',' ).equals( "A7SHU1_45351,201,200,201,200,200,200" ) ) {
+                System.out.println( m.getRowAsString( 0, ',' ) );
+                return false;
+            }
+            //mouse
+            if ( !m.getRowAsString( 1, ',' ).equals( "BCDO2_10090,200,201,200,201,201,43" ) ) {
+                System.out.println( m.getRowAsString( 1, ',' ) );
+                return false;
+            }
+            //human
+            if ( !m.getRowAsString( 2, ',' ).equals( "BCDO2_9606,201,200,201,200,200,43" ) ) {
+                System.out.println( m.getRowAsString( 2, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 3, ',' ).equals( "H2ZH97_51511,200,201,200,201,201,201" ) ) {
+                System.out.println( m.getRowAsString( 3, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 4, ',' ).equals( "Q1RLW1_7955,200,201,200,201,201,43" ) ) {
+                System.out.println( m.getRowAsString( 4, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 5, ',' ).equals( "Q6DIN7_8364,200,43,43,201,43,201" ) ) {
+                System.out.println( m.getRowAsString( 5, ',' ) );
+                return false;
+            }
+            r0 = RIO.executeAnalysis( new File( PATH_TO_TEST_DATA + "rio_mb_taxsn.run1.t" ),
+                                      new File( PATH_TO_TEST_DATA + "rio_tol_1.xml" ),
+                                      ALGORITHM.GSDIR,
+                                      REROOTING.BY_ALGORITHM,
+                                      "",
+                                      -1,
+                                      -1,
+                                      true,
+                                      false );
+            if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) {
+                return false;
+            }
+            if ( r0.getAnalyzedGeneTrees().length != 201 ) {
+                return false;
+            }
+            if ( r0.getExtNodesOfAnalyzedGeneTrees() != 6 ) {
+                return false;
+            }
+            if ( r0.getIntNodesOfAnalyzedGeneTrees() != 5 ) {
+                return false;
+            }
+            if ( r0.getRemovedGeneTreeNodes().size() != 0 ) {
+                return false;
+            }
+            if ( ForesterUtil.roundToInt( r0.getDuplicationsStatistics().median() ) != 1 ) {
+                return false;
+            }
+            m = RIO.calculateOrthologTable( r0.getAnalyzedGeneTrees(), true );
+            if ( !m.getRowAsString( 0, ',' ).equals( "A7SHU1_Nematostella_vectensis,201,201,200,200,200,200" ) ) {
+                System.out.println( m.getRowAsString( 0, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 1, ',' ).equals( "BCDO2_Homo_sapiens,201,201,200,200,200,43" ) ) {
+                System.out.println( m.getRowAsString( 1, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 2, ',' ).equals( "BCDO2_Mus_musculus,200,200,201,201,201,43" ) ) {
+                System.out.println( m.getRowAsString( 2, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 3, ',' ).equals( "H2ZH97_Ciona_savignyi,200,200,201,201,201,201" ) ) {
+                System.out.println( m.getRowAsString( 3, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 4, ',' ).equals( "Q1RLW1_Danio_rerio,200,200,201,201,201,43" ) ) {
+                System.out.println( m.getRowAsString( 4, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 5, ',' ).equals( "Q6DIN7_Xenopus_tropicalis,200,43,43,201,43,201" ) ) {
+                System.out.println( m.getRowAsString( 5, ',' ) );
+                return false;
+            }
+            //
+            r0 = RIO.executeAnalysis( new File( PATH_TO_TEST_DATA + "rio_mb_taxsn.run1.t" ),
+                                      new File( PATH_TO_TEST_DATA + "rio_tol_1.xml" ),
+                                      ALGORITHM.GSDIR,
+                                      REROOTING.MIDPOINT,
+                                      "",
+                                      -1,
+                                      -1,
+                                      true,
+                                      false );
+            if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) {
+                return false;
+            }
+            if ( r0.getAnalyzedGeneTrees().length != 201 ) {
+                return false;
+            }
+            if ( r0.getExtNodesOfAnalyzedGeneTrees() != 6 ) {
+                return false;
+            }
+            if ( r0.getIntNodesOfAnalyzedGeneTrees() != 5 ) {
+                return false;
+            }
+            if ( r0.getRemovedGeneTreeNodes().size() != 0 ) {
+                return false;
+            }
+            if ( ForesterUtil.roundToInt( r0.getDuplicationsStatistics().median() ) != 2 ) {
+                return false;
+            }
+            m = RIO.calculateOrthologTable( r0.getAnalyzedGeneTrees(), true );
+            if ( !m.getRowAsString( 0, ',' ).equals( "A7SHU1_Nematostella_vectensis,201,94,93,160,93,93" ) ) {
+                System.out.println( m.getRowAsString( 0, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 1, ',' ).equals( "BCDO2_Homo_sapiens,94,201,200,53,200,43" ) ) {
+                System.out.println( m.getRowAsString( 1, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 2, ',' ).equals( "BCDO2_Mus_musculus,93,200,201,53,201,43" ) ) {
+                System.out.println( m.getRowAsString( 2, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 3, ',' ).equals( "H2ZH97_Ciona_savignyi,160,53,53,201,53,53" ) ) {
+                System.out.println( m.getRowAsString( 3, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 4, ',' ).equals( "Q1RLW1_Danio_rerio,93,200,201,53,201,43" ) ) {
+                System.out.println( m.getRowAsString( 4, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 5, ',' ).equals( "Q6DIN7_Xenopus_tropicalis,93,43,43,53,43,201" ) ) {
+                System.out.println( m.getRowAsString( 5, ',' ) );
+                return false;
+            }
+            //
+            r0 = RIO.executeAnalysis( new File( PATH_TO_TEST_DATA + "rio_mb_taxsn.run1.t" ),
+                                      new File( PATH_TO_TEST_DATA + "rio_tol_1.xml" ),
+                                      ALGORITHM.GSDIR,
+                                      REROOTING.OUTGROUP,
+                                      "H2ZH97_Ciona_savignyi",
+                                      -1,
+                                      -1,
+                                      true,
+                                      false );
+            if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) {
+                return false;
+            }
+            if ( r0.getAnalyzedGeneTrees().length != 201 ) {
+                return false;
+            }
+            if ( r0.getExtNodesOfAnalyzedGeneTrees() != 6 ) {
+                return false;
+            }
+            if ( r0.getIntNodesOfAnalyzedGeneTrees() != 5 ) {
+                return false;
+            }
+            if ( r0.getRemovedGeneTreeNodes().size() != 0 ) {
+                return false;
+            }
+            if ( ForesterUtil.roundToInt( r0.getDuplicationsStatistics().median() ) != 2 ) {
+                return false;
+            }
+            m = RIO.calculateOrthologTable( r0.getAnalyzedGeneTrees(), true );
+            if ( !m.getRowAsString( 0, ',' ).equals( "A7SHU1_Nematostella_vectensis,201,201,200,0,200,200" ) ) {
+                System.out.println( m.getRowAsString( 0, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 1, ',' ).equals( "BCDO2_Homo_sapiens,201,201,200,0,200,43" ) ) {
+                System.out.println( m.getRowAsString( 1, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 2, ',' ).equals( "BCDO2_Mus_musculus,200,200,201,0,201,43" ) ) {
+                System.out.println( m.getRowAsString( 2, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 3, ',' ).equals( "H2ZH97_Ciona_savignyi,0,0,0,201,0,0" ) ) {
+                System.out.println( m.getRowAsString( 3, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 4, ',' ).equals( "Q1RLW1_Danio_rerio,200,200,201,0,201,43" ) ) {
+                System.out.println( m.getRowAsString( 4, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 5, ',' ).equals( "Q6DIN7_Xenopus_tropicalis,200,43,43,0,43,201" ) ) {
+                System.out.println( m.getRowAsString( 5, ',' ) );
+                return false;
+            }
+            //
+            //
+            r0 = RIO.executeAnalysis( new File( PATH_TO_TEST_DATA + "rio_mb_taxsn.run1.t" ),
+                                      new File( PATH_TO_TEST_DATA + "rio_tol_1.xml" ),
+                                      ALGORITHM.GSDIR,
+                                      REROOTING.NONE,
+                                      null,
+                                      10,
+                                      19,
+                                      true,
+                                      false );
+            if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) {
+                return false;
+            }
+            if ( r0.getAnalyzedGeneTrees().length != 10 ) {
+                return false;
+            }
+            if ( r0.getExtNodesOfAnalyzedGeneTrees() != 6 ) {
+                return false;
+            }
+            if ( r0.getIntNodesOfAnalyzedGeneTrees() != 5 ) {
+                return false;
+            }
+            if ( r0.getRemovedGeneTreeNodes().size() != 0 ) {
+                return false;
+            }
+            if ( ForesterUtil.roundToInt( r0.getDuplicationsStatistics().median() ) != 4 ) {
+                return false;
+            }
+            m = RIO.calculateOrthologTable( r0.getAnalyzedGeneTrees(), true );
+            if ( !m.getRowAsString( 0, ',' ).equals( "A7SHU1_Nematostella_vectensis,10,0,0,10,0,0" ) ) {
+                System.out.println( m.getRowAsString( 0, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 1, ',' ).equals( "BCDO2_Homo_sapiens,0,10,0,0,0,0" ) ) {
+                System.out.println( m.getRowAsString( 1, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 2, ',' ).equals( "BCDO2_Mus_musculus,0,0,10,0,0,0" ) ) {
+                System.out.println( m.getRowAsString( 2, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 3, ',' ).equals( "H2ZH97_Ciona_savignyi,10,0,0,10,0,0" ) ) {
+                System.out.println( m.getRowAsString( 3, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 4, ',' ).equals( "Q1RLW1_Danio_rerio,0,0,0,0,10,0" ) ) {
+                System.out.println( m.getRowAsString( 4, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 5, ',' ).equals( "Q6DIN7_Xenopus_tropicalis,0,0,0,0,0,10" ) ) {
+                System.out.println( m.getRowAsString( 5, ',' ) );
+                return false;
+            }
+            //
+            r0 = RIO.executeAnalysis( new File( PATH_TO_TEST_DATA + "rio_mb_taxcode_1.run1.t" ),
+                                      new File( PATH_TO_TEST_DATA + "rio_tol_1.xml" ),
+                                      ALGORITHM.GSDIR,
+                                      REROOTING.BY_ALGORITHM,
+                                      "",
+                                      -1,
+                                      -1,
+                                      true,
+                                      false );
+            if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.CODE ) {
+                return false;
+            }
+            if ( r0.getAnalyzedGeneTrees().length != 201 ) {
+                return false;
+            }
+            if ( r0.getExtNodesOfAnalyzedGeneTrees() != 3 ) {
+                return false;
+            }
+            if ( r0.getIntNodesOfAnalyzedGeneTrees() != 2 ) {
+                return false;
+            }
+            if ( r0.getRemovedGeneTreeNodes().size() != 3 ) {
+                return false;
+            }
+            if ( ForesterUtil.roundToInt( r0.getDuplicationsStatistics().median() ) != 0 ) {
+                return false;
+            }
+            m = RIO.calculateOrthologTable( r0.getAnalyzedGeneTrees(), true );
+            if ( !m.getRowAsString( 0, ',' ).equals( "BCDO2_HUMAN,201,201,201" ) ) {
+                System.out.println( m.getRowAsString( 0, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 1, ',' ).equals( "Q1RLW1_DANRE,201,201,201" ) ) {
+                System.out.println( m.getRowAsString( 1, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 2, ',' ).equals( "Q6DIN7_XENTR,201,201,201" ) ) {
+                System.out.println( m.getRowAsString( 2, ',' ) );
+                return false;
+            }
+            //
+            //
+            r0 = RIO.executeAnalysis( new File( PATH_TO_TEST_DATA + "rio_mb_taxcode_2.run1.t" ),
+                                      new File( PATH_TO_TEST_DATA + "rio_tol_1.xml" ),
+                                      ALGORITHM.GSDIR,
+                                      REROOTING.BY_ALGORITHM,
+                                      "",
+                                      -1,
+                                      -1,
+                                      true,
+                                      false );
+            if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.CODE ) {
+                return false;
+            }
+            if ( r0.getAnalyzedGeneTrees().length != 201 ) {
+                return false;
+            }
+            if ( r0.getExtNodesOfAnalyzedGeneTrees() != 6 ) {
+                return false;
+            }
+            if ( r0.getIntNodesOfAnalyzedGeneTrees() != 5 ) {
+                return false;
+            }
+            if ( r0.getRemovedGeneTreeNodes().size() != 0 ) {
+                return false;
+            }
+            if ( ForesterUtil.roundToInt( r0.getDuplicationsStatistics().median() ) != 1 ) {
+                return false;
+            }
+            m = RIO.calculateOrthologTable( r0.getAnalyzedGeneTrees(), true );
+            if ( !m.getRowAsString( 0, ',' ).equals( "A7SHU1_NEMVE&1,201,201,200,200,200,200" ) ) {
+                System.out.println( m.getRowAsString( 0, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 1, ',' ).equals( "BCDO2_HUMAN+,201,201,200,200,200,43" ) ) {
+                System.out.println( m.getRowAsString( 1, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 2, ',' ).equals( "BCDO2_MOUSE,200,200,201,201,201,43" ) ) {
+                System.out.println( m.getRowAsString( 2, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 3, ',' ).equals( "CIOSA,200,200,201,201,201,201" ) ) {
+                System.out.println( m.getRowAsString( 3, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 4, ',' ).equals( "Q1RLW1_DANRE/12-45,200,200,201,201,201,43" ) ) {
+                System.out.println( m.getRowAsString( 4, ',' ) );
+                return false;
+            }
+            if ( !m.getRowAsString( 5, ',' ).equals( "Q6DIN7_XENTR-LOUSE,200,43,43,201,43,201" ) ) {
+                System.out.println( m.getRowAsString( 5, ',' ) );
+                return false;
+            }
+            //
         }
         catch ( final Exception e ) {
             e.printStackTrace( System.out );
index e2412b8..7960f72 100644 (file)
@@ -4953,6 +4953,48 @@ public final class Test {
                 System.out.println( n10.toString() );
                 return false;
             }
+            final PhylogenyNode n11 = PhylogenyNode.createInstanceFromNhxString( "blag_Mus_musculus",
+                                                                                 NHXParser.TAXONOMY_EXTRACTION.YES );
+            if ( !n11.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) {
+                System.out.println( n11.toString() );
+                return false;
+            }
+            final PhylogenyNode n12 = PhylogenyNode.createInstanceFromNhxString( "blag_Mus_musculus_musculus",
+                                                                                 NHXParser.TAXONOMY_EXTRACTION.YES );
+            if ( !n12.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) {
+                System.out.println( n12.toString() );
+                return false;
+            }
+            final PhylogenyNode n13 = PhylogenyNode.createInstanceFromNhxString( "blag_Mus_musculus1",
+                                                                                 NHXParser.TAXONOMY_EXTRACTION.YES );
+            if ( n13.getNodeData().isHasTaxonomy() ) {
+                System.out.println( n13.toString() );
+                return false;
+            }
+            final PhylogenyNode n14 = PhylogenyNode.createInstanceFromNhxString( "blag_Mus_musculus_11",
+                                                                                 NHXParser.TAXONOMY_EXTRACTION.YES );
+            if ( n14.getNodeData().isHasTaxonomy() ) {
+                System.out.println( n14.toString() );
+                return false;
+            }
+            final PhylogenyNode n15 = PhylogenyNode.createInstanceFromNhxString( "blag_Mus_musculus_v11",
+                                                                                 NHXParser.TAXONOMY_EXTRACTION.YES );
+            if ( !n15.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus v11" ) ) {
+                System.out.println( n15.toString() );
+                return false;
+            }
+            final PhylogenyNode n16 = PhylogenyNode.createInstanceFromNhxString( "blag_Mus_musculus_/11",
+                                                                                 NHXParser.TAXONOMY_EXTRACTION.YES );
+            if ( n16.getNodeData().isHasTaxonomy() ) {
+                System.out.println( n16.toString() );
+                return false;
+            }
+            final PhylogenyNode n17 = PhylogenyNode.createInstanceFromNhxString( "blag_Mus_musculus_v",
+                                                                                 NHXParser.TAXONOMY_EXTRACTION.YES );
+            if ( n17.getNodeData().isHasTaxonomy() ) {
+                System.out.println( n17.toString() );
+                return false;
+            }
         }
         catch ( final Exception e ) {
             e.printStackTrace( System.out );