"rio" work + clean up
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 19 Dec 2012 03:28:42 +0000 (03:28 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 19 Dec 2012 03:28:42 +0000 (03:28 +0000)
forester/java/src/org/forester/application/gsdi.java
forester/java/src/org/forester/application/rio.java
forester/java/src/org/forester/rio/RIO.java
forester/java/src/org/forester/rio/TestRIO.java
forester/java/src/org/forester/sdi/GSDI.java
forester/java/src/org/forester/sdi/GSDII.java
forester/java/src/org/forester/sdi/NodesLinkingResult.java
forester/java/src/org/forester/sdi/SDIutil.java
forester/java/src/org/forester/sdi/TestGSDI.java
forester/java/src/org/forester/util/ForesterUtil.java

index 71bd6d9..0ec4958 100644 (file)
@@ -36,11 +36,9 @@ import java.util.SortedSet;
 import java.util.TreeMap;
 import java.util.TreeSet;
 
-import org.forester.io.parsers.PhylogenyParser;
-import org.forester.io.parsers.nhx.NHXParser;
+import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
-import org.forester.io.parsers.util.ParserUtils;
 import org.forester.io.writers.PhylogenyWriter;
 import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyMethods;
@@ -53,7 +51,6 @@ import org.forester.sdi.GSDIR;
 import org.forester.sdi.SDIException;
 import org.forester.sdi.SDIutil;
 import org.forester.sdi.SDIutil.ALGORITHM;
-import org.forester.sdi.SDIutil.TaxonomyComparisonBase;
 import org.forester.util.CommandLineArguments;
 import org.forester.util.EasyWriter;
 import org.forester.util.ForesterConstants;
@@ -180,68 +177,27 @@ public final class gsdi {
             gene_tree = factory.create( gene_tree_file, new PhyloXmlParser() )[ 0 ];
         }
         catch ( final IOException e ) {
-            fatalError( "ERROR",
-                        "Failed to read gene tree from [" + gene_tree_file + "]: " + e.getMessage(),
+            fatalError( "error",
+                        "failed to read gene tree from [" + gene_tree_file + "]: " + e.getMessage(),
                         log_writer );
         }
         try {
-            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
-            final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( species_tree_file, true );
-            if ( p instanceof PhyloXmlParser ) {
-                species_tree = factory.create( species_tree_file, p )[ 0 ];
-            }
-            else {
-                if ( REPLACE_UNDERSCORES_IN_NH_SPECIES_TREE && ( p instanceof NHXParser ) ) {
-                    ( ( NHXParser ) p ).setReplaceUnderscores( true );
-                }
-                species_tree = factory.create( species_tree_file, p )[ 0 ];
-                final TaxonomyComparisonBase comp_base = SDIutil.determineTaxonomyComparisonBase( gene_tree );
-                switch ( comp_base ) {
-                    case SCIENTIFIC_NAME:
-                        try {
-                            PhylogenyMethods
-                                    .transferNodeNameToField( species_tree,
-                                                              PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME,
-                                                              true );
-                        }
-                        catch ( final PhyloXmlDataFormatException e ) {
-                            fatalError( "USER ERROR", "Failed to transfer general node name to scientific name, in ["
-                                    + species_tree_file + "]: " + e.getMessage(), log_writer );
-                        }
-                        break;
-                    case CODE:
-                        try {
-                            PhylogenyMethods
-                                    .transferNodeNameToField( species_tree,
-                                                              PhylogenyMethods.PhylogenyNodeField.TAXONOMY_CODE,
-                                                              true );
-                        }
-                        catch ( final PhyloXmlDataFormatException e ) {
-                            fatalError( "USER ERROR", "Failed to transfer general node name to taxonomy code, in ["
-                                    + species_tree_file + "]: " + e.getMessage(), log_writer );
-                        }
-                        break;
-                    case ID:
-                        try {
-                            PhylogenyMethods.transferNodeNameToField( species_tree,
-                                                                      PhylogenyMethods.PhylogenyNodeField.TAXONOMY_ID,
-                                                                      true );
-                        }
-                        catch ( final PhyloXmlDataFormatException e ) {
-                            fatalError( "USER ERROR", "Failed to transfer general node name to taxonomy id, in ["
-                                    + species_tree_file + "]: " + e.getMessage(), log_writer );
-                        }
-                        break;
-                    default:
-                        fatalError( "UNEXPECTED ERROR", "unable to determine comparison base", log_writer );
-                }
-            }
+            species_tree = SDIutil.parseSpeciesTree( gene_tree,
+                                                     species_tree_file,
+                                                     REPLACE_UNDERSCORES_IN_NH_SPECIES_TREE,
+                                                     true,
+                                                     TAXONOMY_EXTRACTION.NO );
+        }
+        catch ( final PhyloXmlDataFormatException e ) {
+            fatalError( "user error",
+                        "failed to transfer general node name, in [" + species_tree_file + "]: " + e.getMessage(),
+                        log_writer );
         }
         catch ( final SDIException e ) {
             fatalError( "user error", e.getMessage(), log_writer );
         }
         catch ( final IOException e ) {
-            fatalError( "ERROR",
+            fatalError( "error",
                         "Failed to read species tree from [" + species_tree_file + "]: " + e.getMessage(),
                         log_writer );
         }
@@ -405,30 +361,6 @@ public final class gsdi {
         log_writer.close();
     }
 
-    private static void writeToRemappedFile( final File out_file,
-                                             final SortedSet<String> remapped,
-                                             final EasyWriter log_writer ) throws IOException {
-        final File file = new File( ForesterUtil.removeSuffix( out_file.toString() ) + REMAPPED_SUFFIX );
-        final EasyWriter remapped_writer = ForesterUtil.createEasyWriter( file );
-        for( final String s : remapped ) {
-            remapped_writer.println( s );
-        }
-        remapped_writer.close();
-        System.out.println( "Wrote remapped gene tree species to      : " + file.getCanonicalPath() );
-        log_writer.println( "Wrote remapped gene tree species to      : " + file.getCanonicalPath() );
-    }
-
-    private static void printMappedNodesToLog( final EasyWriter log_writer, final GSDII gsdi ) throws IOException {
-        final SortedSet<String> ss = new TreeSet<String>();
-        for( final PhylogenyNode n : gsdi.getMappedExternalSpeciesTreeNodes() ) {
-            ss.add( n.toString() );
-        }
-        log_writer.println( "The following " + ss.size() + " species were used: " );
-        for( final String s : ss ) {
-            log_writer.println( "  " + s );
-        }
-    }
-
     private static void fatalError( final String type, final String msg, final EasyWriter log_writer ) {
         try {
             log_writer.flush();
@@ -443,30 +375,6 @@ public final class gsdi {
         ForesterUtil.fatalError( gsdi.PRG_NAME, msg );
     }
 
-    private static void printStrippedGeneTreeNodesToLog( final EasyWriter log_writer, final GSDII gsdi )
-            throws IOException {
-        final SortedMap<String, Integer> sm = new TreeMap<String, Integer>();
-        for( final PhylogenyNode n : gsdi.getStrippedExternalGeneTreeNodes() ) {
-            final String s = n.toString();
-            if ( sm.containsKey( s ) ) {
-                sm.put( s, sm.get( s ) + 1 );
-            }
-            else {
-                sm.put( s, 1 );
-            }
-        }
-        log_writer.println( "The following " + sm.size() + " nodes were stripped from the gene tree: " );
-        for( final String s : sm.keySet() ) {
-            final int count = sm.get( s );
-            if ( count == 1 ) {
-                log_writer.println( "  " + s );
-            }
-            else {
-                log_writer.println( "  " + s + " [" + count + "]" );
-            }
-        }
-    }
-
     private static void print_help() {
         System.out.println( "Usage: " + gsdi.PRG_NAME
                 + " [-options] <gene tree in phyloXML format> <species tree> <outfile>" );
@@ -493,4 +401,52 @@ public final class gsdi {
                 + " gene_tree.xml tree_of_life.xml out.xml" );
         System.out.println();
     }
+
+    private static void printMappedNodesToLog( final EasyWriter log_writer, final GSDII gsdi ) throws IOException {
+        final SortedSet<String> ss = new TreeSet<String>();
+        for( final PhylogenyNode n : gsdi.getMappedExternalSpeciesTreeNodes() ) {
+            ss.add( n.toString() );
+        }
+        log_writer.println( "The following " + ss.size() + " species were used: " );
+        for( final String s : ss ) {
+            log_writer.println( "  " + s );
+        }
+    }
+
+    private static void printStrippedGeneTreeNodesToLog( final EasyWriter log_writer, final GSDII gsdi )
+            throws IOException {
+        final SortedMap<String, Integer> sm = new TreeMap<String, Integer>();
+        for( final PhylogenyNode n : gsdi.getStrippedExternalGeneTreeNodes() ) {
+            final String s = n.toString();
+            if ( sm.containsKey( s ) ) {
+                sm.put( s, sm.get( s ) + 1 );
+            }
+            else {
+                sm.put( s, 1 );
+            }
+        }
+        log_writer.println( "The following " + sm.size() + " nodes were stripped from the gene tree: " );
+        for( final String s : sm.keySet() ) {
+            final int count = sm.get( s );
+            if ( count == 1 ) {
+                log_writer.println( "  " + s );
+            }
+            else {
+                log_writer.println( "  " + s + " [" + count + "]" );
+            }
+        }
+    }
+
+    private static void writeToRemappedFile( final File out_file,
+                                             final SortedSet<String> remapped,
+                                             final EasyWriter log_writer ) throws IOException {
+        final File file = new File( ForesterUtil.removeSuffix( out_file.toString() ) + REMAPPED_SUFFIX );
+        final EasyWriter remapped_writer = ForesterUtil.createEasyWriter( file );
+        for( final String s : remapped ) {
+            remapped_writer.println( s );
+        }
+        remapped_writer.close();
+        System.out.println( "Wrote remapped gene tree species to      : " + file.getCanonicalPath() );
+        log_writer.println( "Wrote remapped gene tree species to      : " + file.getCanonicalPath() );
+    }
 }
index b56c289..065b676 100644 (file)
@@ -33,11 +33,6 @@ import java.util.ArrayList;
 import java.util.List;
 
 import org.forester.datastructures.IntMatrix;
-import org.forester.io.parsers.phyloxml.PhyloXmlParser;
-import org.forester.phylogeny.Phylogeny;
-import org.forester.phylogeny.PhylogenyMethods;
-import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
-import org.forester.phylogeny.factories.PhylogenyFactory;
 import org.forester.rio.RIO;
 import org.forester.rio.RIO.REROOTING;
 import org.forester.rio.RIOException;
@@ -76,14 +71,14 @@ public class rio {
             cla = new CommandLineArguments( args );
         }
         catch ( final Exception e ) {
-            ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
+            ForesterUtil.fatalError( e.getMessage() );
         }
         if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) {
             printHelp();
         }
-        if ( ( args.length < 3 ) || ( args.length > 8 ) ) {
+        if ( ( args.length < 3 ) || ( args.length > 9 ) ) {
             System.out.println();
-            System.out.println( "[" + PRG_NAME + "] incorrect number of arguments" );
+            System.out.println( "error: incorrect number of arguments" );
             System.out.println();
             printHelp();
         }
@@ -95,7 +90,7 @@ public class rio {
         allowed_options.add( USE_SDIR );
         final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
         if ( dissallowed_options.length() > 0 ) {
-            ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
+            ForesterUtil.fatalError( "unknown option(s): " + dissallowed_options );
         }
         final File gene_trees_file = cla.getFile( 0 );
         final File species_tree_file = cla.getFile( 1 );
@@ -104,7 +99,7 @@ public class rio {
         if ( cla.getNumberOfNames() > 3 ) {
             logfile = cla.getFile( 3 );
             if ( logfile.exists() ) {
-                ForesterUtil.fatalError( PRG_NAME, "\"" + logfile + "\" already exists" );
+                ForesterUtil.fatalError( "\"" + logfile + "\" already exists" );
             }
         }
         else {
@@ -113,30 +108,30 @@ public class rio {
         boolean sdir = false;
         if ( cla.isOptionSet( USE_SDIR ) ) {
             if ( cla.isOptionHasAValue( USE_SDIR ) ) {
-                ForesterUtil.fatalError( PRG_NAME, "no value allowed for -" + USE_SDIR );
+                ForesterUtil.fatalError( "no value allowed for -" + USE_SDIR );
             }
             sdir = true;
             if ( logfile != null ) {
-                ForesterUtil.fatalError( PRG_NAME, "no logfile output for SDIR algorithm" );
+                ForesterUtil.fatalError( "no logfile output for SDIR algorithm" );
             }
         }
         String outgroup = null;
         if ( cla.isOptionSet( OUTGROUP ) ) {
             if ( !cla.isOptionHasAValue( OUTGROUP ) ) {
-                ForesterUtil.fatalError( PRG_NAME, "no value for -" + OUTGROUP );
+                ForesterUtil.fatalError( "no value for -" + OUTGROUP );
             }
             if ( sdir ) {
-                ForesterUtil.fatalError( PRG_NAME, "no outgroup option for SDIR algorithm" );
+                ForesterUtil.fatalError( "no outgroup option for SDIR algorithm" );
             }
             outgroup = cla.getOptionValueAsCleanString( OUTGROUP );
         }
         REROOTING rerooting = REROOTING.BY_ALGORITHM;
         if ( cla.isOptionSet( REROOTING_OPT ) ) {
             if ( !cla.isOptionHasAValue( REROOTING_OPT ) ) {
-                ForesterUtil.fatalError( PRG_NAME, "no value for -" + REROOTING_OPT );
+                ForesterUtil.fatalError( "no value for -" + REROOTING_OPT );
             }
             if ( sdir ) {
-                ForesterUtil.fatalError( PRG_NAME, "no re-rooting option for SDIR algorithm" );
+                ForesterUtil.fatalError( "no re-rooting option for SDIR algorithm" );
             }
             final String rerooting_str = cla.getOptionValueAsCleanString( REROOTING_OPT ).toLowerCase();
             if ( rerooting_str.equals( "none" ) ) {
@@ -150,60 +145,59 @@ public class rio {
             }
             else {
                 ForesterUtil
-                        .fatalError( PRG_NAME,
-                                     "values for re-rooting are: 'none', 'midpoint', or 'outgroup' (minizming duplications is default)" );
+                        .fatalError( "values for re-rooting are: 'none', 'midpoint', or 'outgroup' (minizming duplications is default)" );
             }
         }
         if ( ForesterUtil.isEmpty( outgroup ) && ( rerooting == REROOTING.OUTGROUP ) ) {
-            ForesterUtil.fatalError( PRG_NAME, "selected re-rooting by outgroup, but outgroup not set" );
+            ForesterUtil.fatalError( "selected re-rooting by outgroup, but outgroup not set" );
         }
         if ( !ForesterUtil.isEmpty( outgroup ) && ( rerooting != REROOTING.OUTGROUP ) ) {
-            ForesterUtil.fatalError( PRG_NAME, "outgroup set, but selected re-rooting by other approach" );
+            ForesterUtil.fatalError( "outgroup set, but selected re-rooting by other approach" );
         }
         int gt_first = RIO.DEFAULT_RANGE;
         int gt_last = RIO.DEFAULT_RANGE;
         if ( cla.isOptionSet( GT_FIRST ) ) {
             if ( !cla.isOptionHasAValue( GT_FIRST ) ) {
-                ForesterUtil.fatalError( PRG_NAME, "no value for -" + GT_FIRST );
+                ForesterUtil.fatalError( "no value for -" + GT_FIRST );
             }
             if ( sdir ) {
-                ForesterUtil.fatalError( PRG_NAME, "no gene tree range option for SDIR algorithm" );
+                ForesterUtil.fatalError( "no gene tree range option for SDIR algorithm" );
             }
             try {
                 gt_first = cla.getOptionValueAsInt( GT_FIRST );
             }
             catch ( final IOException e ) {
-                ForesterUtil.fatalError( PRG_NAME, "could not parse integer for -" + GT_FIRST + " option" );
+                ForesterUtil.fatalError( "could not parse integer for -" + GT_FIRST + " option" );
             }
             if ( gt_first < 0 ) {
-                ForesterUtil.fatalError( PRG_NAME, "attempt to set index of first tree to analyze to: " + gt_first );
+                ForesterUtil.fatalError( "attempt to set index of first tree to analyze to: " + gt_first );
             }
         }
         if ( cla.isOptionSet( GT_LAST ) ) {
             if ( !cla.isOptionHasAValue( GT_LAST ) ) {
-                ForesterUtil.fatalError( PRG_NAME, "no value for -" + GT_LAST );
+                ForesterUtil.fatalError( "no value for -" + GT_LAST );
             }
             if ( sdir ) {
-                ForesterUtil.fatalError( PRG_NAME, "no gene tree range option for SDIR algorithm" );
+                ForesterUtil.fatalError( "no gene tree range option for SDIR algorithm" );
             }
             try {
                 gt_last = cla.getOptionValueAsInt( GT_LAST );
             }
             catch ( final IOException e ) {
-                ForesterUtil.fatalError( PRG_NAME, "could not parse integer for -" + GT_LAST + " option" );
+                ForesterUtil.fatalError( "could not parse integer for -" + GT_LAST + " option" );
             }
             if ( gt_last < 0 ) {
-                ForesterUtil.fatalError( PRG_NAME, "attempt to set index of last tree to analyze to: " + gt_last );
+                ForesterUtil.fatalError( "attempt to set index of last tree to analyze to: " + gt_last );
             }
         }
         if ( ( ( gt_last != RIO.DEFAULT_RANGE ) && ( gt_first != RIO.DEFAULT_RANGE ) ) && ( ( gt_last < gt_first ) ) ) {
-            ForesterUtil.fatalError( PRG_NAME, "attempt to set range (0-based) of gene to analyze to: from " + gt_first
-                    + " to " + gt_last );
+            ForesterUtil.fatalError( "attempt to set range (0-based) of gene to analyze to: from " + gt_first + " to "
+                    + gt_last );
         }
-        ForesterUtil.fatalErrorIfFileNotReadable( PRG_NAME, gene_trees_file );
-        ForesterUtil.fatalErrorIfFileNotReadable( PRG_NAME, species_tree_file );
+        ForesterUtil.fatalErrorIfFileNotReadable( gene_trees_file );
+        ForesterUtil.fatalErrorIfFileNotReadable( species_tree_file );
         if ( orthology_outtable.exists() ) {
-            ForesterUtil.fatalError( PRG_NAME, "\"" + orthology_outtable + "\" already exists" );
+            ForesterUtil.fatalError( "\"" + orthology_outtable + "\" already exists" );
         }
         long time = 0;
         System.out.println( "Gene trees                : " + gene_trees_file );
@@ -245,27 +239,15 @@ public class rio {
             System.out.println( "Non binary species tree   : disallowed" );
         }
         time = System.currentTimeMillis();
-        Phylogeny species_tree = null;
-        try {
-            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
-            species_tree = factory.create( species_tree_file, new PhyloXmlParser() )[ 0 ];
-        }
-        catch ( final Exception e ) {
-            e.printStackTrace();
-            System.exit( -1 );
-        }
-        if ( !species_tree.isRooted() ) {
-            ForesterUtil.fatalError( PRG_NAME, "species tree is not rooted" );
-        }
-        final int o = PhylogenyMethods.countNumberOfOneDescendantNodes( species_tree );
-        if ( o > 0 ) {
-            ForesterUtil.printWarningMessage( PRG_NAME, "species tree has " + o
-                    + " internal nodes with only one descendent! Going to strip them." );
-            PhylogenyMethods.deleteInternalNodesWithOnlyOneDescendent( species_tree );
-            if ( PhylogenyMethods.countNumberOfOneDescendantNodes( species_tree ) > 0 ) {
-                ForesterUtil.unexpectedFatalError( PRG_NAME, "stripping of one-desc nodes failed" );
-            }
-        }
+        //        Phylogeny species_tree = null;
+        //        try {
+        //            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+        //            species_tree = factory.create( species_tree_file, new PhyloXmlParser() )[ 0 ];
+        //        }
+        //        catch ( final Exception e ) {
+        //            e.printStackTrace();
+        //            System.exit( -1 );
+        //        }
         final ALGORITHM algorithm;
         if ( sdir ) {
             algorithm = ALGORITHM.SDIR;
@@ -275,7 +257,7 @@ public class rio {
         }
         try {
             final RIO rio = RIO.executeAnalysis( gene_trees_file,
-                                                 species_tree,
+                                                 species_tree_file,
                                                  algorithm,
                                                  rerooting,
                                                  outgroup,
@@ -284,7 +266,7 @@ public class rio {
                                                  logfile != null,
                                                  true );
             if ( algorithm == ALGORITHM.GSDIR ) {
-                ForesterUtil.programMessage( PRG_NAME, "taxonomy linking based on: " + rio.getGSDIRtaxCompBase() );
+                System.out.println( "Taxonomy linking based on : " + rio.getGSDIRtaxCompBase() );
             }
             tableOutput( orthology_outtable, rio );
             if ( ( algorithm != ALGORITHM.SDIR ) && ( logfile != null ) ) {
@@ -300,30 +282,29 @@ public class rio {
             }
             final BasicDescriptiveStatistics stats = rio.getDuplicationsStatistics();
             final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.#" );
-            ForesterUtil.programMessage( PRG_NAME,
-                                         "Mean number of duplications  : " + df.format( stats.arithmeticMean() )
-                                                 + " (sd: " + df.format( stats.sampleStandardDeviation() ) + ")" );
+            System.out.println( "Mean number of duplications  : " + df.format( stats.arithmeticMean() ) + " (sd: "
+                    + df.format( stats.sampleStandardDeviation() ) + ")" );
             if ( stats.getN() > 3 ) {
-                ForesterUtil.programMessage( PRG_NAME, "Median number of duplications: " + df.format( stats.median() ) );
+                System.out.println( "Median number of duplications: " + df.format( stats.median() ) );
             }
-            ForesterUtil.programMessage( PRG_NAME, "Minimum duplications         : " + ( int ) stats.getMin() );
-            ForesterUtil.programMessage( PRG_NAME, "Maximum duplications         : " + ( int ) stats.getMax() );
+            System.out.println( "Minimum duplications         : " + ( int ) stats.getMin() );
+            System.out.println( "Maximum duplications         : " + ( int ) stats.getMax() );
         }
         catch ( final RIOException e ) {
-            ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
+            ForesterUtil.fatalError( e.getLocalizedMessage() );
         }
         catch ( final SDIException e ) {
-            ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
+            ForesterUtil.fatalError( e.getLocalizedMessage() );
         }
         catch ( final IOException e ) {
-            ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
+            ForesterUtil.fatalError( e.getLocalizedMessage() );
         }
         catch ( final Exception e ) {
-            ForesterUtil.unexpectedFatalError( PRG_NAME, e );
+            ForesterUtil.unexpectedFatalError( e );
         }
         time = System.currentTimeMillis() - time;
-        ForesterUtil.programMessage( PRG_NAME, "time: " + time + "ms" );
-        ForesterUtil.programMessage( PRG_NAME, "OK" );
+        System.out.println( "Time: " + time + "ms" );
+        System.out.println( "OK" );
         System.exit( 0 );
     }
 
@@ -387,7 +368,7 @@ public class rio {
         out.flush();
         out.println( rio.getLog().toString() );
         out.close();
-        ForesterUtil.programMessage( PRG_NAME, "wrote log to \"" + logfile + "\"" );
+        System.out.println( "Wrote log to \"" + logfile + "\"" );
     }
 
     private static void writeTable( final File table_outfile, final RIO rio, final IntMatrix m ) throws IOException {
@@ -405,7 +386,7 @@ public class rio {
                 w.print( "\t" );
                 if ( x == y ) {
                     if ( m.get( x, y ) != rio.getAnalyzedGeneTrees().length ) {
-                        ForesterUtil.unexpectedFatalError( PRG_NAME, "diagonal value is off" );
+                        ForesterUtil.unexpectedFatalError( "diagonal value is off" );
                     }
                     w.print( "-" );
                 }
@@ -416,6 +397,6 @@ public class rio {
             w.println();
         }
         w.close();
-        ForesterUtil.programMessage( PRG_NAME, "wrote table to \"" + table_outfile + "\"" );
+        System.out.println( "Wrote table to \"" + table_outfile + "\"" );
     }
 }
index 84ae28d..38efa35 100644 (file)
@@ -42,6 +42,7 @@ import java.util.TreeSet;
 import org.forester.datastructures.IntMatrix;
 import org.forester.io.parsers.PhylogenyParser;
 import org.forester.io.parsers.nhx.NHXParser;
+import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
 import org.forester.io.parsers.util.ParserUtils;
 import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyMethods;
@@ -53,6 +54,7 @@ import org.forester.sdi.GSDI;
 import org.forester.sdi.GSDIR;
 import org.forester.sdi.SDIException;
 import org.forester.sdi.SDIR;
+import org.forester.sdi.SDIutil;
 import org.forester.sdi.SDIutil.ALGORITHM;
 import org.forester.sdi.SDIutil.TaxonomyComparisonBase;
 import org.forester.util.BasicDescriptiveStatistics;
@@ -86,7 +88,8 @@ public final class RIO {
         else if ( ( first == DEFAULT_RANGE ) && ( last >= 0 ) ) {
             first = 0;
         }
-        checkPreconditions( gene_trees, rerooting, outgroup, first, last );
+        removeSingleDescendentsNodes( species_tree, verbose );
+        checkPreconditions( gene_trees, species_tree, rerooting, outgroup, first, last );
         _produce_log = produce_log;
         _verbose = verbose;
         _rerooting = rerooting;
@@ -465,6 +468,35 @@ public final class RIO {
     }
 
     public final static RIO executeAnalysis( final File gene_trees_file,
+                                             final File species_tree_file,
+                                             final ALGORITHM algorithm,
+                                             final REROOTING rerooting,
+                                             final String outgroup,
+                                             final int first,
+                                             final int last,
+                                             final boolean produce_log,
+                                             final boolean verbose ) throws IOException, SDIException, RIOException {
+        final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+        final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true );
+        if ( p instanceof NHXParser ) {
+            final NHXParser nhx = ( NHXParser ) p;
+            nhx.setReplaceUnderscores( false );
+            nhx.setIgnoreQuotes( true );
+            nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES );
+        }
+        final Phylogeny[] gene_trees = factory.create( gene_trees_file, p );
+        if ( gene_trees.length < 1 ) {
+            throw new RIOException( "\"" + gene_trees_file + "\" is devoid of appropriate gene trees" );
+        }
+        final Phylogeny species_tree = SDIutil.parseSpeciesTree( gene_trees[ 0 ],
+                                                                 species_tree_file,
+                                                                 false,
+                                                                 true,
+                                                                 TAXONOMY_EXTRACTION.NO );
+        return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose );
+    }
+
+    public final static RIO executeAnalysis( final File gene_trees_file,
                                              final Phylogeny species_tree,
                                              final ALGORITHM algorithm,
                                              final REROOTING rerooting,
@@ -509,6 +541,9 @@ public final class RIO {
             nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES );
         }
         final Phylogeny[] gene_trees = factory.create( gene_trees_file, p );
+        if ( gene_trees.length < 1 ) {
+            throw new RIOException( "\"" + gene_trees_file + "\" is devoid of appropriate gene trees" );
+        }
         return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose );
     }
 
@@ -556,10 +591,14 @@ public final class RIO {
     }
 
     private final static void checkPreconditions( final Phylogeny[] gene_trees,
+                                                  final Phylogeny species_tree,
                                                   final REROOTING rerooting,
                                                   final String outgroup,
                                                   final int first,
                                                   final int last ) throws RIOException {
+        if ( !species_tree.isRooted() ) {
+            throw new RIOException( "species tree is not rooted" );
+        }
         if ( !( ( last == DEFAULT_RANGE ) && ( first == DEFAULT_RANGE ) )
                 && ( ( last < first ) || ( last >= gene_trees.length ) || ( last < 0 ) || ( first < 0 ) ) ) {
             throw new RIOException( "attempt to set range (0-based) of gene to analyze to: from " + first + " to "
@@ -585,6 +624,17 @@ public final class RIO {
         }
     }
 
+    private final static void removeSingleDescendentsNodes( final Phylogeny species_tree, final boolean verbose ) {
+        final int o = PhylogenyMethods.countNumberOfOneDescendantNodes( species_tree );
+        if ( o > 0 ) {
+            if ( verbose ) {
+                System.out.println( "warning: species tree has " + o
+                        + " internal nodes with only one descendent which are therefore going to be removed" );
+            }
+            PhylogenyMethods.deleteInternalNodesWithOnlyOneDescendent( species_tree );
+        }
+    }
+
     public enum REROOTING {
         NONE, BY_ALGORITHM, MIDPOINT, OUTGROUP;
     }
index 5ba32cb..81d2db2 100644 (file)
@@ -18,6 +18,15 @@ public final class TestRIO {
     private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator()
                                                           + "test_data" + ForesterUtil.getFileSeparator();
 
+    public static void main( final String[] args ) {
+        if ( !testRIO_GSDIR() ) {
+            System.out.println( "testRIO GSDIR failed" );
+        }
+        else {
+            System.out.println( "OK" );
+        }
+    }
+
     public static boolean test() {
         if ( !testRIO_GSDIR() ) {
             return false;
@@ -114,13 +123,4 @@ public final class TestRIO {
         }
         return true;
     }
-
-    public static void main( final String[] args ) {
-        if ( !testRIO_GSDIR() ) {
-            System.out.println( "testRIO GSDIR failed" );
-        }
-        else {
-            System.out.println( "OK" );
-        }
-    }
 }
\ No newline at end of file
index f9cc14e..7c41e0c 100644 (file)
@@ -59,6 +59,9 @@ public final class GSDI implements GSDII {
                  final boolean strip_gene_tree,
                  final boolean strip_species_tree ) throws SDIException {
         _most_parsimonious_duplication_model = most_parsimonious_duplication_model;
+        if ( gene_tree.getRoot().getNumberOfDescendants() == 3 ) {
+            gene_tree.reRoot( gene_tree.getRoot().getChildNode( 2 ) );
+        }
         final NodesLinkingResult nodes_linking_result = linkNodesOfG( gene_tree,
                                                                       species_tree,
                                                                       null,
@@ -141,14 +144,20 @@ public final class GSDI implements GSDII {
      * the species tree must be labeled in preorder.
      * <p>
      * @return 
+     * @throws SDIException 
      * 
      */
     final static GSDIsummaryResult geneTreePostOrderTraversal( final Phylogeny gene_tree,
-                                                               final boolean most_parsimonious_duplication_model ) {
+                                                               final boolean most_parsimonious_duplication_model )
+            throws SDIException {
         final GSDIsummaryResult res = new GSDIsummaryResult();
         for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) {
             final PhylogenyNode g = it.next();
             if ( g.isInternal() ) {
+                if ( g.getNumberOfDescendants() != 2 ) {
+                    throw new SDIException( "gene tree contains internal node with " + g.getNumberOfDescendants()
+                            + " descendents" );
+                }
                 PhylogenyNode s1 = g.getChildNode1().getLink();
                 PhylogenyNode s2 = g.getChildNode2().getLink();
                 while ( s1 != s2 ) {
index f5f99e8..1f2445f 100644 (file)
@@ -10,12 +10,12 @@ import org.forester.sdi.SDIutil.TaxonomyComparisonBase;
 \r
 public interface GSDII {\r
 \r
-    public abstract int getSpeciationsSum();\r
-\r
     public abstract Set<PhylogenyNode> getMappedExternalSpeciesTreeNodes();\r
 \r
     public abstract SortedSet<String> getReMappedScientificNamesFromGeneTree();\r
 \r
+    public abstract int getSpeciationsSum();\r
+\r
     public abstract List<PhylogenyNode> getStrippedExternalGeneTreeNodes();\r
 \r
     public abstract List<PhylogenyNode> getStrippedSpeciesTreeNodes();\r
index dd5bc83..1ae5661 100644 (file)
@@ -27,6 +27,14 @@ final class NodesLinkingResult {
         _tax_comp_base = null;
     }
 
+    final Set<PhylogenyNode> getMappedSpeciesTreeNodes() {
+        return _mapped_species_tree_nodes;
+    }
+
+    final SortedSet<String> getScientificNamesMappedToReducedSpecificity() {
+        return _scientific_names_mapped_to_reduced_specificity;
+    }
+
     final List<PhylogenyNode> getStrippedGeneTreeNodes() {
         return _stripped_gene_tree_nodes;
     }
@@ -35,10 +43,6 @@ final class NodesLinkingResult {
         return _stripped_species_tree_nodes;
     }
 
-    final Set<PhylogenyNode> getMappedSpeciesTreeNodes() {
-        return _mapped_species_tree_nodes;
-    }
-
     final TaxonomyComparisonBase getTaxCompBase() {
         return _tax_comp_base;
     }
@@ -46,8 +50,4 @@ final class NodesLinkingResult {
     final void setTaxCompBase( final TaxonomyComparisonBase tax_comp_base ) {
         _tax_comp_base = tax_comp_base;
     }
-
-    final SortedSet<String> getScientificNamesMappedToReducedSpecificity() {
-        return _scientific_names_mapped_to_reduced_specificity;
-    }
 }
index 5ddd905..5526618 100644 (file)
 
 package org.forester.sdi;
 
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+import org.forester.io.parsers.PhylogenyParser;
+import org.forester.io.parsers.nhx.NHXParser;
+import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
+import org.forester.io.parsers.phyloxml.PhyloXmlParser;
+import org.forester.io.parsers.util.ParserUtils;
 import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
 import org.forester.phylogeny.PhylogenyNode;
 import org.forester.phylogeny.data.Identifier;
 import org.forester.phylogeny.data.Taxonomy;
+import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
+import org.forester.phylogeny.factories.PhylogenyFactory;
 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
 import org.forester.util.ForesterUtil;
 
 public class SDIutil {
 
+    public final static TaxonomyComparisonBase determineTaxonomyComparisonBase( final Phylogeny gene_tree )
+            throws SDIException {
+        int with_id_count = 0;
+        int with_code_count = 0;
+        int with_sn_count = 0;
+        int max = 0;
+        for( final PhylogenyNodeIterator iter = gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
+            final PhylogenyNode g = iter.next();
+            if ( g.getNodeData().isHasTaxonomy() ) {
+                final Taxonomy tax = g.getNodeData().getTaxonomy();
+                if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) {
+                    if ( ++with_id_count > max ) {
+                        max = with_id_count;
+                    }
+                }
+                if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
+                    if ( ++with_code_count > max ) {
+                        max = with_code_count;
+                    }
+                }
+                if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) {
+                    if ( ++with_sn_count > max ) {
+                        max = with_sn_count;
+                    }
+                }
+            }
+        }
+        if ( max == 0 ) {
+            throw new SDIException( "gene tree has no taxonomic data" );
+        }
+        else if ( max == 1 ) {
+            throw new SDIException( "gene tree has only one node with taxonomic data" );
+        }
+        else if ( max == with_id_count ) {
+            return TaxonomyComparisonBase.ID;
+        }
+        else if ( max == with_sn_count ) {
+            return TaxonomyComparisonBase.SCIENTIFIC_NAME;
+        }
+        else {
+            return TaxonomyComparisonBase.CODE;
+        }
+    }
+
+    public final static Phylogeny parseSpeciesTree( final Phylogeny gene_tree,
+                                                    final File species_tree_file,
+                                                    final boolean replace_undescores_in_nhx_trees,
+                                                    final boolean ignore_quotes_in_nhx_trees,
+                                                    final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction_in_nhx_trees )
+            throws FileNotFoundException, PhyloXmlDataFormatException, IOException, SDIException {
+        Phylogeny species_tree;
+        final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+        final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( species_tree_file, true );
+        if ( p instanceof PhyloXmlParser ) {
+            species_tree = factory.create( species_tree_file, p )[ 0 ];
+        }
+        else {
+            if ( p instanceof NHXParser ) {
+                final NHXParser nhx = ( NHXParser ) p;
+                nhx.setReplaceUnderscores( replace_undescores_in_nhx_trees );
+                nhx.setIgnoreQuotes( ignore_quotes_in_nhx_trees );
+                nhx.setTaxonomyExtraction( taxonomy_extraction_in_nhx_trees );
+            }
+            species_tree = factory.create( species_tree_file, p )[ 0 ];
+            species_tree.setRooted( true );
+            final TaxonomyComparisonBase comp_base = determineTaxonomyComparisonBase( gene_tree );
+            switch ( comp_base ) {
+                case SCIENTIFIC_NAME:
+                    PhylogenyMethods
+                            .transferNodeNameToField( species_tree,
+                                                      PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME,
+                                                      true );
+                    break;
+                case CODE:
+                    PhylogenyMethods.transferNodeNameToField( species_tree,
+                                                              PhylogenyMethods.PhylogenyNodeField.TAXONOMY_CODE,
+                                                              true );
+                    break;
+                case ID:
+                    PhylogenyMethods.transferNodeNameToField( species_tree,
+                                                              PhylogenyMethods.PhylogenyNodeField.TAXONOMY_ID,
+                                                              true );
+                    break;
+                default:
+                    throw new SDIException( "unable to determine comparison base" );
+            }
+        }
+        return species_tree;
+    }
+
     static String taxonomyToString( final PhylogenyNode n, final TaxonomyComparisonBase base ) {
         switch ( base ) {
             case ID:
@@ -54,48 +156,4 @@ public class SDIutil {
             }
         }
     }
-
-    public final static TaxonomyComparisonBase determineTaxonomyComparisonBase( final Phylogeny gene_tree )
-            throws SDIException {
-        int with_id_count = 0;
-        int with_code_count = 0;
-        int with_sn_count = 0;
-        int max = 0;
-        for( final PhylogenyNodeIterator iter = gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
-            final PhylogenyNode g = iter.next();
-            if ( g.getNodeData().isHasTaxonomy() ) {
-                final Taxonomy tax = g.getNodeData().getTaxonomy();
-                if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) {
-                    if ( ++with_id_count > max ) {
-                        max = with_id_count;
-                    }
-                }
-                if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
-                    if ( ++with_code_count > max ) {
-                        max = with_code_count;
-                    }
-                }
-                if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) {
-                    if ( ++with_sn_count > max ) {
-                        max = with_sn_count;
-                    }
-                }
-            }
-        }
-        if ( max == 0 ) {
-            throw new SDIException( "gene tree has no taxonomic data" );
-        }
-        else if ( max == 1 ) {
-            throw new SDIException( "gene tree has only one node with taxonomic data" );
-        }
-        else if ( max == with_id_count ) {
-            return TaxonomyComparisonBase.ID;
-        }
-        else if ( max == with_sn_count ) {
-            return TaxonomyComparisonBase.SCIENTIFIC_NAME;
-        }
-        else {
-            return TaxonomyComparisonBase.CODE;
-        }
-    }
 }
index 16f6655..e7a9b46 100644 (file)
@@ -43,14 +43,26 @@ public final class TestGSDI {
     private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator()
                                                           + "test_data" + ForesterUtil.getFileSeparator();
 
-    private final static Phylogeny createPhylogeny( final String nhx ) throws IOException {
-        final Phylogeny p = ParserBasedPhylogenyFactory.getInstance().create( nhx, new NHXParser() )[ 0 ];
-        p.setRooted( true );
-        return p;
-    }
-
-    private final static Event getEvent( final Phylogeny p, final String n1, final String n2 ) {
-        return PhylogenyMethods.calculateLCA( p.getNode( n1 ), p.getNode( n2 ) ).getNodeData().getEvent();
+    public static void main( final String[] args ) {
+        if ( !TestGSDI.testGSDI_against_binary_gene_tree() ) {
+            System.out.println( "binary failed" );
+        }
+        if ( !TestGSDI.testGSDI_general() ) {
+            System.out.println( "general failed" );
+        }
+        if ( !TestGSDI.testGSDIR_general() ) {
+            System.out.println( "general re-rooting failed" );
+        }
+        else {
+            System.out.println( "OK" );
+        }
+        //        boolean success = test();
+        //        if ( success ) {
+        //            System.out.println( "OK" );
+        //        }
+        //        else {
+        //            System.out.println( "failed" );
+        //        }
     }
 
     public static boolean test() {
@@ -66,6 +78,16 @@ public final class TestGSDI {
         return true;
     }
 
+    private final static Phylogeny createPhylogeny( final String nhx ) throws IOException {
+        final Phylogeny p = ParserBasedPhylogenyFactory.getInstance().create( nhx, new NHXParser() )[ 0 ];
+        p.setRooted( true );
+        return p;
+    }
+
+    private final static Event getEvent( final Phylogeny p, final String n1, final String n2 ) {
+        return PhylogenyMethods.calculateLCA( p.getNode( n1 ), p.getNode( n2 ) ).getNodeData().getEvent();
+    }
+
     private static boolean testGSDI_against_binary_gene_tree() {
         try {
             final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
@@ -1462,26 +1484,4 @@ public final class TestGSDI {
         }
         return true;
     }
-
-    public static void main( final String[] args ) {
-        if ( !TestGSDI.testGSDI_against_binary_gene_tree() ) {
-            System.out.println( "binary failed" );
-        }
-        if ( !TestGSDI.testGSDI_general() ) {
-            System.out.println( "general failed" );
-        }
-        if ( !TestGSDI.testGSDIR_general() ) {
-            System.out.println( "general re-rooting failed" );
-        }
-        else {
-            System.out.println( "OK" );
-        }
-        //        boolean success = test();
-        //        if ( success ) {
-        //            System.out.println( "OK" );
-        //        }
-        //        else {
-        //            System.out.println( "failed" );
-        //        }
-    }
 }
index 1ca9ba2..bf9dc7f 100644 (file)
@@ -305,6 +305,13 @@ public final class ForesterUtil {
         }
     }
 
+    public static void fatalError( final String message ) {
+        System.err.println();
+        System.err.println( "error: " + message );
+        System.err.println();
+        System.exit( -1 );
+    }
+
     public static void fatalError( final String prg_name, final String message ) {
         System.err.println();
         System.err.println( "[" + prg_name + "] > " + message );
@@ -312,6 +319,16 @@ public final class ForesterUtil {
         System.exit( -1 );
     }
 
+    public static void fatalErrorIfFileNotReadable( final File file ) {
+        final String error = isReadableFile( file );
+        if ( !isEmpty( error ) ) {
+            System.err.println();
+            System.err.println( "error: " + error );
+            System.err.println();
+            System.exit( -1 );
+        }
+    }
+
     public static void fatalErrorIfFileNotReadable( final String prg_name, final File file ) {
         final String error = isReadableFile( file );
         if ( !isEmpty( error ) ) {
@@ -914,10 +931,26 @@ public final class ForesterUtil {
         return str_array;
     }
 
+    final public static void unexpectedFatalError( final Exception e ) {
+        System.err.println();
+        System.err.println( "unexpected error: should not have occured! Please contact program author(s)." );
+        e.printStackTrace( System.err );
+        System.err.println();
+        System.exit( -1 );
+    }
+
+    final public static void unexpectedFatalError( final String message ) {
+        System.err.println();
+        System.err.println( "unexpected error: should not have occured! Please contact program author(s)." );
+        System.err.println( message );
+        System.err.println();
+        System.exit( -1 );
+    }
+
     final public static void unexpectedFatalError( final String prg_name, final Exception e ) {
         System.err.println();
         System.err.println( "[" + prg_name
-                + "] > Unexpected error. Should not have occured! Please contact program author(s)." );
+                + "] > unexpected error; should not have occured! Please contact program author(s)." );
         e.printStackTrace( System.err );
         System.err.println();
         System.exit( -1 );
@@ -926,7 +959,7 @@ public final class ForesterUtil {
     final public static void unexpectedFatalError( final String prg_name, final String message ) {
         System.err.println();
         System.err.println( "[" + prg_name
-                + "] > Unexpected error. Should not have occured! Please contact program author(s)." );
+                + "] > unexpected error: should not have occured! Please contact program author(s)." );
         System.err.println( message );
         System.err.println();
         System.exit( -1 );
@@ -935,7 +968,7 @@ public final class ForesterUtil {
     final public static void unexpectedFatalError( final String prg_name, final String message, final Exception e ) {
         System.err.println();
         System.err.println( "[" + prg_name
-                + "] > Unexpected error. Should not have occured! Please contact program author(s)." );
+                + "] > unexpected error: should not have occured! Please contact program author(s)." );
         System.err.println( message );
         e.printStackTrace( System.err );
         System.err.println();