import java.util.TreeMap;
import java.util.TreeSet;
-import org.forester.io.parsers.PhylogenyParser;
-import org.forester.io.parsers.nhx.NHXParser;
+import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
import org.forester.io.parsers.phyloxml.PhyloXmlParser;
-import org.forester.io.parsers.util.ParserUtils;
import org.forester.io.writers.PhylogenyWriter;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyMethods;
import org.forester.sdi.SDIException;
import org.forester.sdi.SDIutil;
import org.forester.sdi.SDIutil.ALGORITHM;
-import org.forester.sdi.SDIutil.TaxonomyComparisonBase;
import org.forester.util.CommandLineArguments;
import org.forester.util.EasyWriter;
import org.forester.util.ForesterConstants;
gene_tree = factory.create( gene_tree_file, new PhyloXmlParser() )[ 0 ];
}
catch ( final IOException e ) {
- fatalError( "ERROR",
- "Failed to read gene tree from [" + gene_tree_file + "]: " + e.getMessage(),
+ fatalError( "error",
+ "failed to read gene tree from [" + gene_tree_file + "]: " + e.getMessage(),
log_writer );
}
try {
- final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
- final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( species_tree_file, true );
- if ( p instanceof PhyloXmlParser ) {
- species_tree = factory.create( species_tree_file, p )[ 0 ];
- }
- else {
- if ( REPLACE_UNDERSCORES_IN_NH_SPECIES_TREE && ( p instanceof NHXParser ) ) {
- ( ( NHXParser ) p ).setReplaceUnderscores( true );
- }
- species_tree = factory.create( species_tree_file, p )[ 0 ];
- final TaxonomyComparisonBase comp_base = SDIutil.determineTaxonomyComparisonBase( gene_tree );
- switch ( comp_base ) {
- case SCIENTIFIC_NAME:
- try {
- PhylogenyMethods
- .transferNodeNameToField( species_tree,
- PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME,
- true );
- }
- catch ( final PhyloXmlDataFormatException e ) {
- fatalError( "USER ERROR", "Failed to transfer general node name to scientific name, in ["
- + species_tree_file + "]: " + e.getMessage(), log_writer );
- }
- break;
- case CODE:
- try {
- PhylogenyMethods
- .transferNodeNameToField( species_tree,
- PhylogenyMethods.PhylogenyNodeField.TAXONOMY_CODE,
- true );
- }
- catch ( final PhyloXmlDataFormatException e ) {
- fatalError( "USER ERROR", "Failed to transfer general node name to taxonomy code, in ["
- + species_tree_file + "]: " + e.getMessage(), log_writer );
- }
- break;
- case ID:
- try {
- PhylogenyMethods.transferNodeNameToField( species_tree,
- PhylogenyMethods.PhylogenyNodeField.TAXONOMY_ID,
- true );
- }
- catch ( final PhyloXmlDataFormatException e ) {
- fatalError( "USER ERROR", "Failed to transfer general node name to taxonomy id, in ["
- + species_tree_file + "]: " + e.getMessage(), log_writer );
- }
- break;
- default:
- fatalError( "UNEXPECTED ERROR", "unable to determine comparison base", log_writer );
- }
- }
+ species_tree = SDIutil.parseSpeciesTree( gene_tree,
+ species_tree_file,
+ REPLACE_UNDERSCORES_IN_NH_SPECIES_TREE,
+ true,
+ TAXONOMY_EXTRACTION.NO );
+ }
+ catch ( final PhyloXmlDataFormatException e ) {
+ fatalError( "user error",
+ "failed to transfer general node name, in [" + species_tree_file + "]: " + e.getMessage(),
+ log_writer );
}
catch ( final SDIException e ) {
fatalError( "user error", e.getMessage(), log_writer );
}
catch ( final IOException e ) {
- fatalError( "ERROR",
+ fatalError( "error",
"Failed to read species tree from [" + species_tree_file + "]: " + e.getMessage(),
log_writer );
}
log_writer.close();
}
- private static void writeToRemappedFile( final File out_file,
- final SortedSet<String> remapped,
- final EasyWriter log_writer ) throws IOException {
- final File file = new File( ForesterUtil.removeSuffix( out_file.toString() ) + REMAPPED_SUFFIX );
- final EasyWriter remapped_writer = ForesterUtil.createEasyWriter( file );
- for( final String s : remapped ) {
- remapped_writer.println( s );
- }
- remapped_writer.close();
- System.out.println( "Wrote remapped gene tree species to : " + file.getCanonicalPath() );
- log_writer.println( "Wrote remapped gene tree species to : " + file.getCanonicalPath() );
- }
-
- private static void printMappedNodesToLog( final EasyWriter log_writer, final GSDII gsdi ) throws IOException {
- final SortedSet<String> ss = new TreeSet<String>();
- for( final PhylogenyNode n : gsdi.getMappedExternalSpeciesTreeNodes() ) {
- ss.add( n.toString() );
- }
- log_writer.println( "The following " + ss.size() + " species were used: " );
- for( final String s : ss ) {
- log_writer.println( " " + s );
- }
- }
-
private static void fatalError( final String type, final String msg, final EasyWriter log_writer ) {
try {
log_writer.flush();
ForesterUtil.fatalError( gsdi.PRG_NAME, msg );
}
- private static void printStrippedGeneTreeNodesToLog( final EasyWriter log_writer, final GSDII gsdi )
- throws IOException {
- final SortedMap<String, Integer> sm = new TreeMap<String, Integer>();
- for( final PhylogenyNode n : gsdi.getStrippedExternalGeneTreeNodes() ) {
- final String s = n.toString();
- if ( sm.containsKey( s ) ) {
- sm.put( s, sm.get( s ) + 1 );
- }
- else {
- sm.put( s, 1 );
- }
- }
- log_writer.println( "The following " + sm.size() + " nodes were stripped from the gene tree: " );
- for( final String s : sm.keySet() ) {
- final int count = sm.get( s );
- if ( count == 1 ) {
- log_writer.println( " " + s );
- }
- else {
- log_writer.println( " " + s + " [" + count + "]" );
- }
- }
- }
-
private static void print_help() {
System.out.println( "Usage: " + gsdi.PRG_NAME
+ " [-options] <gene tree in phyloXML format> <species tree> <outfile>" );
+ " gene_tree.xml tree_of_life.xml out.xml" );
System.out.println();
}
+
+ private static void printMappedNodesToLog( final EasyWriter log_writer, final GSDII gsdi ) throws IOException {
+ final SortedSet<String> ss = new TreeSet<String>();
+ for( final PhylogenyNode n : gsdi.getMappedExternalSpeciesTreeNodes() ) {
+ ss.add( n.toString() );
+ }
+ log_writer.println( "The following " + ss.size() + " species were used: " );
+ for( final String s : ss ) {
+ log_writer.println( " " + s );
+ }
+ }
+
+ private static void printStrippedGeneTreeNodesToLog( final EasyWriter log_writer, final GSDII gsdi )
+ throws IOException {
+ final SortedMap<String, Integer> sm = new TreeMap<String, Integer>();
+ for( final PhylogenyNode n : gsdi.getStrippedExternalGeneTreeNodes() ) {
+ final String s = n.toString();
+ if ( sm.containsKey( s ) ) {
+ sm.put( s, sm.get( s ) + 1 );
+ }
+ else {
+ sm.put( s, 1 );
+ }
+ }
+ log_writer.println( "The following " + sm.size() + " nodes were stripped from the gene tree: " );
+ for( final String s : sm.keySet() ) {
+ final int count = sm.get( s );
+ if ( count == 1 ) {
+ log_writer.println( " " + s );
+ }
+ else {
+ log_writer.println( " " + s + " [" + count + "]" );
+ }
+ }
+ }
+
+ private static void writeToRemappedFile( final File out_file,
+ final SortedSet<String> remapped,
+ final EasyWriter log_writer ) throws IOException {
+ final File file = new File( ForesterUtil.removeSuffix( out_file.toString() ) + REMAPPED_SUFFIX );
+ final EasyWriter remapped_writer = ForesterUtil.createEasyWriter( file );
+ for( final String s : remapped ) {
+ remapped_writer.println( s );
+ }
+ remapped_writer.close();
+ System.out.println( "Wrote remapped gene tree species to : " + file.getCanonicalPath() );
+ log_writer.println( "Wrote remapped gene tree species to : " + file.getCanonicalPath() );
+ }
}
import java.util.List;
import org.forester.datastructures.IntMatrix;
-import org.forester.io.parsers.phyloxml.PhyloXmlParser;
-import org.forester.phylogeny.Phylogeny;
-import org.forester.phylogeny.PhylogenyMethods;
-import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
-import org.forester.phylogeny.factories.PhylogenyFactory;
import org.forester.rio.RIO;
import org.forester.rio.RIO.REROOTING;
import org.forester.rio.RIOException;
cla = new CommandLineArguments( args );
}
catch ( final Exception e ) {
- ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
+ ForesterUtil.fatalError( e.getMessage() );
}
if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) {
printHelp();
}
- if ( ( args.length < 3 ) || ( args.length > 8 ) ) {
+ if ( ( args.length < 3 ) || ( args.length > 9 ) ) {
System.out.println();
- System.out.println( "[" + PRG_NAME + "] incorrect number of arguments" );
+ System.out.println( "error: incorrect number of arguments" );
System.out.println();
printHelp();
}
allowed_options.add( USE_SDIR );
final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
if ( dissallowed_options.length() > 0 ) {
- ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
+ ForesterUtil.fatalError( "unknown option(s): " + dissallowed_options );
}
final File gene_trees_file = cla.getFile( 0 );
final File species_tree_file = cla.getFile( 1 );
if ( cla.getNumberOfNames() > 3 ) {
logfile = cla.getFile( 3 );
if ( logfile.exists() ) {
- ForesterUtil.fatalError( PRG_NAME, "\"" + logfile + "\" already exists" );
+ ForesterUtil.fatalError( "\"" + logfile + "\" already exists" );
}
}
else {
boolean sdir = false;
if ( cla.isOptionSet( USE_SDIR ) ) {
if ( cla.isOptionHasAValue( USE_SDIR ) ) {
- ForesterUtil.fatalError( PRG_NAME, "no value allowed for -" + USE_SDIR );
+ ForesterUtil.fatalError( "no value allowed for -" + USE_SDIR );
}
sdir = true;
if ( logfile != null ) {
- ForesterUtil.fatalError( PRG_NAME, "no logfile output for SDIR algorithm" );
+ ForesterUtil.fatalError( "no logfile output for SDIR algorithm" );
}
}
String outgroup = null;
if ( cla.isOptionSet( OUTGROUP ) ) {
if ( !cla.isOptionHasAValue( OUTGROUP ) ) {
- ForesterUtil.fatalError( PRG_NAME, "no value for -" + OUTGROUP );
+ ForesterUtil.fatalError( "no value for -" + OUTGROUP );
}
if ( sdir ) {
- ForesterUtil.fatalError( PRG_NAME, "no outgroup option for SDIR algorithm" );
+ ForesterUtil.fatalError( "no outgroup option for SDIR algorithm" );
}
outgroup = cla.getOptionValueAsCleanString( OUTGROUP );
}
REROOTING rerooting = REROOTING.BY_ALGORITHM;
if ( cla.isOptionSet( REROOTING_OPT ) ) {
if ( !cla.isOptionHasAValue( REROOTING_OPT ) ) {
- ForesterUtil.fatalError( PRG_NAME, "no value for -" + REROOTING_OPT );
+ ForesterUtil.fatalError( "no value for -" + REROOTING_OPT );
}
if ( sdir ) {
- ForesterUtil.fatalError( PRG_NAME, "no re-rooting option for SDIR algorithm" );
+ ForesterUtil.fatalError( "no re-rooting option for SDIR algorithm" );
}
final String rerooting_str = cla.getOptionValueAsCleanString( REROOTING_OPT ).toLowerCase();
if ( rerooting_str.equals( "none" ) ) {
}
else {
ForesterUtil
- .fatalError( PRG_NAME,
- "values for re-rooting are: 'none', 'midpoint', or 'outgroup' (minizming duplications is default)" );
+ .fatalError( "values for re-rooting are: 'none', 'midpoint', or 'outgroup' (minizming duplications is default)" );
}
}
if ( ForesterUtil.isEmpty( outgroup ) && ( rerooting == REROOTING.OUTGROUP ) ) {
- ForesterUtil.fatalError( PRG_NAME, "selected re-rooting by outgroup, but outgroup not set" );
+ ForesterUtil.fatalError( "selected re-rooting by outgroup, but outgroup not set" );
}
if ( !ForesterUtil.isEmpty( outgroup ) && ( rerooting != REROOTING.OUTGROUP ) ) {
- ForesterUtil.fatalError( PRG_NAME, "outgroup set, but selected re-rooting by other approach" );
+ ForesterUtil.fatalError( "outgroup set, but selected re-rooting by other approach" );
}
int gt_first = RIO.DEFAULT_RANGE;
int gt_last = RIO.DEFAULT_RANGE;
if ( cla.isOptionSet( GT_FIRST ) ) {
if ( !cla.isOptionHasAValue( GT_FIRST ) ) {
- ForesterUtil.fatalError( PRG_NAME, "no value for -" + GT_FIRST );
+ ForesterUtil.fatalError( "no value for -" + GT_FIRST );
}
if ( sdir ) {
- ForesterUtil.fatalError( PRG_NAME, "no gene tree range option for SDIR algorithm" );
+ ForesterUtil.fatalError( "no gene tree range option for SDIR algorithm" );
}
try {
gt_first = cla.getOptionValueAsInt( GT_FIRST );
}
catch ( final IOException e ) {
- ForesterUtil.fatalError( PRG_NAME, "could not parse integer for -" + GT_FIRST + " option" );
+ ForesterUtil.fatalError( "could not parse integer for -" + GT_FIRST + " option" );
}
if ( gt_first < 0 ) {
- ForesterUtil.fatalError( PRG_NAME, "attempt to set index of first tree to analyze to: " + gt_first );
+ ForesterUtil.fatalError( "attempt to set index of first tree to analyze to: " + gt_first );
}
}
if ( cla.isOptionSet( GT_LAST ) ) {
if ( !cla.isOptionHasAValue( GT_LAST ) ) {
- ForesterUtil.fatalError( PRG_NAME, "no value for -" + GT_LAST );
+ ForesterUtil.fatalError( "no value for -" + GT_LAST );
}
if ( sdir ) {
- ForesterUtil.fatalError( PRG_NAME, "no gene tree range option for SDIR algorithm" );
+ ForesterUtil.fatalError( "no gene tree range option for SDIR algorithm" );
}
try {
gt_last = cla.getOptionValueAsInt( GT_LAST );
}
catch ( final IOException e ) {
- ForesterUtil.fatalError( PRG_NAME, "could not parse integer for -" + GT_LAST + " option" );
+ ForesterUtil.fatalError( "could not parse integer for -" + GT_LAST + " option" );
}
if ( gt_last < 0 ) {
- ForesterUtil.fatalError( PRG_NAME, "attempt to set index of last tree to analyze to: " + gt_last );
+ ForesterUtil.fatalError( "attempt to set index of last tree to analyze to: " + gt_last );
}
}
if ( ( ( gt_last != RIO.DEFAULT_RANGE ) && ( gt_first != RIO.DEFAULT_RANGE ) ) && ( ( gt_last < gt_first ) ) ) {
- ForesterUtil.fatalError( PRG_NAME, "attempt to set range (0-based) of gene to analyze to: from " + gt_first
- + " to " + gt_last );
+ ForesterUtil.fatalError( "attempt to set range (0-based) of gene to analyze to: from " + gt_first + " to "
+ + gt_last );
}
- ForesterUtil.fatalErrorIfFileNotReadable( PRG_NAME, gene_trees_file );
- ForesterUtil.fatalErrorIfFileNotReadable( PRG_NAME, species_tree_file );
+ ForesterUtil.fatalErrorIfFileNotReadable( gene_trees_file );
+ ForesterUtil.fatalErrorIfFileNotReadable( species_tree_file );
if ( orthology_outtable.exists() ) {
- ForesterUtil.fatalError( PRG_NAME, "\"" + orthology_outtable + "\" already exists" );
+ ForesterUtil.fatalError( "\"" + orthology_outtable + "\" already exists" );
}
long time = 0;
System.out.println( "Gene trees : " + gene_trees_file );
System.out.println( "Non binary species tree : disallowed" );
}
time = System.currentTimeMillis();
- Phylogeny species_tree = null;
- try {
- final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
- species_tree = factory.create( species_tree_file, new PhyloXmlParser() )[ 0 ];
- }
- catch ( final Exception e ) {
- e.printStackTrace();
- System.exit( -1 );
- }
- if ( !species_tree.isRooted() ) {
- ForesterUtil.fatalError( PRG_NAME, "species tree is not rooted" );
- }
- final int o = PhylogenyMethods.countNumberOfOneDescendantNodes( species_tree );
- if ( o > 0 ) {
- ForesterUtil.printWarningMessage( PRG_NAME, "species tree has " + o
- + " internal nodes with only one descendent! Going to strip them." );
- PhylogenyMethods.deleteInternalNodesWithOnlyOneDescendent( species_tree );
- if ( PhylogenyMethods.countNumberOfOneDescendantNodes( species_tree ) > 0 ) {
- ForesterUtil.unexpectedFatalError( PRG_NAME, "stripping of one-desc nodes failed" );
- }
- }
+ // Phylogeny species_tree = null;
+ // try {
+ // final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+ // species_tree = factory.create( species_tree_file, new PhyloXmlParser() )[ 0 ];
+ // }
+ // catch ( final Exception e ) {
+ // e.printStackTrace();
+ // System.exit( -1 );
+ // }
final ALGORITHM algorithm;
if ( sdir ) {
algorithm = ALGORITHM.SDIR;
}
try {
final RIO rio = RIO.executeAnalysis( gene_trees_file,
- species_tree,
+ species_tree_file,
algorithm,
rerooting,
outgroup,
logfile != null,
true );
if ( algorithm == ALGORITHM.GSDIR ) {
- ForesterUtil.programMessage( PRG_NAME, "taxonomy linking based on: " + rio.getGSDIRtaxCompBase() );
+ System.out.println( "Taxonomy linking based on : " + rio.getGSDIRtaxCompBase() );
}
tableOutput( orthology_outtable, rio );
if ( ( algorithm != ALGORITHM.SDIR ) && ( logfile != null ) ) {
}
final BasicDescriptiveStatistics stats = rio.getDuplicationsStatistics();
final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.#" );
- ForesterUtil.programMessage( PRG_NAME,
- "Mean number of duplications : " + df.format( stats.arithmeticMean() )
- + " (sd: " + df.format( stats.sampleStandardDeviation() ) + ")" );
+ System.out.println( "Mean number of duplications : " + df.format( stats.arithmeticMean() ) + " (sd: "
+ + df.format( stats.sampleStandardDeviation() ) + ")" );
if ( stats.getN() > 3 ) {
- ForesterUtil.programMessage( PRG_NAME, "Median number of duplications: " + df.format( stats.median() ) );
+ System.out.println( "Median number of duplications: " + df.format( stats.median() ) );
}
- ForesterUtil.programMessage( PRG_NAME, "Minimum duplications : " + ( int ) stats.getMin() );
- ForesterUtil.programMessage( PRG_NAME, "Maximum duplications : " + ( int ) stats.getMax() );
+ System.out.println( "Minimum duplications : " + ( int ) stats.getMin() );
+ System.out.println( "Maximum duplications : " + ( int ) stats.getMax() );
}
catch ( final RIOException e ) {
- ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
+ ForesterUtil.fatalError( e.getLocalizedMessage() );
}
catch ( final SDIException e ) {
- ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
+ ForesterUtil.fatalError( e.getLocalizedMessage() );
}
catch ( final IOException e ) {
- ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
+ ForesterUtil.fatalError( e.getLocalizedMessage() );
}
catch ( final Exception e ) {
- ForesterUtil.unexpectedFatalError( PRG_NAME, e );
+ ForesterUtil.unexpectedFatalError( e );
}
time = System.currentTimeMillis() - time;
- ForesterUtil.programMessage( PRG_NAME, "time: " + time + "ms" );
- ForesterUtil.programMessage( PRG_NAME, "OK" );
+ System.out.println( "Time: " + time + "ms" );
+ System.out.println( "OK" );
System.exit( 0 );
}
out.flush();
out.println( rio.getLog().toString() );
out.close();
- ForesterUtil.programMessage( PRG_NAME, "wrote log to \"" + logfile + "\"" );
+ System.out.println( "Wrote log to \"" + logfile + "\"" );
}
private static void writeTable( final File table_outfile, final RIO rio, final IntMatrix m ) throws IOException {
w.print( "\t" );
if ( x == y ) {
if ( m.get( x, y ) != rio.getAnalyzedGeneTrees().length ) {
- ForesterUtil.unexpectedFatalError( PRG_NAME, "diagonal value is off" );
+ ForesterUtil.unexpectedFatalError( "diagonal value is off" );
}
w.print( "-" );
}
w.println();
}
w.close();
- ForesterUtil.programMessage( PRG_NAME, "wrote table to \"" + table_outfile + "\"" );
+ System.out.println( "Wrote table to \"" + table_outfile + "\"" );
}
}
import org.forester.datastructures.IntMatrix;
import org.forester.io.parsers.PhylogenyParser;
import org.forester.io.parsers.nhx.NHXParser;
+import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
import org.forester.io.parsers.util.ParserUtils;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyMethods;
import org.forester.sdi.GSDIR;
import org.forester.sdi.SDIException;
import org.forester.sdi.SDIR;
+import org.forester.sdi.SDIutil;
import org.forester.sdi.SDIutil.ALGORITHM;
import org.forester.sdi.SDIutil.TaxonomyComparisonBase;
import org.forester.util.BasicDescriptiveStatistics;
else if ( ( first == DEFAULT_RANGE ) && ( last >= 0 ) ) {
first = 0;
}
- checkPreconditions( gene_trees, rerooting, outgroup, first, last );
+ removeSingleDescendentsNodes( species_tree, verbose );
+ checkPreconditions( gene_trees, species_tree, rerooting, outgroup, first, last );
_produce_log = produce_log;
_verbose = verbose;
_rerooting = rerooting;
}
public final static RIO executeAnalysis( final File gene_trees_file,
+ final File species_tree_file,
+ final ALGORITHM algorithm,
+ final REROOTING rerooting,
+ final String outgroup,
+ final int first,
+ final int last,
+ final boolean produce_log,
+ final boolean verbose ) throws IOException, SDIException, RIOException {
+ final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+ final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true );
+ if ( p instanceof NHXParser ) {
+ final NHXParser nhx = ( NHXParser ) p;
+ nhx.setReplaceUnderscores( false );
+ nhx.setIgnoreQuotes( true );
+ nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES );
+ }
+ final Phylogeny[] gene_trees = factory.create( gene_trees_file, p );
+ if ( gene_trees.length < 1 ) {
+ throw new RIOException( "\"" + gene_trees_file + "\" is devoid of appropriate gene trees" );
+ }
+ final Phylogeny species_tree = SDIutil.parseSpeciesTree( gene_trees[ 0 ],
+ species_tree_file,
+ false,
+ true,
+ TAXONOMY_EXTRACTION.NO );
+ return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose );
+ }
+
+ public final static RIO executeAnalysis( final File gene_trees_file,
final Phylogeny species_tree,
final ALGORITHM algorithm,
final REROOTING rerooting,
nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES );
}
final Phylogeny[] gene_trees = factory.create( gene_trees_file, p );
+ if ( gene_trees.length < 1 ) {
+ throw new RIOException( "\"" + gene_trees_file + "\" is devoid of appropriate gene trees" );
+ }
return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose );
}
}
private final static void checkPreconditions( final Phylogeny[] gene_trees,
+ final Phylogeny species_tree,
final REROOTING rerooting,
final String outgroup,
final int first,
final int last ) throws RIOException {
+ if ( !species_tree.isRooted() ) {
+ throw new RIOException( "species tree is not rooted" );
+ }
if ( !( ( last == DEFAULT_RANGE ) && ( first == DEFAULT_RANGE ) )
&& ( ( last < first ) || ( last >= gene_trees.length ) || ( last < 0 ) || ( first < 0 ) ) ) {
throw new RIOException( "attempt to set range (0-based) of gene to analyze to: from " + first + " to "
}
}
+ private final static void removeSingleDescendentsNodes( final Phylogeny species_tree, final boolean verbose ) {
+ final int o = PhylogenyMethods.countNumberOfOneDescendantNodes( species_tree );
+ if ( o > 0 ) {
+ if ( verbose ) {
+ System.out.println( "warning: species tree has " + o
+ + " internal nodes with only one descendent which are therefore going to be removed" );
+ }
+ PhylogenyMethods.deleteInternalNodesWithOnlyOneDescendent( species_tree );
+ }
+ }
+
public enum REROOTING {
NONE, BY_ALGORITHM, MIDPOINT, OUTGROUP;
}
private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator()
+ "test_data" + ForesterUtil.getFileSeparator();
+ public static void main( final String[] args ) {
+ if ( !testRIO_GSDIR() ) {
+ System.out.println( "testRIO GSDIR failed" );
+ }
+ else {
+ System.out.println( "OK" );
+ }
+ }
+
public static boolean test() {
if ( !testRIO_GSDIR() ) {
return false;
}
return true;
}
-
- public static void main( final String[] args ) {
- if ( !testRIO_GSDIR() ) {
- System.out.println( "testRIO GSDIR failed" );
- }
- else {
- System.out.println( "OK" );
- }
- }
}
\ No newline at end of file
final boolean strip_gene_tree,
final boolean strip_species_tree ) throws SDIException {
_most_parsimonious_duplication_model = most_parsimonious_duplication_model;
+ if ( gene_tree.getRoot().getNumberOfDescendants() == 3 ) {
+ gene_tree.reRoot( gene_tree.getRoot().getChildNode( 2 ) );
+ }
final NodesLinkingResult nodes_linking_result = linkNodesOfG( gene_tree,
species_tree,
null,
* the species tree must be labeled in preorder.
* <p>
* @return
+ * @throws SDIException
*
*/
final static GSDIsummaryResult geneTreePostOrderTraversal( final Phylogeny gene_tree,
- final boolean most_parsimonious_duplication_model ) {
+ final boolean most_parsimonious_duplication_model )
+ throws SDIException {
final GSDIsummaryResult res = new GSDIsummaryResult();
for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) {
final PhylogenyNode g = it.next();
if ( g.isInternal() ) {
+ if ( g.getNumberOfDescendants() != 2 ) {
+ throw new SDIException( "gene tree contains internal node with " + g.getNumberOfDescendants()
+ + " descendents" );
+ }
PhylogenyNode s1 = g.getChildNode1().getLink();
PhylogenyNode s2 = g.getChildNode2().getLink();
while ( s1 != s2 ) {
\r
public interface GSDII {\r
\r
- public abstract int getSpeciationsSum();\r
-\r
public abstract Set<PhylogenyNode> getMappedExternalSpeciesTreeNodes();\r
\r
public abstract SortedSet<String> getReMappedScientificNamesFromGeneTree();\r
\r
+ public abstract int getSpeciationsSum();\r
+\r
public abstract List<PhylogenyNode> getStrippedExternalGeneTreeNodes();\r
\r
public abstract List<PhylogenyNode> getStrippedSpeciesTreeNodes();\r
_tax_comp_base = null;
}
+ final Set<PhylogenyNode> getMappedSpeciesTreeNodes() {
+ return _mapped_species_tree_nodes;
+ }
+
+ final SortedSet<String> getScientificNamesMappedToReducedSpecificity() {
+ return _scientific_names_mapped_to_reduced_specificity;
+ }
+
final List<PhylogenyNode> getStrippedGeneTreeNodes() {
return _stripped_gene_tree_nodes;
}
return _stripped_species_tree_nodes;
}
- final Set<PhylogenyNode> getMappedSpeciesTreeNodes() {
- return _mapped_species_tree_nodes;
- }
-
final TaxonomyComparisonBase getTaxCompBase() {
return _tax_comp_base;
}
final void setTaxCompBase( final TaxonomyComparisonBase tax_comp_base ) {
_tax_comp_base = tax_comp_base;
}
-
- final SortedSet<String> getScientificNamesMappedToReducedSpecificity() {
- return _scientific_names_mapped_to_reduced_specificity;
- }
}
package org.forester.sdi;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+import org.forester.io.parsers.PhylogenyParser;
+import org.forester.io.parsers.nhx.NHXParser;
+import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
+import org.forester.io.parsers.phyloxml.PhyloXmlParser;
+import org.forester.io.parsers.util.ParserUtils;
import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
import org.forester.phylogeny.PhylogenyNode;
import org.forester.phylogeny.data.Identifier;
import org.forester.phylogeny.data.Taxonomy;
+import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
+import org.forester.phylogeny.factories.PhylogenyFactory;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
import org.forester.util.ForesterUtil;
public class SDIutil {
+ public final static TaxonomyComparisonBase determineTaxonomyComparisonBase( final Phylogeny gene_tree )
+ throws SDIException {
+ int with_id_count = 0;
+ int with_code_count = 0;
+ int with_sn_count = 0;
+ int max = 0;
+ for( final PhylogenyNodeIterator iter = gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
+ final PhylogenyNode g = iter.next();
+ if ( g.getNodeData().isHasTaxonomy() ) {
+ final Taxonomy tax = g.getNodeData().getTaxonomy();
+ if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) {
+ if ( ++with_id_count > max ) {
+ max = with_id_count;
+ }
+ }
+ if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
+ if ( ++with_code_count > max ) {
+ max = with_code_count;
+ }
+ }
+ if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) {
+ if ( ++with_sn_count > max ) {
+ max = with_sn_count;
+ }
+ }
+ }
+ }
+ if ( max == 0 ) {
+ throw new SDIException( "gene tree has no taxonomic data" );
+ }
+ else if ( max == 1 ) {
+ throw new SDIException( "gene tree has only one node with taxonomic data" );
+ }
+ else if ( max == with_id_count ) {
+ return TaxonomyComparisonBase.ID;
+ }
+ else if ( max == with_sn_count ) {
+ return TaxonomyComparisonBase.SCIENTIFIC_NAME;
+ }
+ else {
+ return TaxonomyComparisonBase.CODE;
+ }
+ }
+
+ public final static Phylogeny parseSpeciesTree( final Phylogeny gene_tree,
+ final File species_tree_file,
+ final boolean replace_undescores_in_nhx_trees,
+ final boolean ignore_quotes_in_nhx_trees,
+ final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction_in_nhx_trees )
+ throws FileNotFoundException, PhyloXmlDataFormatException, IOException, SDIException {
+ Phylogeny species_tree;
+ final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+ final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( species_tree_file, true );
+ if ( p instanceof PhyloXmlParser ) {
+ species_tree = factory.create( species_tree_file, p )[ 0 ];
+ }
+ else {
+ if ( p instanceof NHXParser ) {
+ final NHXParser nhx = ( NHXParser ) p;
+ nhx.setReplaceUnderscores( replace_undescores_in_nhx_trees );
+ nhx.setIgnoreQuotes( ignore_quotes_in_nhx_trees );
+ nhx.setTaxonomyExtraction( taxonomy_extraction_in_nhx_trees );
+ }
+ species_tree = factory.create( species_tree_file, p )[ 0 ];
+ species_tree.setRooted( true );
+ final TaxonomyComparisonBase comp_base = determineTaxonomyComparisonBase( gene_tree );
+ switch ( comp_base ) {
+ case SCIENTIFIC_NAME:
+ PhylogenyMethods
+ .transferNodeNameToField( species_tree,
+ PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME,
+ true );
+ break;
+ case CODE:
+ PhylogenyMethods.transferNodeNameToField( species_tree,
+ PhylogenyMethods.PhylogenyNodeField.TAXONOMY_CODE,
+ true );
+ break;
+ case ID:
+ PhylogenyMethods.transferNodeNameToField( species_tree,
+ PhylogenyMethods.PhylogenyNodeField.TAXONOMY_ID,
+ true );
+ break;
+ default:
+ throw new SDIException( "unable to determine comparison base" );
+ }
+ }
+ return species_tree;
+ }
+
static String taxonomyToString( final PhylogenyNode n, final TaxonomyComparisonBase base ) {
switch ( base ) {
case ID:
}
}
}
-
- public final static TaxonomyComparisonBase determineTaxonomyComparisonBase( final Phylogeny gene_tree )
- throws SDIException {
- int with_id_count = 0;
- int with_code_count = 0;
- int with_sn_count = 0;
- int max = 0;
- for( final PhylogenyNodeIterator iter = gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
- final PhylogenyNode g = iter.next();
- if ( g.getNodeData().isHasTaxonomy() ) {
- final Taxonomy tax = g.getNodeData().getTaxonomy();
- if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) {
- if ( ++with_id_count > max ) {
- max = with_id_count;
- }
- }
- if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
- if ( ++with_code_count > max ) {
- max = with_code_count;
- }
- }
- if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) {
- if ( ++with_sn_count > max ) {
- max = with_sn_count;
- }
- }
- }
- }
- if ( max == 0 ) {
- throw new SDIException( "gene tree has no taxonomic data" );
- }
- else if ( max == 1 ) {
- throw new SDIException( "gene tree has only one node with taxonomic data" );
- }
- else if ( max == with_id_count ) {
- return TaxonomyComparisonBase.ID;
- }
- else if ( max == with_sn_count ) {
- return TaxonomyComparisonBase.SCIENTIFIC_NAME;
- }
- else {
- return TaxonomyComparisonBase.CODE;
- }
- }
}
private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator()
+ "test_data" + ForesterUtil.getFileSeparator();
- private final static Phylogeny createPhylogeny( final String nhx ) throws IOException {
- final Phylogeny p = ParserBasedPhylogenyFactory.getInstance().create( nhx, new NHXParser() )[ 0 ];
- p.setRooted( true );
- return p;
- }
-
- private final static Event getEvent( final Phylogeny p, final String n1, final String n2 ) {
- return PhylogenyMethods.calculateLCA( p.getNode( n1 ), p.getNode( n2 ) ).getNodeData().getEvent();
+ public static void main( final String[] args ) {
+ if ( !TestGSDI.testGSDI_against_binary_gene_tree() ) {
+ System.out.println( "binary failed" );
+ }
+ if ( !TestGSDI.testGSDI_general() ) {
+ System.out.println( "general failed" );
+ }
+ if ( !TestGSDI.testGSDIR_general() ) {
+ System.out.println( "general re-rooting failed" );
+ }
+ else {
+ System.out.println( "OK" );
+ }
+ // boolean success = test();
+ // if ( success ) {
+ // System.out.println( "OK" );
+ // }
+ // else {
+ // System.out.println( "failed" );
+ // }
}
public static boolean test() {
return true;
}
+ private final static Phylogeny createPhylogeny( final String nhx ) throws IOException {
+ final Phylogeny p = ParserBasedPhylogenyFactory.getInstance().create( nhx, new NHXParser() )[ 0 ];
+ p.setRooted( true );
+ return p;
+ }
+
+ private final static Event getEvent( final Phylogeny p, final String n1, final String n2 ) {
+ return PhylogenyMethods.calculateLCA( p.getNode( n1 ), p.getNode( n2 ) ).getNodeData().getEvent();
+ }
+
private static boolean testGSDI_against_binary_gene_tree() {
try {
final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
}
return true;
}
-
- public static void main( final String[] args ) {
- if ( !TestGSDI.testGSDI_against_binary_gene_tree() ) {
- System.out.println( "binary failed" );
- }
- if ( !TestGSDI.testGSDI_general() ) {
- System.out.println( "general failed" );
- }
- if ( !TestGSDI.testGSDIR_general() ) {
- System.out.println( "general re-rooting failed" );
- }
- else {
- System.out.println( "OK" );
- }
- // boolean success = test();
- // if ( success ) {
- // System.out.println( "OK" );
- // }
- // else {
- // System.out.println( "failed" );
- // }
- }
}
}
}
+ public static void fatalError( final String message ) {
+ System.err.println();
+ System.err.println( "error: " + message );
+ System.err.println();
+ System.exit( -1 );
+ }
+
public static void fatalError( final String prg_name, final String message ) {
System.err.println();
System.err.println( "[" + prg_name + "] > " + message );
System.exit( -1 );
}
+ public static void fatalErrorIfFileNotReadable( final File file ) {
+ final String error = isReadableFile( file );
+ if ( !isEmpty( error ) ) {
+ System.err.println();
+ System.err.println( "error: " + error );
+ System.err.println();
+ System.exit( -1 );
+ }
+ }
+
public static void fatalErrorIfFileNotReadable( final String prg_name, final File file ) {
final String error = isReadableFile( file );
if ( !isEmpty( error ) ) {
return str_array;
}
+ final public static void unexpectedFatalError( final Exception e ) {
+ System.err.println();
+ System.err.println( "unexpected error: should not have occured! Please contact program author(s)." );
+ e.printStackTrace( System.err );
+ System.err.println();
+ System.exit( -1 );
+ }
+
+ final public static void unexpectedFatalError( final String message ) {
+ System.err.println();
+ System.err.println( "unexpected error: should not have occured! Please contact program author(s)." );
+ System.err.println( message );
+ System.err.println();
+ System.exit( -1 );
+ }
+
final public static void unexpectedFatalError( final String prg_name, final Exception e ) {
System.err.println();
System.err.println( "[" + prg_name
- + "] > Unexpected error. Should not have occured! Please contact program author(s)." );
+ + "] > unexpected error; should not have occured! Please contact program author(s)." );
e.printStackTrace( System.err );
System.err.println();
System.exit( -1 );
final public static void unexpectedFatalError( final String prg_name, final String message ) {
System.err.println();
System.err.println( "[" + prg_name
- + "] > Unexpected error. Should not have occured! Please contact program author(s)." );
+ + "] > unexpected error: should not have occured! Please contact program author(s)." );
System.err.println( message );
System.err.println();
System.exit( -1 );
final public static void unexpectedFatalError( final String prg_name, final String message, final Exception e ) {
System.err.println();
System.err.println( "[" + prg_name
- + "] > Unexpected error. Should not have occured! Please contact program author(s)." );
+ + "] > unexpected error: should not have occured! Please contact program author(s)." );
System.err.println( message );
e.printStackTrace( System.err );
System.err.println();