From: cmzmasek@gmail.com Date: Mon, 25 Mar 2013 19:58:48 +0000 (+0000) Subject: transfer of taxonomy in GSDI and RIO X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=b819fa043cac2722618af63f0d4752ffa1a40890;p=jalview.git transfer of taxonomy in GSDI and RIO --- diff --git a/forester/java/src/org/forester/application/gsdi.java b/forester/java/src/org/forester/application/gsdi.java index d5c30a3..711b1e6 100644 --- a/forester/java/src/org/forester/application/gsdi.java +++ b/forester/java/src/org/forester/application/gsdi.java @@ -63,17 +63,18 @@ public final class gsdi { final static private String GSDIR_OPTION = "r"; final static private String MOST_PARSIMONIOUS_OPTION = "m"; final static private String GUESS_FORMAT_OF_SPECIES_TREE = "q"; + final static private String TRANSFER_TAXONOMY_OPTION = "t"; final static private String HELP_OPTION_1 = "help"; final static private String HELP_OPTION_2 = "h"; final static private String SUFFIX_FOR_SPECIES_TREE_USED = "_species_tree_used.xml"; final static private String LOGFILE_SUFFIX = "_gsdi_log.txt"; final static private String REMAPPED_SUFFIX = "_gsdi_remapped.txt"; final static private String PRG_NAME = "gsdi"; - final static private String PRG_VERSION = "1.000"; - final static private String PRG_DATE = "120629"; + final static private String PRG_VERSION = "1.001"; + final static private String PRG_DATE = "130325"; final static private String PRG_DESC = "general speciation duplication inference"; final static private String E_MAIL = "phylosoft@gmail.com"; - final static private String WWW = "www.phylosoft.org/forester"; + final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; public static void main( final String args[] ) { try { @@ -108,6 +109,7 @@ public final class gsdi { allowed_options.add( gsdi.GUESS_FORMAT_OF_SPECIES_TREE ); allowed_options.add( gsdi.MOST_PARSIMONIOUS_OPTION ); allowed_options.add( gsdi.ALLOW_STRIPPING_OF_GENE_TREE_OPTION ); + allowed_options.add( TRANSFER_TAXONOMY_OPTION ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( gsdi.PRG_NAME, "unknown option(s): " + dissallowed_options ); @@ -138,6 +140,10 @@ public final class gsdi { } allow_stripping_of_gene_tree = true; } + boolean transfer_taxonomy = false; + if ( cla.isOptionSet( TRANSFER_TAXONOMY_OPTION ) ) { + transfer_taxonomy = true; + } Phylogeny species_tree = null; Phylogeny gene_tree = null; File gene_tree_file = null; @@ -234,6 +240,7 @@ public final class gsdi { + ( ForesterUtil.isEmpty( species_tree.getName() ) ? "" : gene_tree.getName() ) ); System.out.println( "Species tree name : " + ( ForesterUtil.isEmpty( species_tree.getName() ) ? "" : gene_tree.getName() ) ); + System.out.println( "Transfer taxonomy : " + transfer_taxonomy ); GSDII gsdii = null; final long start_time = new Date().getTime(); try { @@ -255,10 +262,11 @@ public final class gsdi { species_tree, most_parsimonous_duplication_model, allow_stripping_of_gene_tree, - true ); + true, + transfer_taxonomy ); } else if ( base_algorithm == ALGORITHM.GSDIR ) { - gsdii = new GSDIR( gene_tree, species_tree, allow_stripping_of_gene_tree, true ); + gsdii = new GSDIR( gene_tree, species_tree, allow_stripping_of_gene_tree, true, transfer_taxonomy ); } } catch ( final SDIException e ) { @@ -385,6 +393,8 @@ public final class gsdi { + ": to allow species tree in other formats than phyloXML (i.e. Newick, NHX, Nexus)" ); System.out.println( " -" + gsdi.GSDIR_OPTION + ": to use GSDIR algorithm instead of GSDI algorithm (re-rooting)" ); + System.out.println( " -" + TRANSFER_TAXONOMY_OPTION + + ": to transfer taxonomic data from species tree to gene tree\n" ); System.out.println(); System.out.println( "Gene tree:" ); System.out.println( " in phyloXM format, with taxonomy and sequence data in appropriate fields" ); diff --git a/forester/java/src/org/forester/application/rio.java b/forester/java/src/org/forester/application/rio.java index 3ccacf5..d3414a7 100644 --- a/forester/java/src/org/forester/application/rio.java +++ b/forester/java/src/org/forester/application/rio.java @@ -54,20 +54,21 @@ import org.forester.util.ForesterUtil; public class rio { - final static private String PRG_NAME = "rio"; - final static private String PRG_VERSION = "4.000 beta 9"; - final static private String PRG_DATE = "2013.01.14"; - final static private String E_MAIL = "phyloxml@gmail.com"; - final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; - final static private String HELP_OPTION_1 = "help"; - final static private String HELP_OPTION_2 = "h"; - final static private String GT_FIRST = "f"; - final static private String GT_LAST = "l"; - final static private String REROOTING_OPT = "r"; - final static private String OUTGROUP = "o"; - final static private String RETURN_SPECIES_TREE = "s"; - final static private String RETURN_BEST_GENE_TREE = "g"; - final static private String USE_SDIR = "b"; + final static private String PRG_NAME = "rio"; + final static private String PRG_VERSION = "4.000 beta 10"; + final static private String PRG_DATE = "130325"; + final static private String E_MAIL = "phyloxml@gmail.com"; + final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; + final static private String HELP_OPTION_1 = "help"; + final static private String HELP_OPTION_2 = "h"; + final static private String GT_FIRST = "f"; + final static private String GT_LAST = "l"; + final static private String REROOTING_OPT = "r"; + final static private String OUTGROUP = "o"; + final static private String RETURN_SPECIES_TREE = "s"; + final static private String RETURN_BEST_GENE_TREE = "g"; + final static private String USE_SDIR = "b"; + final static private String TRANSFER_TAXONOMY_OPTION = "t"; public static void main( final String[] args ) { ForesterUtil.printProgramInformation( PRG_NAME, @@ -101,6 +102,7 @@ public class rio { allowed_options.add( USE_SDIR ); allowed_options.add( RETURN_SPECIES_TREE ); allowed_options.add( RETURN_BEST_GENE_TREE ); + allowed_options.add( TRANSFER_TAXONOMY_OPTION ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( "unknown option(s): " + dissallowed_options ); @@ -229,6 +231,13 @@ public class rio { ForesterUtil.fatalError( "\"" + return_gene_tree + "\" already exists" ); } } + boolean transfer_taxonomy = false; + if ( !sdir && cla.isOptionSet( TRANSFER_TAXONOMY_OPTION ) ) { + if ( return_gene_tree == null ) { + ForesterUtil.fatalError( "no point in transferring taxonomy data without returning best gene tree" ); + } + transfer_taxonomy = true; + } ForesterUtil.fatalErrorIfFileNotReadable( gene_trees_file ); ForesterUtil.fatalErrorIfFileNotReadable( species_tree_file ); if ( orthology_outtable.exists() ) { @@ -300,7 +309,8 @@ public class rio { gt_first, gt_last, logfile != null, - true ); + true, + transfer_taxonomy ); } else { iterating = true; @@ -329,7 +339,8 @@ public class rio { gt_first, gt_last, logfile != null, - true ); + true, + transfer_taxonomy ); } if ( algorithm == ALGORITHM.GSDIR ) { System.out.println( "Taxonomy linking based on : " + rio.getGSDIRtaxCompBase() ); @@ -420,6 +431,11 @@ public class rio { .println( " -" + RETURN_SPECIES_TREE + "= : to write the (stripped) species tree to file" ); System.out.println( " -" + RETURN_BEST_GENE_TREE + "= : to write (one) minimal duplication gene tree to file" ); + System.out + .println( " -" + + TRANSFER_TAXONOMY_OPTION + + " : to transfer taxonomic data from species tree to returned minimal duplication gene tree\n" + + " (if -" + RETURN_BEST_GENE_TREE + " option is used)" ); System.out.println( " -" + USE_SDIR + " : to use SDIR instead of GSDIR (faster, but non-binary species trees are" ); System.out.println( " disallowed, as are most options)" ); diff --git a/forester/java/src/org/forester/archaeopteryx/Archaeopteryx.java b/forester/java/src/org/forester/archaeopteryx/Archaeopteryx.java index 73afe58..c23dcca 100644 --- a/forester/java/src/org/forester/archaeopteryx/Archaeopteryx.java +++ b/forester/java/src/org/forester/archaeopteryx/Archaeopteryx.java @@ -36,7 +36,6 @@ import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.util.ForesterUtil; - public final class Archaeopteryx { public static MainFrame createApplication( final Phylogeny phylogeny ) { diff --git a/forester/java/src/org/forester/archaeopteryx/ArchaeopteryxE.java b/forester/java/src/org/forester/archaeopteryx/ArchaeopteryxE.java index 57a0532..a855d90 100644 --- a/forester/java/src/org/forester/archaeopteryx/ArchaeopteryxE.java +++ b/forester/java/src/org/forester/archaeopteryx/ArchaeopteryxE.java @@ -892,7 +892,7 @@ public class ArchaeopteryxE extends JApplet implements ActionListener { GSDI gsdi = null; final Phylogeny species_tree = _species_tree.copy(); try { - gsdi = new GSDI( gene_tree, species_tree, false, true, true ); + gsdi = new GSDI( gene_tree, species_tree, false, true, true, true ); } catch ( final SDIException e ) { JOptionPane.showMessageDialog( this, @@ -965,7 +965,7 @@ public class ArchaeopteryxE extends JApplet implements ActionListener { GSDIR gsdir = null; final Phylogeny species_tree = _species_tree.copy(); try { - gsdir = new GSDIR( gene_tree, species_tree, true, true ); + gsdir = new GSDIR( gene_tree, species_tree, true, true, true ); } catch ( final SDIException e ) { JOptionPane.showMessageDialog( this, diff --git a/forester/java/src/org/forester/archaeopteryx/Constants.java b/forester/java/src/org/forester/archaeopteryx/Constants.java index 464b428..8aaf495 100644 --- a/forester/java/src/org/forester/archaeopteryx/Constants.java +++ b/forester/java/src/org/forester/archaeopteryx/Constants.java @@ -42,8 +42,8 @@ public final class Constants { public final static boolean __SYNTH_LF = false; // TODO remove me public final static boolean ALLOW_DDBJ_BLAST = false; public final static String PRG_NAME = "Archaeopteryx"; - final static String VERSION = "0.9809 A1ST"; - final static String PRG_DATE = "130314"; + final static String VERSION = "0.9810 A1ST"; + final static String PRG_DATE = "130325"; final static String DEFAULT_CONFIGURATION_FILE_NAME = "_aptx_configuration_file"; final static String[] DEFAULT_FONT_CHOICES = { "Verdana", "Tahoma", "Arial", "Helvetica", "Dialog", "Lucida Sans", "SansSerif", "Sans-serif", "Sans" }; diff --git a/forester/java/src/org/forester/archaeopteryx/MainFrame.java b/forester/java/src/org/forester/archaeopteryx/MainFrame.java index e0db7b3..c26c630 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainFrame.java +++ b/forester/java/src/org/forester/archaeopteryx/MainFrame.java @@ -800,7 +800,7 @@ public abstract class MainFrame extends JFrame implements ActionListener { GSDI gsdi = null; final Phylogeny species_tree = getSpeciesTree().copy(); try { - gsdi = new GSDI( gene_tree, species_tree, false, true, true ); + gsdi = new GSDI( gene_tree, species_tree, false, true, true, true ); } catch ( final SDIException e ) { JOptionPane.showMessageDialog( this, @@ -873,7 +873,7 @@ public abstract class MainFrame extends JFrame implements ActionListener { GSDIR gsdir = null; final Phylogeny species_tree = getSpeciesTree().copy(); try { - gsdir = new GSDIR( gene_tree, species_tree, true, true ); + gsdir = new GSDIR( gene_tree, species_tree, true, true, true ); } catch ( final SDIException e ) { JOptionPane.showMessageDialog( this, diff --git a/forester/java/src/org/forester/rio/RIO.java b/forester/java/src/org/forester/rio/RIO.java index cde7804..b19d327 100644 --- a/forester/java/src/org/forester/rio/RIO.java +++ b/forester/java/src/org/forester/rio/RIO.java @@ -89,7 +89,8 @@ public final class RIO { int first, int last, final boolean produce_log, - final boolean verbose ) throws IOException, SDIException, RIOException { + final boolean verbose, + final boolean transfer_taxonomy ) throws IOException, SDIException, RIOException { if ( ( last == DEFAULT_RANGE ) && ( first >= 0 ) ) { last = END_OF_GT; } @@ -110,7 +111,7 @@ public final class RIO { _removed_gene_tree_nodes = null; _duplications_stats = new BasicDescriptiveStatistics(); p.reset(); - inferOrthologs( p, species_tree, algorithm, outgroup, first, last ); + inferOrthologs( p, species_tree, algorithm, outgroup, first, last, transfer_taxonomy ); _species_tree = species_tree; } @@ -122,7 +123,8 @@ public final class RIO { int first, int last, final boolean produce_log, - final boolean verbose ) throws IOException, SDIException, RIOException { + final boolean verbose, + final boolean transfer_taxonomy ) throws IOException, SDIException, RIOException { if ( ( last == DEFAULT_RANGE ) && ( first >= 0 ) ) { last = gene_trees.length - 1; } @@ -141,7 +143,7 @@ public final class RIO { _analyzed_gene_trees = null; _removed_gene_tree_nodes = null; _duplications_stats = new BasicDescriptiveStatistics(); - inferOrthologs( gene_trees, species_tree, algorithm, outgroup, first, last ); + inferOrthologs( gene_trees, species_tree, algorithm, outgroup, first, last, transfer_taxonomy ); _species_tree = species_tree; } @@ -202,8 +204,9 @@ public final class RIO { final ALGORITHM algorithm, final String outgroup, int first, - final int last ) throws SDIException, RIOException, FileNotFoundException, - IOException { + final int last, + final boolean transfer_taxonomy ) throws SDIException, RIOException, + FileNotFoundException, IOException { if ( !parser.hasNext() ) { throw new RIOException( "no gene trees to analyze" ); } @@ -252,7 +255,12 @@ public final class RIO { throw new RIOException( "failed to establish species based mapping between gene and species trees" ); } } - final Phylogeny analyzed_gt = performOrthologInference( gt, species_tree, algorithm, outgroup, counter ); + final Phylogeny analyzed_gt = performOrthologInference( gt, + species_tree, + algorithm, + outgroup, + counter, + transfer_taxonomy ); RIO.calculateOrthologTable( analyzed_gt, true, counter ); ++counter; } @@ -278,8 +286,9 @@ public final class RIO { final ALGORITHM algorithm, final String outgroup, final int first, - final int last ) throws SDIException, RIOException, FileNotFoundException, - IOException { + final int last, + final boolean transfer_taxonomy ) throws SDIException, RIOException, + FileNotFoundException, IOException { if ( algorithm == ALGORITHM.SDIR ) { // Removes from species_tree all species not found in gene_tree. PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_trees[ 0 ], species_tree ); @@ -332,7 +341,12 @@ public final class RIO { throw new RIOException( "failed to establish species based mapping between gene and species trees" ); } } - _analyzed_gene_trees[ i ] = performOrthologInference( gt, species_tree, algorithm, outgroup, i ); + _analyzed_gene_trees[ i ] = performOrthologInference( gt, + species_tree, + algorithm, + outgroup, + i, + transfer_taxonomy ); } if ( log() ) { postLog( species_tree, first, last ); @@ -382,7 +396,9 @@ public final class RIO { final Phylogeny species_tree, final ALGORITHM algorithm, final String outgroup, - final int i ) throws SDIException, RIOException { + final int i, + final boolean transfer_taxonomy ) throws SDIException, + RIOException { final Phylogeny assigned_tree; switch ( algorithm ) { case SDIR: { @@ -390,7 +406,7 @@ public final class RIO { break; } case GSDIR: { - assigned_tree = performOrthologInferenceByGSDI( gene_tree, species_tree, outgroup, i ); + assigned_tree = performOrthologInferenceByGSDI( gene_tree, species_tree, outgroup, i, transfer_taxonomy ); break; } default: { @@ -412,11 +428,13 @@ public final class RIO { private final Phylogeny performOrthologInferenceByGSDI( final Phylogeny gene_tree, final Phylogeny species_tree, final String outgroup, - final int i ) throws SDIException, RIOException { + final int i, + final boolean transfer_taxonomy ) throws SDIException, + RIOException { final Phylogeny assigned_tree; final int dups; if ( _rerooting == REROOTING.BY_ALGORITHM ) { - final GSDIR gsdir = new GSDIR( gene_tree, species_tree, true, i == 0 ); + final GSDIR gsdir = new GSDIR( gene_tree, species_tree, true, i == 0, transfer_taxonomy ); assigned_tree = gsdir.getMinDuplicationsSumGeneTree(); if ( i == 0 ) { _removed_gene_tree_nodes = gsdir.getStrippedExternalGeneTreeNodes(); @@ -440,7 +458,7 @@ public final class RIO { final PhylogenyNode n = gene_tree.getNode( outgroup ); gene_tree.reRoot( n ); } - final GSDI gsdi = new GSDI( gene_tree, species_tree, true, true, true ); + final GSDI gsdi = new GSDI( gene_tree, species_tree, true, true, true, transfer_taxonomy ); _removed_gene_tree_nodes = gsdi.getStrippedExternalGeneTreeNodes(); for( final PhylogenyNode r : _removed_gene_tree_nodes ) { if ( !r.getNodeData().isHasTaxonomy() ) { @@ -560,7 +578,9 @@ public final class RIO { final int first, final int last, final boolean produce_log, - final boolean verbose ) throws IOException, SDIException, RIOException { + final boolean verbose, + final boolean transfer_taxonomy ) throws IOException, SDIException, + RIOException { final Phylogeny[] gene_trees = parseGeneTrees( gene_trees_file ); if ( gene_trees.length < 1 ) { throw new RIOException( "\"" + gene_trees_file + "\" is devoid of appropriate gene trees" ); @@ -570,7 +590,16 @@ public final class RIO { false, true, TAXONOMY_EXTRACTION.NO ); - return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose ); + return new RIO( gene_trees, + species_tree, + algorithm, + rerooting, + outgroup, + first, + last, + produce_log, + verbose, + transfer_taxonomy ); } public final static RIO executeAnalysis( final File gene_trees_file, @@ -579,7 +608,9 @@ public final class RIO { final REROOTING rerooting, final String outgroup, final boolean produce_log, - final boolean verbose ) throws IOException, SDIException, RIOException { + final boolean verbose, + final boolean transfer_taxonomy ) throws IOException, SDIException, + RIOException { return new RIO( parseGeneTrees( gene_trees_file ), species_tree, algorithm, @@ -588,7 +619,8 @@ public final class RIO { DEFAULT_RANGE, DEFAULT_RANGE, produce_log, - verbose ); + verbose, + transfer_taxonomy ); } public final static RIO executeAnalysis( final File gene_trees_file, @@ -599,7 +631,9 @@ public final class RIO { final int first, final int last, final boolean produce_log, - final boolean verbose ) throws IOException, SDIException, RIOException { + final boolean verbose, + final boolean transfer_taxonomy ) throws IOException, SDIException, + RIOException { return new RIO( parseGeneTrees( gene_trees_file ), species_tree, algorithm, @@ -608,7 +642,8 @@ public final class RIO { first, last, produce_log, - verbose ); + verbose, + transfer_taxonomy ); } public final static RIO executeAnalysis( final IteratingPhylogenyParser p, @@ -619,7 +654,9 @@ public final class RIO { final int first, final int last, final boolean produce_log, - final boolean verbose ) throws IOException, SDIException, RIOException { + final boolean verbose, + final boolean transfer_taxonomy ) throws IOException, SDIException, + RIOException { final Phylogeny g0 = p.next(); if ( ( g0 == null ) || g0.isEmpty() || ( g0.getNumberOfExternalNodes() < 2 ) ) { throw new RIOException( "input file does not seem to contain any gene trees" ); @@ -630,7 +667,16 @@ public final class RIO { true, TAXONOMY_EXTRACTION.NO ); p.reset(); - return new RIO( p, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose ); + return new RIO( p, + species_tree, + algorithm, + rerooting, + outgroup, + first, + last, + produce_log, + verbose, + transfer_taxonomy ); } public final static RIO executeAnalysis( final IteratingPhylogenyParser p, @@ -639,7 +685,9 @@ public final class RIO { final REROOTING rerooting, final String outgroup, final boolean produce_log, - final boolean verbose ) throws IOException, SDIException, RIOException { + final boolean verbose, + final boolean transfer_taxonomy ) throws IOException, SDIException, + RIOException { return new RIO( p, species_tree, algorithm, @@ -648,7 +696,8 @@ public final class RIO { DEFAULT_RANGE, DEFAULT_RANGE, produce_log, - verbose ); + verbose, + transfer_taxonomy ); } public final static RIO executeAnalysis( final IteratingPhylogenyParser p, @@ -659,8 +708,19 @@ public final class RIO { final int first, final int last, final boolean produce_log, - final boolean verbose ) throws IOException, SDIException, RIOException { - return new RIO( p, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose ); + final boolean verbose, + final boolean transfer_taxonomy ) throws IOException, SDIException, + RIOException { + return new RIO( p, + species_tree, + algorithm, + rerooting, + outgroup, + first, + last, + produce_log, + verbose, + transfer_taxonomy ); } public final static RIO executeAnalysis( final Phylogeny[] gene_trees, final Phylogeny species_tree ) @@ -673,6 +733,7 @@ public final class RIO { DEFAULT_RANGE, DEFAULT_RANGE, false, + false, false ); } @@ -682,7 +743,9 @@ public final class RIO { final REROOTING rerooting, final String outgroup, final boolean produce_log, - final boolean verbose ) throws IOException, SDIException, RIOException { + final boolean verbose, + final boolean transfer_taxonomy ) throws IOException, SDIException, + RIOException { return new RIO( gene_trees, species_tree, algorithm, @@ -691,7 +754,8 @@ public final class RIO { DEFAULT_RANGE, DEFAULT_RANGE, produce_log, - verbose ); + verbose, + transfer_taxonomy ); } public final static RIO executeAnalysis( final Phylogeny[] gene_trees, @@ -702,8 +766,19 @@ public final class RIO { final int first, final int last, final boolean produce_log, - final boolean verbose ) throws IOException, SDIException, RIOException { - return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose ); + final boolean verbose, + final boolean transfer_taxonomy ) throws IOException, SDIException, + RIOException { + return new RIO( gene_trees, + species_tree, + algorithm, + rerooting, + outgroup, + first, + last, + produce_log, + verbose, + transfer_taxonomy ); } private final static void calculateOrthologTable( final Phylogeny g, final boolean sort, final int counter ) diff --git a/forester/java/src/org/forester/rio/TestRIO.java b/forester/java/src/org/forester/rio/TestRIO.java index 5d2c34e..9cacc29 100644 --- a/forester/java/src/org/forester/rio/TestRIO.java +++ b/forester/java/src/org/forester/rio/TestRIO.java @@ -42,92 +42,6 @@ public final class TestRIO { return true; } - private static boolean testRIO_GSDIR_Iterating() { - try { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final NHXParser nhx = new NHXParser(); - nhx.setReplaceUnderscores( false ); - nhx.setIgnoreQuotes( true ); - nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.AGRESSIVE ); - final String gene_trees_1_str = "(((((MOUSE,RAT),HUMAN),CAEEL),YEAST),ARATH);" - + "((((MOUSE,RAT),HUMAN),(ARATH,YEAST)),CAEEL);" + "((MOUSE,RAT),(((ARATH,YEAST),CAEEL),HUMAN));" - + "(((((MOUSE,HUMAN),RAT),CAEEL),YEAST),ARATH);" + "((((HUMAN,MOUSE),RAT),(ARATH,YEAST)),CAEEL);"; - nhx.setSource( gene_trees_1_str ); - final String species_trees_1_str = "(((((MOUSE,RAT),HUMAN),CAEEL),YEAST),ARATH);"; - final Phylogeny species_tree_1 = factory.create( species_trees_1_str, new NHXParser() )[ 0 ]; - species_tree_1.setRooted( true ); - PhylogenyMethods.transferNodeNameToField( species_tree_1, PhylogenyNodeField.TAXONOMY_CODE, true ); - //Archaeopteryx.createApplication( species_trees_1 ); - RIO rio = RIO.executeAnalysis( nhx, - species_tree_1, - ALGORITHM.GSDIR, - REROOTING.BY_ALGORITHM, - "", - true, - false ); - if ( rio.getExtNodesOfAnalyzedGeneTrees() != 6 ) { - return false; - } - if ( rio.getGSDIRtaxCompBase() != TaxonomyComparisonBase.CODE ) { - return false; - } - if ( rio.getRemovedGeneTreeNodes().size() != 0 ) { - return false; - } - IntMatrix m = rio.getOrthologTable(); - //System.out.println( m.toString() ); - if ( !m.getRowAsString( 0, ',' ).equals( "ARATH,5,5,5,5,5,5" ) ) { - return false; - } - if ( !m.getRowAsString( 1, ',' ).equals( "CAEEL,5,5,5,5,5,5" ) ) { - return false; - } - if ( !m.getRowAsString( 2, ',' ).equals( "HUMAN,5,5,5,5,3,5" ) ) { - return false; - } - if ( !m.getRowAsString( 3, ',' ).equals( "MOUSE,5,5,5,5,3,5" ) ) { - return false; - } - if ( !m.getRowAsString( 4, ',' ).equals( "RAT,5,5,3,3,5,5" ) ) { - return false; - } - if ( !m.getRowAsString( 5, ',' ).equals( "YEAST,5,5,5,5,5,5" ) ) { - return false; - } - // - final String species_trees_2_str = "((((MOUSE,RAT,HUMAN),CAEEL),YEAST),ARATH);"; - final Phylogeny species_tree_2 = factory.create( species_trees_2_str, new NHXParser() )[ 0 ]; - species_tree_2.setRooted( true ); - PhylogenyMethods.transferNodeNameToField( species_tree_2, PhylogenyNodeField.TAXONOMY_CODE, true ); - rio = RIO.executeAnalysis( nhx, species_tree_2, ALGORITHM.GSDIR, REROOTING.BY_ALGORITHM, "", true, false ); - m = rio.getOrthologTable(); - // System.out.println( m.toString() ); - if ( !m.getRowAsString( 0, ',' ).equals( "ARATH,5,5,5,5,5,5" ) ) { - return false; - } - if ( !m.getRowAsString( 1, ',' ).equals( "CAEEL,5,5,5,5,5,5" ) ) { - return false; - } - if ( !m.getRowAsString( 2, ',' ).equals( "HUMAN,5,5,5,5,5,5" ) ) { - return false; - } - if ( !m.getRowAsString( 3, ',' ).equals( "MOUSE,5,5,5,5,5,5" ) ) { - return false; - } - if ( !m.getRowAsString( 4, ',' ).equals( "RAT,5,5,5,5,5,5" ) ) { - return false; - } - if ( !m.getRowAsString( 5, ',' ).equals( "YEAST,5,5,5,5,5,5" ) ) { - return false; - } - } - catch ( final Exception e ) { - e.printStackTrace( System.out ); - return false; - } - return true; - } - private static boolean testRIO_GSDIR() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); @@ -148,7 +62,8 @@ public final class TestRIO { REROOTING.BY_ALGORITHM, "", true, - false ); + false, + true ); if ( rio.getAnalyzedGeneTrees().length != 4 ) { return false; } @@ -182,7 +97,8 @@ public final class TestRIO { REROOTING.BY_ALGORITHM, "", true, - false ); + false, + true ); if ( rio.getAnalyzedGeneTrees().length != 2 ) { return false; } @@ -217,7 +133,8 @@ public final class TestRIO { REROOTING.BY_ALGORITHM, "", true, - false ); + false, + true ); if ( rio.getAnalyzedGeneTrees().length != 3 ) { return false; } @@ -251,7 +168,8 @@ public final class TestRIO { REROOTING.BY_ALGORITHM, "", true, - false ); + false, + true ); if ( rio.getAnalyzedGeneTrees().length != 1 ) { return false; } @@ -285,7 +203,8 @@ public final class TestRIO { REROOTING.BY_ALGORITHM, "", true, - false ); + false, + true ); if ( rio.getAnalyzedGeneTrees().length != 1 ) { return false; } @@ -322,7 +241,8 @@ public final class TestRIO { REROOTING.BY_ALGORITHM, "", true, - false ); + false, + true ); if ( rio.getAnalyzedGeneTrees().length != 5 ) { return false; } @@ -391,7 +311,8 @@ public final class TestRIO { -1, -1, true, - false ); + false, + true ); if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.CODE ) { return false; } @@ -444,7 +365,8 @@ public final class TestRIO { -1, -1, true, - false ); + false, + true ); if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.ID ) { return false; } @@ -498,7 +420,8 @@ public final class TestRIO { -1, -1, true, - false ); + false, + true ); if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) { return false; } @@ -551,7 +474,8 @@ public final class TestRIO { -1, -1, true, - false ); + false, + true ); if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) { return false; } @@ -604,7 +528,8 @@ public final class TestRIO { -1, -1, true, - false ); + false, + true ); if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) { return false; } @@ -658,7 +583,8 @@ public final class TestRIO { 10, 19, true, - false ); + false, + true ); if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) { return false; } @@ -711,7 +637,8 @@ public final class TestRIO { -1, -1, true, - false ); + false, + true ); if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.CODE ) { return false; } @@ -753,7 +680,8 @@ public final class TestRIO { -1, -1, true, - false ); + false, + true ); if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.CODE ) { return false; } @@ -805,4 +733,98 @@ public final class TestRIO { } return true; } + + private static boolean testRIO_GSDIR_Iterating() { + try { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final NHXParser nhx = new NHXParser(); + nhx.setReplaceUnderscores( false ); + nhx.setIgnoreQuotes( true ); + nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.AGRESSIVE ); + final String gene_trees_1_str = "(((((MOUSE,RAT),HUMAN),CAEEL),YEAST),ARATH);" + + "((((MOUSE,RAT),HUMAN),(ARATH,YEAST)),CAEEL);" + "((MOUSE,RAT),(((ARATH,YEAST),CAEEL),HUMAN));" + + "(((((MOUSE,HUMAN),RAT),CAEEL),YEAST),ARATH);" + "((((HUMAN,MOUSE),RAT),(ARATH,YEAST)),CAEEL);"; + nhx.setSource( gene_trees_1_str ); + final String species_trees_1_str = "(((((MOUSE,RAT),HUMAN),CAEEL),YEAST),ARATH);"; + final Phylogeny species_tree_1 = factory.create( species_trees_1_str, new NHXParser() )[ 0 ]; + species_tree_1.setRooted( true ); + PhylogenyMethods.transferNodeNameToField( species_tree_1, PhylogenyNodeField.TAXONOMY_CODE, true ); + //Archaeopteryx.createApplication( species_trees_1 ); + RIO rio = RIO.executeAnalysis( nhx, + species_tree_1, + ALGORITHM.GSDIR, + REROOTING.BY_ALGORITHM, + "", + true, + false, + true ); + if ( rio.getExtNodesOfAnalyzedGeneTrees() != 6 ) { + return false; + } + if ( rio.getGSDIRtaxCompBase() != TaxonomyComparisonBase.CODE ) { + return false; + } + if ( rio.getRemovedGeneTreeNodes().size() != 0 ) { + return false; + } + IntMatrix m = rio.getOrthologTable(); + //System.out.println( m.toString() ); + if ( !m.getRowAsString( 0, ',' ).equals( "ARATH,5,5,5,5,5,5" ) ) { + return false; + } + if ( !m.getRowAsString( 1, ',' ).equals( "CAEEL,5,5,5,5,5,5" ) ) { + return false; + } + if ( !m.getRowAsString( 2, ',' ).equals( "HUMAN,5,5,5,5,3,5" ) ) { + return false; + } + if ( !m.getRowAsString( 3, ',' ).equals( "MOUSE,5,5,5,5,3,5" ) ) { + return false; + } + if ( !m.getRowAsString( 4, ',' ).equals( "RAT,5,5,3,3,5,5" ) ) { + return false; + } + if ( !m.getRowAsString( 5, ',' ).equals( "YEAST,5,5,5,5,5,5" ) ) { + return false; + } + // + final String species_trees_2_str = "((((MOUSE,RAT,HUMAN),CAEEL),YEAST),ARATH);"; + final Phylogeny species_tree_2 = factory.create( species_trees_2_str, new NHXParser() )[ 0 ]; + species_tree_2.setRooted( true ); + PhylogenyMethods.transferNodeNameToField( species_tree_2, PhylogenyNodeField.TAXONOMY_CODE, true ); + rio = RIO.executeAnalysis( nhx, + species_tree_2, + ALGORITHM.GSDIR, + REROOTING.BY_ALGORITHM, + "", + true, + false, + true ); + m = rio.getOrthologTable(); + // System.out.println( m.toString() ); + if ( !m.getRowAsString( 0, ',' ).equals( "ARATH,5,5,5,5,5,5" ) ) { + return false; + } + if ( !m.getRowAsString( 1, ',' ).equals( "CAEEL,5,5,5,5,5,5" ) ) { + return false; + } + if ( !m.getRowAsString( 2, ',' ).equals( "HUMAN,5,5,5,5,5,5" ) ) { + return false; + } + if ( !m.getRowAsString( 3, ',' ).equals( "MOUSE,5,5,5,5,5,5" ) ) { + return false; + } + if ( !m.getRowAsString( 4, ',' ).equals( "RAT,5,5,5,5,5,5" ) ) { + return false; + } + if ( !m.getRowAsString( 5, ',' ).equals( "YEAST,5,5,5,5,5,5" ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } } \ No newline at end of file diff --git a/forester/java/src/org/forester/sdi/GSDI.java b/forester/java/src/org/forester/sdi/GSDI.java index b9fa794..fa7a52c 100644 --- a/forester/java/src/org/forester/sdi/GSDI.java +++ b/forester/java/src/org/forester/sdi/GSDI.java @@ -37,7 +37,6 @@ import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Event; -import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.sdi.SDIutil.TaxonomyComparisonBase; import org.forester.util.ForesterUtil; @@ -59,6 +58,15 @@ public final class GSDI implements GSDII { final boolean most_parsimonious_duplication_model, final boolean strip_gene_tree, final boolean strip_species_tree ) throws SDIException { + this( gene_tree, species_tree, most_parsimonious_duplication_model, strip_gene_tree, strip_species_tree, true ); + } + + public GSDI( final Phylogeny gene_tree, + final Phylogeny species_tree, + final boolean most_parsimonious_duplication_model, + final boolean strip_gene_tree, + final boolean strip_species_tree, + final boolean transfer_taxonomy ) throws SDIException { _most_parsimonious_duplication_model = most_parsimonious_duplication_model; if ( gene_tree.getRoot().getNumberOfDescendants() == 3 ) { gene_tree.reRoot( gene_tree.getRoot().getChildNode( 2 ) ); @@ -76,7 +84,7 @@ public final class GSDI implements GSDII { PhylogenyMethods.preOrderReId( species_tree ); final GSDIsummaryResult gsdi_summary_result = geneTreePostOrderTraversal( gene_tree, _most_parsimonious_duplication_model, - true ); + transfer_taxonomy ); _speciation_or_duplication_events_sum = gsdi_summary_result.getSpeciationOrDuplicationEventsSum(); _speciations_sum = gsdi_summary_result.getSpeciationsSum(); _duplications_sum = gsdi_summary_result.getDuplicationsSum(); @@ -171,46 +179,13 @@ public final class GSDI implements GSDII { } } g.setLink( s1 ); - if ( transfer_taxonomy ) { - transferTaxonomy( g, s1 ); - } determineEvent( s1, g, most_parsimonious_duplication_model, res ); } - } - return res; - } - - private static final void transferTaxonomy( final PhylogenyNode g, final PhylogenyNode s ) { - if ( s.getNodeData().isHasTaxonomy() ) { - g.getNodeData().setTaxonomy( s.getNodeData().getTaxonomy() ); - if ( g.isInternal() ) { - if ( g.getChildNode1().isInternal() ) { - if ( g.getChildNode1().getNodeData().isHasTaxonomy() && g.getChildNode1().getNodeData().getTaxonomy() == s.getNodeData().getTaxonomy() ) { - g.getChildNode1().getNodeData().setTaxonomy( null ); - } - } - if ( g.getChildNode2().isInternal() ) { - if ( g.getChildNode2().getNodeData().isHasTaxonomy() && g.getChildNode2().getNodeData().getTaxonomy() == s.getNodeData().getTaxonomy() ) { - g.getChildNode2().getNodeData().setTaxonomy( null ); - } - } - } - } - else if ( ForesterUtil.isEmpty( g.getName() ) && !ForesterUtil.isEmpty( s.getName() ) ) { - g.setName( s.getName() ); - if ( g.isInternal() ) { - if ( g.getChildNode1().isInternal() ) { - if ( g.getChildNode1().getName() == s.getName() ) { - g.getChildNode1().setName( "" ); - } - } - if ( g.getChildNode2().isInternal() ) { - if ( g.getChildNode2().getName() == s.getName() ) { - g.getChildNode2().setName( "" ); - } - } + if ( transfer_taxonomy ) { + transferTaxonomy( g ); } } + return res; } final static GSDIsummaryResult geneTreePostOrderTraversal( final Phylogeny gene_tree, @@ -347,6 +322,40 @@ public final class GSDI implements GSDII { return res; } + static final void transferTaxonomy( final PhylogenyNode g ) { + if ( g == null ) { + throw new IllegalArgumentException( "gene tree node is null" ); + } + final PhylogenyNode s = g.getLink(); + if ( s == null ) { + throw new IllegalArgumentException( "mapped species tree node is null" ); + } + if ( s.getNodeData().isHasTaxonomy() ) { + g.getNodeData().setTaxonomy( s.getNodeData().getTaxonomy() ); + if ( g.isInternal() ) { + if ( g.getChildNode1().isInternal() && g.getChildNode1().getNodeData().isHasTaxonomy() + && ( g.getChildNode1().getNodeData().getTaxonomy() == s.getNodeData().getTaxonomy() ) ) { + g.getChildNode1().getNodeData().setTaxonomy( null ); + } + if ( g.getChildNode2().isInternal() && g.getChildNode2().getNodeData().isHasTaxonomy() + && ( g.getChildNode2().getNodeData().getTaxonomy() == s.getNodeData().getTaxonomy() ) ) { + g.getChildNode2().getNodeData().setTaxonomy( null ); + } + } + } + else if ( ForesterUtil.isEmpty( g.getName() ) && !ForesterUtil.isEmpty( s.getName() ) ) { + g.setName( s.getName() ); + if ( g.isInternal() ) { + if ( g.getChildNode1().isInternal() && ( g.getChildNode1().getName() == s.getName() ) ) { + g.getChildNode1().setName( "" ); + } + if ( g.getChildNode2().isInternal() && ( g.getChildNode2().getName() == s.getName() ) ) { + g.getChildNode2().setName( "" ); + } + } + } + } + private final static void addScientificNamesMappedToReducedSpecificity( final String s1, final String s2, final SortedSet scientific_names_mapped_to_reduced_specificity ) { diff --git a/forester/java/src/org/forester/sdi/GSDIR.java b/forester/java/src/org/forester/sdi/GSDIR.java index 6b170d9..7ca8dca 100644 --- a/forester/java/src/org/forester/sdi/GSDIR.java +++ b/forester/java/src/org/forester/sdi/GSDIR.java @@ -52,7 +52,8 @@ public class GSDIR implements GSDII { public GSDIR( final Phylogeny gene_tree, final Phylogeny species_tree, final boolean strip_gene_tree, - final boolean strip_species_tree ) throws SDIException { + final boolean strip_species_tree, + final boolean transfer_taxonomy ) throws SDIException { final NodesLinkingResult nodes_linking_result = GSDI.linkNodesOfG( gene_tree, species_tree, strip_gene_tree, @@ -80,7 +81,6 @@ public class GSDIR implements GSDII { for( final PhylogenyBranch branch : gene_tree_branches_post_order ) { reRoot( branch, gene_tree ); PhylogenyMethods.preOrderReId( species_tree ); - final GSDIsummaryResult gsdi_result = GSDI.geneTreePostOrderTraversal( gene_tree, true, min_duplications_sum ); @@ -90,6 +90,9 @@ public class GSDIR implements GSDII { if ( gsdi_result.getDuplicationsSum() < min_duplications_sum ) { min_duplications_sum = gsdi_result.getDuplicationsSum(); speciations_sum = gsdi_result.getSpeciationsSum(); + if ( transfer_taxonomy ) { + transferTaxonomy( gene_tree ); + } _min_duplications_sum_gene_tree = gene_tree.copy(); } else if ( gsdi_result.getDuplicationsSum() == min_duplications_sum ) { @@ -98,6 +101,9 @@ public class GSDIR implements GSDII { l.add( gene_tree ); final int index = getIndexesOfShortestTree( l ).get( 0 ); if ( index == 1 ) { + if ( transfer_taxonomy ) { + transferTaxonomy( gene_tree ); + } _min_duplications_sum_gene_tree = gene_tree.copy(); } } @@ -208,4 +214,10 @@ public class GSDIR implements GSDII { throw new IllegalArgumentException( "reRoot( Branch b ): b is not a branch." ); } } + + private final static void transferTaxonomy( final Phylogeny gt ) { + for( final PhylogenyNodeIterator it = gt.iteratorPostorder(); it.hasNext(); ) { + GSDI.transferTaxonomy( it.next() ); + } + } } diff --git a/forester/java/src/org/forester/sdi/TestGSDI.java b/forester/java/src/org/forester/sdi/TestGSDI.java index ed1e4d1..027e09b 100644 --- a/forester/java/src/org/forester/sdi/TestGSDI.java +++ b/forester/java/src/org/forester/sdi/TestGSDI.java @@ -1467,7 +1467,7 @@ public final class TestGSDI { final Phylogeny gene_0 = factory0.create( gene_0_str, new NHXParser() )[ 0 ]; s_0.setRooted( true ); gene_0.setRooted( true ); - final GSDIR sdi0 = new GSDIR( gene_0, s_0, true, true ); + final GSDIR sdi0 = new GSDIR( gene_0, s_0, true, true, true ); if ( sdi0.getSpeciationsSum() != 0 ) { return false; } @@ -1482,7 +1482,7 @@ public final class TestGSDI { final Phylogeny gene_00 = factory00.create( gene_00_str, new NHXParser() )[ 0 ]; s_00.setRooted( true ); gene_00.setRooted( true ); - final GSDIR sdi00 = new GSDIR( gene_00, s_00, true, true ); + final GSDIR sdi00 = new GSDIR( gene_00, s_00, true, true, true ); if ( sdi00.getSpeciationsSum() != 0 ) { return false; } @@ -1495,25 +1495,25 @@ public final class TestGSDI { s1.setRooted( true ); final Phylogeny g1 = TestGSDI .createPhylogeny( "(HUMAN[&&NHX:S=HUMAN],(RAT[&&NHX:S=RAT],(CAEEL[&&NHX:T=:S=CAEEL],YEAST[&&NHX:S=YEAST])))" ); - final GSDIR sdi1 = new GSDIR( g1.copy(), s1.copy(), false, false ); + final GSDIR sdi1 = new GSDIR( g1.copy(), s1.copy(), false, false, true ); if ( sdi1.getMinDuplicationsSum() != 0 ) { return false; } final Phylogeny g2 = TestGSDI .createPhylogeny( "(((HUMAN[&&NHX:S=HUMAN],RAT[&&NHX:S=RAT]),CAEEL[&&NHX:T=:S=CAEEL]),YEAST[&&NHX:S=YEAST])" ); - final GSDIR sdi2 = new GSDIR( g2.copy(), s1.copy(), false, false ); + final GSDIR sdi2 = new GSDIR( g2.copy(), s1.copy(), false, false, true ); if ( sdi2.getMinDuplicationsSum() != 0 ) { return false; } final Phylogeny g3 = TestGSDI .createPhylogeny( "(RAT[&&NHX:S=RAT],HUMAN[&&NHX:S=HUMAN],(YEAST[&&NHX:S=YEAST],CAEEL[&&NHX:T=:S=CAEEL]))" ); - final GSDIR sdi3 = new GSDIR( g3.copy(), s1.copy(), false, false ); + final GSDIR sdi3 = new GSDIR( g3.copy(), s1.copy(), false, false, true ); if ( sdi3.getMinDuplicationsSum() != 0 ) { return false; } final Phylogeny g4 = TestGSDI .createPhylogeny( "(((((MOUSE[&&NHX:S=MOUSE],[&&NHX:S=RAT]),[&&NHX:S=HUMAN]),([&&NHX:S=ARATH],[&&NHX:S=YEAST])),[&&NHX:S=CAEEL]),[&&NHX:S=CAEBR])" ); - final GSDIR sdi4 = new GSDIR( g4.copy(), s1.copy(), false, false ); + final GSDIR sdi4 = new GSDIR( g4.copy(), s1.copy(), false, false, true ); if ( sdi4.getMinDuplicationsSum() != 0 ) { return false; } @@ -1522,7 +1522,7 @@ public final class TestGSDI { final Phylogeny s2 = ParserBasedPhylogenyFactory.getInstance().create( s2str, new NHXParser() )[ 0 ]; s2.setRooted( true ); final Phylogeny g5 = TestGSDI.createPhylogeny( s2str ); - final GSDIR sdi5 = new GSDIR( g5, s2, false, false ); + final GSDIR sdi5 = new GSDIR( g5, s2, false, false, true ); if ( sdi5.getMinDuplicationsSum() != 0 ) { System.out.println( sdi5.getMinDuplicationsSum() ); return false; diff --git a/forester/java/src/org/forester/util/ForesterConstants.java b/forester/java/src/org/forester/util/ForesterConstants.java index 3bf1249..e3f2eb1 100644 --- a/forester/java/src/org/forester/util/ForesterConstants.java +++ b/forester/java/src/org/forester/util/ForesterConstants.java @@ -27,8 +27,8 @@ package org.forester.util; public final class ForesterConstants { - public final static String FORESTER_VERSION = "1.025"; - public final static String FORESTER_DATE = "130314"; + public final static String FORESTER_VERSION = "1.025+"; + public final static String FORESTER_DATE = "130325"; public final static String PHYLO_XML_VERSION = "1.10"; public final static String PHYLO_XML_LOCATION = "http://www.phyloxml.org"; public final static String PHYLO_XML_XSD = "phyloxml.xsd";