final static private String GSDIR_OPTION = "r";
final static private String MOST_PARSIMONIOUS_OPTION = "m";
final static private String GUESS_FORMAT_OF_SPECIES_TREE = "q";
+ final static private String TRANSFER_TAXONOMY_OPTION = "t";
final static private String HELP_OPTION_1 = "help";
final static private String HELP_OPTION_2 = "h";
final static private String SUFFIX_FOR_SPECIES_TREE_USED = "_species_tree_used.xml";
final static private String LOGFILE_SUFFIX = "_gsdi_log.txt";
final static private String REMAPPED_SUFFIX = "_gsdi_remapped.txt";
final static private String PRG_NAME = "gsdi";
- final static private String PRG_VERSION = "1.000";
- final static private String PRG_DATE = "120629";
+ final static private String PRG_VERSION = "1.001";
+ final static private String PRG_DATE = "130325";
final static private String PRG_DESC = "general speciation duplication inference";
final static private String E_MAIL = "phylosoft@gmail.com";
- final static private String WWW = "www.phylosoft.org/forester";
+ final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester";
public static void main( final String args[] ) {
try {
allowed_options.add( gsdi.GUESS_FORMAT_OF_SPECIES_TREE );
allowed_options.add( gsdi.MOST_PARSIMONIOUS_OPTION );
allowed_options.add( gsdi.ALLOW_STRIPPING_OF_GENE_TREE_OPTION );
+ allowed_options.add( TRANSFER_TAXONOMY_OPTION );
final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
if ( dissallowed_options.length() > 0 ) {
ForesterUtil.fatalError( gsdi.PRG_NAME, "unknown option(s): " + dissallowed_options );
}
allow_stripping_of_gene_tree = true;
}
+ boolean transfer_taxonomy = false;
+ if ( cla.isOptionSet( TRANSFER_TAXONOMY_OPTION ) ) {
+ transfer_taxonomy = true;
+ }
Phylogeny species_tree = null;
Phylogeny gene_tree = null;
File gene_tree_file = null;
+ ( ForesterUtil.isEmpty( species_tree.getName() ) ? "" : gene_tree.getName() ) );
System.out.println( "Species tree name : "
+ ( ForesterUtil.isEmpty( species_tree.getName() ) ? "" : gene_tree.getName() ) );
+ System.out.println( "Transfer taxonomy : " + transfer_taxonomy );
GSDII gsdii = null;
final long start_time = new Date().getTime();
try {
species_tree,
most_parsimonous_duplication_model,
allow_stripping_of_gene_tree,
- true );
+ true,
+ transfer_taxonomy );
}
else if ( base_algorithm == ALGORITHM.GSDIR ) {
- gsdii = new GSDIR( gene_tree, species_tree, allow_stripping_of_gene_tree, true );
+ gsdii = new GSDIR( gene_tree, species_tree, allow_stripping_of_gene_tree, true, transfer_taxonomy );
}
}
catch ( final SDIException e ) {
+ ": to allow species tree in other formats than phyloXML (i.e. Newick, NHX, Nexus)" );
System.out.println( " -" + gsdi.GSDIR_OPTION
+ ": to use GSDIR algorithm instead of GSDI algorithm (re-rooting)" );
+ System.out.println( " -" + TRANSFER_TAXONOMY_OPTION
+ + ": to transfer taxonomic data from species tree to gene tree\n" );
System.out.println();
System.out.println( "Gene tree:" );
System.out.println( " in phyloXM format, with taxonomy and sequence data in appropriate fields" );
public class rio {
- final static private String PRG_NAME = "rio";
- final static private String PRG_VERSION = "4.000 beta 9";
- final static private String PRG_DATE = "2013.01.14";
- final static private String E_MAIL = "phyloxml@gmail.com";
- final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester";
- final static private String HELP_OPTION_1 = "help";
- final static private String HELP_OPTION_2 = "h";
- final static private String GT_FIRST = "f";
- final static private String GT_LAST = "l";
- final static private String REROOTING_OPT = "r";
- final static private String OUTGROUP = "o";
- final static private String RETURN_SPECIES_TREE = "s";
- final static private String RETURN_BEST_GENE_TREE = "g";
- final static private String USE_SDIR = "b";
+ final static private String PRG_NAME = "rio";
+ final static private String PRG_VERSION = "4.000 beta 10";
+ final static private String PRG_DATE = "130325";
+ final static private String E_MAIL = "phyloxml@gmail.com";
+ final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester";
+ final static private String HELP_OPTION_1 = "help";
+ final static private String HELP_OPTION_2 = "h";
+ final static private String GT_FIRST = "f";
+ final static private String GT_LAST = "l";
+ final static private String REROOTING_OPT = "r";
+ final static private String OUTGROUP = "o";
+ final static private String RETURN_SPECIES_TREE = "s";
+ final static private String RETURN_BEST_GENE_TREE = "g";
+ final static private String USE_SDIR = "b";
+ final static private String TRANSFER_TAXONOMY_OPTION = "t";
public static void main( final String[] args ) {
ForesterUtil.printProgramInformation( PRG_NAME,
allowed_options.add( USE_SDIR );
allowed_options.add( RETURN_SPECIES_TREE );
allowed_options.add( RETURN_BEST_GENE_TREE );
+ allowed_options.add( TRANSFER_TAXONOMY_OPTION );
final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
if ( dissallowed_options.length() > 0 ) {
ForesterUtil.fatalError( "unknown option(s): " + dissallowed_options );
ForesterUtil.fatalError( "\"" + return_gene_tree + "\" already exists" );
}
}
+ boolean transfer_taxonomy = false;
+ if ( !sdir && cla.isOptionSet( TRANSFER_TAXONOMY_OPTION ) ) {
+ if ( return_gene_tree == null ) {
+ ForesterUtil.fatalError( "no point in transferring taxonomy data without returning best gene tree" );
+ }
+ transfer_taxonomy = true;
+ }
ForesterUtil.fatalErrorIfFileNotReadable( gene_trees_file );
ForesterUtil.fatalErrorIfFileNotReadable( species_tree_file );
if ( orthology_outtable.exists() ) {
gt_first,
gt_last,
logfile != null,
- true );
+ true,
+ transfer_taxonomy );
}
else {
iterating = true;
gt_first,
gt_last,
logfile != null,
- true );
+ true,
+ transfer_taxonomy );
}
if ( algorithm == ALGORITHM.GSDIR ) {
System.out.println( "Taxonomy linking based on : " + rio.getGSDIRtaxCompBase() );
.println( " -" + RETURN_SPECIES_TREE + "=<outfile> : to write the (stripped) species tree to file" );
System.out.println( " -" + RETURN_BEST_GENE_TREE
+ "=<outfile> : to write (one) minimal duplication gene tree to file" );
+ System.out
+ .println( " -"
+ + TRANSFER_TAXONOMY_OPTION
+ + " : to transfer taxonomic data from species tree to returned minimal duplication gene tree\n"
+ + " (if -" + RETURN_BEST_GENE_TREE + " option is used)" );
System.out.println( " -" + USE_SDIR
+ " : to use SDIR instead of GSDIR (faster, but non-binary species trees are" );
System.out.println( " disallowed, as are most options)" );
import org.forester.phylogeny.PhylogenyMethods;
import org.forester.util.ForesterUtil;
-
public final class Archaeopteryx {
public static MainFrame createApplication( final Phylogeny phylogeny ) {
GSDI gsdi = null;
final Phylogeny species_tree = _species_tree.copy();
try {
- gsdi = new GSDI( gene_tree, species_tree, false, true, true );
+ gsdi = new GSDI( gene_tree, species_tree, false, true, true, true );
}
catch ( final SDIException e ) {
JOptionPane.showMessageDialog( this,
GSDIR gsdir = null;
final Phylogeny species_tree = _species_tree.copy();
try {
- gsdir = new GSDIR( gene_tree, species_tree, true, true );
+ gsdir = new GSDIR( gene_tree, species_tree, true, true, true );
}
catch ( final SDIException e ) {
JOptionPane.showMessageDialog( this,
public final static boolean __SYNTH_LF = false; // TODO remove me
public final static boolean ALLOW_DDBJ_BLAST = false;
public final static String PRG_NAME = "Archaeopteryx";
- final static String VERSION = "0.9809 A1ST";
- final static String PRG_DATE = "130314";
+ final static String VERSION = "0.9810 A1ST";
+ final static String PRG_DATE = "130325";
final static String DEFAULT_CONFIGURATION_FILE_NAME = "_aptx_configuration_file";
final static String[] DEFAULT_FONT_CHOICES = { "Verdana", "Tahoma",
"Arial", "Helvetica", "Dialog", "Lucida Sans", "SansSerif", "Sans-serif", "Sans" };
GSDI gsdi = null;
final Phylogeny species_tree = getSpeciesTree().copy();
try {
- gsdi = new GSDI( gene_tree, species_tree, false, true, true );
+ gsdi = new GSDI( gene_tree, species_tree, false, true, true, true );
}
catch ( final SDIException e ) {
JOptionPane.showMessageDialog( this,
GSDIR gsdir = null;
final Phylogeny species_tree = getSpeciesTree().copy();
try {
- gsdir = new GSDIR( gene_tree, species_tree, true, true );
+ gsdir = new GSDIR( gene_tree, species_tree, true, true, true );
}
catch ( final SDIException e ) {
JOptionPane.showMessageDialog( this,
int first,
int last,
final boolean produce_log,
- final boolean verbose ) throws IOException, SDIException, RIOException {
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException, RIOException {
if ( ( last == DEFAULT_RANGE ) && ( first >= 0 ) ) {
last = END_OF_GT;
}
_removed_gene_tree_nodes = null;
_duplications_stats = new BasicDescriptiveStatistics();
p.reset();
- inferOrthologs( p, species_tree, algorithm, outgroup, first, last );
+ inferOrthologs( p, species_tree, algorithm, outgroup, first, last, transfer_taxonomy );
_species_tree = species_tree;
}
int first,
int last,
final boolean produce_log,
- final boolean verbose ) throws IOException, SDIException, RIOException {
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException, RIOException {
if ( ( last == DEFAULT_RANGE ) && ( first >= 0 ) ) {
last = gene_trees.length - 1;
}
_analyzed_gene_trees = null;
_removed_gene_tree_nodes = null;
_duplications_stats = new BasicDescriptiveStatistics();
- inferOrthologs( gene_trees, species_tree, algorithm, outgroup, first, last );
+ inferOrthologs( gene_trees, species_tree, algorithm, outgroup, first, last, transfer_taxonomy );
_species_tree = species_tree;
}
final ALGORITHM algorithm,
final String outgroup,
int first,
- final int last ) throws SDIException, RIOException, FileNotFoundException,
- IOException {
+ final int last,
+ final boolean transfer_taxonomy ) throws SDIException, RIOException,
+ FileNotFoundException, IOException {
if ( !parser.hasNext() ) {
throw new RIOException( "no gene trees to analyze" );
}
throw new RIOException( "failed to establish species based mapping between gene and species trees" );
}
}
- final Phylogeny analyzed_gt = performOrthologInference( gt, species_tree, algorithm, outgroup, counter );
+ final Phylogeny analyzed_gt = performOrthologInference( gt,
+ species_tree,
+ algorithm,
+ outgroup,
+ counter,
+ transfer_taxonomy );
RIO.calculateOrthologTable( analyzed_gt, true, counter );
++counter;
}
final ALGORITHM algorithm,
final String outgroup,
final int first,
- final int last ) throws SDIException, RIOException, FileNotFoundException,
- IOException {
+ final int last,
+ final boolean transfer_taxonomy ) throws SDIException, RIOException,
+ FileNotFoundException, IOException {
if ( algorithm == ALGORITHM.SDIR ) {
// Removes from species_tree all species not found in gene_tree.
PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_trees[ 0 ], species_tree );
throw new RIOException( "failed to establish species based mapping between gene and species trees" );
}
}
- _analyzed_gene_trees[ i ] = performOrthologInference( gt, species_tree, algorithm, outgroup, i );
+ _analyzed_gene_trees[ i ] = performOrthologInference( gt,
+ species_tree,
+ algorithm,
+ outgroup,
+ i,
+ transfer_taxonomy );
}
if ( log() ) {
postLog( species_tree, first, last );
final Phylogeny species_tree,
final ALGORITHM algorithm,
final String outgroup,
- final int i ) throws SDIException, RIOException {
+ final int i,
+ final boolean transfer_taxonomy ) throws SDIException,
+ RIOException {
final Phylogeny assigned_tree;
switch ( algorithm ) {
case SDIR: {
break;
}
case GSDIR: {
- assigned_tree = performOrthologInferenceByGSDI( gene_tree, species_tree, outgroup, i );
+ assigned_tree = performOrthologInferenceByGSDI( gene_tree, species_tree, outgroup, i, transfer_taxonomy );
break;
}
default: {
private final Phylogeny performOrthologInferenceByGSDI( final Phylogeny gene_tree,
final Phylogeny species_tree,
final String outgroup,
- final int i ) throws SDIException, RIOException {
+ final int i,
+ final boolean transfer_taxonomy ) throws SDIException,
+ RIOException {
final Phylogeny assigned_tree;
final int dups;
if ( _rerooting == REROOTING.BY_ALGORITHM ) {
- final GSDIR gsdir = new GSDIR( gene_tree, species_tree, true, i == 0 );
+ final GSDIR gsdir = new GSDIR( gene_tree, species_tree, true, i == 0, transfer_taxonomy );
assigned_tree = gsdir.getMinDuplicationsSumGeneTree();
if ( i == 0 ) {
_removed_gene_tree_nodes = gsdir.getStrippedExternalGeneTreeNodes();
final PhylogenyNode n = gene_tree.getNode( outgroup );
gene_tree.reRoot( n );
}
- final GSDI gsdi = new GSDI( gene_tree, species_tree, true, true, true );
+ final GSDI gsdi = new GSDI( gene_tree, species_tree, true, true, true, transfer_taxonomy );
_removed_gene_tree_nodes = gsdi.getStrippedExternalGeneTreeNodes();
for( final PhylogenyNode r : _removed_gene_tree_nodes ) {
if ( !r.getNodeData().isHasTaxonomy() ) {
final int first,
final int last,
final boolean produce_log,
- final boolean verbose ) throws IOException, SDIException, RIOException {
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
final Phylogeny[] gene_trees = parseGeneTrees( gene_trees_file );
if ( gene_trees.length < 1 ) {
throw new RIOException( "\"" + gene_trees_file + "\" is devoid of appropriate gene trees" );
false,
true,
TAXONOMY_EXTRACTION.NO );
- return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose );
+ return new RIO( gene_trees,
+ species_tree,
+ algorithm,
+ rerooting,
+ outgroup,
+ first,
+ last,
+ produce_log,
+ verbose,
+ transfer_taxonomy );
}
public final static RIO executeAnalysis( final File gene_trees_file,
final REROOTING rerooting,
final String outgroup,
final boolean produce_log,
- final boolean verbose ) throws IOException, SDIException, RIOException {
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
return new RIO( parseGeneTrees( gene_trees_file ),
species_tree,
algorithm,
DEFAULT_RANGE,
DEFAULT_RANGE,
produce_log,
- verbose );
+ verbose,
+ transfer_taxonomy );
}
public final static RIO executeAnalysis( final File gene_trees_file,
final int first,
final int last,
final boolean produce_log,
- final boolean verbose ) throws IOException, SDIException, RIOException {
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
return new RIO( parseGeneTrees( gene_trees_file ),
species_tree,
algorithm,
first,
last,
produce_log,
- verbose );
+ verbose,
+ transfer_taxonomy );
}
public final static RIO executeAnalysis( final IteratingPhylogenyParser p,
final int first,
final int last,
final boolean produce_log,
- final boolean verbose ) throws IOException, SDIException, RIOException {
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
final Phylogeny g0 = p.next();
if ( ( g0 == null ) || g0.isEmpty() || ( g0.getNumberOfExternalNodes() < 2 ) ) {
throw new RIOException( "input file does not seem to contain any gene trees" );
true,
TAXONOMY_EXTRACTION.NO );
p.reset();
- return new RIO( p, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose );
+ return new RIO( p,
+ species_tree,
+ algorithm,
+ rerooting,
+ outgroup,
+ first,
+ last,
+ produce_log,
+ verbose,
+ transfer_taxonomy );
}
public final static RIO executeAnalysis( final IteratingPhylogenyParser p,
final REROOTING rerooting,
final String outgroup,
final boolean produce_log,
- final boolean verbose ) throws IOException, SDIException, RIOException {
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
return new RIO( p,
species_tree,
algorithm,
DEFAULT_RANGE,
DEFAULT_RANGE,
produce_log,
- verbose );
+ verbose,
+ transfer_taxonomy );
}
public final static RIO executeAnalysis( final IteratingPhylogenyParser p,
final int first,
final int last,
final boolean produce_log,
- final boolean verbose ) throws IOException, SDIException, RIOException {
- return new RIO( p, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose );
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
+ return new RIO( p,
+ species_tree,
+ algorithm,
+ rerooting,
+ outgroup,
+ first,
+ last,
+ produce_log,
+ verbose,
+ transfer_taxonomy );
}
public final static RIO executeAnalysis( final Phylogeny[] gene_trees, final Phylogeny species_tree )
DEFAULT_RANGE,
DEFAULT_RANGE,
false,
+ false,
false );
}
final REROOTING rerooting,
final String outgroup,
final boolean produce_log,
- final boolean verbose ) throws IOException, SDIException, RIOException {
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
return new RIO( gene_trees,
species_tree,
algorithm,
DEFAULT_RANGE,
DEFAULT_RANGE,
produce_log,
- verbose );
+ verbose,
+ transfer_taxonomy );
}
public final static RIO executeAnalysis( final Phylogeny[] gene_trees,
final int first,
final int last,
final boolean produce_log,
- final boolean verbose ) throws IOException, SDIException, RIOException {
- return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose );
+ final boolean verbose,
+ final boolean transfer_taxonomy ) throws IOException, SDIException,
+ RIOException {
+ return new RIO( gene_trees,
+ species_tree,
+ algorithm,
+ rerooting,
+ outgroup,
+ first,
+ last,
+ produce_log,
+ verbose,
+ transfer_taxonomy );
}
private final static void calculateOrthologTable( final Phylogeny g, final boolean sort, final int counter )
return true;
}
- private static boolean testRIO_GSDIR_Iterating() {
- try {
- final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
- final NHXParser nhx = new NHXParser();
- nhx.setReplaceUnderscores( false );
- nhx.setIgnoreQuotes( true );
- nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.AGRESSIVE );
- final String gene_trees_1_str = "(((((MOUSE,RAT),HUMAN),CAEEL),YEAST),ARATH);"
- + "((((MOUSE,RAT),HUMAN),(ARATH,YEAST)),CAEEL);" + "((MOUSE,RAT),(((ARATH,YEAST),CAEEL),HUMAN));"
- + "(((((MOUSE,HUMAN),RAT),CAEEL),YEAST),ARATH);" + "((((HUMAN,MOUSE),RAT),(ARATH,YEAST)),CAEEL);";
- nhx.setSource( gene_trees_1_str );
- final String species_trees_1_str = "(((((MOUSE,RAT),HUMAN),CAEEL),YEAST),ARATH);";
- final Phylogeny species_tree_1 = factory.create( species_trees_1_str, new NHXParser() )[ 0 ];
- species_tree_1.setRooted( true );
- PhylogenyMethods.transferNodeNameToField( species_tree_1, PhylogenyNodeField.TAXONOMY_CODE, true );
- //Archaeopteryx.createApplication( species_trees_1 );
- RIO rio = RIO.executeAnalysis( nhx,
- species_tree_1,
- ALGORITHM.GSDIR,
- REROOTING.BY_ALGORITHM,
- "",
- true,
- false );
- if ( rio.getExtNodesOfAnalyzedGeneTrees() != 6 ) {
- return false;
- }
- if ( rio.getGSDIRtaxCompBase() != TaxonomyComparisonBase.CODE ) {
- return false;
- }
- if ( rio.getRemovedGeneTreeNodes().size() != 0 ) {
- return false;
- }
- IntMatrix m = rio.getOrthologTable();
- //System.out.println( m.toString() );
- if ( !m.getRowAsString( 0, ',' ).equals( "ARATH,5,5,5,5,5,5" ) ) {
- return false;
- }
- if ( !m.getRowAsString( 1, ',' ).equals( "CAEEL,5,5,5,5,5,5" ) ) {
- return false;
- }
- if ( !m.getRowAsString( 2, ',' ).equals( "HUMAN,5,5,5,5,3,5" ) ) {
- return false;
- }
- if ( !m.getRowAsString( 3, ',' ).equals( "MOUSE,5,5,5,5,3,5" ) ) {
- return false;
- }
- if ( !m.getRowAsString( 4, ',' ).equals( "RAT,5,5,3,3,5,5" ) ) {
- return false;
- }
- if ( !m.getRowAsString( 5, ',' ).equals( "YEAST,5,5,5,5,5,5" ) ) {
- return false;
- }
- //
- final String species_trees_2_str = "((((MOUSE,RAT,HUMAN),CAEEL),YEAST),ARATH);";
- final Phylogeny species_tree_2 = factory.create( species_trees_2_str, new NHXParser() )[ 0 ];
- species_tree_2.setRooted( true );
- PhylogenyMethods.transferNodeNameToField( species_tree_2, PhylogenyNodeField.TAXONOMY_CODE, true );
- rio = RIO.executeAnalysis( nhx, species_tree_2, ALGORITHM.GSDIR, REROOTING.BY_ALGORITHM, "", true, false );
- m = rio.getOrthologTable();
- // System.out.println( m.toString() );
- if ( !m.getRowAsString( 0, ',' ).equals( "ARATH,5,5,5,5,5,5" ) ) {
- return false;
- }
- if ( !m.getRowAsString( 1, ',' ).equals( "CAEEL,5,5,5,5,5,5" ) ) {
- return false;
- }
- if ( !m.getRowAsString( 2, ',' ).equals( "HUMAN,5,5,5,5,5,5" ) ) {
- return false;
- }
- if ( !m.getRowAsString( 3, ',' ).equals( "MOUSE,5,5,5,5,5,5" ) ) {
- return false;
- }
- if ( !m.getRowAsString( 4, ',' ).equals( "RAT,5,5,5,5,5,5" ) ) {
- return false;
- }
- if ( !m.getRowAsString( 5, ',' ).equals( "YEAST,5,5,5,5,5,5" ) ) {
- return false;
- }
- }
- catch ( final Exception e ) {
- e.printStackTrace( System.out );
- return false;
- }
- return true;
- }
-
private static boolean testRIO_GSDIR() {
try {
final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
REROOTING.BY_ALGORITHM,
"",
true,
- false );
+ false,
+ true );
if ( rio.getAnalyzedGeneTrees().length != 4 ) {
return false;
}
REROOTING.BY_ALGORITHM,
"",
true,
- false );
+ false,
+ true );
if ( rio.getAnalyzedGeneTrees().length != 2 ) {
return false;
}
REROOTING.BY_ALGORITHM,
"",
true,
- false );
+ false,
+ true );
if ( rio.getAnalyzedGeneTrees().length != 3 ) {
return false;
}
REROOTING.BY_ALGORITHM,
"",
true,
- false );
+ false,
+ true );
if ( rio.getAnalyzedGeneTrees().length != 1 ) {
return false;
}
REROOTING.BY_ALGORITHM,
"",
true,
- false );
+ false,
+ true );
if ( rio.getAnalyzedGeneTrees().length != 1 ) {
return false;
}
REROOTING.BY_ALGORITHM,
"",
true,
- false );
+ false,
+ true );
if ( rio.getAnalyzedGeneTrees().length != 5 ) {
return false;
}
-1,
-1,
true,
- false );
+ false,
+ true );
if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.CODE ) {
return false;
}
-1,
-1,
true,
- false );
+ false,
+ true );
if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.ID ) {
return false;
}
-1,
-1,
true,
- false );
+ false,
+ true );
if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) {
return false;
}
-1,
-1,
true,
- false );
+ false,
+ true );
if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) {
return false;
}
-1,
-1,
true,
- false );
+ false,
+ true );
if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) {
return false;
}
10,
19,
true,
- false );
+ false,
+ true );
if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) {
return false;
}
-1,
-1,
true,
- false );
+ false,
+ true );
if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.CODE ) {
return false;
}
-1,
-1,
true,
- false );
+ false,
+ true );
if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.CODE ) {
return false;
}
}
return true;
}
+
+ private static boolean testRIO_GSDIR_Iterating() {
+ try {
+ final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+ final NHXParser nhx = new NHXParser();
+ nhx.setReplaceUnderscores( false );
+ nhx.setIgnoreQuotes( true );
+ nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.AGRESSIVE );
+ final String gene_trees_1_str = "(((((MOUSE,RAT),HUMAN),CAEEL),YEAST),ARATH);"
+ + "((((MOUSE,RAT),HUMAN),(ARATH,YEAST)),CAEEL);" + "((MOUSE,RAT),(((ARATH,YEAST),CAEEL),HUMAN));"
+ + "(((((MOUSE,HUMAN),RAT),CAEEL),YEAST),ARATH);" + "((((HUMAN,MOUSE),RAT),(ARATH,YEAST)),CAEEL);";
+ nhx.setSource( gene_trees_1_str );
+ final String species_trees_1_str = "(((((MOUSE,RAT),HUMAN),CAEEL),YEAST),ARATH);";
+ final Phylogeny species_tree_1 = factory.create( species_trees_1_str, new NHXParser() )[ 0 ];
+ species_tree_1.setRooted( true );
+ PhylogenyMethods.transferNodeNameToField( species_tree_1, PhylogenyNodeField.TAXONOMY_CODE, true );
+ //Archaeopteryx.createApplication( species_trees_1 );
+ RIO rio = RIO.executeAnalysis( nhx,
+ species_tree_1,
+ ALGORITHM.GSDIR,
+ REROOTING.BY_ALGORITHM,
+ "",
+ true,
+ false,
+ true );
+ if ( rio.getExtNodesOfAnalyzedGeneTrees() != 6 ) {
+ return false;
+ }
+ if ( rio.getGSDIRtaxCompBase() != TaxonomyComparisonBase.CODE ) {
+ return false;
+ }
+ if ( rio.getRemovedGeneTreeNodes().size() != 0 ) {
+ return false;
+ }
+ IntMatrix m = rio.getOrthologTable();
+ //System.out.println( m.toString() );
+ if ( !m.getRowAsString( 0, ',' ).equals( "ARATH,5,5,5,5,5,5" ) ) {
+ return false;
+ }
+ if ( !m.getRowAsString( 1, ',' ).equals( "CAEEL,5,5,5,5,5,5" ) ) {
+ return false;
+ }
+ if ( !m.getRowAsString( 2, ',' ).equals( "HUMAN,5,5,5,5,3,5" ) ) {
+ return false;
+ }
+ if ( !m.getRowAsString( 3, ',' ).equals( "MOUSE,5,5,5,5,3,5" ) ) {
+ return false;
+ }
+ if ( !m.getRowAsString( 4, ',' ).equals( "RAT,5,5,3,3,5,5" ) ) {
+ return false;
+ }
+ if ( !m.getRowAsString( 5, ',' ).equals( "YEAST,5,5,5,5,5,5" ) ) {
+ return false;
+ }
+ //
+ final String species_trees_2_str = "((((MOUSE,RAT,HUMAN),CAEEL),YEAST),ARATH);";
+ final Phylogeny species_tree_2 = factory.create( species_trees_2_str, new NHXParser() )[ 0 ];
+ species_tree_2.setRooted( true );
+ PhylogenyMethods.transferNodeNameToField( species_tree_2, PhylogenyNodeField.TAXONOMY_CODE, true );
+ rio = RIO.executeAnalysis( nhx,
+ species_tree_2,
+ ALGORITHM.GSDIR,
+ REROOTING.BY_ALGORITHM,
+ "",
+ true,
+ false,
+ true );
+ m = rio.getOrthologTable();
+ // System.out.println( m.toString() );
+ if ( !m.getRowAsString( 0, ',' ).equals( "ARATH,5,5,5,5,5,5" ) ) {
+ return false;
+ }
+ if ( !m.getRowAsString( 1, ',' ).equals( "CAEEL,5,5,5,5,5,5" ) ) {
+ return false;
+ }
+ if ( !m.getRowAsString( 2, ',' ).equals( "HUMAN,5,5,5,5,5,5" ) ) {
+ return false;
+ }
+ if ( !m.getRowAsString( 3, ',' ).equals( "MOUSE,5,5,5,5,5,5" ) ) {
+ return false;
+ }
+ if ( !m.getRowAsString( 4, ',' ).equals( "RAT,5,5,5,5,5,5" ) ) {
+ return false;
+ }
+ if ( !m.getRowAsString( 5, ',' ).equals( "YEAST,5,5,5,5,5,5" ) ) {
+ return false;
+ }
+ }
+ catch ( final Exception e ) {
+ e.printStackTrace( System.out );
+ return false;
+ }
+ return true;
+ }
}
\ No newline at end of file
import org.forester.phylogeny.PhylogenyMethods;
import org.forester.phylogeny.PhylogenyNode;
import org.forester.phylogeny.data.Event;
-import org.forester.phylogeny.data.Taxonomy;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
import org.forester.sdi.SDIutil.TaxonomyComparisonBase;
import org.forester.util.ForesterUtil;
final boolean most_parsimonious_duplication_model,
final boolean strip_gene_tree,
final boolean strip_species_tree ) throws SDIException {
+ this( gene_tree, species_tree, most_parsimonious_duplication_model, strip_gene_tree, strip_species_tree, true );
+ }
+
+ public GSDI( final Phylogeny gene_tree,
+ final Phylogeny species_tree,
+ final boolean most_parsimonious_duplication_model,
+ final boolean strip_gene_tree,
+ final boolean strip_species_tree,
+ final boolean transfer_taxonomy ) throws SDIException {
_most_parsimonious_duplication_model = most_parsimonious_duplication_model;
if ( gene_tree.getRoot().getNumberOfDescendants() == 3 ) {
gene_tree.reRoot( gene_tree.getRoot().getChildNode( 2 ) );
PhylogenyMethods.preOrderReId( species_tree );
final GSDIsummaryResult gsdi_summary_result = geneTreePostOrderTraversal( gene_tree,
_most_parsimonious_duplication_model,
- true );
+ transfer_taxonomy );
_speciation_or_duplication_events_sum = gsdi_summary_result.getSpeciationOrDuplicationEventsSum();
_speciations_sum = gsdi_summary_result.getSpeciationsSum();
_duplications_sum = gsdi_summary_result.getDuplicationsSum();
}
}
g.setLink( s1 );
- if ( transfer_taxonomy ) {
- transferTaxonomy( g, s1 );
- }
determineEvent( s1, g, most_parsimonious_duplication_model, res );
}
- }
- return res;
- }
-
- private static final void transferTaxonomy( final PhylogenyNode g, final PhylogenyNode s ) {
- if ( s.getNodeData().isHasTaxonomy() ) {
- g.getNodeData().setTaxonomy( s.getNodeData().getTaxonomy() );
- if ( g.isInternal() ) {
- if ( g.getChildNode1().isInternal() ) {
- if ( g.getChildNode1().getNodeData().isHasTaxonomy() && g.getChildNode1().getNodeData().getTaxonomy() == s.getNodeData().getTaxonomy() ) {
- g.getChildNode1().getNodeData().setTaxonomy( null );
- }
- }
- if ( g.getChildNode2().isInternal() ) {
- if ( g.getChildNode2().getNodeData().isHasTaxonomy() && g.getChildNode2().getNodeData().getTaxonomy() == s.getNodeData().getTaxonomy() ) {
- g.getChildNode2().getNodeData().setTaxonomy( null );
- }
- }
- }
- }
- else if ( ForesterUtil.isEmpty( g.getName() ) && !ForesterUtil.isEmpty( s.getName() ) ) {
- g.setName( s.getName() );
- if ( g.isInternal() ) {
- if ( g.getChildNode1().isInternal() ) {
- if ( g.getChildNode1().getName() == s.getName() ) {
- g.getChildNode1().setName( "" );
- }
- }
- if ( g.getChildNode2().isInternal() ) {
- if ( g.getChildNode2().getName() == s.getName() ) {
- g.getChildNode2().setName( "" );
- }
- }
+ if ( transfer_taxonomy ) {
+ transferTaxonomy( g );
}
}
+ return res;
}
final static GSDIsummaryResult geneTreePostOrderTraversal( final Phylogeny gene_tree,
return res;
}
+ static final void transferTaxonomy( final PhylogenyNode g ) {
+ if ( g == null ) {
+ throw new IllegalArgumentException( "gene tree node is null" );
+ }
+ final PhylogenyNode s = g.getLink();
+ if ( s == null ) {
+ throw new IllegalArgumentException( "mapped species tree node is null" );
+ }
+ if ( s.getNodeData().isHasTaxonomy() ) {
+ g.getNodeData().setTaxonomy( s.getNodeData().getTaxonomy() );
+ if ( g.isInternal() ) {
+ if ( g.getChildNode1().isInternal() && g.getChildNode1().getNodeData().isHasTaxonomy()
+ && ( g.getChildNode1().getNodeData().getTaxonomy() == s.getNodeData().getTaxonomy() ) ) {
+ g.getChildNode1().getNodeData().setTaxonomy( null );
+ }
+ if ( g.getChildNode2().isInternal() && g.getChildNode2().getNodeData().isHasTaxonomy()
+ && ( g.getChildNode2().getNodeData().getTaxonomy() == s.getNodeData().getTaxonomy() ) ) {
+ g.getChildNode2().getNodeData().setTaxonomy( null );
+ }
+ }
+ }
+ else if ( ForesterUtil.isEmpty( g.getName() ) && !ForesterUtil.isEmpty( s.getName() ) ) {
+ g.setName( s.getName() );
+ if ( g.isInternal() ) {
+ if ( g.getChildNode1().isInternal() && ( g.getChildNode1().getName() == s.getName() ) ) {
+ g.getChildNode1().setName( "" );
+ }
+ if ( g.getChildNode2().isInternal() && ( g.getChildNode2().getName() == s.getName() ) ) {
+ g.getChildNode2().setName( "" );
+ }
+ }
+ }
+ }
+
private final static void addScientificNamesMappedToReducedSpecificity( final String s1,
final String s2,
final SortedSet<String> scientific_names_mapped_to_reduced_specificity ) {
public GSDIR( final Phylogeny gene_tree,\r
final Phylogeny species_tree,\r
final boolean strip_gene_tree,\r
- final boolean strip_species_tree ) throws SDIException {\r
+ final boolean strip_species_tree,\r
+ final boolean transfer_taxonomy ) throws SDIException {\r
final NodesLinkingResult nodes_linking_result = GSDI.linkNodesOfG( gene_tree,\r
species_tree,\r
strip_gene_tree,\r
for( final PhylogenyBranch branch : gene_tree_branches_post_order ) {\r
reRoot( branch, gene_tree );\r
PhylogenyMethods.preOrderReId( species_tree );\r
- \r
final GSDIsummaryResult gsdi_result = GSDI.geneTreePostOrderTraversal( gene_tree,\r
true,\r
min_duplications_sum );\r
if ( gsdi_result.getDuplicationsSum() < min_duplications_sum ) {\r
min_duplications_sum = gsdi_result.getDuplicationsSum();\r
speciations_sum = gsdi_result.getSpeciationsSum();\r
+ if ( transfer_taxonomy ) {\r
+ transferTaxonomy( gene_tree );\r
+ }\r
_min_duplications_sum_gene_tree = gene_tree.copy();\r
}\r
else if ( gsdi_result.getDuplicationsSum() == min_duplications_sum ) {\r
l.add( gene_tree );\r
final int index = getIndexesOfShortestTree( l ).get( 0 );\r
if ( index == 1 ) {\r
+ if ( transfer_taxonomy ) {\r
+ transferTaxonomy( gene_tree );\r
+ }\r
_min_duplications_sum_gene_tree = gene_tree.copy();\r
}\r
}\r
throw new IllegalArgumentException( "reRoot( Branch b ): b is not a branch." );\r
}\r
}\r
+\r
+ private final static void transferTaxonomy( final Phylogeny gt ) {\r
+ for( final PhylogenyNodeIterator it = gt.iteratorPostorder(); it.hasNext(); ) {\r
+ GSDI.transferTaxonomy( it.next() );\r
+ }\r
+ }\r
}\r
final Phylogeny gene_0 = factory0.create( gene_0_str, new NHXParser() )[ 0 ];
s_0.setRooted( true );
gene_0.setRooted( true );
- final GSDIR sdi0 = new GSDIR( gene_0, s_0, true, true );
+ final GSDIR sdi0 = new GSDIR( gene_0, s_0, true, true, true );
if ( sdi0.getSpeciationsSum() != 0 ) {
return false;
}
final Phylogeny gene_00 = factory00.create( gene_00_str, new NHXParser() )[ 0 ];
s_00.setRooted( true );
gene_00.setRooted( true );
- final GSDIR sdi00 = new GSDIR( gene_00, s_00, true, true );
+ final GSDIR sdi00 = new GSDIR( gene_00, s_00, true, true, true );
if ( sdi00.getSpeciationsSum() != 0 ) {
return false;
}
s1.setRooted( true );
final Phylogeny g1 = TestGSDI
.createPhylogeny( "(HUMAN[&&NHX:S=HUMAN],(RAT[&&NHX:S=RAT],(CAEEL[&&NHX:T=:S=CAEEL],YEAST[&&NHX:S=YEAST])))" );
- final GSDIR sdi1 = new GSDIR( g1.copy(), s1.copy(), false, false );
+ final GSDIR sdi1 = new GSDIR( g1.copy(), s1.copy(), false, false, true );
if ( sdi1.getMinDuplicationsSum() != 0 ) {
return false;
}
final Phylogeny g2 = TestGSDI
.createPhylogeny( "(((HUMAN[&&NHX:S=HUMAN],RAT[&&NHX:S=RAT]),CAEEL[&&NHX:T=:S=CAEEL]),YEAST[&&NHX:S=YEAST])" );
- final GSDIR sdi2 = new GSDIR( g2.copy(), s1.copy(), false, false );
+ final GSDIR sdi2 = new GSDIR( g2.copy(), s1.copy(), false, false, true );
if ( sdi2.getMinDuplicationsSum() != 0 ) {
return false;
}
final Phylogeny g3 = TestGSDI
.createPhylogeny( "(RAT[&&NHX:S=RAT],HUMAN[&&NHX:S=HUMAN],(YEAST[&&NHX:S=YEAST],CAEEL[&&NHX:T=:S=CAEEL]))" );
- final GSDIR sdi3 = new GSDIR( g3.copy(), s1.copy(), false, false );
+ final GSDIR sdi3 = new GSDIR( g3.copy(), s1.copy(), false, false, true );
if ( sdi3.getMinDuplicationsSum() != 0 ) {
return false;
}
final Phylogeny g4 = TestGSDI
.createPhylogeny( "(((((MOUSE[&&NHX:S=MOUSE],[&&NHX:S=RAT]),[&&NHX:S=HUMAN]),([&&NHX:S=ARATH],[&&NHX:S=YEAST])),[&&NHX:S=CAEEL]),[&&NHX:S=CAEBR])" );
- final GSDIR sdi4 = new GSDIR( g4.copy(), s1.copy(), false, false );
+ final GSDIR sdi4 = new GSDIR( g4.copy(), s1.copy(), false, false, true );
if ( sdi4.getMinDuplicationsSum() != 0 ) {
return false;
}
final Phylogeny s2 = ParserBasedPhylogenyFactory.getInstance().create( s2str, new NHXParser() )[ 0 ];
s2.setRooted( true );
final Phylogeny g5 = TestGSDI.createPhylogeny( s2str );
- final GSDIR sdi5 = new GSDIR( g5, s2, false, false );
+ final GSDIR sdi5 = new GSDIR( g5, s2, false, false, true );
if ( sdi5.getMinDuplicationsSum() != 0 ) {
System.out.println( sdi5.getMinDuplicationsSum() );
return false;
public final class ForesterConstants {
- public final static String FORESTER_VERSION = "1.025";
- public final static String FORESTER_DATE = "130314";
+ public final static String FORESTER_VERSION = "1.025+";
+ public final static String FORESTER_DATE = "130325";
public final static String PHYLO_XML_VERSION = "1.10";
public final static String PHYLO_XML_LOCATION = "http://www.phyloxml.org";
public final static String PHYLO_XML_XSD = "phyloxml.xsd";