import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
import org.forester.phylogeny.factories.PhylogenyFactory;
import org.forester.sdi.GSDI;
+import org.forester.sdi.GSDIR;
import org.forester.sdi.SDI;
import org.forester.sdi.SDI.TaxonomyComparisonBase;
import org.forester.sdi.SDIException;
public final class gsdi {
private enum BASE_ALGORITHM {
- GSDI, SDI
+ GSDIR, GSDI, SDI
}
final static public boolean REPLACE_UNDERSCORES_IN_NH_SPECIES_TREE = true;
final static private String ALLOW_STRIPPING_OF_GENE_TREE_OPTION = "g";
final static private String SDISE_OPTION = "b";
+ final static private String GSDIR_OPTION = "r";
final static private String MOST_PARSIMONIOUS_OPTION = "m";
final static private String GUESS_FORMAT_OF_SPECIES_TREE = "q";
final static private String HELP_OPTION_1 = "help";
}
final List<String> allowed_options = new ArrayList<String>();
allowed_options.add( gsdi.SDISE_OPTION );
+ allowed_options.add( gsdi.GSDIR_OPTION );
allowed_options.add( gsdi.GUESS_FORMAT_OF_SPECIES_TREE );
allowed_options.add( gsdi.MOST_PARSIMONIOUS_OPTION );
allowed_options.add( gsdi.ALLOW_STRIPPING_OF_GENE_TREE_OPTION );
BASE_ALGORITHM base_algorithm = BASE_ALGORITHM.GSDI;
boolean most_parsimonous_duplication_model = false;
boolean allow_stripping_of_gene_tree = false;
- if ( cla.isOptionSet( gsdi.SDISE_OPTION ) ) {
+ if ( cla.isOptionSet( gsdi.GSDIR_OPTION ) ) {
+ base_algorithm = BASE_ALGORITHM.GSDIR;
+ }
+ else if ( cla.isOptionSet( gsdi.SDISE_OPTION ) ) {
base_algorithm = BASE_ALGORITHM.SDI;
}
if ( cla.isOptionSet( gsdi.MOST_PARSIMONIOUS_OPTION ) ) {
- if ( base_algorithm != BASE_ALGORITHM.GSDI ) {
- ForesterUtil.fatalError( gsdi.PRG_NAME, "Can only use most parsimonious duplication mode with GSDI" );
+ if ( base_algorithm == BASE_ALGORITHM.SDI ) {
+ ForesterUtil.fatalError( gsdi.PRG_NAME, "Cannot use most parsimonious duplication mode with SDI" );
}
most_parsimonous_duplication_model = true;
}
if ( cla.isOptionSet( gsdi.ALLOW_STRIPPING_OF_GENE_TREE_OPTION ) ) {
- if ( base_algorithm != BASE_ALGORITHM.GSDI ) {
- ForesterUtil.fatalError( gsdi.PRG_NAME, "Can only allow stripping of gene tree with GSDI" );
+ if ( base_algorithm == BASE_ALGORITHM.SDI ) {
+ ForesterUtil.fatalError( gsdi.PRG_NAME, "Cannot allow stripping of gene tree with SDI" );
}
allow_stripping_of_gene_tree = true;
}
if ( !gene_tree.isCompletelyBinary() ) {
fatalError( "user error", "gene tree is not completely binary", log_writer );
}
- if ( base_algorithm != BASE_ALGORITHM.GSDI ) {
+ if ( base_algorithm == BASE_ALGORITHM.SDI ) {
if ( !species_tree.isCompletelyBinary() ) {
- fatalError( "user error", "species tree is not completely binary, use GSDI instead", log_writer );
+ fatalError( "user error",
+ "species tree is not completely binary, use GSDI or GSDIR instead",
+ log_writer );
}
}
log_writer.println( PRG_NAME + " - " + PRG_DESC );
SDI sdi = null;
final long start_time = new Date().getTime();
try {
- if ( base_algorithm == BASE_ALGORITHM.GSDI ) {
+ if ( ( base_algorithm == BASE_ALGORITHM.GSDI ) || ( base_algorithm == BASE_ALGORITHM.GSDIR ) ) {
+ if ( base_algorithm == BASE_ALGORITHM.GSDI ) {
+ System.out.println( "Algorithm : GSDI" );
+ log_writer.println( "Algorithm : GSDI" );
+ }
+ else if ( base_algorithm == BASE_ALGORITHM.GSDIR ) {
+ System.out.println( "Algorithm : GSDIR" );
+ log_writer.println( "Algorithm : GSDIR" );
+ }
System.out.println( "Use most parsimonous duplication model : " + most_parsimonous_duplication_model );
System.out.println( "Allow stripping of gene tree nodes : " + allow_stripping_of_gene_tree );
log_writer.println( "Use most parsimonous duplication model : " + most_parsimonous_duplication_model );
log_writer.println( "Allow stripping of gene tree nodes : " + allow_stripping_of_gene_tree );
log_writer.flush();
- sdi = new GSDI( gene_tree,
- species_tree,
- most_parsimonous_duplication_model,
- allow_stripping_of_gene_tree,
- true );
+ if ( base_algorithm == BASE_ALGORITHM.GSDI ) {
+ sdi = new GSDI( gene_tree,
+ species_tree,
+ most_parsimonous_duplication_model,
+ allow_stripping_of_gene_tree,
+ true );
+ }
+ else if ( base_algorithm == BASE_ALGORITHM.GSDIR ) {
+ sdi = new GSDIR( gene_tree, species_tree, allow_stripping_of_gene_tree, 1 );
+ }
}
else {
System.out.println();
- System.out.println( "Using SDIse algorithm" );
- log_writer.println( "Using SDIse algorithm" );
+ System.out.println( "Algorithm : SDI" );
+ log_writer.println( "Algorithm : SDI" );
log_writer.flush();
sdi = new SDIse( gene_tree, species_tree );
}
System.out.println( "Mapping based on : " + gsdi.getTaxCompBase() );
log_writer.println( "Mapping based on : " + gsdi.getTaxCompBase() );
}
+ if ( ( base_algorithm == BASE_ALGORITHM.GSDIR ) ) {
+ final GSDIR gsdir = ( GSDIR ) sdi;
+ System.out.println( "Mapping based on : " + gsdir.getTaxCompBase() );
+ log_writer.println( "Mapping based on : " + gsdir.getTaxCompBase() );
+ System.out.println( "Minimal duplications sum : " + gsdir.getMinDuplicationsSum() );
+ log_writer.println( "Minimal duplications sum : " + gsdir.getMinDuplicationsSum() );
+ System.out.println( "Duplications sum statistics : " + gsdir.getMinDuplicationsSum() );
+ log_writer.println( "Duplications sum statistics : " + gsdir.getMinDuplicationsSum() );
+ }
try {
final PhylogenyWriter writer = new PhylogenyWriter();
- writer.toPhyloXML( out_file, gene_tree, 0 );
+ if ( base_algorithm == BASE_ALGORITHM.GSDIR ) {
+ writer.toPhyloXML( out_file,
+ ( ( GSDIR ) sdi ).getMinDuplicationsSumGeneTrees(),
+ 0,
+ ForesterUtil.LINE_SEPARATOR );
+ }
+ else {
+ writer.toPhyloXML( out_file, gene_tree, 0 );
+ }
}
catch ( final IOException e ) {
ForesterUtil.fatalError( PRG_NAME,
System.out.println( "Mapping cost : " + sdi.computeMappingCostL() );
log_writer.println( "Mapping cost : " + sdi.computeMappingCostL() );
}
- else if ( ( base_algorithm == BASE_ALGORITHM.GSDI ) ) {
+ else if ( ( base_algorithm == BASE_ALGORITHM.GSDI ) || ( base_algorithm == BASE_ALGORITHM.GSDIR ) ) {
final GSDI gsdi = ( GSDI ) sdi;
final File species_tree_used_file = new File( ForesterUtil.removeSuffix( out_file.toString() )
+ SUFFIX_FOR_SPECIES_TREE_USED );
+ ": to allow species tree in other formats than phyloXML (i.e. Newick, NHX, Nexus)" );
System.out.println( " -" + gsdi.SDISE_OPTION
+ ": to use SDIse algorithm instead of GSDI algorithm (for binary species trees)" );
+ System.out.println( " -" + gsdi.GSDIR_OPTION
+ + ": to use GSDIR algorithm instead of GSDI algorithm (re-rooting)" );
System.out.println();
System.out.println( "Gene tree:" );
System.out.println( " in phyloXM format, with taxonomy and sequence data in appropriate fields" );
*
* @author Christian M. Zmasek
*/
-public final class GSDI extends SDI {
+public class GSDI extends SDI {
private final boolean _most_parsimonious_duplication_model;
private final boolean _strip_gene_tree;
throws SDIException {
this( gene_tree, species_tree, most_parsimonious_duplication_model, false, false );
}
+
+ public GSDI( final Phylogeny gene_tree,
+ final Phylogeny species_tree,
+ final boolean most_parsimonious_duplication_model,
+ final boolean strip_gene_tree,
+ final boolean strip_species_tree,
+ int x ) throws SDIException {
+ super( gene_tree, species_tree );
+ _speciation_or_duplication_events_sum = 0;
+ _speciations_sum = 0;
+ _most_parsimonious_duplication_model = most_parsimonious_duplication_model;
+ _duplications_sum = 0;
+ _strip_gene_tree = strip_gene_tree;
+ _strip_species_tree = strip_species_tree;
+ _stripped_gene_tree_nodes = new ArrayList<PhylogenyNode>();
+ _stripped_species_tree_nodes = new ArrayList<PhylogenyNode>();
+ _mapped_species_tree_nodes = new HashSet<PhylogenyNode>();
+ _scientific_names_mapped_to_reduced_specificity = new TreeSet<String>();
+
+ }
// s is the node on the species tree g maps to.
private final void determineEvent( final PhylogenyNode s, final PhylogenyNode g ) {
--- /dev/null
+// $Id:\r
+// FORESTER -- software libraries and applications\r
+// for evolutionary biology research and applications.\r
+//\r
+// Copyright (C) 2008-2013 Christian M. Zmasek\r
+// All rights reserved\r
+//\r
+// This library is free software; you can redistribute it and/or\r
+// modify it under the terms of the GNU Lesser General Public\r
+// License as published by the Free Software Foundation; either\r
+// version 2.1 of the License, or (at your option) any later version.\r
+//\r
+// This library is distributed in the hope that it will be useful,\r
+// but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+// Lesser General Public License for more details.\r
+//\r
+// You should have received a copy of the GNU Lesser General Public\r
+// License along with this library; if not, write to the Free Software\r
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA\r
+//\r
+// Contact: phylosoft @ gmail . com\r
+// WWW: www.phylosoft.org\r
+\r
+package org.forester.sdi;\r
+\r
+import java.util.ArrayList;\r
+import java.util.List;\r
+\r
+import org.forester.phylogeny.Phylogeny;\r
+import org.forester.phylogeny.PhylogenyMethods;\r
+import org.forester.phylogeny.PhylogenyNode;\r
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;\r
+import org.forester.util.BasicDescriptiveStatistics;\r
+\r
+public class GSDIR extends GSDI {\r
+\r
+ private int _min_duplications_sum;\r
+ private final BasicDescriptiveStatistics _duplications_sum_stats;\r
+ private final List<Phylogeny> _min_duplications_sum_gene_trees;\r
+\r
+ public GSDIR( final Phylogeny gene_tree, final Phylogeny species_tree, final boolean strip_gene_tree, final int x )\r
+ throws SDIException {\r
+ super( gene_tree, species_tree, true, strip_gene_tree, true, 1 );\r
+ _min_duplications_sum = Integer.MAX_VALUE;\r
+ _min_duplications_sum_gene_trees = new ArrayList<Phylogeny>();\r
+ _duplications_sum_stats = new BasicDescriptiveStatistics();\r
+ linkNodesOfG();\r
+ final List<PhylogenyNode> gene_tree_nodes_post_order = new ArrayList<PhylogenyNode>();\r
+ for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) {\r
+ gene_tree_nodes_post_order.add( it.next() );\r
+ }\r
+ for( final PhylogenyNode root : gene_tree_nodes_post_order ) {\r
+ _gene_tree.reRoot( root );\r
+ PhylogenyMethods.preOrderReId( getSpeciesTree() );\r
+ //TEST, remove later\r
+ for( final PhylogenyNodeIterator it = getGeneTree().iteratorPostorder(); it.hasNext(); ) {\r
+ final PhylogenyNode g = it.next();\r
+ if ( g.isInternal() ) {\r
+ g.setLink( null );\r
+ }\r
+ }\r
+ geneTreePostOrderTraversal();\r
+ _duplications_sum_stats.addValue( getMinDuplicationsSum() );\r
+ if ( getDuplicationsSum() < getMinDuplicationsSum() ) {\r
+ _min_duplications_sum = getDuplicationsSum();\r
+ _min_duplications_sum_gene_trees.clear();\r
+ _min_duplications_sum_gene_trees.add( getGeneTree().copy() );\r
+ }\r
+ else if ( getDuplicationsSum() == getMinDuplicationsSum() ) {\r
+ _min_duplications_sum_gene_trees.add( getGeneTree().copy() );\r
+ }\r
+ }\r
+ }\r
+\r
+ public int getMinDuplicationsSum() {\r
+ return _min_duplications_sum;\r
+ }\r
+\r
+ public List<Phylogeny> getMinDuplicationsSumGeneTrees() {\r
+ return _min_duplications_sum_gene_trees;\r
+ }\r
+\r
+ public BasicDescriptiveStatistics getDuplicationsSumStats() {\r
+ return _duplications_sum_stats;\r
+ }\r
+}\r
private static boolean testGSDI_general() {
try {
- final PhylogenyMethods pm = PhylogenyMethods.getInstance();
+
final String s2_ = "((" + "([&&NHX:S=a1],[&&NHX:S=a2],[&&NHX:S=a3],[&&NHX:S=a4]),"
+ "([&&NHX:S=b1],[&&NHX:S=b2],[&&NHX:S=b3],[&&NHX:S=b4]),"
+ "([&&NHX:S=c1],[&&NHX:S=c2],[&&NHX:S=c3],[&&NHX:S=c4]),"
}
return true;
}
+
+ private static boolean testGSDIR_general() {
+ try {
+
+ final String s2_ = "((" + "([&&NHX:S=a1],[&&NHX:S=a2],[&&NHX:S=a3],[&&NHX:S=a4]),"
+ + "([&&NHX:S=b1],[&&NHX:S=b2],[&&NHX:S=b3],[&&NHX:S=b4]),"
+ + "([&&NHX:S=c1],[&&NHX:S=c2],[&&NHX:S=c3],[&&NHX:S=c4]),"
+ + "([&&NHX:S=d1],[&&NHX:S=d2],[&&NHX:S=d3],[&&NHX:S=d4])),("
+ + "([&&NHX:S=e1],[&&NHX:S=e2],[&&NHX:S=e3],[&&NHX:S=e4]),"
+ + "([&&NHX:S=f1],[&&NHX:S=f2],[&&NHX:S=f3],[&&NHX:S=f4]),"
+ + "([&&NHX:S=g1],[&&NHX:S=g2],[&&NHX:S=g3],[&&NHX:S=g4]),"
+ + "([&&NHX:S=h1],[&&NHX:S=h2],[&&NHX:S=h3],[&&NHX:S=h4])),("
+ + "([&&NHX:S=i1],[&&NHX:S=i2],[&&NHX:S=i3],[&&NHX:S=i4]),"
+ + "([&&NHX:S=j1],[&&NHX:S=j2],[&&NHX:S=j3],[&&NHX:S=j4]),"
+ + "([&&NHX:S=k1],[&&NHX:S=k2],[&&NHX:S=k3],[&&NHX:S=k4]),"
+ + "([&&NHX:S=l1],[&&NHX:S=l2],[&&NHX:S=l3],[&&NHX:S=l4])),("
+ + "([&&NHX:S=m1],[&&NHX:S=m2],[&&NHX:S=m3],[&&NHX:S=m4]),"
+ + "([&&NHX:S=n1],[&&NHX:S=n2],[&&NHX:S=n3],[&&NHX:S=n4]),"
+ + "([&&NHX:S=o1],[&&NHX:S=o2],[&&NHX:S=o3],[&&NHX:S=o4]),"
+ + "([&&NHX:S=p1],[&&NHX:S=p2],[&&NHX:S=p3],[&&NHX:S=p4])"
+ + "),[&&NHX:S=x],[&&NHX:S=y],[&&NHX:S=z])";
+ final Phylogeny s2 = ParserBasedPhylogenyFactory.getInstance().create( s2_, new NHXParser() )[ 0 ];
+ s2.setRooted( true );
+ final String s1_ = "((([&&NHX:S=A2],[&&NHX:S=A1]),[&&NHX:S=B],[&&NHX:S=C]),[&&NHX:S=D])";
+ final Phylogeny s1 = ParserBasedPhylogenyFactory.getInstance().create( s1_, new NHXParser() )[ 0 ];
+ s1.setRooted( true );
+ final Phylogeny g1 = TestGSDI
+ .createPhylogeny( "((((B[&&NHX:S=B],A1[&&NHX:S=A1]),C[&&NHX:S=C]),A2[&&NHX:S=A2]),D[&&NHX:S=D])" );
+ final GSDIR sdi1 = new GSDIR( g1, s1, false, 1 );
+ // Archaeopteryx.createApplication( g1 );
+ // Archaeopteryx.createApplication( s1 );
+ if ( sdi1.getDuplicationsSum() != 1 ) {
+ return false;
+ }
+ if ( !PhylogenyMethods.calculateLCA( g1.getNode( "B" ), g1.getNode( "A1" ) ).getNodeData().getEvent()
+ .isSpeciation() ) {
+ return false;
+ }
+ if ( !PhylogenyMethods.calculateLCA( g1.getNode( "C" ), g1.getNode( "A1" ) ).getNodeData().getEvent()
+ .isSpeciationOrDuplication() ) {
+ return false;
+ }
+ if ( !( PhylogenyMethods.calculateLCA( g1.getNode( "A2" ), g1.getNode( "A1" ) ).getNodeData().getEvent()
+ .isDuplication() ) ) {
+ return false;
+ }
+ if ( !PhylogenyMethods.calculateLCA( g1.getNode( "D" ), g1.getNode( "A1" ) ).getNodeData().getEvent()
+ .isSpeciation() ) {
+ return false;
+ }
+
+ }
+ catch ( final Exception e ) {
+ e.printStackTrace( System.out );
+ return false;
+ }
+ return true;
+ }
public static void main( final String[] args ) {
if ( !TestGSDI.testGSDI_against_binary_gene_tree() ) {
if ( !TestGSDI.testGSDI_general() ) {
System.out.println( "general failed" );
}
+ if ( !TestGSDI.testGSDIR_general() ) {
+ System.out.println( "general re-rooting failed" );
+ }
else {
- System.out.println( "general OK" );
+ System.out.println( "OK" );
}
// boolean success = test();
// if ( success ) {
return _sum;
}
- /* (non-Javadoc)
- * @see org.forester.util.DescriptiveStatisticsI#getSummaryAsString()
- */
+
@Override
public String getSummaryAsString() {
validate();
return "" + mean + ( ( char ) 177 ) + sd + " [" + getMin() + "..." + getMax() + "]";
}
- /* (non-Javadoc)
- * @see org.forester.util.DescriptiveStatisticsI#getValue(int)
- */
+
@Override
public double getValue( final int index ) {
validate();