import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
import org.forester.phylogeny.factories.PhylogenyFactory;
import org.forester.sdi.GSDI;
+import org.forester.sdi.GSDII;
import org.forester.sdi.GSDIR;
-import org.forester.sdi.SDI;
import org.forester.sdi.SDIException;
import org.forester.sdi.SDIutil;
import org.forester.sdi.SDIutil.ALGORITHM;
final static public boolean REPLACE_UNDERSCORES_IN_NH_SPECIES_TREE = true;
final static private String ALLOW_STRIPPING_OF_GENE_TREE_OPTION = "g";
- final static private String SDISE_OPTION = "b";
final static private String GSDIR_OPTION = "r";
final static private String MOST_PARSIMONIOUS_OPTION = "m";
final static private String GUESS_FORMAT_OF_SPECIES_TREE = "q";
System.exit( -1 );
}
final List<String> allowed_options = new ArrayList<String>();
- allowed_options.add( gsdi.SDISE_OPTION );
+
allowed_options.add( gsdi.GSDIR_OPTION );
allowed_options.add( gsdi.GUESS_FORMAT_OF_SPECIES_TREE );
allowed_options.add( gsdi.MOST_PARSIMONIOUS_OPTION );
if ( cla.isOptionSet( gsdi.GSDIR_OPTION ) ) {
base_algorithm = ALGORITHM.GSDIR;
}
- else if ( cla.isOptionSet( gsdi.SDISE_OPTION ) ) {
- base_algorithm = ALGORITHM.SDI;
- }
+
if ( cla.isOptionSet( gsdi.MOST_PARSIMONIOUS_OPTION ) ) {
if ( base_algorithm == ALGORITHM.SDI ) {
ForesterUtil.fatalError( gsdi.PRG_NAME, "Cannot use most parsimonious duplication mode with SDI" );
+ ( ForesterUtil.isEmpty( species_tree.getName() ) ? "" : gene_tree.getName() ) );
System.out.println( "Species tree name : "
+ ( ForesterUtil.isEmpty( species_tree.getName() ) ? "" : gene_tree.getName() ) );
- Object sdi = null;
+ GSDII gsdii = null;
final long start_time = new Date().getTime();
try {
- if ( ( base_algorithm == ALGORITHM.GSDI ) || ( base_algorithm == ALGORITHM.GSDIR ) ) {
- if ( base_algorithm == ALGORITHM.GSDI ) {
- System.out.println( "Algorithm : GSDI" );
- log_writer.println( "Algorithm : GSDI" );
- }
- else if ( base_algorithm == ALGORITHM.GSDIR ) {
- System.out.println( "Algorithm : GSDIR" );
- log_writer.println( "Algorithm : GSDIR" );
- }
- System.out.println( "Use most parsimonous duplication model : " + most_parsimonous_duplication_model );
- System.out.println( "Allow stripping of gene tree nodes : " + allow_stripping_of_gene_tree );
- log_writer.println( "Use most parsimonous duplication model : " + most_parsimonous_duplication_model );
- log_writer.println( "Allow stripping of gene tree nodes : " + allow_stripping_of_gene_tree );
- log_writer.flush();
- if ( base_algorithm == ALGORITHM.GSDI ) {
- sdi = new GSDI( gene_tree,
- species_tree,
- most_parsimonous_duplication_model,
- allow_stripping_of_gene_tree,
- true );
- }
- else if ( base_algorithm == ALGORITHM.GSDIR ) {
- sdi = new GSDIR( gene_tree, species_tree, allow_stripping_of_gene_tree, true );
- }
+ if ( base_algorithm == ALGORITHM.GSDI ) {
+ System.out.println( "Algorithm : GSDI" );
+ log_writer.println( "Algorithm : GSDI" );
}
- else {
- System.out.println();
- System.out.println( "Algorithm : SDI" );
- log_writer.println( "Algorithm : SDI" );
- log_writer.flush();
- sdi = new SDI( gene_tree, species_tree );
+ else if ( base_algorithm == ALGORITHM.GSDIR ) {
+ System.out.println( "Algorithm : GSDIR" );
+ log_writer.println( "Algorithm : GSDIR" );
+ }
+ System.out.println( "Use most parsimonous duplication model : " + most_parsimonous_duplication_model );
+ System.out.println( "Allow stripping of gene tree nodes : " + allow_stripping_of_gene_tree );
+ log_writer.println( "Use most parsimonous duplication model : " + most_parsimonous_duplication_model );
+ log_writer.println( "Allow stripping of gene tree nodes : " + allow_stripping_of_gene_tree );
+ log_writer.flush();
+ if ( base_algorithm == ALGORITHM.GSDI ) {
+ gsdii = new GSDI( gene_tree,
+ species_tree,
+ most_parsimonous_duplication_model,
+ allow_stripping_of_gene_tree,
+ true );
+ }
+ else if ( base_algorithm == ALGORITHM.GSDIR ) {
+ gsdii = new GSDIR( gene_tree, species_tree, allow_stripping_of_gene_tree, true );
}
}
catch ( final SDIException e ) {
+ "ms" );
log_writer.println( "Running time (excluding I/O) : " + ( new Date().getTime() - start_time )
+ "ms" );
- if ( ( base_algorithm == ALGORITHM.GSDI ) ) {
- final GSDI gsdi = ( GSDI ) sdi;
- System.out.println( "Mapping based on : " + gsdi.getTaxCompBase() );
- log_writer.println( "Mapping based on : " + gsdi.getTaxCompBase() );
- }
+ System.out.println( "Mapping based on : " + gsdii.getTaxCompBase() );
+ log_writer.println( "Mapping based on : " + gsdii.getTaxCompBase() );
if ( ( base_algorithm == ALGORITHM.GSDIR ) ) {
- final GSDIR gsdir = ( GSDIR ) sdi;
- System.out.println( "Mapping based on : " + gsdir.getTaxCompBase() );
- log_writer.println( "Mapping based on : " + gsdir.getTaxCompBase() );
- System.out.println( "Minimal duplications sum : " + gsdir.getMinDuplicationsSum() );
- log_writer.println( "Minimal duplications sum : " + gsdir.getMinDuplicationsSum() );
- System.out.println( "Duplications sum statistics : " + gsdir.getMinDuplicationsSum() );
+ final GSDIR gsdir = ( GSDIR ) gsdii;
+ System.out.println( "Duplications sum statistics : " + gsdir.getMinDuplicationsSum() );
log_writer.println( "Duplications sum statistics : " + gsdir.getMinDuplicationsSum() );
}
try {
final PhylogenyWriter writer = new PhylogenyWriter();
if ( base_algorithm == ALGORITHM.GSDIR ) {
writer.toPhyloXML( out_file,
- ( ( GSDIR ) sdi ).getMinDuplicationsSumGeneTrees(),
+ ( ( GSDIR ) gsdii ).getMinDuplicationsSumGeneTrees(),
0,
ForesterUtil.LINE_SEPARATOR );
}
}
System.out.println( "Wrote resulting gene tree to : " + out_file.getCanonicalPath() );
log_writer.println( "Wrote resulting gene tree to : " + out_file.getCanonicalPath() );
- if ( base_algorithm == ALGORITHM.SDI ) {
- sdi = sdi;
- sdi.computeMappingCostL();
- System.out.println( "Mapping cost : " + sdi.computeMappingCostL() );
- log_writer.println( "Mapping cost : " + sdi.computeMappingCostL() );
- }
- else if ( ( base_algorithm == ALGORITHM.GSDI ) || ( base_algorithm == ALGORITHM.GSDIR ) ) {
- final GSDI gsdi = ( GSDI ) sdi;
- final File species_tree_used_file = new File( ForesterUtil.removeSuffix( out_file.toString() )
- + SUFFIX_FOR_SPECIES_TREE_USED );
- try {
- final PhylogenyWriter writer = new PhylogenyWriter();
- writer.toPhyloXML( species_tree_used_file, gsdi.getSpeciesTree(), 0 );
- }
- catch ( final IOException e ) {
- ForesterUtil.fatalError( PRG_NAME, "Failed to write to [" + species_tree_used_file.getCanonicalPath()
- + "]: " + e.getMessage() );
- }
- System.out.println( "Wrote (stripped) species tree to : "
- + species_tree_used_file.getCanonicalPath() );
- log_writer.println( "Wrote (stripped) species tree to : "
- + species_tree_used_file.getCanonicalPath() );
- if ( ( gsdi.getReMappedScientificNamesFromGeneTree() != null )
- && !gsdi.getReMappedScientificNamesFromGeneTree().isEmpty() ) {
- System.out.println( "Number of gene tree species remapped : "
- + gsdi.getReMappedScientificNamesFromGeneTree().size() );
- log_writer.println( "Number of gene tree species remapped : "
- + gsdi.getReMappedScientificNamesFromGeneTree().size() );
- writeToRemappedFile( out_file, gsdi.getReMappedScientificNamesFromGeneTree(), log_writer );
- }
+ final File species_tree_used_file = new File( ForesterUtil.removeSuffix( out_file.toString() )
+ + SUFFIX_FOR_SPECIES_TREE_USED );
+ try {
+ final PhylogenyWriter writer = new PhylogenyWriter();
+ writer.toPhyloXML( species_tree_used_file, species_tree, 0 );
+ }
+ catch ( final IOException e ) {
+ ForesterUtil.fatalError( PRG_NAME, "Failed to write to [" + species_tree_used_file.getCanonicalPath()
+ + "]: " + e.getMessage() );
+ }
+ System.out.println( "Wrote (stripped) species tree to : " + species_tree_used_file.getCanonicalPath() );
+ log_writer.println( "Wrote (stripped) species tree to : " + species_tree_used_file.getCanonicalPath() );
+ if ( ( gsdii.getReMappedScientificNamesFromGeneTree() != null )
+ && !gsdii.getReMappedScientificNamesFromGeneTree().isEmpty() ) {
+ System.out.println( "Number of gene tree species remapped : "
+ + gsdii.getReMappedScientificNamesFromGeneTree().size() );
+ log_writer.println( "Number of gene tree species remapped : "
+ + gsdii.getReMappedScientificNamesFromGeneTree().size() );
+ writeToRemappedFile( out_file, gsdii.getReMappedScientificNamesFromGeneTree(), log_writer );
}
System.out.println( "Number of external nodes in gene tree : " + gene_tree.getNumberOfExternalNodes() );
log_writer.println( "Number of external nodes in gene tree : " + gene_tree.getNumberOfExternalNodes() );
- System.out.println( "Number of external nodes in species tree : "
- + sdi.getSpeciesTree().getNumberOfExternalNodes() );
- log_writer.println( "Number of external nodes in species tree : "
- + sdi.getSpeciesTree().getNumberOfExternalNodes() );
- if ( ( base_algorithm == ALGORITHM.GSDI ) ) {
- final GSDI gsdi = ( GSDI ) sdi;
- final int poly = PhylogenyMethods.countNumberOfPolytomies( gsdi.getSpeciesTree() );
- System.out.println( "Number of polytomies in species tree : " + poly );
- log_writer.println( "Number of polytomies in species tree : " + poly );
- System.out.println( "External nodes stripped from gene tree : "
- + gsdi.getStrippedExternalGeneTreeNodes().size() );
- log_writer.println( "External nodes stripped from gene tree : "
- + gsdi.getStrippedExternalGeneTreeNodes().size() );
- System.out.println( "External nodes stripped from species tree: "
- + gsdi.getStrippedSpeciesTreeNodes().size() );
- log_writer.println( "External nodes stripped from species tree: "
- + gsdi.getStrippedSpeciesTreeNodes().size() );
- }
+ System.out.println( "Number of external nodes in species tree : " + species_tree.getNumberOfExternalNodes() );
+ log_writer.println( "Number of external nodes in species tree : " + species_tree.getNumberOfExternalNodes() );
+ final int poly = PhylogenyMethods.countNumberOfPolytomies( species_tree );
+ System.out.println( "Number of polytomies in species tree : " + poly );
+ log_writer.println( "Number of polytomies in species tree : " + poly );
+ System.out.println( "External nodes stripped from gene tree : "
+ + gsdii.getStrippedExternalGeneTreeNodes().size() );
+ log_writer.println( "External nodes stripped from gene tree : "
+ + gsdii.getStrippedExternalGeneTreeNodes().size() );
+ System.out.println( "External nodes stripped from species tree: " + gsdii.getStrippedSpeciesTreeNodes().size() );
+ log_writer.println( "External nodes stripped from species tree: " + gsdii.getStrippedSpeciesTreeNodes().size() );
System.out.println();
- System.out.println( "Number of duplications : " + sdi.getDuplicationsSum() );
- log_writer.println( "Number of duplications : " + sdi.getDuplicationsSum() );
- if ( ( base_algorithm == ALGORITHM.GSDI ) ) {
- final GSDI gsdi = ( GSDI ) sdi;
+ System.out.println( "Number of speciations : " + gsdii.getSpeciationsSum() );
+ log_writer.println( "Number of speciations : " + gsdii.getSpeciationsSum() );
+ if ( ( base_algorithm == ALGORITHM.GSDIR ) ) {
+ final GSDIR gsdir = ( GSDIR ) gsdii;
+ System.out.println( "Minimal number of duplications : " + gsdir.getMinDuplicationsSum() );
+ log_writer.println( "Minimal number of duplications : " + gsdir.getMinDuplicationsSum() );
+ }
+ else if ( ( base_algorithm == ALGORITHM.GSDI ) ) {
+ final GSDI gsdi = ( GSDI ) gsdii;
+ System.out.println( "Number of duplications : " + gsdi.getDuplicationsSum() );
+ log_writer.println( "Number of duplications : " + gsdi.getDuplicationsSum() );
if ( !most_parsimonous_duplication_model ) {
final int u = gsdi.getSpeciationOrDuplicationEventsSum();
System.out.println( "Number of potential duplications : " + u );
log_writer.println( "Number of potential duplications : " + u );
}
- System.out.println( "Number of speciations : " + gsdi.getSpeciationsSum() );
- log_writer.println( "Number of speciations : " + gsdi.getSpeciationsSum() );
- log_writer.println();
- printMappedNodesToLog( log_writer, gsdi );
- log_writer.println();
- printStrippedGeneTreeNodesToLog( log_writer, gsdi );
+
}
+ log_writer.println();
+ printMappedNodesToLog( log_writer, gsdii );
+ log_writer.println();
+ printStrippedGeneTreeNodesToLog( log_writer, gsdii );
System.out.println();
System.out.println( "Wrote log to : " + log_file.getCanonicalPath() );
System.out.println();
log_writer.println( "Wrote remapped gene tree species to : " + file.getCanonicalPath() );
}
- private static void printMappedNodesToLog( final EasyWriter log_writer, final GSDI gsdi ) throws IOException {
+ private static void printMappedNodesToLog( final EasyWriter log_writer, final GSDII gsdi ) throws IOException {
final SortedSet<String> ss = new TreeSet<String>();
for( final PhylogenyNode n : gsdi.getMappedExternalSpeciesTreeNodes() ) {
ss.add( n.toString() );
ForesterUtil.fatalError( gsdi.PRG_NAME, msg );
}
- private static void printStrippedGeneTreeNodesToLog( final EasyWriter log_writer, final GSDI gsdi )
+ private static void printStrippedGeneTreeNodesToLog( final EasyWriter log_writer, final GSDII gsdi )
throws IOException {
final SortedMap<String, Integer> sm = new TreeMap<String, Integer>();
for( final PhylogenyNode n : gsdi.getStrippedExternalGeneTreeNodes() ) {
System.out.println( " as potential duplications due to polytomies in the species tree" );
System.out.println( " -" + gsdi.GUESS_FORMAT_OF_SPECIES_TREE
+ ": to allow species tree in other formats than phyloXML (i.e. Newick, NHX, Nexus)" );
- System.out.println( " -" + gsdi.SDISE_OPTION
- + ": to use SDIse algorithm instead of GSDI algorithm (for binary species trees)" );
System.out.println( " -" + gsdi.GSDIR_OPTION
+ ": to use GSDIR algorithm instead of GSDI algorithm (re-rooting)" );
System.out.println();
import org.forester.sdi.SDIutil.TaxonomyComparisonBase;
import org.forester.util.ForesterUtil;
-public final class GSDI {
+public final class GSDI implements GSDII {
private final boolean _most_parsimonious_duplication_model;
private final int _speciation_or_duplication_events_sum;
.getScientificNamesMappedToReducedSpecificity();
_tax_comp_base = nodes_linking_result.getTaxCompBase();
PhylogenyMethods.preOrderReId( species_tree );
- final GSDIsummaryResult gsdi_summary_result = new GSDIsummaryResult();
- geneTreePostOrderTraversal( gene_tree, _most_parsimonious_duplication_model, gsdi_summary_result );
+ final GSDIsummaryResult gsdi_summary_result = geneTreePostOrderTraversal( gene_tree,
+ _most_parsimonious_duplication_model );
_speciation_or_duplication_events_sum = gsdi_summary_result.getSpeciationOrDuplicationEventsSum();
_speciations_sum = gsdi_summary_result.getSpeciationsSum();
_duplications_sum = gsdi_summary_result.getDuplicationsSum();
* Preconditions: Mapping M for external nodes must have been calculated and
* the species tree must be labeled in preorder.
* <p>
+ * @return
*
*/
- final static void geneTreePostOrderTraversal( final Phylogeny gene_tree,
- final boolean most_parsimonious_duplication_model,
- final GSDIsummaryResult res ) {
+ final static GSDIsummaryResult geneTreePostOrderTraversal( final Phylogeny gene_tree,
+ final boolean most_parsimonious_duplication_model ) {
+ final GSDIsummaryResult res = new GSDIsummaryResult();
for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) {
final PhylogenyNode g = it.next();
if ( g.isInternal() ) {
determineEvent( s1, g, most_parsimonious_duplication_model, res );
}
}
+ return res;
}
/**
}
}
if ( strip_species_tree ) {
- stripSpeciesTree( species_tree, species_tree_ext_nodes, res.getMappedSpeciesTreeNodes() );
+ stripSpeciesTree( species_tree, species_tree_ext_nodes, res );
}
return res;
}
scientific_names_mapped_to_reduced_specificity.add( s1 + " -> " + s2 );
}
- // Used by GSDIR
- // protected GSDI( final Phylogeny gene_tree, final Phylogeny species_tree, final boolean strip_gene_tree )
- // throws SDIException {
- // super( gene_tree, species_tree );
- // _speciation_or_duplication_events_sum = -1;
- // _speciations_sum = 0;
- // _most_parsimonious_duplication_model = true;
- // _duplications_sum = 0;
- // _stripped_gene_tree_nodes = new ArrayList<PhylogenyNode>();
- // _stripped_species_tree_nodes = new ArrayList<PhylogenyNode>();
- // _mapped_species_tree_nodes = new HashSet<PhylogenyNode>();
- // _scientific_names_mapped_to_reduced_specificity = new TreeSet<String>();
- // }
- // s is the node on the species tree g maps to.
private final static void determineEvent( final PhylogenyNode s,
final PhylogenyNode g,
final boolean most_parsimonious_duplication_model,
}
}
- private final static List<PhylogenyNode> stripSpeciesTree( final Phylogeny species_tree,
- final List<PhylogenyNode> species_tree_ext_nodes,
- final Set<PhylogenyNode> keep ) {
- final List<PhylogenyNode> stripped_species_tree_nodes = new ArrayList<PhylogenyNode>();
+ private final static void stripSpeciesTree( final Phylogeny species_tree,
+ final List<PhylogenyNode> species_tree_ext_nodes,
+ final NodesLinkingResult res ) {
for( final PhylogenyNode s : species_tree_ext_nodes ) {
- if ( !keep.contains( s ) ) {
+ if ( !res.getMappedSpeciesTreeNodes().contains( s ) ) {
species_tree.deleteSubtree( s, true );
- stripped_species_tree_nodes.add( s );
+ res.getStrippedSpeciesTreeNodes().add( s );
}
}
species_tree.clearHashIdToNodeMap();
species_tree.externalNodesHaveChanged();
- return stripped_species_tree_nodes;
}
private final static void stripTree( final Phylogeny phy, final List<PhylogenyNode> strip_nodes ) {
import org.forester.sdi.SDIutil.TaxonomyComparisonBase;\r
import org.forester.util.BasicDescriptiveStatistics;\r
\r
-public class GSDIR {\r
+public class GSDIR implements GSDII {\r
+\r
+ private final int _min_duplications_sum;\r
+ private final int _speciations_sum;\r
+\r
+ \r
+ @Override\r
+ public int getSpeciationsSum() {\r
+ return _speciations_sum;\r
+ }\r
\r
- private int _min_duplications_sum;\r
private final BasicDescriptiveStatistics _duplications_sum_stats;\r
private final List<Phylogeny> _min_duplications_sum_gene_trees;\r
- protected int _speciations_sum;\r
- protected int _duplications_sum;\r
+ \r
private final List<PhylogenyNode> _stripped_gene_tree_nodes;\r
private final List<PhylogenyNode> _stripped_species_tree_nodes;\r
private final Set<PhylogenyNode> _mapped_species_tree_nodes;\r
private final TaxonomyComparisonBase _tax_comp_base;\r
private final SortedSet<String> _scientific_names_mapped_to_reduced_specificity;\r
-\r
+ \r
public GSDIR( final Phylogeny gene_tree,\r
final Phylogeny species_tree,\r
final boolean strip_gene_tree,\r
final boolean strip_species_tree ) throws SDIException {\r
- _speciations_sum = 0;\r
- _duplications_sum = 0;\r
+ \r
final NodesLinkingResult nodes_linking_result = GSDI.linkNodesOfG( gene_tree,\r
species_tree,\r
null,\r
gene_tree_branches_post_order.add( new PhylogenyBranch( n, n.getParent() ) );\r
}\r
}\r
- _min_duplications_sum = Integer.MAX_VALUE;\r
+ int min_duplications_sum = Integer.MAX_VALUE;\r
+ int speciations_sum = 0;\r
_min_duplications_sum_gene_trees = new ArrayList<Phylogeny>();\r
_duplications_sum_stats = new BasicDescriptiveStatistics();\r
for( final PhylogenyBranch branch : gene_tree_branches_post_order ) {\r
- _duplications_sum = 0;\r
- _speciations_sum = 0;\r
+ \r
gene_tree.reRoot( branch );\r
PhylogenyMethods.preOrderReId( species_tree );\r
//TEST, remove later\r
// g.setLink( null );\r
// }\r
// }\r
- final GSDIsummaryResult gsdi_summary_result = new GSDIsummaryResult();\r
- GSDI.geneTreePostOrderTraversal( gene_tree, true, gsdi_summary_result );\r
- if ( _duplications_sum < _min_duplications_sum ) {\r
- _min_duplications_sum = _duplications_sum;\r
+ final GSDIsummaryResult gsdi_result = GSDI.geneTreePostOrderTraversal( gene_tree, true );\r
+ if ( gsdi_result.getDuplicationsSum() < min_duplications_sum ) {\r
+ min_duplications_sum = gsdi_result.getDuplicationsSum();\r
+ speciations_sum = gsdi_result.getSpeciationsSum();\r
_min_duplications_sum_gene_trees.clear();\r
_min_duplications_sum_gene_trees.add( gene_tree.copy() );\r
//_speciations_sum\r
}\r
- else if ( _duplications_sum == _min_duplications_sum ) {\r
+ else if ( gsdi_result.getDuplicationsSum()== min_duplications_sum ) {\r
_min_duplications_sum_gene_trees.add( gene_tree.copy() );\r
}\r
- _duplications_sum_stats.addValue( _duplications_sum );\r
+ _duplications_sum_stats.addValue( gsdi_result.getDuplicationsSum() );\r
+ \r
}\r
+ _min_duplications_sum = min_duplications_sum; \r
+ _speciations_sum = speciations_sum;\r
}\r
\r
public int getMinDuplicationsSum() {\r
return _duplications_sum_stats;\r
}\r
\r
+ @Override\r
public Set<PhylogenyNode> getMappedExternalSpeciesTreeNodes() {\r
return _mapped_species_tree_nodes;\r
}\r
\r
+ @Override\r
public final SortedSet<String> getReMappedScientificNamesFromGeneTree() {\r
return _scientific_names_mapped_to_reduced_specificity;\r
}\r
\r
+ @Override\r
public List<PhylogenyNode> getStrippedExternalGeneTreeNodes() {\r
return _stripped_gene_tree_nodes;\r
}\r
\r
+ @Override\r
public List<PhylogenyNode> getStrippedSpeciesTreeNodes() {\r
return _stripped_species_tree_nodes;\r
}\r
\r
+ @Override\r
public TaxonomyComparisonBase getTaxCompBase() {\r
return _tax_comp_base;\r
}\r