final static public boolean REPLACE_UNDERSCORES_IN_NH_SPECIES_TREE = true;
final static private String STRIP_OPTION = "s";
+ final static private String ALLOW_STRIPPING_OF_GENE_TREE_OPTION = "g";
+
final static private String SDI_OPTION = "b";
final static private String MOST_PARSIMONIOUS_OPTION = "m";
final static private String GUESS_FORMAT_OF_SPECIES_TREE = "q";
final static private String HELP_OPTION_1 = "help";
final static private String HELP_OPTION_2 = "h";
final static private String DEFAULT_OUTFILE_SUFFIX = "_gsdi_out.phylo.xml";
+ final static private String SUFFIX_FOR_LIST_OF_STIPPED_GENE_TREE_NODES = "_stripped_gene_tree_nodes.txt";
+ final static private String SUFFIX_FOR_LOG_FILE = "_gsdi_log.txt";
final static private String PRG_NAME = "gsdi";
final static private String PRG_VERSION = "0.901";
final static private String PRG_DATE = "120608";
allowed_options.add( gsdi.SDI_OPTION );
allowed_options.add( gsdi.GUESS_FORMAT_OF_SPECIES_TREE );
allowed_options.add( gsdi.MOST_PARSIMONIOUS_OPTION );
+ allowed_options.add( gsdi.ALLOW_STRIPPING_OF_GENE_TREE_OPTION );
+
final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
if ( dissallowed_options.length() > 0 ) {
ForesterUtil.fatalError( gsdi.PRG_NAME, "unknown option(s): " + dissallowed_options );
boolean strip = false;
boolean most_parsimonous_duplication_model = false;
boolean species_tree_in_phyloxml = true;
+ boolean allow_stripping_of_gene_tree = false;
if ( cla.isOptionSet( gsdi.STRIP_OPTION ) ) {
strip = true;
}
if ( cla.isOptionSet( gsdi.GUESS_FORMAT_OF_SPECIES_TREE ) ) {
species_tree_in_phyloxml = false;
}
+ if ( cla.isOptionSet( gsdi.ALLOW_STRIPPING_OF_GENE_TREE_OPTION ) ) {
+ if ( use_sdise ) {
+ ForesterUtil.fatalError( gsdi.PRG_NAME, "Can only allow stripping of gene tree with GSDI" );
+ }
+ allow_stripping_of_gene_tree = true;
+ }
+
+
+
Phylogeny species_tree = null;
Phylogeny gene_tree = null;
File gene_tree_file = null;
System.out.println( "Using GSDI algorithm" );
System.out.println();
System.out.println( "Use most parsimonous duplication model: " + most_parsimonous_duplication_model );
- sdi = new GSDI( gene_tree, species_tree, most_parsimonous_duplication_model, true );
+ System.out.println( "Allow stripping of gene tree nodes : " + allow_stripping_of_gene_tree );
+ sdi = new GSDI( gene_tree, species_tree, most_parsimonous_duplication_model, allow_stripping_of_gene_tree );
+
}
}
catch ( final Exception e ) {
+ ( ( GSDI ) sdi ).getSpeciationOrDuplicationEventsSum() );
}
if ( !use_sdise ) {
- System.out.println( "Number speciations : " + ( ( GSDI ) sdi ).getSpeciationsSum() );
+ System.out.println( "Number of speciations : " + ( ( GSDI ) sdi ).getSpeciationsSum() );
}
System.out.println();
+ some stat on gene tree:
+ filename, name
+ number of external nodes, strppided nodes
+ some stats on sepcies tree, external nodes,
+ filename, name
+ internal nodes
+ how many of which are polytomies
+ //wrote log file to
+ if ( allow_stripping_of_gene_tree ) {
+ stripped x nodes, y external nodes remain
+
+
+ }
}
private static void print_help() {
+ " [-options] <gene tree in phyloXML format> <species tree in phyloXML format> [outfile]" );
System.out.println();
System.out.println( "Options:" );
- System.out.println( " -" + gsdi.STRIP_OPTION + ": to strip the species tree prior to duplication inference" );
- System.out.println( " -" + gsdi.SDI_OPTION + ": to use SDI algorithm instead of GSDI algorithm" );
+ System.out.println( " -" + gsdi.ALLOW_STRIPPING_OF_GENE_TREE_OPTION + ": to allow stripping of gene tree nodes without a matching species in the species tree (writes list of stripped nodes to " + );
+ System.out.println( " -" + gsdi.STRIP_OPTION + ": to strip the species tree of unneeded species prior to duplication inference" );
+ System.out.println( " -" + gsdi.SDI_OPTION + ": to use SDI algorithm instead of GSDI algorithm" );//TODO gsdi.ALLOW_STRIPPING_OF_GENE_TREE_OPTION not allowed
System.out.println( " -" + gsdi.MOST_PARSIMONIOUS_OPTION
+ ": use most parimonious duplication model for GSDI: " );
System.out.println( " assign nodes as speciations which would otherwise be assiged" );
System.out.println( " as unknown because of polytomies in the species tree" );
System.out.println( " -" + gsdi.GUESS_FORMAT_OF_SPECIES_TREE + ": to allow species tree in other formats than" );
System.out.println( " phyloXML (Newick, NHX, Nexus)" );
+
+
System.out.println();
System.out.println( "Species tree:" );
System.out.println( " In phyloXML format (unless option -" + gsdi.GUESS_FORMAT_OF_SPECIES_TREE
- + " is used), with taxonomy data in appropriate fields" );
+ + " is used, in which case the species matching between gene tree and species tree must be via scientific name), with taxonomy data in appropriate fields" );
System.out.println();
System.out.println( "Gene tree:" );
System.out.println( " In phyloXM format, with taxonomy and sequence data in appropriate fields" );
System.out.println();
+ System.out.println( "Example:" );
+ System.out.println( "gsdi
+ System.out.println();
System.out.println( "Note -- GSDI algorithm is under development" );
System.out.println();
}
private final boolean _strip_gene_tree;
private int _speciation_or_duplication_events_sum;
private int _speciations_sum;
+ private final Set<PhylogenyNode> _stripped_gene_tree_nodes;
/**
* Constructor which sets the gene tree and the species tree to be compared.
_speciations_sum = 0;
_most_parsimonious_duplication_model = most_parsimonious_duplication_model;
_transversal_counts = new HashMap<PhylogenyNode, Integer>();
- _duplications_sum = 0;
+ _duplications_sum = 0;
_strip_gene_tree = strip_gene_tree;
+ _stripped_gene_tree_nodes = new HashSet<PhylogenyNode>();
getSpeciesTree().preOrderReId();
linkNodesOfG();
geneTreePostOrderTraversal( getGeneTree().getRoot() );
*/
@Override
final void linkNodesOfG() {
- final HashMap<Taxonomy, PhylogenyNode> speciestree_ext_nodes = new HashMap<Taxonomy, PhylogenyNode>();
- for( final PhylogenyNodeIterator iter = _species_tree.iteratorLevelOrder(); iter.hasNext(); ) {
- final PhylogenyNode n = iter.next();
- if ( n.getNodeData().isHasTaxonomy() ) {
- if ( speciestree_ext_nodes.containsKey( n.getNodeData().getTaxonomy() ) ) {
- throw new IllegalArgumentException( "taxonomy [" + n.getNodeData().getTaxonomy()
- + "] is not unique in species phylogeny" );
- }
- speciestree_ext_nodes.put( n.getNodeData().getTaxonomy(), n );
- }
- }
+ final HashMap<Taxonomy, PhylogenyNode> speciestree_ext_nodes = createTaxonomyToNodeMap();
if ( _strip_gene_tree ) {
stripGeneTree( speciestree_ext_nodes );
if ( ( _gene_tree == null ) || ( _gene_tree.getNumberOfExternalNodes() < 2 ) ) {
}
}
+ final private HashMap<Taxonomy, PhylogenyNode> createTaxonomyToNodeMap() {
+ final HashMap<Taxonomy, PhylogenyNode> speciestree_ext_nodes = new HashMap<Taxonomy, PhylogenyNode>();
+ for( final PhylogenyNodeIterator iter = _species_tree.iteratorLevelOrder(); iter.hasNext(); ) {
+ final PhylogenyNode n = iter.next();
+ if ( n.getNodeData().isHasTaxonomy() ) {
+ if ( speciestree_ext_nodes.containsKey( n.getNodeData().getTaxonomy() ) ) {
+ throw new IllegalArgumentException( "taxonomy [" + n.getNodeData().getTaxonomy()
+ + "] is not unique in species phylogeny" );
+ }
+ speciestree_ext_nodes.put( n.getNodeData().getTaxonomy(), n );
+ }
+ }
+ return speciestree_ext_nodes;
+ }
+
private final void stripGeneTree( final HashMap<Taxonomy, PhylogenyNode> speciestree_ext_nodes ) {
- final Set<PhylogenyNode> to_delete = new HashSet<PhylogenyNode>();
+ // final Set<PhylogenyNode> to_delete = new HashSet<PhylogenyNode>();
+
for( final PhylogenyNodeIterator iter = _gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
final PhylogenyNode g = iter.next();
if ( !g.getNodeData().isHasTaxonomy() ) {
}
final PhylogenyNode s = speciestree_ext_nodes.get( g.getNodeData().getTaxonomy() );
if ( s == null ) {
- to_delete.add( g );
+ _stripped_gene_tree_nodes.add( g );
}
}
- for( final PhylogenyNode n : to_delete ) {
+ for( final PhylogenyNode n : _stripped_gene_tree_nodes ) {
_gene_tree.deleteSubtree( n, true );
- System.out.println( "deleted node from gene tree: " + n );
+
}
+
+ }
+
+ public Set<PhylogenyNode> getStrippedExternalGeneTreeNodes() {
+ return _stripped_gene_tree_nodes;
}
@Override