import java.util.ArrayList;
import java.util.Date;
import java.util.List;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
import org.forester.io.parsers.PhylogenyParser;
import org.forester.io.parsers.nhx.NHXParser;
ForesterUtil.fatalError( gsdi.PRG_NAME, "species tree is not completely binary, use GSDI instead" );
}
}
- // For timing.
- // gene_tree = Helper.createBalancedTree( 10 );
- // species_tree = Helper.createBalancedTree( 13 );
- // species_tree = Helper.createUnbalancedTree( 1024 );
- // gene_tree = Helper.createUnbalancedTree( 8192 );
- // species_tree = gene_tree.copyTree();
- // gene_tree = species_tree.copyTree();
- // Helper.numberSpeciesInOrder( species_tree );
- // Helper.numberSpeciesInOrder( gene_tree );
- // Helper.randomizeSpecies( 1, 8192, gene_tree );
- // Helper.intervalNumberSpecies( gene_tree, 4096 );
- // Helper.numberSpeciesInDescOrder( gene_tree );
log_writer.println( PRG_NAME + " - " + PRG_DESC );
log_writer.println( " version : " + PRG_VERSION );
log_writer.println( " date : " + PRG_DATE );
log_writer.println( " forester version: " + ForesterConstants.FORESTER_VERSION );
- log_writer.println( "Start time: " + new SimpleDateFormat( "yyyyMMdd HH:mm:ss" ).format( new Date() ) );
- log_writer.println( "Gene tree file: " + gene_tree_file.getCanonicalPath() );
- log_writer.println( "Gene tree name: "
+ log_writer.println();
+ log_writer.println( "Start time : "
+ + new SimpleDateFormat( "yyyyMMdd HH:mm:ss" ).format( new Date() ) );
+ System.out.println( "Start time : "
+ + new SimpleDateFormat( "yyyyMMdd HH:mm:ss" ).format( new Date() ) );
+ log_writer.println( "Gene tree file : " + gene_tree_file.getCanonicalPath() );
+ System.out.println( "Gene tree file : " + gene_tree_file.getCanonicalPath() );
+ log_writer.println( "Gene tree name : "
+ ( ForesterUtil.isEmpty( gene_tree.getName() ) ? "" : gene_tree.getName() ) );
- log_writer.println( "Species tree file: " + species_tree_file.getCanonicalPath() );
- log_writer.println( "Species tree name: "
+ System.out.println( "Gene tree name : "
+ + ( ForesterUtil.isEmpty( gene_tree.getName() ) ? "" : gene_tree.getName() ) );
+ log_writer.println( "Species tree file : " + species_tree_file.getCanonicalPath() );
+ System.out.println( "Species tree file : " + species_tree_file.getCanonicalPath() );
+ log_writer.println( "Species tree name : "
+ + ( ForesterUtil.isEmpty( species_tree.getName() ) ? "" : gene_tree.getName() ) );
+ System.out.println( "Species tree name : "
+ ( ForesterUtil.isEmpty( species_tree.getName() ) ? "" : gene_tree.getName() ) );
- System.out.println();
SDI sdi = null;
final long start_time = new Date().getTime();
try {
if ( base_algorithm == BASE_ALGORITHM.GSDI ) {
- System.out.println();
- System.out.println( "Use most parsimonous duplication model: " + most_parsimonous_duplication_model );
- System.out.println( "Allow stripping of gene tree nodes : " + allow_stripping_of_gene_tree );
- log_writer.println( "Use most parsimonous duplication model: " + most_parsimonous_duplication_model );
- log_writer.write( "Allow stripping of gene tree nodes : " + allow_stripping_of_gene_tree );
+ System.out.println( "Use most parsimonous duplication model : " + most_parsimonous_duplication_model );
+ System.out.println( "Allow stripping of gene tree nodes : " + allow_stripping_of_gene_tree );
+ log_writer.println( "Use most parsimonous duplication model : " + most_parsimonous_duplication_model );
+ log_writer.println( "Allow stripping of gene tree nodes : " + allow_stripping_of_gene_tree );
log_writer.flush();
sdi = new GSDI( gene_tree,
species_tree,
e.printStackTrace();
System.exit( -1 );
}
- System.out.println();
- System.out.println( "Running time (excluding I/O): " + ( new Date().getTime() - start_time ) + "ms" );
- log_writer.println( "Running time (excluding I/O): " + ( new Date().getTime() - start_time ) + "ms" );
+ System.out.println( "Running time (excluding I/O) : " + ( new Date().getTime() - start_time )
+ + "ms" );
+ log_writer.println( "Running time (excluding I/O) : " + ( new Date().getTime() - start_time )
+ + "ms" );
+ if ( ( base_algorithm == BASE_ALGORITHM.GSDI ) ) {
+ final GSDI gsdi = ( GSDI ) sdi;
+ System.out.println( "Mapping based on : " + gsdi.getTaxCompBase() );
+ log_writer.println( "Mapping based on : " + gsdi.getTaxCompBase() );
+ }
try {
final PhylogenyWriter writer = new PhylogenyWriter();
writer.toPhyloXML( out_file, gene_tree, 0 );
catch ( final IOException e ) {
ForesterUtil.fatalError( PRG_NAME, "Failed to write to [" + out_file + "]: " + e.getMessage() );
}
- System.out.println();
- System.out.println( "Wrote resulting gene tree to: " + out_file );
- System.out.println();
- log_writer.println( "Wrote resulting gene tree to: " + out_file );
+ System.out.println( "Wrote resulting gene tree to : " + out_file );
+ log_writer.println( "Wrote resulting gene tree to : " + out_file );
if ( base_algorithm == BASE_ALGORITHM.SDI ) {
sdi.computeMappingCostL();
- System.out.println( "Mapping cost : " + sdi.computeMappingCostL() );
- log_writer.println( "Mapping cost : " + sdi.computeMappingCostL() );
+ System.out.println( "Mapping cost : " + sdi.computeMappingCostL() );
+ log_writer.println( "Mapping cost : " + sdi.computeMappingCostL() );
}
- System.out.println( "Number of duplications : " + sdi.getDuplicationsSum() );
- log_writer.println( "Number of duplications : " + sdi.getDuplicationsSum() );
- if ( ( base_algorithm == BASE_ALGORITHM.GSDI ) ) {
+ else if ( ( base_algorithm == BASE_ALGORITHM.GSDI ) ) {
final GSDI gsdi = ( GSDI ) sdi;
final File species_tree_used_file = new File( out_file + SUFFIX_FOR_SPECIES_TREE_USED );
try {
ForesterUtil.fatalError( PRG_NAME,
"Failed to write to [" + species_tree_used_file + "]: " + e.getMessage() );
}
- System.out.println();
- System.out.println( "Wrote used species tree to: " + species_tree_used_file );
- System.out.println();
- log_writer.println( "Wrote used species tree to: " + species_tree_used_file );
+ System.out.println( "Wrote (stripped) species tree to : " + species_tree_used_file );
+ log_writer.println( "Wrote (stripped) species tree to : " + species_tree_used_file );
+ }
+ System.out.println( "Number of external nodes in gene tree : " + gene_tree.getNumberOfExternalNodes() );
+ log_writer.println( "Number of external nodes in gene tree : " + gene_tree.getNumberOfExternalNodes() );
+ System.out.println( "Number of external nodes in species tree : "
+ + sdi.getSpeciesTree().getNumberOfExternalNodes() );
+ log_writer.println( "Number of external nodes in species tree : "
+ + sdi.getSpeciesTree().getNumberOfExternalNodes() );
+ if ( ( base_algorithm == BASE_ALGORITHM.GSDI ) ) {
+ final GSDI gsdi = ( GSDI ) sdi;
+ final int poly = PhylogenyMethods.countNumberOfPolytomies( gsdi.getSpeciesTree() );
+ System.out.println( "Number of polytomies in species tree : " + poly );
+ log_writer.println( "Number of polytomies in species tree : " + poly );
+ System.out.println( "External nodes stripped from gene tree : "
+ + gsdi.getStrippedExternalGeneTreeNodes().size() );
+ log_writer.println( "External nodes stripped from gene tree : "
+ + gsdi.getStrippedExternalGeneTreeNodes().size() );
+ System.out.println( "External nodes stripped from species tree: "
+ + gsdi.getStrippedSpeciesTreeNodes().size() );
+ log_writer.println( "External nodes stripped from species tree: "
+ + gsdi.getStrippedSpeciesTreeNodes().size() );
+ }
+ System.out.println();
+ System.out.println( "Number of duplications : " + sdi.getDuplicationsSum() );
+ log_writer.println( "Number of duplications : " + sdi.getDuplicationsSum() );
+ if ( ( base_algorithm == BASE_ALGORITHM.GSDI ) ) {
+ final GSDI gsdi = ( GSDI ) sdi;
if ( !most_parsimonous_duplication_model ) {
- final int duplications = gsdi.getSpeciationOrDuplicationEventsSum();
- System.out.println( "Number of potential duplications: " + duplications );
- log_writer.println( "Number of potential duplications: " + duplications );
- }
- final int spec = gsdi.getSpeciationsSum();
- System.out.println( "Number of speciations : " + spec );
- log_writer.println( "Number of speciations : " + spec );
- for( final PhylogenyNode n : gsdi.getMappedExternalSpeciesTreeNodes() ) {
- System.out.println( n.toString() );
+ final int u = gsdi.getSpeciationOrDuplicationEventsSum();
+ System.out.println( "Number of potential duplications : " + u );
+ log_writer.println( "Number of potential duplications : " + u );
}
+ System.out.println( "Number of speciations : " + gsdi.getSpeciationsSum() );
+ log_writer.println( "Number of speciations : " + gsdi.getSpeciationsSum() );
+ log_writer.println();
+ printMappedNodesToLog( log_writer, gsdi );
+ log_writer.println();
+ printStrippedGeneTreeNodesToLog( log_writer, gsdi );
+ log_writer.println();
+ printStrippedSpeciesTreeNodesToLog( log_writer, gsdi );
}
System.out.println();
+ System.out.println( "Wrote log to : " + log_file );
+ System.out.println();
log_writer.close();
- // some stat on gene tree:
- // filename, name
- // number of external nodes, strppided nodes
- // some stats on sepcies tree, external nodes,
- // filename, name
- // internal nodes
- // how many of which are polytomies
- //wrote log file to
- // if ( allow_stripping_of_gene_tree ) {
- // stripped x nodes, y external nodes remain
- // }
+ }
+
+ private static void printMappedNodesToLog( final EasyWriter log_writer, final GSDI gsdi ) throws IOException {
+ final SortedSet<String> ss = new TreeSet<String>();
+ for( final PhylogenyNode n : gsdi.getMappedExternalSpeciesTreeNodes() ) {
+ ss.add( n.toString() );
+ }
+ log_writer.println( "The following " + ss.size() + " species were used: " );
+ for( final String s : ss ) {
+ log_writer.println( " " + s );
+ }
+ }
+
+ private static void printStrippedGeneTreeNodesToLog( final EasyWriter log_writer, final GSDI gsdi )
+ throws IOException {
+ final SortedMap<String, Integer> sm = new TreeMap<String, Integer>();
+ for( final PhylogenyNode n : gsdi.getStrippedExternalGeneTreeNodes() ) {
+ final String s = n.toString();
+ if ( sm.containsKey( s ) ) {
+ sm.put( s, sm.get( s ) + 1 );
+ }
+ else {
+ sm.put( s, 1 );
+ }
+ }
+ log_writer.println( "The following " + sm.size() + " nodes were stripped from the gene tree: " );
+ for( final String s : sm.keySet() ) {
+ final int count = sm.get( s );
+ if ( count == 1 ) {
+ log_writer.println( " " + s );
+ }
+ else {
+ log_writer.println( " " + s + " [" + count + "]" );
+ }
+ }
+ }
+
+ private static void printStrippedSpeciesTreeNodesToLog( final EasyWriter log_writer, final GSDI gsdi )
+ throws IOException {
+ final SortedSet<String> ss = new TreeSet<String>();
+ for( final PhylogenyNode n : gsdi.getStrippedSpeciesTreeNodes() ) {
+ ss.add( n.toString() );
+ }
+ log_writer.println( "The following " + ss.size() + " nodes were stripped from the species tree: " );
+ for( final String n : ss ) {
+ log_writer.println( " " + n );
+ }
}
private static void print_help() {
private final List<PhylogenyNode> _stripped_gene_tree_nodes;
private final List<PhylogenyNode> _stripped_species_tree_nodes;
private final Set<PhylogenyNode> _mapped_species_tree_nodes;
+ private TaxonomyComparisonBase _tax_comp_base;
public GSDI( final Phylogeny gene_tree,
final Phylogeny species_tree,
g.getNodeData().setEvent( createDuplicationEvent() );
}
else {
- g.getNodeData().setEvent( createSingleSpeciationOrDuplicationEvent() );
+ if ( _most_parsimonious_duplication_model ) {
+ g.getNodeData().setEvent( createSpeciationEvent() );
+ }
+ else {
+ g.getNodeData().setEvent( createSingleSpeciationOrDuplicationEvent() );
+ }
}
}
else {
final void linkNodesOfG() throws SDIException {
final Map<String, PhylogenyNode> species_to_node_map = new HashMap<String, PhylogenyNode>();
final List<PhylogenyNode> species_tree_ext_nodes = new ArrayList<PhylogenyNode>();
- final TaxonomyComparisonBase tax_comp_base = determineTaxonomyComparisonBase( _gene_tree );
+ _tax_comp_base = determineTaxonomyComparisonBase( _gene_tree );
// System.out.println( "comp base is: " + tax_comp_base );
// Stringyfied taxonomy is the key, node is the value.
for( final PhylogenyNodeIterator iter = _species_tree.iteratorExternalForward(); iter.hasNext(); ) {
final PhylogenyNode s = iter.next();
species_tree_ext_nodes.add( s );
- final String tax_str = taxonomyToString( s, tax_comp_base );
+ final String tax_str = taxonomyToString( s, _tax_comp_base );
if ( !ForesterUtil.isEmpty( tax_str ) ) {
if ( species_to_node_map.containsKey( tax_str ) ) {
throw new SDIException( "taxonomy \"" + s + "\" is not unique in species tree" );
}
}
else {
- final String tax_str = taxonomyToString( g, tax_comp_base );
+ final String tax_str = taxonomyToString( g, _tax_comp_base );
if ( ForesterUtil.isEmpty( tax_str ) ) {
if ( _strip_gene_tree ) {
_stripped_gene_tree_nodes.add( g );
}
} // for loop
if ( _strip_gene_tree ) {
- for( final PhylogenyNode g : _stripped_gene_tree_nodes ) {
- _gene_tree.deleteSubtree( g, true );
- }
+ stripGeneTree();
}
if ( _strip_species_tree ) {
- for( final PhylogenyNode s : species_tree_ext_nodes ) {
- if ( !_mapped_species_tree_nodes.contains( s ) ) {
- _species_tree.deleteSubtree( s, true );
- }
+ stripSpeciesTree( species_tree_ext_nodes );
+ }
+ }
+
+ public TaxonomyComparisonBase getTaxCompBase() {
+ return _tax_comp_base;
+ }
+
+ private void stripSpeciesTree( final List<PhylogenyNode> species_tree_ext_nodes ) {
+ for( final PhylogenyNode s : species_tree_ext_nodes ) {
+ if ( !_mapped_species_tree_nodes.contains( s ) ) {
+ _species_tree.deleteSubtree( s, true );
+ _stripped_species_tree_nodes.add( s );
}
}
}
+ public List<PhylogenyNode> getStrippedSpeciesTreeNodes() {
+ return _stripped_species_tree_nodes;
+ }
+
+ private void stripGeneTree() {
+ for( final PhylogenyNode g : _stripped_gene_tree_nodes ) {
+ _gene_tree.deleteSubtree( g, true );
+ }
+ }
+
public Set<PhylogenyNode> getMappedExternalSpeciesTreeNodes() {
return _mapped_species_tree_nodes;
}
import org.forester.archaeopteryx.Archaeopteryx;
import org.forester.development.DevelopmentTools;
import org.forester.io.parsers.nhx.NHXParser;
+import org.forester.io.parsers.util.ParserUtils;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyMethods;
import org.forester.phylogeny.data.Event;
import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
import org.forester.phylogeny.factories.PhylogenyFactory;
+import org.forester.util.ForesterUtil;
public final class TestGSDI {
+ private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator()
+ + "test_data" + ForesterUtil.getFileSeparator();
+
private final static Phylogeny createPhylogeny( final String nhx ) throws IOException {
final Phylogeny p = ParserBasedPhylogenyFactory.getInstance().create( nhx, new NHXParser() )[ 0 ];
p.setRooted( true );
if ( !TestGSDI.getEvent( g2_32, "a1", "z" ).isSpeciationOrDuplication() ) {
return false;
}
- // // //-
- final Phylogeny g2_33_d = TestGSDI
- .createPhylogeny( "((((((((((((a1[&&NHX:S=a1],a2[&&NHX:S=a2])[&&NHX:D=N],b1[&&NHX:S=b1])[&&NHX:D=N],c1[&&NHX:S=c1])[&&NHX:D=?],d1[&&NHX:S=d1])[&&NHX:D=?],x[&&NHX:S=x])[&&NHX:D=N],p1[&&NHX:S=p1])[&&NHX:D=?],i1[&&NHX:S=i1])[&&NHX:D=?],k2[&&NHX:S=k2])[&&NHX:D=Y],e1[&&NHX:S=e1])[&&NHX:D=Y],y[&&NHX:S=y])[&&NHX:D=Y],z[&&NHX:S=z])[&&NHX:D=?],(((((((((((a1[&&NHX:S=a1],a2[&&NHX:S=a2])[&&NHX:D=N],b1[&&NHX:S=b1])[&&NHX:D=N],c1[&&NHX:S=c1])[&&NHX:D=?],d1[&&NHX:S=d1])[&&NHX:D=?],x[&&NHX:S=x])[&&NHX:D=N],p1[&&NHX:S=p1])[&&NHX:D=?],i1[&&NHX:S=i1])[&&NHX:D=?],k2[&&NHX:S=k2])[&&NHX:D=Y],e1[&&NHX:S=e1])[&&NHX:D=Y],y[&&NHX:S=y])[&&NHX:D=Y],z[&&NHX:S=z])[&&NHX:D=?])" );
- final GSDI sdi2_33_d = new GSDI( g2_33_d, s2, false );
- Archaeopteryx.createApplication( g2_33_d );
- // Archaeopteryx.createApplication( s2 );
- //-
final Phylogeny g2_33 = TestGSDI
.createPhylogeny( "(((((((((((a1[&&NHX:S=a1],a2[&&NHX:S=a2]),b1[&&NHX:S=b1]),c1[&&NHX:S=c1]),d1[&&NHX:S=d1]),x[&&NHX:S=x]),p1[&&NHX:S=p1]),i1[&&NHX:S=i1]),k2[&&NHX:S=k2]),e1[&&NHX:S=e1]),y[&&NHX:S=y]),z[&&NHX:S=z])" );
final GSDI sdi2_33 = new GSDI( g2_33, s2, false );
- Archaeopteryx.createApplication( g2_33 );
- Archaeopteryx.createApplication( s2 );
if ( sdi2_33.getDuplicationsSum() != 1 ) {
return false;
}
if ( !TestGSDI.getEvent( g2_33, "a1", "z" ).isSpeciationOrDuplication() ) {
return false;
}
+ final Phylogeny g2_33_d = TestGSDI
+ .createPhylogeny( "((((((((((((a1[&&NHX:S=a1],a2[&&NHX:S=a2])[&&NHX:D=N],b1[&&NHX:S=b1])[&&NHX:D=N],c1[&&NHX:S=c1])[&&NHX:D=?],d1[&&NHX:S=d1])[&&NHX:D=?],x[&&NHX:S=x])[&&NHX:D=N],p1[&&NHX:S=p1])[&&NHX:D=?],i1[&&NHX:S=i1])[&&NHX:D=?],k2[&&NHX:S=k2])[&&NHX:D=Y],e1[&&NHX:S=e1])[&&NHX:D=Y],y[&&NHX:S=y])[&&NHX:D=Y],z[&&NHX:S=z])[&&NHX:D=?],(((((((((((a1[&&NHX:S=a1],a2[&&NHX:S=a2])[&&NHX:D=N],b1[&&NHX:S=b1])[&&NHX:D=N],c1[&&NHX:S=c1])[&&NHX:D=?],d1[&&NHX:S=d1])[&&NHX:D=?],x[&&NHX:S=x])[&&NHX:D=N],p1[&&NHX:S=p1])[&&NHX:D=?],i1[&&NHX:S=i1])[&&NHX:D=?],k2[&&NHX:S=k2])[&&NHX:D=Y],e1[&&NHX:S=e1])[&&NHX:D=Y],y[&&NHX:S=y])[&&NHX:D=Y],z[&&NHX:S=z])[&&NHX:D=?])" );
+ final GSDI sdi2_33_d = new GSDI( g2_33_d, s2, false );
+ if ( sdi2_33_d.getDuplicationsSum() != 3 ) {
+ return false;
+ }
+ if ( sdi2_33_d.getSpeciationOrDuplicationEventsSum() != 14 ) {
+ return false;
+ }
+ if ( sdi2_33_d.getSpeciationsSum() != 6 ) {
+ return false;
+ }
final Phylogeny g2_34 = TestGSDI
.createPhylogeny( "(((n1_0[&&NHX:S=n1],n2_0[&&NHX:S=n2]),(n1_1[&&NHX:S=n1],n3_0[&&NHX:S=n3])),n4_0[&&NHX:S=n4])" );
final GSDI sdi2_34 = new GSDI( g2_34, s2, false );
if ( sdi7_4_2.getSpeciationsSum() != 5 ) {
return false;
}
- //---------------------
final String g2_0_ = "(([&&NHX:S=a1],[&&NHX:S=a2]),([&&NHX:S=o2],[&&NHX:S=o4]))";
final Phylogeny g2_0p = TestGSDI.createPhylogeny( g2_0_ );
g2_0.setRooted( true );
if ( sdi2_0p.getDuplicationsSum() != 0 ) {
return false;
}
+ //--
+ final Phylogeny tol_143_ = ParserUtils.readPhylogenies( PATH_TO_TEST_DATA + "tol_143.xml" )[ 0 ];
+ final Phylogeny gene_tree_tax_code_4_ = ParserUtils.readPhylogenies( PATH_TO_TEST_DATA
+ + "gene_tree_tax_code_4.xml" )[ 0 ];
+ final GSDI gsdi_143_4_1 = new GSDI( gene_tree_tax_code_4_.copy(), tol_143_.copy(), false, true, true );
+ Archaeopteryx.createApplication( gsdi_143_4_1.getGeneTree() );
+ if ( gsdi_143_4_1.getDuplicationsSum() != 21 ) {
+ return false;
+ }
+ if ( gsdi_143_4_1.getSpeciationsSum() != 28 ) {
+ return false;
+ }
+ if ( gsdi_143_4_1.getSpeciationOrDuplicationEventsSum() != 6 ) {
+ return false;
+ }
}
catch ( final Exception e ) {
e.printStackTrace( System.out );