import java.util.Date;
import java.util.List;
+import org.forester.archaeopteryx.Archaeopteryx;
+import org.forester.io.parsers.PhylogenyParser;
import org.forester.io.parsers.phyloxml.PhyloXmlParser;
+import org.forester.io.parsers.util.ParserUtils;
import org.forester.io.writers.PhylogenyWriter;
import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
import org.forester.phylogeny.factories.PhylogenyFactory;
import org.forester.sdi.GSDI;
public final class gsdi {
- final static private String STRIP_OPTION = "s";
- final static private String GSDI_OPTION = "g";
- final static private String MOST_PARSIMONIOUS_OPTION = "m";
- final static private String HELP_OPTION_1 = "help";
- final static private String HELP_OPTION_2 = "h";
- final static private String DEFAULT_OUTFILE = "sdi_out.xml";
- final static private String PRG_NAME = "gsdi";
- final static private String PRG_VERSION = "0.4";
- final static private String PRG_DATE = "120607";
- final static private String PRG_DESC = "general speciation duplication inference";
- final static private String E_MAIL = "phylosoft@gmail.com";
- final static private String WWW = "www.phylosoft.org/forester/";
+ final static private String STRIP_OPTION = "s";
+ final static private String SDI_OPTION = "b";
+ final static private String MOST_PARSIMONIOUS_OPTION = "m";
+ final static private String GUESS_FORMAT_OF_SPECIES_TREE = "q";
+ final static private String HELP_OPTION_1 = "help";
+ final static private String HELP_OPTION_2 = "h";
+ final static private String DEFAULT_OUTFILE = "gsdi_out.phylo.xml";
+ final static private String PRG_NAME = "gsdi";
+ final static private String PRG_VERSION = "0.4";
+ final static private String PRG_DATE = "120607";
+ final static private String PRG_DESC = "general speciation duplication inference";
+ final static private String E_MAIL = "phylosoft@gmail.com";
+ final static private String WWW = "www.phylosoft.org/forester";
public static void main( final String args[] ) {
ForesterUtil.printProgramInformation( PRG_NAME,
}
final List<String> allowed_options = new ArrayList<String>();
allowed_options.add( gsdi.STRIP_OPTION );
- allowed_options.add( gsdi.GSDI_OPTION );
+ allowed_options.add( gsdi.SDI_OPTION );
+ allowed_options.add( gsdi.GUESS_FORMAT_OF_SPECIES_TREE );
allowed_options.add( gsdi.MOST_PARSIMONIOUS_OPTION );
final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
if ( dissallowed_options.length() > 0 ) {
ForesterUtil.fatalError( gsdi.PRG_NAME, "unknown option(s): " + dissallowed_options );
}
- boolean use_sdise = true;
+ boolean use_sdise = false;
boolean strip = false;
boolean most_parsimonous_duplication_model = false;
+ boolean species_tree_in_phyloxml = true;
if ( cla.isOptionSet( gsdi.STRIP_OPTION ) ) {
strip = true;
}
- if ( cla.isOptionSet( gsdi.GSDI_OPTION ) ) {
- use_sdise = false;
+ if ( cla.isOptionSet( gsdi.SDI_OPTION ) ) {
+ use_sdise = true;
}
if ( cla.isOptionSet( gsdi.MOST_PARSIMONIOUS_OPTION ) ) {
if ( use_sdise ) {
}
most_parsimonous_duplication_model = true;
}
+ if ( cla.isOptionSet( gsdi.GUESS_FORMAT_OF_SPECIES_TREE ) ) {
+ species_tree_in_phyloxml = false;
+ }
Phylogeny species_tree = null;
Phylogeny gene_tree = null;
File gene_tree_file = null;
}
try {
final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
- species_tree = factory.create( species_tree_file, new PhyloXmlParser() )[ 0 ];
+ if ( species_tree_in_phyloxml ) {
+ species_tree = factory.create( species_tree_file, new PhyloXmlParser() )[ 0 ];
+ }
+ else {
+ final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( species_tree_file, true );
+ species_tree = factory.create( species_tree_file, p )[ 0 ];
+ PhylogenyMethods.transferNodeNameToField( species_tree,
+ PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME );
+
+ }
}
catch ( final IOException e ) {
ForesterUtil.fatalError( gsdi.PRG_NAME,
"Failed to read species tree from [" + gene_tree_file + "]: " + e.getMessage() );
}
+
try {
final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
gene_tree = factory.create( gene_tree_file, new PhyloXmlParser() )[ 0 ];
gene_tree.setRooted( true );
species_tree.setRooted( true );
if ( !gene_tree.isCompletelyBinary() ) {
- ForesterUtil.fatalError( gsdi.PRG_NAME, "gene tree (\"" + gene_tree_file + "\") is not completely binary." );
+ ForesterUtil.fatalError( gsdi.PRG_NAME, "gene tree (\"" + gene_tree_file + "\") is not completely binary" );
}
if ( use_sdise ) {
if ( !species_tree.isCompletelyBinary() ) {
ForesterUtil.fatalError( gsdi.PRG_NAME, "species tree (\"" + species_tree_file
- + "\") is not completely binary." );
+ + "\") is not completely binary" );
}
}
// For timing.
try {
if ( use_sdise ) {
System.out.println();
- System.out.println( "Using SDIse algorithm." );
+ System.out.println( "Using SDIse algorithm" );
sdi = new SDIse( gene_tree, species_tree );
}
else {
System.out.println();
- System.out.println( "Using GSDI algorithm." );
+ System.out.println( "Using GSDI algorithm" );
System.out.println();
System.out.println( "Use most parsimonous duplication model: " + most_parsimonous_duplication_model );
sdi = new GSDI( gene_tree, species_tree, most_parsimonous_duplication_model );
writer.toPhyloXML( out_file, gene_tree, 0 );
}
catch ( final IOException e ) {
- ForesterUtil.fatalError( PRG_NAME, "Failed to write to \"" + out_file + "\" [" + e.getMessage() + "]" );
+ ForesterUtil.fatalError( PRG_NAME, "Failed to write to [" + out_file + "]: " + e.getMessage() );
}
System.out.println();
System.out.println( "Successfully wrote resulting gene tree to: " + out_file );
System.out.println( "Number speciations : " + ( ( GSDI ) sdi ).getSpeciationsSum() );
}
System.out.println();
- }
+ }
private static void print_help() {
- System.out.println( "Usage: \"" + gsdi.PRG_NAME
- + " [-options] <gene tree in phyloXML format> <species tree in phyloXML format> [outfile]\"" );
+ System.out.println( "Usage: " + gsdi.PRG_NAME
+ + " [-options] <gene tree in phyloXML format> <species tree in phyloXML format> [outfile]" );
System.out.println();
System.out.println( "Options:" );
System.out.println( " -" + gsdi.STRIP_OPTION + ": to strip the species tree prior to duplication inference" );
- System.out.println( " -" + gsdi.GSDI_OPTION
- + ": to use GSDI algorithm instead of SDIse algorithm" );
- System.out
- .println( " -" + gsdi.MOST_PARSIMONIOUS_OPTION + ": use most parimonious duplication model for GSDI: " );
+ System.out.println( " -" + gsdi.SDI_OPTION + ": to use SDI algorithm instead of GSDI algorithm" );
+ System.out.println( " -" + gsdi.MOST_PARSIMONIOUS_OPTION
+ + ": use most parimonious duplication model for GSDI: " );
System.out.println( " assign nodes as speciations which would otherwise be assiged" );
System.out.println( " as unknown because of polytomies in the species tree" );
+ System.out.println( " -" + gsdi.GUESS_FORMAT_OF_SPECIES_TREE + ": to allow species tree in other formats than" );
+ System.out.println( " phyloXML (Newick, NHX, Nexus)" );
System.out.println();
System.out.println( "Species tree:" );
- System.out.println( " In phyloXML format, with taxonomy data in appropriate fields." );
+ System.out.println( " In phyloXML format (unless option " + gsdi.GUESS_FORMAT_OF_SPECIES_TREE
+ + " is used), with taxonomy data in appropriate fields" );
System.out.println();
System.out.println( "Gene tree:" );
- System.out.println( " In phyloXM format, with taxonomy and sequence data in appropriate fields." );
+ System.out.println( " In phyloXM format, with taxonomy and sequence data in appropriate fields" );
System.out.println();
- System.out.println( "Note. GSDI algorithm is under development." );
+ System.out.println( "Note -- GSDI algorithm is under development" );
System.out.println();
}
}
*
* @author Christian M. Zmasek
*/
-public class GSDI extends SDI {
+public final class GSDI extends SDI {
private final HashMap<PhylogenyNode, Integer> _transversal_counts;
private final boolean _most_parsimonious_duplication_model;
geneTreePostOrderTraversal( getGeneTree().getRoot() );
}
- private Event createDuplicationEvent() {
+ private final Event createDuplicationEvent() {
final Event event = Event.createSingleDuplicationEvent();
++_duplications_sum;
return event;
}
- private Event createSingleSpeciationOrDuplicationEvent() {
+ private final Event createSingleSpeciationOrDuplicationEvent() {
final Event event = Event.createSingleSpeciationOrDuplicationEvent();
++_speciation_or_duplication_events_sum;
return event;
}
- private Event createSpeciationEvent() {
+ private final Event createSpeciationEvent() {
final Event event = Event.createSingleSpeciationEvent();
++_speciations_sum;
return event;
}
// s is the node on the species tree g maps to.
- private void determineEvent( final PhylogenyNode s, final PhylogenyNode g ) {
+ private final void determineEvent( final PhylogenyNode s, final PhylogenyNode g ) {
Event event = null;
// Determine how many children map to same node as parent.
int sum_g_childs_mapping_to_s = 0;
* @param g
* starting node of a gene tree - normally the root
*/
- void geneTreePostOrderTraversal( final PhylogenyNode g ) {
+ final void geneTreePostOrderTraversal( final PhylogenyNode g ) {
if ( !g.isExternal() ) {
for( final PhylogenyNodeIterator iter = g.iterateChildNodesForward(); iter.hasNext(); ) {
geneTreePostOrderTraversal( iter.next() );
}
}
- public int getSpeciationOrDuplicationEventsSum() {
+ public final int getSpeciationOrDuplicationEventsSum() {
return _speciation_or_duplication_events_sum;
}
- public int getSpeciationsSum() {
+ public final int getSpeciationsSum() {
return _speciations_sum;
}
- private int getTraversalCount( final PhylogenyNode node ) {
+ private final int getTraversalCount( final PhylogenyNode node ) {
if ( _transversal_counts.containsKey( node ) ) {
return _transversal_counts.get( node );
}
return 0;
}
- private void increaseTraversalCount( final PhylogenyNode node ) {
+ private final void increaseTraversalCount( final PhylogenyNode node ) {
if ( _transversal_counts.containsKey( node ) ) {
_transversal_counts.put( node, _transversal_counts.get( node ) + 1 );
}
*
*/
@Override
- void linkNodesOfG() {
+ final void linkNodesOfG() {
final HashMap<Taxonomy, PhylogenyNode> speciestree_ext_nodes = new HashMap<Taxonomy, PhylogenyNode>();
for( final PhylogenyNodeIterator iter = _species_tree.iteratorLevelOrder(); iter.hasNext(); ) {
final PhylogenyNode n = iter.next();
final PhylogenyNode s = speciestree_ext_nodes.get( g.getNodeData().getTaxonomy() );
if ( s == null ) {
throw new IllegalArgumentException( "species " + g.getNodeData().getTaxonomy()
- + " not present in species tree." );
+ + " not present in species tree" );
}
+
+
g.setLink( s );
}
}
@Override
- public String toString() {
+ public final String toString() {
final StringBuffer sb = new StringBuffer();
sb.append( "Most parsimonious duplication model: " + _most_parsimonious_duplication_model );
sb.append( ForesterUtil.getLineSeparator() );
return sb.toString();
}
- static int[] obtainMinMaxIdIndices( final PhylogenyNode[] linked_nodes ) {
+ static final int[] obtainMinMaxIdIndices( final PhylogenyNode[] linked_nodes ) {
int max_i = 0;
int min_i = 0;
int max_i_id = -Integer.MAX_VALUE;
// n.getPhylogenyNodeData().setEvent( event );
// }
// } // calculateMforNode( PhylogenyNode )
-} // End of class GSDI.
+}