From: cmzmasek@gmail.com Date: Sat, 12 Jan 2013 04:42:12 +0000 (+0000) Subject: int => long for node id X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=aafd947d5ebcf9ed3218c269f432be59781ce322;p=jalview.git int => long for node id + work on rio --- diff --git a/forester/java/src/org/forester/application/rio.java b/forester/java/src/org/forester/application/rio.java index c371af5..3230c41 100644 --- a/forester/java/src/org/forester/application/rio.java +++ b/forester/java/src/org/forester/application/rio.java @@ -40,6 +40,8 @@ import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.io.parsers.util.ParserUtils; +import org.forester.io.writers.PhylogenyWriter; +import org.forester.phylogeny.Phylogeny; import org.forester.rio.RIO; import org.forester.rio.RIO.REROOTING; import org.forester.rio.RIOException; @@ -52,18 +54,20 @@ import org.forester.util.ForesterUtil; public class rio { - final static private String PRG_NAME = "rio"; - final static private String PRG_VERSION = "4.000 beta 7"; - final static private String PRG_DATE = "2013.01.08"; - final static private String E_MAIL = "phyloxml@gmail.com"; - final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; - final static private String HELP_OPTION_1 = "help"; - final static private String HELP_OPTION_2 = "h"; - final static private String GT_FIRST = "f"; - final static private String GT_LAST = "l"; - final static private String REROOTING_OPT = "r"; - final static private String OUTGROUP = "o"; - final static private String USE_SDIR = "b"; + final static private String PRG_NAME = "rio"; + final static private String PRG_VERSION = "4.000 beta 8"; + final static private String PRG_DATE = "2013.01.11"; + final static private String E_MAIL = "phyloxml@gmail.com"; + final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; + final static private String HELP_OPTION_1 = "help"; + final static private String HELP_OPTION_2 = "h"; + final static private String GT_FIRST = "f"; + final static private String GT_LAST = "l"; + final static private String REROOTING_OPT = "r"; + final static private String OUTGROUP = "o"; + final static private String RETURN_SPECIES_TREE = "s"; + final static private String RETURN_BEST_GENE_TREE = "g"; + final static private String USE_SDIR = "b"; public static void main( final String[] args ) { ForesterUtil.printProgramInformation( PRG_NAME, @@ -83,7 +87,7 @@ public class rio { if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) { printHelp(); } - if ( ( args.length < 3 ) || ( args.length > 9 ) ) { + if ( ( args.length < 3 ) || ( args.length > 11 ) ) { System.out.println(); System.out.println( "error: incorrect number of arguments" ); System.out.println(); @@ -95,6 +99,8 @@ public class rio { allowed_options.add( REROOTING_OPT ); allowed_options.add( OUTGROUP ); allowed_options.add( USE_SDIR ); + allowed_options.add( RETURN_SPECIES_TREE ); + allowed_options.add( RETURN_BEST_GENE_TREE ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( "unknown option(s): " + dissallowed_options ); @@ -201,6 +207,28 @@ public class rio { ForesterUtil.fatalError( "attempt to set range (0-based) of gene to analyze to: from " + gt_first + " to " + gt_last ); } + File return_species_tree = null; + if ( !sdir && cla.isOptionSet( RETURN_SPECIES_TREE ) ) { + if ( !cla.isOptionHasAValue( RETURN_SPECIES_TREE ) ) { + ForesterUtil.fatalError( "no value for -" + RETURN_SPECIES_TREE ); + } + final String s = cla.getOptionValueAsCleanString( RETURN_SPECIES_TREE ); + return_species_tree = new File( s ); + if ( return_species_tree.exists() ) { + ForesterUtil.fatalError( "\"" + return_species_tree + "\" already exists" ); + } + } + File return_gene_tree = null; + if ( !sdir && cla.isOptionSet( RETURN_BEST_GENE_TREE ) ) { + if ( !cla.isOptionHasAValue( RETURN_BEST_GENE_TREE ) ) { + ForesterUtil.fatalError( "no value for -" + RETURN_BEST_GENE_TREE ); + } + final String s = cla.getOptionValueAsCleanString( RETURN_BEST_GENE_TREE ); + return_gene_tree = new File( s ); + if ( return_gene_tree.exists() ) { + ForesterUtil.fatalError( "\"" + return_gene_tree + "\" already exists" ); + } + } ForesterUtil.fatalErrorIfFileNotReadable( gene_trees_file ); ForesterUtil.fatalErrorIfFileNotReadable( species_tree_file ); if ( orthology_outtable.exists() ) { @@ -245,6 +273,12 @@ public class rio { else { System.out.println( "Non binary species tree : disallowed" ); } + if ( return_species_tree != null ) { + System.out.println( "Write used species tree to: " + return_species_tree ); + } + if ( return_gene_tree != null ) { + System.out.println( "Write best gene tree to : " + return_gene_tree ); + } time = System.currentTimeMillis(); final ALGORITHM algorithm; if ( sdir ) { @@ -320,6 +354,14 @@ public class rio { PRG_DATE, ForesterUtil.getForesterLibraryInformation() ); } + if ( return_species_tree != null ) { + writeTree( rio.getSpeciesTree(), return_species_tree, "Wrote (stripped) species tree to" ); + } + if ( return_gene_tree != null ) { + writeTree( rio.getMinDuplicationsGeneTree(), + return_gene_tree, + "Wrote (one) minimal duplication gene tree to" ); + } final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.#" ); System.out.println( "Mean number of duplications : " + df.format( stats.arithmeticMean() ) + " (sd: " + df.format( stats.sampleStandardDeviation() ) + ") (" @@ -374,6 +416,10 @@ public class rio { System.out.println( " or 'outgroup' (default: by minizming duplications)" ); System.out.println( " -" + OUTGROUP + "= : for rooting by outgroup, name of outgroup (external gene tree node)" ); + System.out + .println( " -" + RETURN_SPECIES_TREE + "= : to write the (stripped) species tree to file" ); + System.out.println( " -" + RETURN_BEST_GENE_TREE + + "= : to write (one) minimal duplication gene tree to file" ); System.out.println( " -" + USE_SDIR + " : to use SDIR instead of GSDIR (faster, but non-binary species trees are" ); System.out.println( " disallowed, as are most options)" ); @@ -448,4 +494,10 @@ public class rio { w.close(); System.out.println( "Wrote table to \"" + table_outfile + "\"" ); } + + private static void writeTree( final Phylogeny p, final File f, final String comment ) throws IOException { + final PhylogenyWriter writer = new PhylogenyWriter(); + writer.toPhyloXML( f, p, 0 ); + System.out.println( comment + " \"" + f + "\"" ); + } } diff --git a/forester/java/src/org/forester/archaeopteryx/ControlPanel.java b/forester/java/src/org/forester/archaeopteryx/ControlPanel.java index 7c338cb..0703f63 100644 --- a/forester/java/src/org/forester/archaeopteryx/ControlPanel.java +++ b/forester/java/src/org/forester/archaeopteryx/ControlPanel.java @@ -1615,7 +1615,7 @@ final class ControlPanel extends JPanel implements ActionListener { } } if ( ( nodes != null ) && ( nodes.size() > 0 ) ) { - main_panel.getCurrentTreePanel().setFoundNodes( new HashSet() ); + main_panel.getCurrentTreePanel().setFoundNodes( new HashSet() ); for( final PhylogenyNode node : nodes ) { main_panel.getCurrentTreePanel().getFoundNodes().add( node.getId() ); } diff --git a/forester/java/src/org/forester/archaeopteryx/MainFrame.java b/forester/java/src/org/forester/archaeopteryx/MainFrame.java index ec2e600..0cc981e 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainFrame.java +++ b/forester/java/src/org/forester/archaeopteryx/MainFrame.java @@ -1211,7 +1211,7 @@ public abstract class MainFrame extends JFrame implements ActionListener { private void annotateSequences() { if ( getCurrentTreePanel() != null ) { - final Set nodes = getCurrentTreePanel().getFoundNodes(); + final Set nodes = getCurrentTreePanel().getFoundNodes(); if ( ( nodes == null ) || nodes.isEmpty() ) { JOptionPane .showMessageDialog( this, @@ -1260,7 +1260,7 @@ public abstract class MainFrame extends JFrame implements ActionListener { desc = desc.replaceAll( "\\s+", " " ); } if ( !ForesterUtil.isEmpty( ref ) || !ForesterUtil.isEmpty( desc ) ) { - for( final Integer id : nodes ) { + for( final Long id : nodes ) { final PhylogenyNode n = phy.getNode( id ); ForesterUtil.ensurePresenceOfSequence( n ); final Annotation ann = ForesterUtil.isEmpty( ref ) ? new Annotation() diff --git a/forester/java/src/org/forester/archaeopteryx/MainPanel.java b/forester/java/src/org/forester/archaeopteryx/MainPanel.java index f59a3d4..24fc5f7 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainPanel.java +++ b/forester/java/src/org/forester/archaeopteryx/MainPanel.java @@ -66,7 +66,7 @@ public class MainPanel extends JPanel implements ComponentListener { private TreeColorSet _colorset; private TreeFontSet _fontset; private Phylogeny _cut_or_copied_tree; - private Set _copied_and_pasted_nodes; + private Set _copied_and_pasted_nodes; private Hashtable _image_map; private static Map _lineage_to_rank_map; @@ -230,7 +230,7 @@ public class MainPanel extends JPanel implements ComponentListener { return _control_panel; } - public Set getCopiedAndPastedNodes() { + public Set getCopiedAndPastedNodes() { return _copied_and_pasted_nodes; } @@ -388,7 +388,7 @@ public class MainPanel extends JPanel implements ComponentListener { repaint(); } - public void setCopiedAndPastedNodes( final Set node_ids ) { + public void setCopiedAndPastedNodes( final Set node_ids ) { _copied_and_pasted_nodes = node_ids; } diff --git a/forester/java/src/org/forester/archaeopteryx/TreePanel.java b/forester/java/src/org/forester/archaeopteryx/TreePanel.java index f58553a..a309225 100644 --- a/forester/java/src/org/forester/archaeopteryx/TreePanel.java +++ b/forester/java/src/org/forester/archaeopteryx/TreePanel.java @@ -129,113 +129,112 @@ import org.forester.util.SequenceIdParser; public final class TreePanel extends JPanel implements ActionListener, MouseWheelListener, Printable { - private static final float PI = ( float ) ( Math.PI ); - private static final double TWO_PI = 2 * Math.PI; - private static final float ONEHALF_PI = ( float ) ( 1.5 * Math.PI ); - private static final float HALF_PI = ( float ) ( Math.PI / 2.0 ); - private static final float ANGLE_ROTATION_UNIT = ( float ) ( Math.PI / 32 ); - private static final short OV_BORDER = 10; - final static Cursor CUT_CURSOR = Cursor.getPredefinedCursor( Cursor.CROSSHAIR_CURSOR ); - final static Cursor MOVE_CURSOR = Cursor.getPredefinedCursor( Cursor.MOVE_CURSOR ); - final static Cursor ARROW_CURSOR = Cursor.getPredefinedCursor( Cursor.DEFAULT_CURSOR ); - final static Cursor HAND_CURSOR = Cursor.getPredefinedCursor( Cursor.HAND_CURSOR ); - final static Cursor WAIT_CURSOR = Cursor.getPredefinedCursor( Cursor.WAIT_CURSOR ); - private final static long serialVersionUID = -978349745916505029L; - private final static int EURO_D = 10; - private final static String NODE_POPMENU_NODE_CLIENT_PROPERTY = "node"; - private final static int MIN_ROOT_LENGTH = 3; - private final static int MAX_SUBTREES = 100; - private final static int MAX_NODE_FRAMES = 10; - private final static int MOVE = 20; - private final static NumberFormat FORMATTER_CONFIDENCE; - private final static NumberFormat FORMATTER_BRANCH_LENGTH; - private final static int WIGGLE = 2; - private final static int LIMIT_FOR_HQ_RENDERING = 1000; - private final static int CONFIDENCE_LEFT_MARGIN = 4; - private final RenderingHints _rendering_hints = new RenderingHints( RenderingHints.KEY_RENDERING, - RenderingHints.VALUE_RENDER_DEFAULT ); - private File _treefile = null; - private Configuration _configuration = null; - private final NodeFrame[] _node_frames = new NodeFrame[ TreePanel.MAX_NODE_FRAMES ]; - private int _node_frame_index = 0; - private Phylogeny _phylogeny = null; - private final Phylogeny[] _sub_phylogenies = new Phylogeny[ TreePanel.MAX_SUBTREES ]; - private final PhylogenyNode[] _sub_phylogenies_temp_roots = new PhylogenyNode[ TreePanel.MAX_SUBTREES ]; - private int _subtree_index = 0; - private MainPanel _main_panel = null; - private Set _found_nodes = null; - private PhylogenyNode _highlight_node = null; - private JPopupMenu _node_popup_menu = null; - private JMenuItem _node_popup_menu_items[] = null; - private int _longest_ext_node_info = 0; - private float _x_correction_factor = 0.0f; - private float _ov_x_correction_factor = 0.0f; - private float _x_distance = 0.0f; - private float _y_distance = 0.0f; - private PHYLOGENY_GRAPHICS_TYPE _graphics_type = PHYLOGENY_GRAPHICS_TYPE.RECTANGULAR; - private double _domain_structure_width = Constants.DOMAIN_STRUCTURE_DEFAULT_WIDTH; - private int _domain_structure_e_value_thr_exp = Constants.DOMAIN_STRUCTURE_E_VALUE_THR_DEFAULT_EXP; - private float _last_drag_point_x = 0; - private float _last_drag_point_y = 0; - private ControlPanel _control_panel = null; - private int _external_node_index = 0; - private final Polygon _polygon = new Polygon(); - private final StringBuilder _sb = new StringBuilder(); - private JColorChooser _color_chooser = null; - private double _scale_distance = 0.0; - private String _scale_label = null; - private final CubicCurve2D _cubic_curve = new CubicCurve2D.Float(); - private final QuadCurve2D _quad_curve = new QuadCurve2D.Float(); - private final Line2D _line = new Line2D.Float(); - private final Ellipse2D _ellipse = new Ellipse2D.Float(); - private final Rectangle2D _rectangle = new Rectangle2D.Float(); - private Options _options = null; - private float _ov_max_width = 0; - private float _ov_max_height = 0; - private int _ov_x_position = 0; - private int _ov_y_position = 0; - private int _ov_y_start = 0; - private float _ov_y_distance = 0; - private float _ov_x_distance = 0; - private boolean _ov_on = false; - private double _urt_starting_angle = ( float ) ( Math.PI / 2 ); - private float _urt_factor = 1; - private float _urt_factor_ov = 1; - private final boolean _phy_has_branch_lengths; - private final Rectangle2D _ov_rectangle = new Rectangle2D.Float(); - private boolean _in_ov_rect = false; - private boolean _in_ov = false; - private final Rectangle _ov_virtual_rectangle = new Rectangle(); - final private static double _180_OVER_PI = 180.0 / Math.PI; - private static final float ROUNDED_D = 8; - private int _circ_max_depth; - private PhylogenyNode _root; - final private Arc2D _arc = new Arc2D.Double(); - final private HashMap _urt_nodeid_angle_map = new HashMap(); - final private HashMap _urt_nodeid_index_map = new HashMap(); - final private Set _collapsed_external_nodeid_set = new HashSet(); - HashMap _nodeid_dist_to_leaf = new HashMap(); - private AffineTransform _at; - private double _max_distance_to_root = -1; - private int _dynamic_hiding_factor = 0; - private boolean _edited = false; - private Popup _node_desc_popup; - private JTextArea _rollover_popup; - private final StringBuffer _popup_buffer = new StringBuffer(); - final private static Font POPUP_FONT = new Font( Configuration - .getDefaultFontFamilyName(), - Font.PLAIN, - 12 ); - private Sequence _query_sequence = null; - private final FontRenderContext _frc = new FontRenderContext( null, - false, - false ); + private static final float PI = ( float ) ( Math.PI ); + private static final double TWO_PI = 2 * Math.PI; + private static final float ONEHALF_PI = ( float ) ( 1.5 * Math.PI ); + private static final float HALF_PI = ( float ) ( Math.PI / 2.0 ); + private static final float ANGLE_ROTATION_UNIT = ( float ) ( Math.PI / 32 ); + private static final short OV_BORDER = 10; + final static Cursor CUT_CURSOR = Cursor.getPredefinedCursor( Cursor.CROSSHAIR_CURSOR ); + final static Cursor MOVE_CURSOR = Cursor.getPredefinedCursor( Cursor.MOVE_CURSOR ); + final static Cursor ARROW_CURSOR = Cursor.getPredefinedCursor( Cursor.DEFAULT_CURSOR ); + final static Cursor HAND_CURSOR = Cursor.getPredefinedCursor( Cursor.HAND_CURSOR ); + final static Cursor WAIT_CURSOR = Cursor.getPredefinedCursor( Cursor.WAIT_CURSOR ); + private final static long serialVersionUID = -978349745916505029L; + private final static int EURO_D = 10; + private final static String NODE_POPMENU_NODE_CLIENT_PROPERTY = "node"; + private final static int MIN_ROOT_LENGTH = 3; + private final static int MAX_SUBTREES = 100; + private final static int MAX_NODE_FRAMES = 10; + private final static int MOVE = 20; + private final static NumberFormat FORMATTER_CONFIDENCE; + private final static NumberFormat FORMATTER_BRANCH_LENGTH; + private final static int WIGGLE = 2; + private final static int LIMIT_FOR_HQ_RENDERING = 1000; + private final static int CONFIDENCE_LEFT_MARGIN = 4; + private final RenderingHints _rendering_hints = new RenderingHints( RenderingHints.KEY_RENDERING, + RenderingHints.VALUE_RENDER_DEFAULT ); + private File _treefile = null; + private Configuration _configuration = null; + private final NodeFrame[] _node_frames = new NodeFrame[ TreePanel.MAX_NODE_FRAMES ]; + private int _node_frame_index = 0; + private Phylogeny _phylogeny = null; + private final Phylogeny[] _sub_phylogenies = new Phylogeny[ TreePanel.MAX_SUBTREES ]; + private final PhylogenyNode[] _sub_phylogenies_temp_roots = new PhylogenyNode[ TreePanel.MAX_SUBTREES ]; + private int _subtree_index = 0; + private MainPanel _main_panel = null; + private Set _found_nodes = null; + private PhylogenyNode _highlight_node = null; + private JPopupMenu _node_popup_menu = null; + private JMenuItem _node_popup_menu_items[] = null; + private int _longest_ext_node_info = 0; + private float _x_correction_factor = 0.0f; + private float _ov_x_correction_factor = 0.0f; + private float _x_distance = 0.0f; + private float _y_distance = 0.0f; + private PHYLOGENY_GRAPHICS_TYPE _graphics_type = PHYLOGENY_GRAPHICS_TYPE.RECTANGULAR; + private double _domain_structure_width = Constants.DOMAIN_STRUCTURE_DEFAULT_WIDTH; + private int _domain_structure_e_value_thr_exp = Constants.DOMAIN_STRUCTURE_E_VALUE_THR_DEFAULT_EXP; + private float _last_drag_point_x = 0; + private float _last_drag_point_y = 0; + private ControlPanel _control_panel = null; + private int _external_node_index = 0; + private final Polygon _polygon = new Polygon(); + private final StringBuilder _sb = new StringBuilder(); + private JColorChooser _color_chooser = null; + private double _scale_distance = 0.0; + private String _scale_label = null; + private final CubicCurve2D _cubic_curve = new CubicCurve2D.Float(); + private final QuadCurve2D _quad_curve = new QuadCurve2D.Float(); + private final Line2D _line = new Line2D.Float(); + private final Ellipse2D _ellipse = new Ellipse2D.Float(); + private final Rectangle2D _rectangle = new Rectangle2D.Float(); + private Options _options = null; + private float _ov_max_width = 0; + private float _ov_max_height = 0; + private int _ov_x_position = 0; + private int _ov_y_position = 0; + private int _ov_y_start = 0; + private float _ov_y_distance = 0; + private float _ov_x_distance = 0; + private boolean _ov_on = false; + private double _urt_starting_angle = ( float ) ( Math.PI / 2 ); + private float _urt_factor = 1; + private float _urt_factor_ov = 1; + private final boolean _phy_has_branch_lengths; + private final Rectangle2D _ov_rectangle = new Rectangle2D.Float(); + private boolean _in_ov_rect = false; + private boolean _in_ov = false; + private final Rectangle _ov_virtual_rectangle = new Rectangle(); + final private static double _180_OVER_PI = 180.0 / Math.PI; + private static final float ROUNDED_D = 8; + private int _circ_max_depth; + private PhylogenyNode _root; + final private Arc2D _arc = new Arc2D.Double(); + final private HashMap _urt_nodeid_angle_map = new HashMap(); + final private HashMap _urt_nodeid_index_map = new HashMap(); + final private Set _collapsed_external_nodeid_set = new HashSet(); + HashMap _nodeid_dist_to_leaf = new HashMap(); + private AffineTransform _at; + private double _max_distance_to_root = -1; + private int _dynamic_hiding_factor = 0; + private boolean _edited = false; + private Popup _node_desc_popup; + private JTextArea _rollover_popup; + private final StringBuffer _popup_buffer = new StringBuffer(); + final private static Font POPUP_FONT = new Font( Configuration.getDefaultFontFamilyName(), + Font.PLAIN, + 12 ); + private Sequence _query_sequence = null; + private final FontRenderContext _frc = new FontRenderContext( null, + false, + false ); // expression values menu: - private DescriptiveStatistics _statistics_for_vector_data; - private PhylogenyNode[] _nodes_in_preorder = null; - private StringBuilder _current_external_nodes_data_buffer = new StringBuilder(); - private int _current_external_nodes_data_buffer_change_counter = 0; - private Set _current_external_nodes = null; + private DescriptiveStatistics _statistics_for_vector_data; + private PhylogenyNode[] _nodes_in_preorder = null; + private StringBuilder _current_external_nodes_data_buffer = new StringBuilder(); + private int _current_external_nodes_data_buffer_change_counter = 0; + private Set _current_external_nodes = null; // private Image offscreenImage; // private Graphics offscreenGraphics; // private Dimension offscreenDimension; @@ -931,7 +930,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee return _domain_structure_e_value_thr_exp; } - final Set getFoundNodes() { + final Set getFoundNodes() { return _found_nodes; } @@ -1188,7 +1187,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee if ( ( e.getModifiers() & InputEvent.SHIFT_MASK ) != 0 ) { // Yes, so add to _found_nodes if ( getFoundNodes() == null ) { - setFoundNodes( new HashSet() ); + setFoundNodes( new HashSet() ); } getFoundNodes().add( node.getId() ); // Check if control key is down @@ -1741,7 +1740,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee } final void resetNodeIdToDistToLeafMap() { - _nodeid_dist_to_leaf = new HashMap(); + _nodeid_dist_to_leaf = new HashMap(); } final void resetPreferredSize() { @@ -1788,7 +1787,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee getControlPanel().getSearchResetButton().setEnabled( true ); getControlPanel().getSearchResetButton().setVisible( true ); if ( getFoundNodes() == null ) { - setFoundNodes( new HashSet() ); + setFoundNodes( new HashSet() ); } getFoundNodes().add( node.getId() ); getControlPanel().setSearchFoundCountsOnLabel( getFoundNodes().size() ); @@ -1809,7 +1808,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee _current_external_nodes_data_buffer = sb; } - final void setFoundNodes( final Set found_nodes ) { + final void setFoundNodes( final Set found_nodes ) { _found_nodes = found_nodes; } @@ -2198,9 +2197,9 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee repaint(); } - final private void addToCurrentExternalNodes( final int i ) { + final private void addToCurrentExternalNodes( final long i ) { if ( _current_external_nodes == null ) { - _current_external_nodes = new HashSet(); + _current_external_nodes = new HashSet(); } _current_external_nodes.add( i ); } @@ -2416,7 +2415,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee setNodeInPreorderToNull(); setCutOrCopiedTree( _phylogeny.copy( node ) ); final List nodes = PhylogenyMethods.getAllDescendants( node ); - final Set node_ids = new HashSet( nodes.size() ); + final Set node_ids = new HashSet( nodes.size() ); for( final PhylogenyNode n : nodes ) { node_ids.add( n.getId() ); } @@ -2709,11 +2708,11 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee JOptionPane.ERROR_MESSAGE ); } - final private Set getCopiedAndPastedNodes() { + final private Set getCopiedAndPastedNodes() { return getMainPanel().getCopiedAndPastedNodes(); } - final private Set getCurrentExternalNodes() { + final private Set getCurrentExternalNodes() { return _current_external_nodes; } @@ -4915,10 +4914,10 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee } } if ( getCopiedAndPastedNodes() == null ) { - setCopiedAndPastedNodes( new HashSet() ); + setCopiedAndPastedNodes( new HashSet() ); } final List nodes = PhylogenyMethods.obtainAllNodesAsList( buffer_phy ); - final Set node_ids = new HashSet( nodes.size() ); + final Set node_ids = new HashSet( nodes.size() ); for( final PhylogenyNode n : nodes ) { node_ids.add( n.getId() ); } @@ -4958,7 +4957,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee return sb; } - final private void setCopiedAndPastedNodes( final Set nodeIds ) { + final private void setCopiedAndPastedNodes( final Set nodeIds ) { getMainPanel().setCopiedAndPastedNodes( nodeIds ); } diff --git a/forester/java/src/org/forester/phylogeny/Phylogeny.java b/forester/java/src/org/forester/phylogeny/Phylogeny.java index 67cdeb5..6234358 100644 --- a/forester/java/src/org/forester/phylogeny/Phylogeny.java +++ b/forester/java/src/org/forester/phylogeny/Phylogeny.java @@ -68,7 +68,7 @@ public class Phylogeny { private Confidence _confidence; private Identifier _identifier; private boolean _rerootable; - private HashMap _id_to_node_map; + private HashMap _id_to_node_map; private List _external_nodes_set; private Collection _sequenceRelationQueries; private Collection _relevant_sequence_relation_types; @@ -390,7 +390,7 @@ public class Phylogeny { * Finds the PhylogenyNode of this Phylogeny which has a matching ID number. * @return PhylogenyNode with matching ID, null if not found */ - public PhylogenyNode getNode( final int id ) throws NoSuchElementException { + public PhylogenyNode getNode( final long id ) throws NoSuchElementException { if ( isEmpty() ) { throw new NoSuchElementException( "attempt to get node in an empty phylogeny" ); } @@ -741,7 +741,7 @@ public class Phylogeny { return; } _id_to_node_map = null; - int max = 0; + long max = 0; for( final PhylogenyNodeIterator it = iteratorPreorder(); it.hasNext(); ) { final PhylogenyNode node = it.next(); if ( node.isRoot() ) { @@ -811,7 +811,7 @@ public class Phylogeny { * @param id * ID (int) of PhylogenyNode of this Phylogeny */ - public void reRoot( final int id ) { + public void reRoot( final long id ) { reRoot( getNode( id ) ); } @@ -1040,7 +1040,7 @@ public class Phylogeny { _identifier = identifier; } - public void setIdToNodeMap( final HashMap idhash ) { + public void setIdToNodeMap( final HashMap idhash ) { _id_to_node_map = idhash; } @@ -1168,7 +1168,7 @@ public class Phylogeny { return; } // unRoot() - private HashMap getIdToNodeMap() { + private HashMap getIdToNodeMap() { return _id_to_node_map; } @@ -1273,7 +1273,7 @@ public class Phylogeny { if ( isEmpty() ) { return; } - setIdToNodeMap( new HashMap() ); + setIdToNodeMap( new HashMap() ); for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); getIdToNodeMap().put( node.getId(), node ); diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java index d87188b..8d43782 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java @@ -327,10 +327,14 @@ public class PhylogenyMethods { public static final HashMap createNameToExtNodeMap( final Phylogeny phy ) { final HashMap nodes = new HashMap(); - for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode n = iter.next(); + List ext = phy.getExternalNodes(); + for( PhylogenyNode n : ext ) { nodes.put( n.getName(), n ); } + // for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { + // final PhylogenyNode n = iter.next(); + // nodes.put( n.getName(), n ); + // } return nodes; } @@ -439,7 +443,7 @@ public class PhylogenyMethods { public static List getAllDescendants( final PhylogenyNode node ) { final List descs = new ArrayList(); - final Set encountered = new HashSet(); + final Set encountered = new HashSet(); if ( !node.isExternal() ) { final List exts = node.getAllExternalDescendants(); for( PhylogenyNode current : exts ) { @@ -882,7 +886,7 @@ public class PhylogenyMethods { return; } phy.setIdToNodeMap( null ); - int i = PhylogenyNode.getNodeCount(); + long i = PhylogenyNode.getNodeCount(); for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) { it.next().setId( i++ ); } diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyNode.java b/forester/java/src/org/forester/phylogeny/PhylogenyNode.java index 9663845..17a839e 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyNode.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyNode.java @@ -52,9 +52,9 @@ public final class PhylogenyNode implements Comparable { public enum NH_CONVERSION_SUPPORT_VALUE_STYLE { NONE, IN_SQUARE_BRACKETS, AS_INTERNAL_NODE_NAMES; } - private static int _node_count = 0; + private static long NODE_COUNT = 0; private byte _indicator; - private int _id; + private long _id; private int _sum_ext_nodes; private float _x; private float _y; @@ -487,7 +487,7 @@ public final class PhylogenyNode implements Comparable { /** * Returns the ID (int) of this PhylogenyNode. */ - final public int getId() { + final public long getId() { return _id; } @@ -903,7 +903,7 @@ public final class PhylogenyNode implements Comparable { * should not be set to values lower than getNodeCount() -- which this method * does not allow. */ - synchronized final protected void setId( final int i ) { + synchronized final protected void setId( final long i ) { if ( i < getNodeCount() ) { throw new IllegalArgumentException( "attempt to set node id to a value less than total node count (thus violating the uniqueness of node ids)" ); } @@ -1119,30 +1119,30 @@ public final class PhylogenyNode implements Comparable { * Decreases the total number of all Nodes created so far by one. */ final static synchronized void decreaseNodeCount() { - --PhylogenyNode._node_count; + --NODE_COUNT; } /** * Returns the total number of all Nodes created so far. * - * @return total number of Nodes (int) + * @return total number of Nodes (long) */ - synchronized final public static int getNodeCount() { - return PhylogenyNode._node_count; + synchronized final public static long getNodeCount() { + return NODE_COUNT; } /** * Increases the total number of all Nodes created so far by one. */ synchronized final private static void increaseNodeCount() { - ++PhylogenyNode._node_count; + ++NODE_COUNT; } /** - * Sets the total number of all Nodes created so far to i (int). + * Sets the total number of all Nodes created so far to i. */ - synchronized final static void setNodeCount( final int i ) { - PhylogenyNode._node_count = i; + synchronized final static void setNodeCount( final long i ) { + PhylogenyNode.NODE_COUNT = i; } public static PhylogenyNode createInstanceFromNhxString( final String nhx ) throws NHXFormatException, diff --git a/forester/java/src/org/forester/rio/RIO.java b/forester/java/src/org/forester/rio/RIO.java index 0cbe6dd..68256f4 100644 --- a/forester/java/src/org/forester/rio/RIO.java +++ b/forester/java/src/org/forester/rio/RIO.java @@ -78,8 +78,10 @@ public final class RIO { private final boolean _produce_log; private final boolean _verbose; private final REROOTING _rerooting; + private final Phylogeny _species_tree; + private Phylogeny _min_dub_gene_tree; - private RIO( final Phylogeny[] gene_trees, + private RIO( final IteratingPhylogenyParser p, final Phylogeny species_tree, final ALGORITHM algorithm, final REROOTING rerooting, @@ -89,13 +91,14 @@ public final class RIO { final boolean produce_log, final boolean verbose ) throws IOException, SDIException, RIOException { if ( ( last == DEFAULT_RANGE ) && ( first >= 0 ) ) { - last = gene_trees.length - 1; + last = END_OF_GT; } else if ( ( first == DEFAULT_RANGE ) && ( last >= 0 ) ) { first = 0; } removeSingleDescendentsNodes( species_tree, verbose ); - checkPreconditions( gene_trees, species_tree, rerooting, outgroup, first, last ); + p.reset(); + checkPreconditions( p, species_tree, rerooting, outgroup, first, last ); _produce_log = produce_log; _verbose = verbose; _rerooting = rerooting; @@ -106,10 +109,12 @@ public final class RIO { _analyzed_gene_trees = null; _removed_gene_tree_nodes = null; _duplications_stats = new BasicDescriptiveStatistics(); - inferOrthologs( gene_trees, species_tree, algorithm, outgroup, first, last ); + p.reset(); + inferOrthologs( p, species_tree, algorithm, outgroup, first, last ); + _species_tree = species_tree; } - private RIO( final IteratingPhylogenyParser p, + private RIO( final Phylogeny[] gene_trees, final Phylogeny species_tree, final ALGORITHM algorithm, final REROOTING rerooting, @@ -119,14 +124,13 @@ public final class RIO { final boolean produce_log, final boolean verbose ) throws IOException, SDIException, RIOException { if ( ( last == DEFAULT_RANGE ) && ( first >= 0 ) ) { - last = END_OF_GT; + last = gene_trees.length - 1; } else if ( ( first == DEFAULT_RANGE ) && ( last >= 0 ) ) { first = 0; } removeSingleDescendentsNodes( species_tree, verbose ); - p.reset(); - checkPreconditions( p, species_tree, rerooting, outgroup, first, last ); + checkPreconditions( gene_trees, species_tree, rerooting, outgroup, first, last ); _produce_log = produce_log; _verbose = verbose; _rerooting = rerooting; @@ -137,8 +141,8 @@ public final class RIO { _analyzed_gene_trees = null; _removed_gene_tree_nodes = null; _duplications_stats = new BasicDescriptiveStatistics(); - p.reset(); - inferOrthologs( p, species_tree, algorithm, outgroup, first, last ); + inferOrthologs( gene_trees, species_tree, algorithm, outgroup, first, last ); + _species_tree = species_tree; } public final Phylogeny[] getAnalyzedGeneTrees() { @@ -159,6 +163,10 @@ public final class RIO { return _ext_nodes; } + public final TaxonomyComparisonBase getGSDIRtaxCompBase() { + return _gsdir_tax_comp_base; + } + /** * Returns the numbers of number of int nodes in gene trees analyzed (after * stripping). @@ -169,80 +177,24 @@ public final class RIO { return _int_nodes; } - public final TaxonomyComparisonBase getGSDIRtaxCompBase() { - return _gsdir_tax_comp_base; - } - public final StringBuilder getLog() { return _log; } + final public Phylogeny getMinDuplicationsGeneTree() { + return _min_dub_gene_tree; + } + + public final IntMatrix getOrthologTable() { + return _m; + } + public final List getRemovedGeneTreeNodes() { return _removed_gene_tree_nodes; } - private final void inferOrthologs( final Phylogeny[] gene_trees, - final Phylogeny species_tree, - final ALGORITHM algorithm, - final String outgroup, - final int first, - final int last ) throws SDIException, RIOException, FileNotFoundException, - IOException { - if ( algorithm == ALGORITHM.SDIR ) { - // Removes from species_tree all species not found in gene_tree. - PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_trees[ 0 ], species_tree ); - if ( species_tree.isEmpty() ) { - throw new RIOException( "failed to establish species based mapping between gene and species trees" ); - } - } - final Phylogeny[] my_gene_trees; - if ( ( first >= 0 ) && ( last >= first ) && ( last < gene_trees.length ) ) { - my_gene_trees = new Phylogeny[ ( 1 + last ) - first ]; - int c = 0; - for( int i = first; i <= last; ++i ) { - my_gene_trees[ c++ ] = gene_trees[ i ]; - } - } - else { - my_gene_trees = gene_trees; - } - if ( log() ) { - preLog( gene_trees.length, species_tree, algorithm, outgroup ); - } - if ( _verbose && ( my_gene_trees.length >= 4 ) ) { - System.out.println(); - } - _analyzed_gene_trees = new Phylogeny[ my_gene_trees.length ]; - int gene_tree_ext_nodes = 0; - for( int i = 0; i < my_gene_trees.length; ++i ) { - final Phylogeny gt = my_gene_trees[ i ]; - if ( _verbose && ( my_gene_trees.length > 4 ) ) { - ForesterUtil.updateProgress( ( ( double ) i ) / my_gene_trees.length ); - } - if ( i == 0 ) { - gene_tree_ext_nodes = gt.getNumberOfExternalNodes(); - } - else if ( gene_tree_ext_nodes != gt.getNumberOfExternalNodes() ) { - throw new RIOException( "gene tree #" + i + " has a different number of external nodes (" - + gt.getNumberOfExternalNodes() + ") than the preceding gene tree(s) (" + gene_tree_ext_nodes - + ")" ); - } - if ( algorithm == ALGORITHM.SDIR ) { - // Removes from gene_tree all species not found in species_tree. - PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gt ); - if ( gt.isEmpty() ) { - throw new RIOException( "failed to establish species based mapping between gene and species trees" ); - } - } - _analyzed_gene_trees[ i ] = performOrthologInference( gt, species_tree, algorithm, outgroup, i ); - } - if ( log() ) { - postLog( species_tree, first, last ); - } - if ( _verbose && ( my_gene_trees.length > 4 ) ) { - System.out.println(); - System.out.println(); - } + public final Phylogeny getSpeciesTree() { + return _species_tree; } private final void inferOrthologs( final IteratingPhylogenyParser parser, @@ -315,6 +267,70 @@ public final class RIO { } } + private final void inferOrthologs( final Phylogeny[] gene_trees, + final Phylogeny species_tree, + final ALGORITHM algorithm, + final String outgroup, + final int first, + final int last ) throws SDIException, RIOException, FileNotFoundException, + IOException { + if ( algorithm == ALGORITHM.SDIR ) { + // Removes from species_tree all species not found in gene_tree. + PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_trees[ 0 ], species_tree ); + if ( species_tree.isEmpty() ) { + throw new RIOException( "failed to establish species based mapping between gene and species trees" ); + } + } + final Phylogeny[] my_gene_trees; + if ( ( first >= 0 ) && ( last >= first ) && ( last < gene_trees.length ) ) { + my_gene_trees = new Phylogeny[ ( 1 + last ) - first ]; + int c = 0; + for( int i = first; i <= last; ++i ) { + my_gene_trees[ c++ ] = gene_trees[ i ]; + } + } + else { + my_gene_trees = gene_trees; + } + if ( log() ) { + preLog( gene_trees.length, species_tree, algorithm, outgroup ); + } + if ( _verbose && ( my_gene_trees.length >= 4 ) ) { + System.out.println(); + } + _analyzed_gene_trees = new Phylogeny[ my_gene_trees.length ]; + int gene_tree_ext_nodes = 0; + for( int i = 0; i < my_gene_trees.length; ++i ) { + final Phylogeny gt = my_gene_trees[ i ]; + if ( _verbose && ( my_gene_trees.length > 4 ) ) { + ForesterUtil.updateProgress( ( ( double ) i ) / my_gene_trees.length ); + } + if ( i == 0 ) { + gene_tree_ext_nodes = gt.getNumberOfExternalNodes(); + } + else if ( gene_tree_ext_nodes != gt.getNumberOfExternalNodes() ) { + throw new RIOException( "gene tree #" + i + " has a different number of external nodes (" + + gt.getNumberOfExternalNodes() + ") than the preceding gene tree(s) (" + gene_tree_ext_nodes + + ")" ); + } + if ( algorithm == ALGORITHM.SDIR ) { + // Removes from gene_tree all species not found in species_tree. + PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gt ); + if ( gt.isEmpty() ) { + throw new RIOException( "failed to establish species based mapping between gene and species trees" ); + } + } + _analyzed_gene_trees[ i ] = performOrthologInference( gt, species_tree, algorithm, outgroup, i ); + } + if ( log() ) { + postLog( species_tree, first, last ); + } + if ( _verbose && ( my_gene_trees.length > 4 ) ) { + System.out.println(); + System.out.println(); + } + } + private final boolean log() { return _produce_log; } @@ -386,6 +402,7 @@ public final class RIO { final String outgroup, final int i ) throws SDIException, RIOException { final Phylogeny assigned_tree; + final int dups; if ( _rerooting == REROOTING.BY_ALGORITHM ) { final GSDIR gsdir = new GSDIR( gene_tree, species_tree, true, i == 0 ); assigned_tree = gsdir.getMinDuplicationsSumGeneTree(); @@ -401,7 +418,7 @@ public final class RIO { if ( i == 0 ) { _gsdir_tax_comp_base = gsdir.getTaxCompBase(); } - _duplications_stats.addValue( gsdir.getMinDuplicationsSum() ); + dups = gsdir.getMinDuplicationsSum(); } else { if ( _rerooting == REROOTING.MIDPOINT ) { @@ -423,8 +440,12 @@ public final class RIO { if ( i == 0 ) { _gsdir_tax_comp_base = gsdi.getTaxCompBase(); } - _duplications_stats.addValue( gsdi.getDuplicationsSum() ); + dups = gsdi.getDuplicationsSum(); + } + if ( ( i == 0 ) || ( dups < _duplications_stats.getMin() ) ) { + _min_dub_gene_tree = assigned_tree; } + _duplications_stats.addValue( dups ); return assigned_tree; } @@ -498,73 +519,6 @@ public final class RIO { log( "Re-rooting : " + rs ); } - public final IntMatrix getOrthologTable() { - return _m; - } - - private final static void calculateOrthologTable( final Phylogeny g, final boolean sort, final int counter ) - throws RIOException { - final List labels = new ArrayList(); - final Set labels_set = new HashSet(); - if ( counter == 0 ) { - for( final PhylogenyNode n : g.getExternalNodes() ) { - final String label = obtainLabel( labels_set, n ); - labels_set.add( label ); - labels.add( label ); - } - if ( sort ) { - Collections.sort( labels ); - } - _m = new IntMatrix( labels ); - } - updateCounts( _m, counter, g ); - } - - private final static String obtainLabel( final Set labels_set, final PhylogenyNode n ) throws RIOException { - String label; - if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getName() ) ) { - label = n.getNodeData().getSequence().getName(); - } - else if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) { - label = n.getNodeData().getSequence().getSymbol(); - } - else if ( !ForesterUtil.isEmpty( n.getName() ) ) { - label = n.getName(); - } - else { - throw new RIOException( "node " + n + " has no appropriate label" ); - } - if ( labels_set.contains( label ) ) { - throw new RIOException( "label " + label + " is not unique" ); - } - return label; - } - - private final static void updateCounts( final IntMatrix m, final int counter, final Phylogeny g ) - throws RIOException { - PhylogenyMethods.preOrderReId( g ); - final HashMap map = PhylogenyMethods.createNameToExtNodeMap( g ); - for( int x = 0; x < m.size(); ++x ) { - final String mx = m.getLabel( x ); - final PhylogenyNode nx = map.get( mx ); - if ( nx == null ) { - throw new RIOException( "node \"" + mx + "\" not present in gene tree #" + counter ); - } - String my; - PhylogenyNode ny; - for( int y = 0; y < m.size(); ++y ) { - my = m.getLabel( y ); - ny = map.get( my ); - if ( ny == null ) { - throw new RIOException( "node \"" + my + "\" not present in gene tree #" + counter ); - } - if ( !PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( nx, ny ).isDuplication() ) { - m.inreaseByOne( x, y ); - } - } - } - } - public final static IntMatrix calculateOrthologTable( final Phylogeny[] analyzed_gene_trees, final boolean sort ) throws RIOException { final List labels = new ArrayList(); @@ -607,26 +561,22 @@ public final class RIO { return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose ); } - public final static RIO executeAnalysis( final IteratingPhylogenyParser p, - final File species_tree_file, + public final static RIO executeAnalysis( final File gene_trees_file, + final Phylogeny species_tree, final ALGORITHM algorithm, final REROOTING rerooting, final String outgroup, - final int first, - final int last, final boolean produce_log, final boolean verbose ) throws IOException, SDIException, RIOException { - final Phylogeny g0 = p.next(); - if ( ( g0 == null ) || g0.isEmpty() || ( g0.getNumberOfExternalNodes() < 2 ) ) { - throw new RIOException( "input file does not seem to contain any gene trees" ); - } - final Phylogeny species_tree = SDIutil.parseSpeciesTree( g0, - species_tree_file, - false, - true, - TAXONOMY_EXTRACTION.NO ); - p.reset(); - return new RIO( p, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose ); + return new RIO( parseGeneTrees( gene_trees_file ), + species_tree, + algorithm, + rerooting, + outgroup, + DEFAULT_RANGE, + DEFAULT_RANGE, + produce_log, + verbose ); } public final static RIO executeAnalysis( final File gene_trees_file, @@ -634,6 +584,8 @@ public final class RIO { final ALGORITHM algorithm, final REROOTING rerooting, final String outgroup, + final int first, + final int last, final boolean produce_log, final boolean verbose ) throws IOException, SDIException, RIOException { return new RIO( parseGeneTrees( gene_trees_file ), @@ -641,13 +593,35 @@ public final class RIO { algorithm, rerooting, outgroup, - DEFAULT_RANGE, - DEFAULT_RANGE, + first, + last, produce_log, verbose ); } public final static RIO executeAnalysis( final IteratingPhylogenyParser p, + final File species_tree_file, + final ALGORITHM algorithm, + final REROOTING rerooting, + final String outgroup, + final int first, + final int last, + final boolean produce_log, + final boolean verbose ) throws IOException, SDIException, RIOException { + final Phylogeny g0 = p.next(); + if ( ( g0 == null ) || g0.isEmpty() || ( g0.getNumberOfExternalNodes() < 2 ) ) { + throw new RIOException( "input file does not seem to contain any gene trees" ); + } + final Phylogeny species_tree = SDIutil.parseSpeciesTree( g0, + species_tree_file, + false, + true, + TAXONOMY_EXTRACTION.NO ); + p.reset(); + return new RIO( p, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose ); + } + + public final static RIO executeAnalysis( final IteratingPhylogenyParser p, final Phylogeny species_tree, final ALGORITHM algorithm, final REROOTING rerooting, @@ -665,7 +639,7 @@ public final class RIO { verbose ); } - public final static RIO executeAnalysis( final File gene_trees_file, + public final static RIO executeAnalysis( final IteratingPhylogenyParser p, final Phylogeny species_tree, final ALGORITHM algorithm, final REROOTING rerooting, @@ -674,15 +648,7 @@ public final class RIO { final int last, final boolean produce_log, final boolean verbose ) throws IOException, SDIException, RIOException { - return new RIO( parseGeneTrees( gene_trees_file ), - species_tree, - algorithm, - rerooting, - outgroup, - first, - last, - produce_log, - verbose ); + return new RIO( p, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose ); } public final static RIO executeAnalysis( final Phylogeny[] gene_trees, final Phylogeny species_tree ) @@ -728,31 +694,44 @@ public final class RIO { return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose ); } - public final static RIO executeAnalysis( final IteratingPhylogenyParser p, - final Phylogeny species_tree, - final ALGORITHM algorithm, - final REROOTING rerooting, - final String outgroup, - final int first, - final int last, - final boolean produce_log, - final boolean verbose ) throws IOException, SDIException, RIOException { - return new RIO( p, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose ); + private final static void calculateOrthologTable( final Phylogeny g, final boolean sort, final int counter ) + throws RIOException { + if ( counter == 0 ) { + final List labels = new ArrayList(); + final Set labels_set = new HashSet(); + for( final PhylogenyNode n : g.getExternalNodes() ) { + final String label = obtainLabel( labels_set, n ); + labels_set.add( label ); + labels.add( label ); + } + if ( sort ) { + Collections.sort( labels ); + } + _m = new IntMatrix( labels ); + } + updateCounts( _m, counter, g ); } - private final static void checkPreconditions( final Phylogeny[] gene_trees, + private final static void checkPreconditions( final IteratingPhylogenyParser p, final Phylogeny species_tree, final REROOTING rerooting, final String outgroup, final int first, - final int last ) throws RIOException { + final int last ) throws RIOException, IOException { + final Phylogeny g0 = p.next(); + if ( ( g0 == null ) || g0.isEmpty() ) { + throw new RIOException( "input file does not seem to contain any gene trees" ); + } + if ( g0.getNumberOfExternalNodes() < 2 ) { + throw new RIOException( "input file does not seem to contain any useable gene trees" ); + } if ( !species_tree.isRooted() ) { throw new RIOException( "species tree is not rooted" ); } if ( !( ( last == DEFAULT_RANGE ) && ( first == DEFAULT_RANGE ) ) - && ( ( last < first ) || ( last >= gene_trees.length ) || ( last < 0 ) || ( first < 0 ) ) ) { + && ( ( last < first ) || ( last < 0 ) || ( first < 0 ) ) ) { throw new RIOException( "attempt to set range (0-based) of gene to analyze to: from " + first + " to " - + last + " (out of " + gene_trees.length + ")" ); + + last ); } if ( ( rerooting == REROOTING.OUTGROUP ) && ForesterUtil.isEmpty( outgroup ) ) { throw new RIOException( "outgroup not set for midpoint rooting" ); @@ -760,13 +739,12 @@ public final class RIO { if ( ( rerooting != REROOTING.OUTGROUP ) && !ForesterUtil.isEmpty( outgroup ) ) { throw new RIOException( "outgroup only used for midpoint rooting" ); } - if ( ( rerooting == REROOTING.MIDPOINT ) - && ( PhylogenyMethods.calculateMaxDistanceToRoot( gene_trees[ 0 ] ) <= 0 ) ) { + if ( ( rerooting == REROOTING.MIDPOINT ) && ( PhylogenyMethods.calculateMaxDistanceToRoot( g0 ) <= 0 ) ) { throw new RIOException( "attempt to use midpoint rooting on gene trees which seem to have no (positive) branch lengths (cladograms)" ); } if ( rerooting == REROOTING.OUTGROUP ) { try { - gene_trees[ 0 ].getNode( outgroup ); + g0.getNode( outgroup ); } catch ( final IllegalArgumentException e ) { throw new RIOException( "cannot perform re-rooting by outgroup: " + e.getLocalizedMessage() ); @@ -774,26 +752,19 @@ public final class RIO { } } - private final static void checkPreconditions( final IteratingPhylogenyParser p, + private final static void checkPreconditions( final Phylogeny[] gene_trees, final Phylogeny species_tree, final REROOTING rerooting, final String outgroup, final int first, - final int last ) throws RIOException, IOException { - final Phylogeny g0 = p.next(); - if ( ( g0 == null ) || g0.isEmpty() ) { - throw new RIOException( "input file does not seem to contain any gene trees" ); - } - if ( g0.getNumberOfExternalNodes() < 2 ) { - throw new RIOException( "input file does not seem to contain any useable gene trees" ); - } + final int last ) throws RIOException { if ( !species_tree.isRooted() ) { throw new RIOException( "species tree is not rooted" ); } if ( !( ( last == DEFAULT_RANGE ) && ( first == DEFAULT_RANGE ) ) - && ( ( last < first ) || ( last < 0 ) || ( first < 0 ) ) ) { + && ( ( last < first ) || ( last >= gene_trees.length ) || ( last < 0 ) || ( first < 0 ) ) ) { throw new RIOException( "attempt to set range (0-based) of gene to analyze to: from " + first + " to " - + last ); + + last + " (out of " + gene_trees.length + ")" ); } if ( ( rerooting == REROOTING.OUTGROUP ) && ForesterUtil.isEmpty( outgroup ) ) { throw new RIOException( "outgroup not set for midpoint rooting" ); @@ -801,12 +772,13 @@ public final class RIO { if ( ( rerooting != REROOTING.OUTGROUP ) && !ForesterUtil.isEmpty( outgroup ) ) { throw new RIOException( "outgroup only used for midpoint rooting" ); } - if ( ( rerooting == REROOTING.MIDPOINT ) && ( PhylogenyMethods.calculateMaxDistanceToRoot( g0 ) <= 0 ) ) { + if ( ( rerooting == REROOTING.MIDPOINT ) + && ( PhylogenyMethods.calculateMaxDistanceToRoot( gene_trees[ 0 ] ) <= 0 ) ) { throw new RIOException( "attempt to use midpoint rooting on gene trees which seem to have no (positive) branch lengths (cladograms)" ); } if ( rerooting == REROOTING.OUTGROUP ) { try { - g0.getNode( outgroup ); + gene_trees[ 0 ].getNode( outgroup ); } catch ( final IllegalArgumentException e ) { throw new RIOException( "cannot perform re-rooting by outgroup: " + e.getLocalizedMessage() ); @@ -814,6 +786,26 @@ public final class RIO { } } + private final static String obtainLabel( final Set labels_set, final PhylogenyNode n ) throws RIOException { + String label; + if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getName() ) ) { + label = n.getNodeData().getSequence().getName(); + } + else if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) { + label = n.getNodeData().getSequence().getSymbol(); + } + else if ( !ForesterUtil.isEmpty( n.getName() ) ) { + label = n.getName(); + } + else { + throw new RIOException( "node " + n + " has no appropriate label" ); + } + if ( labels_set.contains( label ) ) { + throw new RIOException( "label " + label + " is not unique" ); + } + return label; + } + private final static Phylogeny[] parseGeneTrees( final File gene_trees_file ) throws FileNotFoundException, IOException { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); @@ -844,6 +836,31 @@ public final class RIO { } } + private final static void updateCounts( final IntMatrix m, final int counter, final Phylogeny g ) + throws RIOException { + PhylogenyMethods.preOrderReId( g ); + final HashMap map = PhylogenyMethods.createNameToExtNodeMap( g ); + for( int x = 0; x < m.size(); ++x ) { + final String mx = m.getLabel( x ); + final PhylogenyNode nx = map.get( mx ); + if ( nx == null ) { + throw new RIOException( "node \"" + mx + "\" not present in gene tree #" + counter ); + } + String my; + PhylogenyNode ny; + for( int y = 0; y < m.size(); ++y ) { + my = m.getLabel( y ); + ny = map.get( my ); + if ( ny == null ) { + throw new RIOException( "node \"" + my + "\" not present in gene tree #" + counter ); + } + if ( !PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( nx, ny ).isDuplication() ) { + m.inreaseByOne( x, y ); + } + } + } + } + public enum REROOTING { NONE, BY_ALGORITHM, MIDPOINT, OUTGROUP; } diff --git a/forester/java/src/org/forester/sdi/GSDI.java b/forester/java/src/org/forester/sdi/GSDI.java index 9f95b24..87d28bd 100644 --- a/forester/java/src/org/forester/sdi/GSDI.java +++ b/forester/java/src/org/forester/sdi/GSDI.java @@ -174,6 +174,37 @@ public final class GSDI implements GSDII { return res; } + final static GSDIsummaryResult geneTreePostOrderTraversal( final Phylogeny gene_tree, + final boolean most_parsimonious_duplication_model, + final int min_duplications ) throws SDIException { + final GSDIsummaryResult res = new GSDIsummaryResult(); + for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) { + final PhylogenyNode g = it.next(); + if ( g.isInternal() ) { + if ( g.getNumberOfDescendants() != 2 ) { + throw new SDIException( "gene tree contains internal node with " + g.getNumberOfDescendants() + + " descendents" ); + } + PhylogenyNode s1 = g.getChildNode1().getLink(); + PhylogenyNode s2 = g.getChildNode2().getLink(); + while ( s1 != s2 ) { + if ( s1.getId() > s2.getId() ) { + s1 = s1.getParent(); + } + else { + s2 = s2.getParent(); + } + } + g.setLink( s1 ); + determineEvent( s1, g, most_parsimonious_duplication_model, res ); + if ( res.getDuplicationsSum() > min_duplications ) { + return null; + } + } + } + return res; + } + final static NodesLinkingResult linkNodesOfG( final Phylogeny gene_tree, final Phylogeny species_tree, final boolean strip_gene_tree, diff --git a/forester/java/src/org/forester/sdi/GSDIR.java b/forester/java/src/org/forester/sdi/GSDIR.java index 8ada705..d0ee1a7 100644 --- a/forester/java/src/org/forester/sdi/GSDIR.java +++ b/forester/java/src/org/forester/sdi/GSDIR.java @@ -83,7 +83,12 @@ public class GSDIR implements GSDII { // g.setLink( null ); // } // } - final GSDIsummaryResult gsdi_result = GSDI.geneTreePostOrderTraversal( gene_tree, true ); + final GSDIsummaryResult gsdi_result = GSDI.geneTreePostOrderTraversal( gene_tree, + true, + min_duplications_sum ); + if ( gsdi_result == null ) { + continue; + } if ( gsdi_result.getDuplicationsSum() < min_duplications_sum ) { min_duplications_sum = gsdi_result.getDuplicationsSum(); speciations_sum = gsdi_result.getSpeciationsSum(); diff --git a/forester/java/src/org/forester/sdi/SDIR.java b/forester/java/src/org/forester/sdi/SDIR.java index 85481ff..88a2f2c 100644 --- a/forester/java/src/org/forester/sdi/SDIR.java +++ b/forester/java/src/org/forester/sdi/SDIR.java @@ -494,8 +494,8 @@ public class SDIR { branches.add( new PhylogenyBranch( t.getRoot().getChildNode1(), t.getRoot().getChildNode2() ) ); return branches; } - final Set one = new HashSet(); - final Set two = new HashSet(); + final Set one = new HashSet(); + final Set two = new HashSet(); PhylogenyNode node = t.getRoot(); while ( !node.isRoot() || !two.contains( node.getId() ) ) { if ( !node.isExternal() && !two.contains( node.getId() ) ) { diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index fc7ce47..e2e93c7 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -7201,7 +7201,7 @@ public final class Test { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny p = factory.create( "((1,2)A,(((X,Y,Z)a,b)3)B,(4,5,6)C)r", new NHXParser() )[ 0 ]; - final int count = PhylogenyNode.getNodeCount(); + final long count = PhylogenyNode.getNodeCount(); p.levelOrderReID(); if ( p.getNode( "r" ).getId() != count ) { return false; diff --git a/forester/java/src/org/forester/tools/SupportCount.java b/forester/java/src/org/forester/tools/SupportCount.java index 50702d4..283668c 100644 --- a/forester/java/src/org/forester/tools/SupportCount.java +++ b/forester/java/src/org/forester/tools/SupportCount.java @@ -64,7 +64,7 @@ public final class SupportCount { phylogeny.reRoot( phylogeny.getNode( child0_name ) ); evaluator_phylogeny.reRoot( evaluator_phylogeny.getNode( child0_name ) ); } - final Map> phylogeny_external_names_per_node = SupportCount + final Map> phylogeny_external_names_per_node = SupportCount .extractExternalNamesPerNode( phylogeny ); return ( SupportCount.compare( phylogeny, evaluator_phylogeny, @@ -97,7 +97,7 @@ public final class SupportCount { */ private static double compare( final Phylogeny phylogeny, final Phylogeny evaluator_phylogeny, - final Map> phylogeny_external_names_per_node, + final Map> phylogeny_external_names_per_node, final boolean update_support_in_phylogeny, final double similarity_threshold ) { int matching_branches = 0; @@ -114,7 +114,7 @@ public final class SupportCount { for( final Object element : evaluator_phylogeny_it.next().getAllExternalDescendants() ) { c1.add( ( ( PhylogenyNode ) element ).getName() ); } - for( final Integer id : phylogeny_external_names_per_node.keySet() ) { + for( final Long id : phylogeny_external_names_per_node.keySet() ) { final List c2 = phylogeny_external_names_per_node.get( id ); if ( ( c2.size() == c1.size() ) && c2.containsAll( c1 ) ) { if ( c2.size() > 1 ) { @@ -181,7 +181,7 @@ public final class SupportCount { } final String child0_name = phylogeny.getFirstExternalNode().getName(); phylogeny.reRoot( phylogeny.getNode( child0_name ) ); - final Map> phylogeny_external_names_per_node = SupportCount + final Map> phylogeny_external_names_per_node = SupportCount .extractExternalNamesPerNode( phylogeny ); if ( verbose ) { System.out.println(); @@ -239,14 +239,14 @@ public final class SupportCount { return evaluator_phylogenies_above_threshold; } - private static Map> extractExternalNamesPerNode( final Phylogeny phylogeny ) + private static Map> extractExternalNamesPerNode( final Phylogeny phylogeny ) throws NoSuchElementException { - final HashMap> phylogeny_external_names_per_node = new HashMap>(); + final HashMap> phylogeny_external_names_per_node = new HashMap>(); for( final PhylogenyNodeIterator it = phylogeny.iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode n = it.next(); final List l = n.getAllExternalDescendants(); final ArrayList c = new ArrayList(); - phylogeny_external_names_per_node.put( new Integer( n.getId() ), c ); + phylogeny_external_names_per_node.put( new Long( n.getId() ), c ); for( final PhylogenyNode phylogenyNode : l ) { c.add( phylogenyNode.getName() ); }