3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 // Contact: phylosoft @ gmail . com
25 // WWW: www.phylosoft.org/forester
27 package org.forester.evoinference.parsimony;
29 import java.util.ArrayList;
30 import java.util.HashMap;
31 import java.util.List;
34 import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
35 import org.forester.evoinference.matrix.character.CharacterStateMatrix;
36 import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
37 import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates;
38 import org.forester.phylogeny.Phylogeny;
39 import org.forester.phylogeny.PhylogenyNode;
40 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
41 import org.forester.util.ForesterUtil;
43 public class DolloParsimony {
45 final static private BinaryStates PRESENT = BinaryStates.PRESENT;
46 final static private BinaryStates ABSENT = BinaryStates.ABSENT;
47 final static private BinaryStates UNKNOWN = BinaryStates.UNKNOWN;
48 final static private GainLossStates LOSS = GainLossStates.LOSS;
49 final static private GainLossStates GAIN = GainLossStates.GAIN;
50 final static private GainLossStates UNCHANGED_PRESENT = GainLossStates.UNCHANGED_PRESENT;
51 final static private GainLossStates UNCHANGED_ABSENT = GainLossStates.UNCHANGED_ABSENT;
52 private static final boolean RETURN_INTERNAL_STATES_DEFAULT = false;
53 private static final boolean RETURN_GAIN_LOSS_MATRIX_DEFAULT = false;
54 private boolean _return_internal_states = false;
55 private boolean _return_gain_loss = false;
56 private int _total_gains;
57 private int _total_losses;
58 private int _total_unchanged;
59 private CharacterStateMatrix<BinaryStates> _internal_states_matrix;
60 private CharacterStateMatrix<GainLossStates> _gain_loss_matrix;
62 private DolloParsimony() {
66 public void execute( final Phylogeny p, final CharacterStateMatrix<BinaryStates> external_node_states_matrix ) {
67 if ( !p.isRooted() ) {
68 throw new IllegalArgumentException( "attempt to execute Dollo parsimony on unroored phylogeny" );
70 if ( external_node_states_matrix.isEmpty() ) {
71 throw new IllegalArgumentException( "character matrix is empty" );
73 if ( external_node_states_matrix.getNumberOfIdentifiers() != p.getNumberOfExternalNodes() ) {
74 throw new IllegalArgumentException( "number of external nodes in phylogeny ["
75 + p.getNumberOfExternalNodes() + "] and number of indentifiers ["
76 + external_node_states_matrix.getNumberOfIdentifiers() + "] in matrix are not equal" );
79 if ( isReturnInternalStates() ) {
80 initializeInternalStates( p, external_node_states_matrix );
82 if ( isReturnGainLossMatrix() ) {
83 initializeGainLossMatrix( p, external_node_states_matrix );
85 for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) {
86 executeForOneCharacter( p,
87 getStatesForCharacter( p, external_node_states_matrix, character_index ),
90 if ( ( external_node_states_matrix.getNumberOfCharacters() * p.getNumberOfBranches() ) != ( getTotalGains()
91 + getTotalLosses() + getTotalUnchanged() ) ) {
92 throw new AssertionError( "this should not have happened: something is deeply wrong with Dollo parsimony implementation" );
96 private void executeForOneCharacter( final Phylogeny p,
97 final Map<PhylogenyNode, BinaryStates> states,
98 final int character_state_column ) {
99 postOrderTraversal( p, states );
100 preOrderTraversal( p, states, character_state_column );
104 * @see org.forester.phylogenyinference.Parsimony#getCost()
106 public int getCost() {
107 return getTotalGains() + getTotalLosses();
111 * @see org.forester.phylogenyinference.Parsimony#getGainLossMatrix()
113 public CharacterStateMatrix<CharacterStateMatrix.GainLossStates> getGainLossMatrix() {
114 if ( !isReturnGainLossMatrix() ) {
115 throw new RuntimeException( "creation of gain-loss matrix has not been enabled" );
117 return _gain_loss_matrix;
121 * @see org.forester.phylogenyinference.Parsimony#getInternalStatesMatrix()
123 public CharacterStateMatrix<BinaryStates> getInternalStatesMatrix() {
124 if ( !isReturnInternalStates() ) {
125 throw new RuntimeException( "creation of internal state matrix has not been enabled" );
127 return _internal_states_matrix;
130 private Map<PhylogenyNode, BinaryStates> getStatesForCharacter( final Phylogeny p,
131 final CharacterStateMatrix<BinaryStates> matrix,
132 final int character_index ) {
133 final Map<PhylogenyNode, BinaryStates> states = new HashMap<PhylogenyNode, BinaryStates>( matrix
134 .getNumberOfIdentifiers() );
135 for( int indentifier_index = 0; indentifier_index < matrix.getNumberOfIdentifiers(); ++indentifier_index ) {
136 final BinaryStates state = matrix.getState( indentifier_index, character_index );
137 if ( state == null ) {
138 throw new IllegalArgumentException( "value at [" + indentifier_index + ", " + character_index
141 states.put( p.getNode( matrix.getIdentifier( indentifier_index ) ), state );
147 * @see org.forester.phylogenyinference.Parsimony#getTotalGains()
149 public int getTotalGains() {
154 * @see org.forester.phylogenyinference.Parsimony#getTotalLosses()
156 public int getTotalLosses() {
157 return _total_losses;
161 * @see org.forester.phylogenyinference.Parsimony#getTotalUnchanged()
163 public int getTotalUnchanged() {
164 return _total_unchanged;
167 private void init() {
168 setReturnInternalStates( RETURN_INTERNAL_STATES_DEFAULT );
169 setReturnGainLossMatrix( RETURN_GAIN_LOSS_MATRIX_DEFAULT );
173 private void initializeGainLossMatrix( final Phylogeny p,
174 final CharacterStateMatrix<BinaryStates> external_node_states_matrix ) {
175 final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
176 for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) {
177 nodes.add( postorder.next() );
179 setGainLossMatrix( new BasicCharacterStateMatrix<CharacterStateMatrix.GainLossStates>( nodes.size(),
180 external_node_states_matrix
181 .getNumberOfCharacters() ) );
182 int identifier_index = 0;
183 for( final PhylogenyNode node : nodes ) {
184 getGainLossMatrix().setIdentifier( identifier_index++,
185 ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node
188 for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) {
189 getGainLossMatrix().setCharacter( character_index,
190 external_node_states_matrix.getCharacter( character_index ) );
194 private void initializeInternalStates( final Phylogeny p,
195 final CharacterStateMatrix<BinaryStates> external_node_states_matrix ) {
196 final List<PhylogenyNode> internal_nodes = new ArrayList<PhylogenyNode>();
197 for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) {
198 final PhylogenyNode node = postorder.next();
199 if ( node.isInternal() ) {
200 internal_nodes.add( node );
203 setInternalStatesMatrix( new BasicCharacterStateMatrix<BinaryStates>( internal_nodes.size(),
204 external_node_states_matrix
205 .getNumberOfCharacters() ) );
206 int identifier_index = 0;
207 for( final PhylogenyNode node : internal_nodes ) {
208 getInternalStatesMatrix().setIdentifier( identifier_index++,
209 ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node
212 for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) {
213 getInternalStatesMatrix().setCharacter( character_index,
214 external_node_states_matrix.getCharacter( character_index ) );
218 private boolean isReturnGainLossMatrix() {
219 return _return_gain_loss;
222 private boolean isReturnInternalStates() {
223 return _return_internal_states;
226 private void postOrderTraversal( final Phylogeny p, final Map<PhylogenyNode, BinaryStates> states )
227 throws AssertionError {
228 for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) {
229 final PhylogenyNode node = postorder.next();
230 if ( !node.isExternal() ) {
231 final int present_unknown = getNumberOfChildNodesWithPresentOrUnknownState( states, node );
232 if ( present_unknown < 1 ) {
233 states.put( node, ABSENT );
235 else if ( present_unknown == 1 ) {
236 states.put( node, UNKNOWN );
239 states.put( node, PRESENT );
245 private void preOrderTraversal( final Phylogeny p,
246 final Map<PhylogenyNode, BinaryStates> states,
247 final int character_state_column ) throws AssertionError {
248 boolean gain = false;
249 for( final PhylogenyNodeIterator preorder = p.iteratorPreorder(); preorder.hasNext(); ) {
250 final PhylogenyNode node = preorder.next();
251 BinaryStates parent_state = null;
252 if ( !node.isRoot() ) {
253 parent_state = states.get( node.getParent() );
255 if ( !node.isExternal() ) {
256 if ( states.get( node ) == UNKNOWN ) {
257 if ( parent_state == PRESENT ) {
258 states.put( node, PRESENT );
261 if ( isCharacterPresentOrUnknownInAtLeastTwoChildNodes( states, node ) ) {
262 states.put( node, PRESENT );
265 states.put( node, ABSENT );
269 if ( isReturnInternalStates() ) {
270 setInternalNodeState( states, character_state_column, node );
273 final BinaryStates current_state = states.get( node );
274 if ( ( parent_state == PRESENT ) && ( current_state == ABSENT ) ) {
276 if ( isReturnGainLossMatrix() ) {
277 setGainLossState( character_state_column, node, LOSS );
280 else if ( ( ( parent_state == ABSENT ) ) && ( current_state == PRESENT ) ) {
282 throw new RuntimeException( "this should not have happened: dollo parsimony cannot have more than one gain" );
286 if ( isReturnGainLossMatrix() ) {
287 setGainLossState( character_state_column, node, GAIN );
292 if ( isReturnGainLossMatrix() ) {
293 if ( current_state == PRESENT ) {
294 setGainLossState( character_state_column, node, UNCHANGED_PRESENT );
296 else if ( current_state == ABSENT ) {
297 setGainLossState( character_state_column, node, UNCHANGED_ABSENT );
304 private void reset() {
307 setTotalUnchanged( 0 );
310 private void setGainLossMatrix( final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> gain_loss_matrix ) {
311 _gain_loss_matrix = gain_loss_matrix;
314 private void setGainLossState( final int character_state_column,
315 final PhylogenyNode node,
316 final GainLossStates state ) {
317 getGainLossMatrix().setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(),
318 character_state_column,
322 private void setInternalNodeState( final Map<PhylogenyNode, BinaryStates> states,
323 final int character_state_column,
324 final PhylogenyNode node ) {
325 getInternalStatesMatrix()
326 .setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(),
327 character_state_column,
328 states.get( node ) );
331 private void setInternalStatesMatrix( final CharacterStateMatrix<BinaryStates> internal_states_matrix ) {
332 _internal_states_matrix = internal_states_matrix;
336 * @see org.forester.phylogenyinference.Parsimony#setReturnGainLossMatrix(boolean)
338 public void setReturnGainLossMatrix( final boolean return_gain_loss ) {
339 _return_gain_loss = return_gain_loss;
343 * @see org.forester.phylogenyinference.Parsimony#setReturnInternalStates(boolean)
345 public void setReturnInternalStates( final boolean return_internal_states ) {
346 _return_internal_states = return_internal_states;
349 private void setTotalGains( final int total_gains ) {
350 _total_gains = total_gains;
353 private void setTotalLosses( final int total_losses ) {
354 _total_losses = total_losses;
357 private void setTotalUnchanged( final int total_unchanged ) {
358 _total_unchanged = total_unchanged;
361 public static DolloParsimony createInstance() {
362 return new DolloParsimony();
365 private static int getNumberOfChildNodesWithPresentOrUnknownState( final Map<PhylogenyNode, BinaryStates> states,
366 final PhylogenyNode node ) {
368 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
369 final PhylogenyNode node_child = node.getChildNode( i );
370 if ( !states.containsKey( node_child ) ) {
371 throw new RuntimeException( "this should not have happened: node [" + node_child.getName()
372 + "] not found in node state map" );
374 if ( ( states.get( node_child ) == BinaryStates.PRESENT )
375 || ( states.get( node_child ) == BinaryStates.UNKNOWN ) ) {
382 private static boolean isCharacterPresentOrUnknownInAtLeastTwoChildNodes( final Map<PhylogenyNode, BinaryStates> states,
383 final PhylogenyNode node ) {
385 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
386 final PhylogenyNode node_child = node.getChildNode( i );
387 if ( ( states.get( node_child ) == PRESENT ) || ( states.get( node_child ) == UNKNOWN ) ) {