3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 // Contact: phylosoft @ gmail . com
25 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
27 package org.forester.evoinference.parsimony;
29 import java.text.DecimalFormat;
30 import java.util.ArrayList;
31 import java.util.HashMap;
32 import java.util.Iterator;
33 import java.util.List;
35 import java.util.Random;
36 import java.util.SortedSet;
37 import java.util.TreeSet;
39 import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
40 import org.forester.evoinference.matrix.character.CharacterStateMatrix;
41 import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
42 import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates;
43 import org.forester.phylogeny.Phylogeny;
44 import org.forester.phylogeny.PhylogenyNode;
45 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
46 import org.forester.util.FailedConditionCheckException;
47 import org.forester.util.ForesterUtil;
49 public class FitchParsimony<STATE_TYPE> {
51 final static private BinaryStates ABSENT = BinaryStates.ABSENT;
52 final static private GainLossStates GAIN = GainLossStates.GAIN;
53 final static private GainLossStates LOSS = GainLossStates.LOSS;
54 final static private BinaryStates PRESENT = BinaryStates.PRESENT;
55 private static final long RANDOM_NUMBER_SEED_DEFAULT = 21;
56 private static final boolean RANDOMIZE_DEFAULT = false;
57 private static final boolean RETURN_GAIN_LOSS_MATRIX_DEFAULT = false;
58 private static final boolean RETURN_INTERNAL_STATES_DEFAULT = false;
59 final static private GainLossStates UNCHANGED_ABSENT = GainLossStates.UNCHANGED_ABSENT;
60 final static private GainLossStates UNCHANGED_PRESENT = GainLossStates.UNCHANGED_PRESENT;
61 private static final boolean USE_LAST_DEFAULT = false;
63 private CharacterStateMatrix<GainLossStates> _gain_loss_matrix;
64 private CharacterStateMatrix<STATE_TYPE> _internal_states_matrix_after_traceback;
65 private CharacterStateMatrix<List<STATE_TYPE>> _internal_states_matrix_prior_to_traceback;
66 private Random _random_generator;
67 private long _random_number_seed;
68 private boolean _randomize;
69 private boolean _return_gain_loss = false;
70 private boolean _return_internal_states = false;
71 private int _total_gains;
72 private int _total_losses;
73 private int _total_unchanged;
74 private boolean _use_last;
75 private boolean _verbose = false;
77 public FitchParsimony() {
81 public void execute( final Phylogeny p, final CharacterStateMatrix<STATE_TYPE> external_node_states_matrix ) {
82 execute( p, external_node_states_matrix, false );
85 public void execute( final Phylogeny p,
86 final CharacterStateMatrix<STATE_TYPE> external_node_states_matrix,
87 final boolean verbose ) {
88 if ( !p.isRooted() ) {
89 throw new IllegalArgumentException( "attempt to execute Fitch parsimony on unroored phylogeny" );
91 if ( external_node_states_matrix.isEmpty() ) {
92 throw new IllegalArgumentException( "character matrix is empty" );
94 if ( external_node_states_matrix.getNumberOfIdentifiers() != p.getNumberOfExternalNodes() ) {
95 throw new IllegalArgumentException( "number of external nodes in phylogeny ["
96 + p.getNumberOfExternalNodes() + "] and number of indentifiers ["
97 + external_node_states_matrix.getNumberOfIdentifiers() + "] in matrix are not equal" );
99 setVerbose( verbose );
101 if ( isReturnInternalStates() ) {
102 initializeInternalStates( p, external_node_states_matrix );
104 if ( isReturnGainLossMatrix() ) {
105 initializeGainLossMatrix( p, external_node_states_matrix );
107 final DecimalFormat pf = new java.text.DecimalFormat( "000000" );
109 System.out.println( "Number of characters: " + external_node_states_matrix.getNumberOfCharacters() );
111 for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) {
113 ForesterUtil.updateProgress( character_index, pf );
115 executeForOneCharacter( p,
116 getStatesForCharacter( p, external_node_states_matrix, character_index ),
117 getStatesForCharacterForTraceback( p, external_node_states_matrix, character_index ),
121 System.out.println();
123 if ( external_node_states_matrix.getState( 0, 0 ) instanceof BinaryStates ) {
124 if ( ( external_node_states_matrix.getNumberOfCharacters() * p.getNumberOfBranches() ) != ( getTotalGains()
125 + getTotalLosses() + getTotalUnchanged() ) ) {
126 throw new FailedConditionCheckException( "this should not have happened: something is deeply wrong with Fitch parsimony implementation" );
131 public int getCost() {
135 public CharacterStateMatrix<CharacterStateMatrix.GainLossStates> getGainLossMatrix() {
136 if ( !isReturnGainLossMatrix() ) {
137 throw new RuntimeException( "creation of gain-loss matrix has not been enabled" );
139 return _gain_loss_matrix;
142 public CharacterStateMatrix<STATE_TYPE> getInternalStatesMatrix() {
143 if ( !isReturnInternalStates() ) {
144 throw new RuntimeException( "creation of internal state matrix has not been enabled" );
146 return _internal_states_matrix_after_traceback;
150 * Returns a view of the internal states prior to trace-back.
154 public CharacterStateMatrix<List<STATE_TYPE>> getInternalStatesMatrixPriorToTraceback() {
155 if ( !isReturnInternalStates() ) {
156 throw new RuntimeException( "creation of internal state matrix has not been enabled" );
158 return _internal_states_matrix_prior_to_traceback;
161 public int getTotalGains() {
165 public int getTotalLosses() {
166 return _total_losses;
169 public int getTotalUnchanged() {
170 return _total_unchanged;
173 public boolean isVerbose() {
177 public void setRandomize( final boolean randomize ) {
178 if ( randomize && isUseLast() ) {
179 throw new IllegalArgumentException( "attempt to allways use last state (ordered) if more than one choices and randomization at the same time" );
181 _randomize = randomize;
184 public void setRandomNumberSeed( final long random_number_seed ) {
185 if ( !isRandomize() ) {
186 throw new IllegalArgumentException( "attempt to set random number generator seed without randomization enabled" );
188 _random_number_seed = random_number_seed;
191 public void setReturnGainLossMatrix( final boolean return_gain_loss ) {
192 _return_gain_loss = return_gain_loss;
195 public void setReturnInternalStates( final boolean return_internal_states ) {
196 _return_internal_states = return_internal_states;
200 * This sets whether to use the first or last state in the sorted
201 * states at the undecided internal nodes.
202 * For randomized choices set randomize to true (and this to false).
204 * Note. It might be advisable to set this to false
205 * for BinaryStates if absence at the root is preferred
206 * (given the enum BinaryStates sorts in the following order:
207 * ABSENT, UNKNOWN, PRESENT).
212 public void setUseLast( final boolean use_last ) {
213 if ( use_last && isRandomize() ) {
214 throw new IllegalArgumentException( "attempt to allways use last state (ordered) if more than one choices and randomization at the same time" );
216 _use_last = use_last;
219 public void setVerbose( final boolean verbose ) {
223 private int determineIndex( final SortedSet<STATE_TYPE> current_node_states, int i ) {
224 if ( isRandomize() ) {
225 i = getRandomGenerator().nextInt( current_node_states.size() );
227 else if ( isUseLast() ) {
228 i = current_node_states.size() - 1;
233 private void executeForOneCharacter( final Phylogeny p,
234 final Map<PhylogenyNode, SortedSet<STATE_TYPE>> states,
235 final Map<PhylogenyNode, STATE_TYPE> traceback_states,
236 final int character_state_column ) {
237 postOrderTraversal( p, states );
238 preOrderTraversal( p, states, traceback_states, character_state_column );
241 private SortedSet<STATE_TYPE> getIntersectionOfStatesOfChildNodes( final Map<PhylogenyNode, SortedSet<STATE_TYPE>> states,
242 final PhylogenyNode node ) throws AssertionError {
243 final SortedSet<STATE_TYPE> states_in_child_nodes = new TreeSet<STATE_TYPE>();
244 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
245 final PhylogenyNode node_child = node.getChildNode( i );
246 if ( !states.containsKey( node_child ) ) {
247 throw new AssertionError( "this should not have happened: node [" + node_child.getName()
248 + "] not found in node state map" );
251 states_in_child_nodes.addAll( states.get( node_child ) );
254 states_in_child_nodes.retainAll( states.get( node_child ) );
257 return states_in_child_nodes;
260 private Random getRandomGenerator() {
261 return _random_generator;
264 private long getRandomNumberSeed() {
265 return _random_number_seed;
268 private STATE_TYPE getStateAt( final int i, final SortedSet<STATE_TYPE> states ) {
269 final Iterator<STATE_TYPE> it = states.iterator();
270 for( int j = 0; j < i; ++j ) {
276 private Map<PhylogenyNode, SortedSet<STATE_TYPE>> getStatesForCharacter( final Phylogeny p,
277 final CharacterStateMatrix<STATE_TYPE> matrix,
278 final int character_index ) {
279 final Map<PhylogenyNode, SortedSet<STATE_TYPE>> states = new HashMap<PhylogenyNode, SortedSet<STATE_TYPE>>( matrix
280 .getNumberOfIdentifiers() );
281 for( int indentifier_index = 0; indentifier_index < matrix.getNumberOfIdentifiers(); ++indentifier_index ) {
282 final STATE_TYPE state = matrix.getState( indentifier_index, character_index );
283 if ( state == null ) {
284 throw new IllegalArgumentException( "value at [" + indentifier_index + ", " + character_index
287 final SortedSet<STATE_TYPE> l = new TreeSet<STATE_TYPE>();
289 states.put( p.getNode( matrix.getIdentifier( indentifier_index ) ), l );
294 private Map<PhylogenyNode, STATE_TYPE> getStatesForCharacterForTraceback( final Phylogeny p,
295 final CharacterStateMatrix<STATE_TYPE> matrix,
296 final int character_index ) {
297 final Map<PhylogenyNode, STATE_TYPE> states = new HashMap<PhylogenyNode, STATE_TYPE>( matrix.getNumberOfIdentifiers() );
298 for( int indentifier_index = 0; indentifier_index < matrix.getNumberOfIdentifiers(); ++indentifier_index ) {
299 final STATE_TYPE state = matrix.getState( indentifier_index, character_index );
300 if ( state == null ) {
301 throw new IllegalArgumentException( "value at [" + indentifier_index + ", " + character_index
304 states.put( p.getNode( matrix.getIdentifier( indentifier_index ) ), state );
309 private SortedSet<STATE_TYPE> getUnionOfStatesOfChildNodes( final Map<PhylogenyNode, SortedSet<STATE_TYPE>> states,
310 final PhylogenyNode node ) throws AssertionError {
311 final SortedSet<STATE_TYPE> states_in_child_nodes = new TreeSet<STATE_TYPE>();
312 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
313 final PhylogenyNode node_child = node.getChildNode( i );
314 if ( !states.containsKey( node_child ) ) {
315 throw new AssertionError( "this should not have happened: node [" + node_child.getName()
316 + "] not found in node state map" );
318 states_in_child_nodes.addAll( states.get( node_child ) );
320 return states_in_child_nodes;
323 private void increaseCost() {
327 private void init() {
328 setReturnInternalStates( RETURN_INTERNAL_STATES_DEFAULT );
329 setReturnGainLossMatrix( RETURN_GAIN_LOSS_MATRIX_DEFAULT );
330 setRandomize( RANDOMIZE_DEFAULT );
331 setUseLast( USE_LAST_DEFAULT );
332 _random_number_seed = RANDOM_NUMBER_SEED_DEFAULT;
336 private void initializeGainLossMatrix( final Phylogeny p,
337 final CharacterStateMatrix<STATE_TYPE> external_node_states_matrix ) {
338 final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
339 for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) {
340 nodes.add( postorder.next() );
342 setGainLossMatrix( new BasicCharacterStateMatrix<CharacterStateMatrix.GainLossStates>( nodes.size(),
343 external_node_states_matrix
344 .getNumberOfCharacters() ) );
345 int identifier_index = 0;
346 for( final PhylogenyNode node : nodes ) {
347 getGainLossMatrix().setIdentifier( identifier_index++,
348 ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node
351 for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) {
352 getGainLossMatrix().setCharacter( character_index,
353 external_node_states_matrix.getCharacter( character_index ) );
357 private void initializeInternalStates( final Phylogeny p,
358 final CharacterStateMatrix<STATE_TYPE> external_node_states_matrix ) {
359 final List<PhylogenyNode> internal_nodes = new ArrayList<PhylogenyNode>();
360 for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) {
361 final PhylogenyNode node = postorder.next();
362 if ( node.isInternal() ) {
363 internal_nodes.add( node );
366 setInternalStatesMatrixPriorToTraceback( new BasicCharacterStateMatrix<List<STATE_TYPE>>( internal_nodes.size(),
367 external_node_states_matrix
368 .getNumberOfCharacters() ) );
369 setInternalStatesMatrixTraceback( new BasicCharacterStateMatrix<STATE_TYPE>( internal_nodes.size(),
370 external_node_states_matrix
371 .getNumberOfCharacters() ) );
372 int identifier_index = 0;
373 for( final PhylogenyNode node : internal_nodes ) {
374 getInternalStatesMatrix().setIdentifier( identifier_index,
375 ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node
377 getInternalStatesMatrixPriorToTraceback().setIdentifier( identifier_index,
378 ForesterUtil.isEmpty( node.getName() ) ? node
379 .getId() + "" : node.getName() );
382 for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) {
383 getInternalStatesMatrix().setCharacter( character_index,
384 external_node_states_matrix.getCharacter( character_index ) );
385 getInternalStatesMatrixPriorToTraceback().setCharacter( character_index,
386 external_node_states_matrix
387 .getCharacter( character_index ) );
391 private boolean isRandomize() {
395 private boolean isReturnGainLossMatrix() {
396 return _return_gain_loss;
399 private boolean isReturnInternalStates() {
400 return _return_internal_states;
403 private boolean isUseLast() {
407 private void postOrderTraversal( final Phylogeny p, final Map<PhylogenyNode, SortedSet<STATE_TYPE>> states ) {
408 for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) {
409 final PhylogenyNode node = postorder.next();
410 if ( !node.isExternal() ) {
411 SortedSet<STATE_TYPE> states_in_children = getIntersectionOfStatesOfChildNodes( states, node );
412 if ( states_in_children.isEmpty() ) {
413 states_in_children = getUnionOfStatesOfChildNodes( states, node );
415 states.put( node, states_in_children );
420 private void preOrderTraversal( final Phylogeny p,
421 final Map<PhylogenyNode, SortedSet<STATE_TYPE>> states,
422 final Map<PhylogenyNode, STATE_TYPE> traceback_states,
423 final int character_state_column ) {
424 for( final PhylogenyNodeIterator preorder = p.iteratorPreorder(); preorder.hasNext(); ) {
425 final PhylogenyNode current_node = preorder.next();
426 final SortedSet<STATE_TYPE> current_node_states = states.get( current_node );
427 STATE_TYPE parent_state = null;
428 if ( current_node.isRoot() ) {
430 i = determineIndex( current_node_states, i );
431 traceback_states.put( current_node, getStateAt( i, current_node_states ) );
434 parent_state = traceback_states.get( current_node.getParent() );
435 if ( current_node_states.contains( parent_state ) ) {
436 traceback_states.put( current_node, parent_state );
441 i = determineIndex( current_node_states, i );
442 traceback_states.put( current_node, getStateAt( i, current_node_states ) );
445 if ( isReturnInternalStates() ) {
446 if ( !current_node.isExternal() ) {
447 setInternalNodeStatePriorToTraceback( states, character_state_column, current_node );
448 setInternalNodeState( traceback_states, character_state_column, current_node );
451 if ( isReturnGainLossMatrix() && !current_node.isRoot() ) {
452 if ( !( parent_state instanceof BinaryStates ) ) {
453 throw new RuntimeException( "attempt to create gain loss matrix for not binary states" );
455 final BinaryStates parent_binary_state = ( BinaryStates ) parent_state;
456 final BinaryStates current_binary_state = ( BinaryStates ) traceback_states.get( current_node );
457 if ( ( parent_binary_state == PRESENT ) && ( current_binary_state == ABSENT ) ) {
459 setGainLossState( character_state_column, current_node, LOSS );
461 else if ( ( ( parent_binary_state == ABSENT ) || ( parent_binary_state == null ) )
462 && ( current_binary_state == PRESENT ) ) {
464 setGainLossState( character_state_column, current_node, GAIN );
468 if ( current_binary_state == PRESENT ) {
469 setGainLossState( character_state_column, current_node, UNCHANGED_PRESENT );
471 else if ( current_binary_state == ABSENT ) {
472 setGainLossState( character_state_column, current_node, UNCHANGED_ABSENT );
476 else if ( isReturnGainLossMatrix() && current_node.isRoot() ) {
477 final BinaryStates current_binary_state = ( BinaryStates ) traceback_states.get( current_node );
478 ++_total_unchanged; //new
479 if ( current_binary_state == PRESENT ) {//new
480 setGainLossState( character_state_column, current_node, UNCHANGED_PRESENT );//new
482 else if ( current_binary_state == ABSENT ) {//new
483 setGainLossState( character_state_column, current_node, UNCHANGED_ABSENT );//new
485 // setGainLossState( character_state_column, current_node, UNKNOWN_GAIN_LOSS );
490 private void reset() {
494 setTotalUnchanged( 0 );
495 setRandomGenerator( new Random( getRandomNumberSeed() ) );
498 private void setCost( final int cost ) {
502 private void setGainLossMatrix( final CharacterStateMatrix<GainLossStates> gain_loss_matrix ) {
503 _gain_loss_matrix = gain_loss_matrix;
506 private void setGainLossState( final int character_state_column,
507 final PhylogenyNode node,
508 final GainLossStates state ) {
509 getGainLossMatrix().setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(),
510 character_state_column,
514 private void setInternalNodeState( final Map<PhylogenyNode, STATE_TYPE> states,
515 final int character_state_column,
516 final PhylogenyNode node ) {
517 getInternalStatesMatrix()
518 .setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(),
519 character_state_column,
520 states.get( node ) );
523 private void setInternalNodeStatePriorToTraceback( final Map<PhylogenyNode, SortedSet<STATE_TYPE>> states,
524 final int character_state_column,
525 final PhylogenyNode node ) {
526 getInternalStatesMatrixPriorToTraceback()
527 .setState( ForesterUtil.isEmpty( node.getName() ) ? String.valueOf( node.getId() ) : node.getName(),
528 character_state_column,
529 toListSorted( states.get( node ) ) );
532 private void setInternalStatesMatrixPriorToTraceback( final CharacterStateMatrix<List<STATE_TYPE>> internal_states_matrix_prior_to_traceback ) {
533 _internal_states_matrix_prior_to_traceback = internal_states_matrix_prior_to_traceback;
536 private void setInternalStatesMatrixTraceback( final CharacterStateMatrix<STATE_TYPE> internal_states_matrix_after_traceback ) {
537 _internal_states_matrix_after_traceback = internal_states_matrix_after_traceback;
540 private void setRandomGenerator( final Random random_generator ) {
541 _random_generator = random_generator;
544 private void setTotalGains( final int total_gains ) {
545 _total_gains = total_gains;
548 private void setTotalLosses( final int total_losses ) {
549 _total_losses = total_losses;
552 private void setTotalUnchanged( final int total_unchanged ) {
553 _total_unchanged = total_unchanged;
556 private List<STATE_TYPE> toListSorted( final SortedSet<STATE_TYPE> states ) {
557 final List<STATE_TYPE> l = new ArrayList<STATE_TYPE>( states.size() );
558 for( final STATE_TYPE state : states ) {