3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 // Contact: phylosoft @ gmail . com
25 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
27 package org.forester.evoinference.parsimony;
29 import java.util.ArrayList;
30 import java.util.HashMap;
31 import java.util.List;
34 import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
35 import org.forester.evoinference.matrix.character.CharacterStateMatrix;
36 import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
37 import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates;
38 import org.forester.phylogeny.Phylogeny;
39 import org.forester.phylogeny.PhylogenyNode;
40 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
41 import org.forester.util.ForesterUtil;
43 public class DolloParsimony {
45 final static private BinaryStates PRESENT = BinaryStates.PRESENT;
46 final static private BinaryStates ABSENT = BinaryStates.ABSENT;
47 final static private BinaryStates UNKNOWN = BinaryStates.UNKNOWN;
48 final static private GainLossStates LOSS = GainLossStates.LOSS;
49 final static private GainLossStates GAIN = GainLossStates.GAIN;
50 final static private GainLossStates UNCHANGED_PRESENT = GainLossStates.UNCHANGED_PRESENT;
51 final static private GainLossStates UNCHANGED_ABSENT = GainLossStates.UNCHANGED_ABSENT;
52 private static final boolean RETURN_INTERNAL_STATES_DEFAULT = false;
53 private static final boolean RETURN_GAIN_LOSS_MATRIX_DEFAULT = false;
54 private boolean _return_internal_states = false;
55 private boolean _return_gain_loss = false;
56 private int _total_gains;
57 private int _total_losses;
58 private int _total_unchanged;
59 private CharacterStateMatrix<BinaryStates> _internal_states_matrix;
60 private CharacterStateMatrix<GainLossStates> _gain_loss_matrix;
62 private DolloParsimony() {
66 public void execute( final Phylogeny p, final CharacterStateMatrix<BinaryStates> external_node_states_matrix ) {
67 if ( !p.isRooted() ) {
68 throw new IllegalArgumentException( "attempt to execute Dollo parsimony on unroored phylogeny" );
70 if ( external_node_states_matrix.isEmpty() ) {
71 throw new IllegalArgumentException( "character matrix is empty" );
73 if ( external_node_states_matrix.getNumberOfIdentifiers() != p.getNumberOfExternalNodes() ) {
74 throw new IllegalArgumentException( "number of external nodes in phylogeny ["
75 + p.getNumberOfExternalNodes() + "] and number of indentifiers ["
76 + external_node_states_matrix.getNumberOfIdentifiers() + "] in matrix are not equal" );
79 if ( isReturnInternalStates() ) {
80 initializeInternalStates( p, external_node_states_matrix );
82 if ( isReturnGainLossMatrix() ) {
83 initializeGainLossMatrix( p, external_node_states_matrix );
85 for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) {
86 executeForOneCharacter( p,
87 getStatesForCharacter( p, external_node_states_matrix, character_index ),
90 if ( ( external_node_states_matrix.getNumberOfCharacters() * p.getNumberOfBranches() ) != ( getTotalGains()
91 + getTotalLosses() + getTotalUnchanged() ) ) {
92 throw new AssertionError( "this should not have happened: something is deeply wrong with Dollo parsimony implementation" );
96 private void executeForOneCharacter( final Phylogeny p,
97 final Map<PhylogenyNode, BinaryStates> states,
98 final int character_state_column ) {
99 postOrderTraversal( p, states );
100 preOrderTraversal( p, states, character_state_column );
104 * @see org.forester.phylogenyinference.Parsimony#getCost()
106 public int getCost() {
107 return getTotalGains() + getTotalLosses();
111 * @see org.forester.phylogenyinference.Parsimony#getGainLossMatrix()
113 public CharacterStateMatrix<CharacterStateMatrix.GainLossStates> getGainLossMatrix() {
114 if ( !isReturnGainLossMatrix() ) {
115 throw new RuntimeException( "creation of gain-loss matrix has not been enabled" );
117 return _gain_loss_matrix;
121 * @see org.forester.phylogenyinference.Parsimony#getInternalStatesMatrix()
123 public CharacterStateMatrix<BinaryStates> getInternalStatesMatrix() {
124 if ( !isReturnInternalStates() ) {
125 throw new RuntimeException( "creation of internal state matrix has not been enabled" );
127 return _internal_states_matrix;
130 private Map<PhylogenyNode, BinaryStates> getStatesForCharacter( final Phylogeny p,
131 final CharacterStateMatrix<BinaryStates> matrix,
132 final int character_index ) {
133 final Map<PhylogenyNode, BinaryStates> states = new HashMap<PhylogenyNode, BinaryStates>( matrix.getNumberOfIdentifiers() );
134 for( int indentifier_index = 0; indentifier_index < matrix.getNumberOfIdentifiers(); ++indentifier_index ) {
135 final BinaryStates state = matrix.getState( indentifier_index, character_index );
136 if ( state == null ) {
137 throw new IllegalArgumentException( "value at [" + indentifier_index + ", " + character_index
140 states.put( p.getNode( matrix.getIdentifier( indentifier_index ) ), state );
146 * @see org.forester.phylogenyinference.Parsimony#getTotalGains()
148 public int getTotalGains() {
153 * @see org.forester.phylogenyinference.Parsimony#getTotalLosses()
155 public int getTotalLosses() {
156 return _total_losses;
160 * @see org.forester.phylogenyinference.Parsimony#getTotalUnchanged()
162 public int getTotalUnchanged() {
163 return _total_unchanged;
166 private void init() {
167 setReturnInternalStates( RETURN_INTERNAL_STATES_DEFAULT );
168 setReturnGainLossMatrix( RETURN_GAIN_LOSS_MATRIX_DEFAULT );
172 private void initializeGainLossMatrix( final Phylogeny p,
173 final CharacterStateMatrix<BinaryStates> external_node_states_matrix ) {
174 final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
175 for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) {
176 nodes.add( postorder.next() );
178 setGainLossMatrix( new BasicCharacterStateMatrix<CharacterStateMatrix.GainLossStates>( nodes.size(),
179 external_node_states_matrix
180 .getNumberOfCharacters() ) );
181 int identifier_index = 0;
182 for( final PhylogenyNode node : nodes ) {
183 getGainLossMatrix().setIdentifier( identifier_index++,
184 ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node
187 for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) {
188 getGainLossMatrix().setCharacter( character_index,
189 external_node_states_matrix.getCharacter( character_index ) );
193 private void initializeInternalStates( final Phylogeny p,
194 final CharacterStateMatrix<BinaryStates> external_node_states_matrix ) {
195 final List<PhylogenyNode> internal_nodes = new ArrayList<PhylogenyNode>();
196 for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) {
197 final PhylogenyNode node = postorder.next();
198 if ( node.isInternal() ) {
199 internal_nodes.add( node );
202 setInternalStatesMatrix( new BasicCharacterStateMatrix<BinaryStates>( internal_nodes.size(),
203 external_node_states_matrix
204 .getNumberOfCharacters() ) );
205 int identifier_index = 0;
206 for( final PhylogenyNode node : internal_nodes ) {
207 getInternalStatesMatrix().setIdentifier( identifier_index++,
208 ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node
211 for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) {
212 getInternalStatesMatrix().setCharacter( character_index,
213 external_node_states_matrix.getCharacter( character_index ) );
217 private boolean isReturnGainLossMatrix() {
218 return _return_gain_loss;
221 private boolean isReturnInternalStates() {
222 return _return_internal_states;
225 private void postOrderTraversal( final Phylogeny p, final Map<PhylogenyNode, BinaryStates> states )
226 throws AssertionError {
227 for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) {
228 final PhylogenyNode node = postorder.next();
229 if ( !node.isExternal() ) {
230 final int present_unknown = getNumberOfChildNodesWithPresentOrUnknownState( states, node );
231 if ( present_unknown < 1 ) {
232 states.put( node, ABSENT );
234 else if ( present_unknown == 1 ) {
235 states.put( node, UNKNOWN );
238 states.put( node, PRESENT );
244 private void preOrderTraversal( final Phylogeny p,
245 final Map<PhylogenyNode, BinaryStates> states,
246 final int character_state_column ) throws AssertionError {
247 boolean gain = false;
248 for( final PhylogenyNodeIterator preorder = p.iteratorPreorder(); preorder.hasNext(); ) {
249 final PhylogenyNode node = preorder.next();
250 BinaryStates parent_state = null;
251 if ( !node.isRoot() ) {
252 parent_state = states.get( node.getParent() );
254 if ( !node.isExternal() ) {
255 if ( states.get( node ) == UNKNOWN ) {
256 if ( parent_state == PRESENT ) {
257 states.put( node, PRESENT );
260 if ( isCharacterPresentOrUnknownInAtLeastTwoChildNodes( states, node ) ) {
261 states.put( node, PRESENT );
264 states.put( node, ABSENT );
268 if ( isReturnInternalStates() ) {
269 setInternalNodeState( states, character_state_column, node );
272 final BinaryStates current_state = states.get( node );
273 if ( ( parent_state == PRESENT ) && ( current_state == ABSENT ) ) {
275 if ( isReturnGainLossMatrix() ) {
276 setGainLossState( character_state_column, node, LOSS );
279 else if ( ( ( parent_state == ABSENT ) ) && ( current_state == PRESENT ) ) {
281 throw new RuntimeException( "this should not have happened: dollo parsimony cannot have more than one gain" );
285 if ( isReturnGainLossMatrix() ) {
286 setGainLossState( character_state_column, node, GAIN );
291 if ( isReturnGainLossMatrix() ) {
292 if ( current_state == PRESENT ) {
293 setGainLossState( character_state_column, node, UNCHANGED_PRESENT );
295 else if ( current_state == ABSENT ) {
296 setGainLossState( character_state_column, node, UNCHANGED_ABSENT );
303 private void reset() {
306 setTotalUnchanged( 0 );
309 private void setGainLossMatrix( final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> gain_loss_matrix ) {
310 _gain_loss_matrix = gain_loss_matrix;
313 private void setGainLossState( final int character_state_column,
314 final PhylogenyNode node,
315 final GainLossStates state ) {
316 getGainLossMatrix().setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(),
317 character_state_column,
321 private void setInternalNodeState( final Map<PhylogenyNode, BinaryStates> states,
322 final int character_state_column,
323 final PhylogenyNode node ) {
324 getInternalStatesMatrix()
325 .setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(),
326 character_state_column,
327 states.get( node ) );
330 private void setInternalStatesMatrix( final CharacterStateMatrix<BinaryStates> internal_states_matrix ) {
331 _internal_states_matrix = internal_states_matrix;
335 * @see org.forester.phylogenyinference.Parsimony#setReturnGainLossMatrix(boolean)
337 public void setReturnGainLossMatrix( final boolean return_gain_loss ) {
338 _return_gain_loss = return_gain_loss;
342 * @see org.forester.phylogenyinference.Parsimony#setReturnInternalStates(boolean)
344 public void setReturnInternalStates( final boolean return_internal_states ) {
345 _return_internal_states = return_internal_states;
348 private void setTotalGains( final int total_gains ) {
349 _total_gains = total_gains;
352 private void setTotalLosses( final int total_losses ) {
353 _total_losses = total_losses;
356 private void setTotalUnchanged( final int total_unchanged ) {
357 _total_unchanged = total_unchanged;
360 public static DolloParsimony createInstance() {
361 return new DolloParsimony();
364 private static int getNumberOfChildNodesWithPresentOrUnknownState( final Map<PhylogenyNode, BinaryStates> states,
365 final PhylogenyNode node ) {
367 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
368 final PhylogenyNode node_child = node.getChildNode( i );
369 if ( !states.containsKey( node_child ) ) {
370 throw new RuntimeException( "this should not have happened: node [" + node_child.getName()
371 + "] not found in node state map" );
373 if ( ( states.get( node_child ) == BinaryStates.PRESENT )
374 || ( states.get( node_child ) == BinaryStates.UNKNOWN ) ) {
381 private static boolean isCharacterPresentOrUnknownInAtLeastTwoChildNodes( final Map<PhylogenyNode, BinaryStates> states,
382 final PhylogenyNode node ) {
384 for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
385 final PhylogenyNode node_child = node.getChildNode( i );
386 if ( ( states.get( node_child ) == PRESENT ) || ( states.get( node_child ) == UNKNOWN ) ) {