initial commit
[jalview.git] / forester / java / src / org / forester / evoinference / parsimony / DolloParsimony.java
1 // $Id:
2 //
3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
5 //
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
8 // All rights reserved
9 // 
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 // 
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 //
24 // Contact: phylosoft @ gmail . com
25 // WWW: www.phylosoft.org/forester
26
27 package org.forester.evoinference.parsimony;
28
29 import java.util.ArrayList;
30 import java.util.HashMap;
31 import java.util.List;
32 import java.util.Map;
33
34 import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
35 import org.forester.evoinference.matrix.character.CharacterStateMatrix;
36 import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
37 import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates;
38 import org.forester.phylogeny.Phylogeny;
39 import org.forester.phylogeny.PhylogenyNode;
40 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
41 import org.forester.util.ForesterUtil;
42
43 public class DolloParsimony {
44
45     final static private BinaryStates            PRESENT                         = BinaryStates.PRESENT;
46     final static private BinaryStates            ABSENT                          = BinaryStates.ABSENT;
47     final static private BinaryStates            UNKNOWN                         = BinaryStates.UNKNOWN;
48     final static private GainLossStates          LOSS                            = GainLossStates.LOSS;
49     final static private GainLossStates          GAIN                            = GainLossStates.GAIN;
50     final static private GainLossStates          UNCHANGED_PRESENT               = GainLossStates.UNCHANGED_PRESENT;
51     final static private GainLossStates          UNCHANGED_ABSENT                = GainLossStates.UNCHANGED_ABSENT;
52     private static final boolean                 RETURN_INTERNAL_STATES_DEFAULT  = false;
53     private static final boolean                 RETURN_GAIN_LOSS_MATRIX_DEFAULT = false;
54     private boolean                              _return_internal_states         = false;
55     private boolean                              _return_gain_loss               = false;
56     private int                                  _total_gains;
57     private int                                  _total_losses;
58     private int                                  _total_unchanged;
59     private CharacterStateMatrix<BinaryStates>   _internal_states_matrix;
60     private CharacterStateMatrix<GainLossStates> _gain_loss_matrix;
61
62     private DolloParsimony() {
63         init();
64     }
65
66     public void execute( final Phylogeny p, final CharacterStateMatrix<BinaryStates> external_node_states_matrix ) {
67         if ( !p.isRooted() ) {
68             throw new IllegalArgumentException( "attempt to execute Dollo parsimony on unroored phylogeny" );
69         }
70         if ( external_node_states_matrix.isEmpty() ) {
71             throw new IllegalArgumentException( "character matrix is empty" );
72         }
73         if ( external_node_states_matrix.getNumberOfIdentifiers() != p.getNumberOfExternalNodes() ) {
74             throw new IllegalArgumentException( "number of external nodes in phylogeny ["
75                     + p.getNumberOfExternalNodes() + "] and number of indentifiers ["
76                     + external_node_states_matrix.getNumberOfIdentifiers() + "] in matrix are not equal" );
77         }
78         reset();
79         if ( isReturnInternalStates() ) {
80             initializeInternalStates( p, external_node_states_matrix );
81         }
82         if ( isReturnGainLossMatrix() ) {
83             initializeGainLossMatrix( p, external_node_states_matrix );
84         }
85         for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) {
86             executeForOneCharacter( p,
87                                     getStatesForCharacter( p, external_node_states_matrix, character_index ),
88                                     character_index );
89         }
90         if ( ( external_node_states_matrix.getNumberOfCharacters() * p.getNumberOfBranches() ) != ( getTotalGains()
91                 + getTotalLosses() + getTotalUnchanged() ) ) {
92             throw new AssertionError( "this should not have happened: something is deeply wrong with Dollo parsimony implementation" );
93         }
94     }
95
96     private void executeForOneCharacter( final Phylogeny p,
97                                          final Map<PhylogenyNode, BinaryStates> states,
98                                          final int character_state_column ) {
99         postOrderTraversal( p, states );
100         preOrderTraversal( p, states, character_state_column );
101     }
102
103     /* (non-Javadoc)
104      * @see org.forester.phylogenyinference.Parsimony#getCost()
105      */
106     public int getCost() {
107         return getTotalGains() + getTotalLosses();
108     }
109
110     /* (non-Javadoc)
111      * @see org.forester.phylogenyinference.Parsimony#getGainLossMatrix()
112      */
113     public CharacterStateMatrix<CharacterStateMatrix.GainLossStates> getGainLossMatrix() {
114         if ( !isReturnGainLossMatrix() ) {
115             throw new RuntimeException( "creation of gain-loss matrix has not been enabled" );
116         }
117         return _gain_loss_matrix;
118     }
119
120     /* (non-Javadoc)
121      * @see org.forester.phylogenyinference.Parsimony#getInternalStatesMatrix()
122      */
123     public CharacterStateMatrix<BinaryStates> getInternalStatesMatrix() {
124         if ( !isReturnInternalStates() ) {
125             throw new RuntimeException( "creation of internal state matrix has not been enabled" );
126         }
127         return _internal_states_matrix;
128     }
129
130     private Map<PhylogenyNode, BinaryStates> getStatesForCharacter( final Phylogeny p,
131                                                                     final CharacterStateMatrix<BinaryStates> matrix,
132                                                                     final int character_index ) {
133         final Map<PhylogenyNode, BinaryStates> states = new HashMap<PhylogenyNode, BinaryStates>( matrix
134                 .getNumberOfIdentifiers() );
135         for( int indentifier_index = 0; indentifier_index < matrix.getNumberOfIdentifiers(); ++indentifier_index ) {
136             final BinaryStates state = matrix.getState( indentifier_index, character_index );
137             if ( state == null ) {
138                 throw new IllegalArgumentException( "value at [" + indentifier_index + ", " + character_index
139                         + "] is null" );
140             }
141             states.put( p.getNode( matrix.getIdentifier( indentifier_index ) ), state );
142         }
143         return states;
144     }
145
146     /* (non-Javadoc)
147      * @see org.forester.phylogenyinference.Parsimony#getTotalGains()
148      */
149     public int getTotalGains() {
150         return _total_gains;
151     }
152
153     /* (non-Javadoc)
154      * @see org.forester.phylogenyinference.Parsimony#getTotalLosses()
155      */
156     public int getTotalLosses() {
157         return _total_losses;
158     }
159
160     /* (non-Javadoc)
161      * @see org.forester.phylogenyinference.Parsimony#getTotalUnchanged()
162      */
163     public int getTotalUnchanged() {
164         return _total_unchanged;
165     }
166
167     private void init() {
168         setReturnInternalStates( RETURN_INTERNAL_STATES_DEFAULT );
169         setReturnGainLossMatrix( RETURN_GAIN_LOSS_MATRIX_DEFAULT );
170         reset();
171     }
172
173     private void initializeGainLossMatrix( final Phylogeny p,
174                                            final CharacterStateMatrix<BinaryStates> external_node_states_matrix ) {
175         final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
176         for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) {
177             nodes.add( postorder.next() );
178         }
179         setGainLossMatrix( new BasicCharacterStateMatrix<CharacterStateMatrix.GainLossStates>( nodes.size(),
180                                                                                                external_node_states_matrix
181                                                                                                        .getNumberOfCharacters() ) );
182         int identifier_index = 0;
183         for( final PhylogenyNode node : nodes ) {
184             getGainLossMatrix().setIdentifier( identifier_index++,
185                                                ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node
186                                                        .getName() );
187         }
188         for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) {
189             getGainLossMatrix().setCharacter( character_index,
190                                               external_node_states_matrix.getCharacter( character_index ) );
191         }
192     }
193
194     private void initializeInternalStates( final Phylogeny p,
195                                            final CharacterStateMatrix<BinaryStates> external_node_states_matrix ) {
196         final List<PhylogenyNode> internal_nodes = new ArrayList<PhylogenyNode>();
197         for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) {
198             final PhylogenyNode node = postorder.next();
199             if ( node.isInternal() ) {
200                 internal_nodes.add( node );
201             }
202         }
203         setInternalStatesMatrix( new BasicCharacterStateMatrix<BinaryStates>( internal_nodes.size(),
204                                                                               external_node_states_matrix
205                                                                                       .getNumberOfCharacters() ) );
206         int identifier_index = 0;
207         for( final PhylogenyNode node : internal_nodes ) {
208             getInternalStatesMatrix().setIdentifier( identifier_index++,
209                                                      ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node
210                                                              .getName() );
211         }
212         for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) {
213             getInternalStatesMatrix().setCharacter( character_index,
214                                                     external_node_states_matrix.getCharacter( character_index ) );
215         }
216     }
217
218     private boolean isReturnGainLossMatrix() {
219         return _return_gain_loss;
220     }
221
222     private boolean isReturnInternalStates() {
223         return _return_internal_states;
224     }
225
226     private void postOrderTraversal( final Phylogeny p, final Map<PhylogenyNode, BinaryStates> states )
227             throws AssertionError {
228         for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) {
229             final PhylogenyNode node = postorder.next();
230             if ( !node.isExternal() ) {
231                 final int present_unknown = getNumberOfChildNodesWithPresentOrUnknownState( states, node );
232                 if ( present_unknown < 1 ) {
233                     states.put( node, ABSENT );
234                 }
235                 else if ( present_unknown == 1 ) {
236                     states.put( node, UNKNOWN );
237                 }
238                 else {
239                     states.put( node, PRESENT );
240                 }
241             }
242         }
243     }
244
245     private void preOrderTraversal( final Phylogeny p,
246                                     final Map<PhylogenyNode, BinaryStates> states,
247                                     final int character_state_column ) throws AssertionError {
248         boolean gain = false;
249         for( final PhylogenyNodeIterator preorder = p.iteratorPreorder(); preorder.hasNext(); ) {
250             final PhylogenyNode node = preorder.next();
251             BinaryStates parent_state = null;
252             if ( !node.isRoot() ) {
253                 parent_state = states.get( node.getParent() );
254             }
255             if ( !node.isExternal() ) {
256                 if ( states.get( node ) == UNKNOWN ) {
257                     if ( parent_state == PRESENT ) {
258                         states.put( node, PRESENT );
259                     }
260                     else {
261                         if ( isCharacterPresentOrUnknownInAtLeastTwoChildNodes( states, node ) ) {
262                             states.put( node, PRESENT );
263                         }
264                         else {
265                             states.put( node, ABSENT );
266                         }
267                     }
268                 }
269                 if ( isReturnInternalStates() ) {
270                     setInternalNodeState( states, character_state_column, node );
271                 }
272             }
273             final BinaryStates current_state = states.get( node );
274             if ( ( parent_state == PRESENT ) && ( current_state == ABSENT ) ) {
275                 ++_total_losses;
276                 if ( isReturnGainLossMatrix() ) {
277                     setGainLossState( character_state_column, node, LOSS );
278                 }
279             }
280             else if ( ( ( parent_state == ABSENT ) ) && ( current_state == PRESENT ) ) {
281                 if ( gain ) {
282                     throw new RuntimeException( "this should not have happened: dollo parsimony cannot have more than one gain" );
283                 }
284                 gain = true;
285                 ++_total_gains;
286                 if ( isReturnGainLossMatrix() ) {
287                     setGainLossState( character_state_column, node, GAIN );
288                 }
289             }
290             else {
291                 ++_total_unchanged;
292                 if ( isReturnGainLossMatrix() ) {
293                     if ( current_state == PRESENT ) {
294                         setGainLossState( character_state_column, node, UNCHANGED_PRESENT );
295                     }
296                     else if ( current_state == ABSENT ) {
297                         setGainLossState( character_state_column, node, UNCHANGED_ABSENT );
298                     }
299                 }
300             }
301         }
302     }
303
304     private void reset() {
305         setTotalLosses( 0 );
306         setTotalGains( 0 );
307         setTotalUnchanged( 0 );
308     }
309
310     private void setGainLossMatrix( final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> gain_loss_matrix ) {
311         _gain_loss_matrix = gain_loss_matrix;
312     }
313
314     private void setGainLossState( final int character_state_column,
315                                    final PhylogenyNode node,
316                                    final GainLossStates state ) {
317         getGainLossMatrix().setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(),
318                                       character_state_column,
319                                       state );
320     }
321
322     private void setInternalNodeState( final Map<PhylogenyNode, BinaryStates> states,
323                                        final int character_state_column,
324                                        final PhylogenyNode node ) {
325         getInternalStatesMatrix()
326                 .setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(),
327                            character_state_column,
328                            states.get( node ) );
329     }
330
331     private void setInternalStatesMatrix( final CharacterStateMatrix<BinaryStates> internal_states_matrix ) {
332         _internal_states_matrix = internal_states_matrix;
333     }
334
335     /* (non-Javadoc)
336      * @see org.forester.phylogenyinference.Parsimony#setReturnGainLossMatrix(boolean)
337      */
338     public void setReturnGainLossMatrix( final boolean return_gain_loss ) {
339         _return_gain_loss = return_gain_loss;
340     }
341
342     /* (non-Javadoc)
343      * @see org.forester.phylogenyinference.Parsimony#setReturnInternalStates(boolean)
344      */
345     public void setReturnInternalStates( final boolean return_internal_states ) {
346         _return_internal_states = return_internal_states;
347     }
348
349     private void setTotalGains( final int total_gains ) {
350         _total_gains = total_gains;
351     }
352
353     private void setTotalLosses( final int total_losses ) {
354         _total_losses = total_losses;
355     }
356
357     private void setTotalUnchanged( final int total_unchanged ) {
358         _total_unchanged = total_unchanged;
359     }
360
361     public static DolloParsimony createInstance() {
362         return new DolloParsimony();
363     }
364
365     private static int getNumberOfChildNodesWithPresentOrUnknownState( final Map<PhylogenyNode, BinaryStates> states,
366                                                                        final PhylogenyNode node ) {
367         int presents = 0;
368         for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
369             final PhylogenyNode node_child = node.getChildNode( i );
370             if ( !states.containsKey( node_child ) ) {
371                 throw new RuntimeException( "this should not have happened: node [" + node_child.getName()
372                         + "] not found in node state map" );
373             }
374             if ( ( states.get( node_child ) == BinaryStates.PRESENT )
375                     || ( states.get( node_child ) == BinaryStates.UNKNOWN ) ) {
376                 ++presents;
377             }
378         }
379         return presents;
380     }
381
382     private static boolean isCharacterPresentOrUnknownInAtLeastTwoChildNodes( final Map<PhylogenyNode, BinaryStates> states,
383                                                                               final PhylogenyNode node ) {
384         int count = 0;
385         for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
386             final PhylogenyNode node_child = node.getChildNode( i );
387             if ( ( states.get( node_child ) == PRESENT ) || ( states.get( node_child ) == UNKNOWN ) ) {
388                 ++count;
389                 if ( count > 1 ) {
390                     return true;
391                 }
392             }
393         }
394         return false;
395     }
396 }