(no commit message)
[jalview.git] / forester / java / src / org / forester / evoinference / parsimony / DolloParsimony.java
1 // $Id:
2 //
3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
5 //
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
8 // All rights reserved
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 //
24 // Contact: phylosoft @ gmail . com
25 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26
27 package org.forester.evoinference.parsimony;
28
29 import java.util.ArrayList;
30 import java.util.HashMap;
31 import java.util.List;
32 import java.util.Map;
33
34 import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
35 import org.forester.evoinference.matrix.character.CharacterStateMatrix;
36 import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
37 import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates;
38 import org.forester.phylogeny.Phylogeny;
39 import org.forester.phylogeny.PhylogenyNode;
40 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
41 import org.forester.util.ForesterUtil;
42
43 public class DolloParsimony {
44
45     final static private BinaryStates            PRESENT                         = BinaryStates.PRESENT;
46     final static private BinaryStates            ABSENT                          = BinaryStates.ABSENT;
47     final static private BinaryStates            UNKNOWN                         = BinaryStates.UNKNOWN;
48     final static private GainLossStates          LOSS                            = GainLossStates.LOSS;
49     final static private GainLossStates          GAIN                            = GainLossStates.GAIN;
50     final static private GainLossStates          UNCHANGED_PRESENT               = GainLossStates.UNCHANGED_PRESENT;
51     final static private GainLossStates          UNCHANGED_ABSENT                = GainLossStates.UNCHANGED_ABSENT;
52     private static final boolean                 RETURN_INTERNAL_STATES_DEFAULT  = false;
53     private static final boolean                 RETURN_GAIN_LOSS_MATRIX_DEFAULT = false;
54     private boolean                              _return_internal_states         = false;
55     private boolean                              _return_gain_loss               = false;
56     private int                                  _total_gains;
57     private int                                  _total_losses;
58     private int                                  _total_unchanged;
59     private CharacterStateMatrix<BinaryStates>   _internal_states_matrix;
60     private CharacterStateMatrix<GainLossStates> _gain_loss_matrix;
61
62     private DolloParsimony() {
63         init();
64     }
65
66     public void execute( final Phylogeny p, final CharacterStateMatrix<BinaryStates> external_node_states_matrix ) {
67         if ( !p.isRooted() ) {
68             throw new IllegalArgumentException( "attempt to execute Dollo parsimony on unroored phylogeny" );
69         }
70         if ( external_node_states_matrix.isEmpty() ) {
71             throw new IllegalArgumentException( "character matrix is empty" );
72         }
73         if ( external_node_states_matrix.getNumberOfIdentifiers() != p.getNumberOfExternalNodes() ) {
74             throw new IllegalArgumentException( "number of external nodes in phylogeny ["
75                     + p.getNumberOfExternalNodes() + "] and number of indentifiers ["
76                     + external_node_states_matrix.getNumberOfIdentifiers() + "] in matrix are not equal" );
77         }
78         reset();
79         if ( isReturnInternalStates() ) {
80             initializeInternalStates( p, external_node_states_matrix );
81         }
82         if ( isReturnGainLossMatrix() ) {
83             initializeGainLossMatrix( p, external_node_states_matrix );
84         }
85         for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) {
86             executeForOneCharacter( p,
87                                     getStatesForCharacter( p, external_node_states_matrix, character_index ),
88                                     character_index );
89         }
90         if ( ( external_node_states_matrix.getNumberOfCharacters() * p.getNumberOfBranches() ) != ( getTotalGains()
91                 + getTotalLosses() + getTotalUnchanged() ) ) {
92             throw new AssertionError( "this should not have happened: something is deeply wrong with Dollo parsimony implementation" );
93         }
94     }
95
96     private void executeForOneCharacter( final Phylogeny p,
97                                          final Map<PhylogenyNode, BinaryStates> states,
98                                          final int character_state_column ) {
99         postOrderTraversal( p, states );
100         preOrderTraversal( p, states, character_state_column );
101     }
102
103     /* (non-Javadoc)
104      * @see org.forester.phylogenyinference.Parsimony#getCost()
105      */
106     public int getCost() {
107         return getTotalGains() + getTotalLosses();
108     }
109
110     /* (non-Javadoc)
111      * @see org.forester.phylogenyinference.Parsimony#getGainLossMatrix()
112      */
113     public CharacterStateMatrix<CharacterStateMatrix.GainLossStates> getGainLossMatrix() {
114         if ( !isReturnGainLossMatrix() ) {
115             throw new RuntimeException( "creation of gain-loss matrix has not been enabled" );
116         }
117         return _gain_loss_matrix;
118     }
119
120     /* (non-Javadoc)
121      * @see org.forester.phylogenyinference.Parsimony#getInternalStatesMatrix()
122      */
123     public CharacterStateMatrix<BinaryStates> getInternalStatesMatrix() {
124         if ( !isReturnInternalStates() ) {
125             throw new RuntimeException( "creation of internal state matrix has not been enabled" );
126         }
127         return _internal_states_matrix;
128     }
129
130     private Map<PhylogenyNode, BinaryStates> getStatesForCharacter( final Phylogeny p,
131                                                                     final CharacterStateMatrix<BinaryStates> matrix,
132                                                                     final int character_index ) {
133         final Map<PhylogenyNode, BinaryStates> states = new HashMap<PhylogenyNode, BinaryStates>( matrix.getNumberOfIdentifiers() );
134         for( int indentifier_index = 0; indentifier_index < matrix.getNumberOfIdentifiers(); ++indentifier_index ) {
135             final BinaryStates state = matrix.getState( indentifier_index, character_index );
136             if ( state == null ) {
137                 throw new IllegalArgumentException( "value at [" + indentifier_index + ", " + character_index
138                                                     + "] is null" );
139             }
140             states.put( p.getNode( matrix.getIdentifier( indentifier_index ) ), state );
141         }
142         return states;
143     }
144
145     /* (non-Javadoc)
146      * @see org.forester.phylogenyinference.Parsimony#getTotalGains()
147      */
148     public int getTotalGains() {
149         return _total_gains;
150     }
151
152     /* (non-Javadoc)
153      * @see org.forester.phylogenyinference.Parsimony#getTotalLosses()
154      */
155     public int getTotalLosses() {
156         return _total_losses;
157     }
158
159     /* (non-Javadoc)
160      * @see org.forester.phylogenyinference.Parsimony#getTotalUnchanged()
161      */
162     public int getTotalUnchanged() {
163         return _total_unchanged;
164     }
165
166     private void init() {
167         setReturnInternalStates( RETURN_INTERNAL_STATES_DEFAULT );
168         setReturnGainLossMatrix( RETURN_GAIN_LOSS_MATRIX_DEFAULT );
169         reset();
170     }
171
172     private void initializeGainLossMatrix( final Phylogeny p,
173                                            final CharacterStateMatrix<BinaryStates> external_node_states_matrix ) {
174         final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
175         for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) {
176             nodes.add( postorder.next() );
177         }
178         setGainLossMatrix( new BasicCharacterStateMatrix<CharacterStateMatrix.GainLossStates>( nodes.size(),
179                 external_node_states_matrix
180                 .getNumberOfCharacters() ) );
181         int identifier_index = 0;
182         for( final PhylogenyNode node : nodes ) {
183             getGainLossMatrix().setIdentifier( identifier_index++,
184                                                ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node
185                                                        .getName() );
186         }
187         for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) {
188             getGainLossMatrix().setCharacter( character_index,
189                                               external_node_states_matrix.getCharacter( character_index ) );
190         }
191     }
192
193     private void initializeInternalStates( final Phylogeny p,
194                                            final CharacterStateMatrix<BinaryStates> external_node_states_matrix ) {
195         final List<PhylogenyNode> internal_nodes = new ArrayList<PhylogenyNode>();
196         for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) {
197             final PhylogenyNode node = postorder.next();
198             if ( node.isInternal() ) {
199                 internal_nodes.add( node );
200             }
201         }
202         setInternalStatesMatrix( new BasicCharacterStateMatrix<BinaryStates>( internal_nodes.size(),
203                 external_node_states_matrix
204                 .getNumberOfCharacters() ) );
205         int identifier_index = 0;
206         for( final PhylogenyNode node : internal_nodes ) {
207             getInternalStatesMatrix().setIdentifier( identifier_index++,
208                                                      ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node
209                                                              .getName() );
210         }
211         for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) {
212             getInternalStatesMatrix().setCharacter( character_index,
213                                                     external_node_states_matrix.getCharacter( character_index ) );
214         }
215     }
216
217     private boolean isReturnGainLossMatrix() {
218         return _return_gain_loss;
219     }
220
221     private boolean isReturnInternalStates() {
222         return _return_internal_states;
223     }
224
225     private void postOrderTraversal( final Phylogeny p, final Map<PhylogenyNode, BinaryStates> states )
226             throws AssertionError {
227         for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) {
228             final PhylogenyNode node = postorder.next();
229             if ( !node.isExternal() ) {
230                 final int present_unknown = getNumberOfChildNodesWithPresentOrUnknownState( states, node );
231                 if ( present_unknown < 1 ) {
232                     states.put( node, ABSENT );
233                 }
234                 else if ( present_unknown == 1 ) {
235                     states.put( node, UNKNOWN );
236                 }
237                 else {
238                     states.put( node, PRESENT );
239                 }
240             }
241         }
242     }
243
244     private void preOrderTraversal( final Phylogeny p,
245                                     final Map<PhylogenyNode, BinaryStates> states,
246                                     final int character_state_column ) throws AssertionError {
247         boolean gain = false;
248         for( final PhylogenyNodeIterator preorder = p.iteratorPreorder(); preorder.hasNext(); ) {
249             final PhylogenyNode node = preorder.next();
250             BinaryStates parent_state = null;
251             if ( !node.isRoot() ) {
252                 parent_state = states.get( node.getParent() );
253             }
254             if ( !node.isExternal() ) {
255                 if ( states.get( node ) == UNKNOWN ) {
256                     if ( parent_state == PRESENT ) {
257                         states.put( node, PRESENT );
258                     }
259                     else {
260                         if ( isCharacterPresentOrUnknownInAtLeastTwoChildNodes( states, node ) ) {
261                             states.put( node, PRESENT );
262                         }
263                         else {
264                             states.put( node, ABSENT );
265                         }
266                     }
267                 }
268                 if ( isReturnInternalStates() ) {
269                     setInternalNodeState( states, character_state_column, node );
270                 }
271             }
272             final BinaryStates current_state = states.get( node );
273             if ( ( parent_state == PRESENT ) && ( current_state == ABSENT ) ) {
274                 ++_total_losses;
275                 if ( isReturnGainLossMatrix() ) {
276                     setGainLossState( character_state_column, node, LOSS );
277                 }
278             }
279             else if ( ( ( parent_state == ABSENT ) ) && ( current_state == PRESENT ) ) {
280                 if ( gain ) {
281                     throw new RuntimeException( "this should not have happened: dollo parsimony cannot have more than one gain" );
282                 }
283                 gain = true;
284                 ++_total_gains;
285                 if ( isReturnGainLossMatrix() ) {
286                     setGainLossState( character_state_column, node, GAIN );
287                 }
288             }
289             else {
290                 ++_total_unchanged;
291                 if ( isReturnGainLossMatrix() ) {
292                     if ( current_state == PRESENT ) {
293                         setGainLossState( character_state_column, node, UNCHANGED_PRESENT );
294                     }
295                     else if ( current_state == ABSENT ) {
296                         setGainLossState( character_state_column, node, UNCHANGED_ABSENT );
297                     }
298                 }
299             }
300         }
301     }
302
303     private void reset() {
304         setTotalLosses( 0 );
305         setTotalGains( 0 );
306         setTotalUnchanged( 0 );
307     }
308
309     private void setGainLossMatrix( final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> gain_loss_matrix ) {
310         _gain_loss_matrix = gain_loss_matrix;
311     }
312
313     private void setGainLossState( final int character_state_column,
314                                    final PhylogenyNode node,
315                                    final GainLossStates state ) {
316         getGainLossMatrix().setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(),
317                 character_state_column,
318                 state );
319     }
320
321     private void setInternalNodeState( final Map<PhylogenyNode, BinaryStates> states,
322                                        final int character_state_column,
323                                        final PhylogenyNode node ) {
324         getInternalStatesMatrix()
325         .setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(),
326                 character_state_column,
327                 states.get( node ) );
328     }
329
330     private void setInternalStatesMatrix( final CharacterStateMatrix<BinaryStates> internal_states_matrix ) {
331         _internal_states_matrix = internal_states_matrix;
332     }
333
334     /* (non-Javadoc)
335      * @see org.forester.phylogenyinference.Parsimony#setReturnGainLossMatrix(boolean)
336      */
337     public void setReturnGainLossMatrix( final boolean return_gain_loss ) {
338         _return_gain_loss = return_gain_loss;
339     }
340
341     /* (non-Javadoc)
342      * @see org.forester.phylogenyinference.Parsimony#setReturnInternalStates(boolean)
343      */
344     public void setReturnInternalStates( final boolean return_internal_states ) {
345         _return_internal_states = return_internal_states;
346     }
347
348     private void setTotalGains( final int total_gains ) {
349         _total_gains = total_gains;
350     }
351
352     private void setTotalLosses( final int total_losses ) {
353         _total_losses = total_losses;
354     }
355
356     private void setTotalUnchanged( final int total_unchanged ) {
357         _total_unchanged = total_unchanged;
358     }
359
360     public static DolloParsimony createInstance() {
361         return new DolloParsimony();
362     }
363
364     private static int getNumberOfChildNodesWithPresentOrUnknownState( final Map<PhylogenyNode, BinaryStates> states,
365                                                                        final PhylogenyNode node ) {
366         int presents = 0;
367         for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
368             final PhylogenyNode node_child = node.getChildNode( i );
369             if ( !states.containsKey( node_child ) ) {
370                 throw new RuntimeException( "this should not have happened: node [" + node_child.getName()
371                                             + "] not found in node state map" );
372             }
373             if ( ( states.get( node_child ) == BinaryStates.PRESENT )
374                     || ( states.get( node_child ) == BinaryStates.UNKNOWN ) ) {
375                 ++presents;
376             }
377         }
378         return presents;
379     }
380
381     private static boolean isCharacterPresentOrUnknownInAtLeastTwoChildNodes( final Map<PhylogenyNode, BinaryStates> states,
382                                                                               final PhylogenyNode node ) {
383         int count = 0;
384         for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
385             final PhylogenyNode node_child = node.getChildNode( i );
386             if ( ( states.get( node_child ) == PRESENT ) || ( states.get( node_child ) == UNKNOWN ) ) {
387                 ++count;
388                 if ( count > 1 ) {
389                     return true;
390                 }
391             }
392         }
393         return false;
394     }
395 }