X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fevoinference%2Fparsimony%2FFitchParsimony.java;h=7c7c9f2df17656c73aba805ad6f6002b3e47006e;hb=5bae4861f59350b3158d6ebd5034ea7698d81b98;hp=2f94bed8469c252165150ee3f9387b1a6a8f4799;hpb=e5997cf0185e43b0864767193c3cd89bde2fbacd;p=jalview.git diff --git a/forester/java/src/org/forester/evoinference/parsimony/FitchParsimony.java b/forester/java/src/org/forester/evoinference/parsimony/FitchParsimony.java index 2f94bed..7c7c9f2 100644 --- a/forester/java/src/org/forester/evoinference/parsimony/FitchParsimony.java +++ b/forester/java/src/org/forester/evoinference/parsimony/FitchParsimony.java @@ -26,6 +26,7 @@ package org.forester.evoinference.parsimony; +import java.text.DecimalFormat; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; @@ -47,46 +48,43 @@ import org.forester.util.ForesterUtil; public class FitchParsimony { - final static private BinaryStates PRESENT = BinaryStates.PRESENT; final static private BinaryStates ABSENT = BinaryStates.ABSENT; - final static private GainLossStates LOSS = GainLossStates.LOSS; final static private GainLossStates GAIN = GainLossStates.GAIN; - final static private GainLossStates UNCHANGED_PRESENT = GainLossStates.UNCHANGED_PRESENT; - final static private GainLossStates UNCHANGED_ABSENT = GainLossStates.UNCHANGED_ABSENT; - private static final boolean RETURN_INTERNAL_STATES_DEFAULT = false; - private static final boolean RETURN_GAIN_LOSS_MATRIX_DEFAULT = false; - private static final boolean RANDOMIZE_DEFAULT = false; + final static private GainLossStates LOSS = GainLossStates.LOSS; + final static private BinaryStates PRESENT = BinaryStates.PRESENT; private static final long RANDOM_NUMBER_SEED_DEFAULT = 21; + private static final boolean RANDOMIZE_DEFAULT = false; + private static final boolean RETURN_GAIN_LOSS_MATRIX_DEFAULT = false; + private static final boolean RETURN_INTERNAL_STATES_DEFAULT = false; + final static private GainLossStates UNCHANGED_ABSENT = GainLossStates.UNCHANGED_ABSENT; + final static private GainLossStates UNCHANGED_PRESENT = GainLossStates.UNCHANGED_PRESENT; private static final boolean USE_LAST_DEFAULT = false; - private boolean _return_internal_states = false; + private int _cost; + private CharacterStateMatrix _gain_loss_matrix; + private CharacterStateMatrix _internal_states_matrix_after_traceback; + private CharacterStateMatrix> _internal_states_matrix_prior_to_traceback; + private Random _random_generator; + private long _random_number_seed; + private boolean _randomize; private boolean _return_gain_loss = false; + private boolean _return_internal_states = false; private int _total_gains; private int _total_losses; private int _total_unchanged; - private CharacterStateMatrix> _internal_states_matrix_prior_to_traceback; - private CharacterStateMatrix _internal_states_matrix_after_traceback; - private CharacterStateMatrix _gain_loss_matrix; - private boolean _randomize; private boolean _use_last; - private int _cost; - private long _random_number_seed; - private Random _random_generator; + private boolean _verbose = false; public FitchParsimony() { init(); } - private int determineIndex( final SortedSet current_node_states, int i ) { - if ( isRandomize() ) { - i = getRandomGenerator().nextInt( current_node_states.size() ); - } - else if ( isUseLast() ) { - i = current_node_states.size() - 1; - } - return i; + public void execute( final Phylogeny p, final CharacterStateMatrix external_node_states_matrix ) { + execute( p, external_node_states_matrix, false ); } - public void execute( final Phylogeny p, final CharacterStateMatrix external_node_states_matrix ) { + public void execute( final Phylogeny p, + final CharacterStateMatrix external_node_states_matrix, + final boolean verbose ) { if ( !p.isRooted() ) { throw new IllegalArgumentException( "attempt to execute Fitch parsimony on unroored phylogeny" ); } @@ -98,6 +96,7 @@ public class FitchParsimony { + p.getNumberOfExternalNodes() + "] and number of indentifiers [" + external_node_states_matrix.getNumberOfIdentifiers() + "] in matrix are not equal" ); } + setVerbose( verbose ); reset(); if ( isReturnInternalStates() ) { initializeInternalStates( p, external_node_states_matrix ); @@ -105,12 +104,22 @@ public class FitchParsimony { if ( isReturnGainLossMatrix() ) { initializeGainLossMatrix( p, external_node_states_matrix ); } + final DecimalFormat pf = new java.text.DecimalFormat( "000000" ); + if ( isVerbose() ) { + System.out.println( "Number of characters: " + external_node_states_matrix.getNumberOfCharacters() ); + } for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) { + if ( isVerbose() ) { + ForesterUtil.updateProgress( character_index, pf ); + } executeForOneCharacter( p, getStatesForCharacter( p, external_node_states_matrix, character_index ), getStatesForCharacterForTraceback( p, external_node_states_matrix, character_index ), character_index ); } + if ( isVerbose() ) { + System.out.println(); + } if ( external_node_states_matrix.getState( 0, 0 ) instanceof BinaryStates ) { if ( ( external_node_states_matrix.getNumberOfCharacters() * p.getNumberOfBranches() ) != ( getTotalGains() + getTotalLosses() + getTotalUnchanged() ) ) { @@ -119,14 +128,6 @@ public class FitchParsimony { } } - private void executeForOneCharacter( final Phylogeny p, - final Map> states, - final Map traceback_states, - final int character_state_column ) { - postOrderTraversal( p, states ); - preOrderTraversal( p, states, traceback_states, character_state_column ); - } - public int getCost() { return _cost; } @@ -147,7 +148,7 @@ public class FitchParsimony { /** * Returns a view of the internal states prior to trace-back. - * + * * @return */ public CharacterStateMatrix> getInternalStatesMatrixPriorToTraceback() { @@ -157,6 +158,86 @@ public class FitchParsimony { return _internal_states_matrix_prior_to_traceback; } + public int getTotalGains() { + return _total_gains; + } + + public int getTotalLosses() { + return _total_losses; + } + + public int getTotalUnchanged() { + return _total_unchanged; + } + + public boolean isVerbose() { + return _verbose; + } + + public void setRandomize( final boolean randomize ) { + if ( randomize && isUseLast() ) { + throw new IllegalArgumentException( "attempt to allways use last state (ordered) if more than one choices and randomization at the same time" ); + } + _randomize = randomize; + } + + public void setRandomNumberSeed( final long random_number_seed ) { + if ( !isRandomize() ) { + throw new IllegalArgumentException( "attempt to set random number generator seed without randomization enabled" ); + } + _random_number_seed = random_number_seed; + } + + public void setReturnGainLossMatrix( final boolean return_gain_loss ) { + _return_gain_loss = return_gain_loss; + } + + public void setReturnInternalStates( final boolean return_internal_states ) { + _return_internal_states = return_internal_states; + } + + /** + * This sets whether to use the first or last state in the sorted + * states at the undecided internal nodes. + * For randomized choices set randomize to true (and this to false). + * + * Note. It might be advisable to set this to false + * for BinaryStates if absence at the root is preferred + * (given the enum BinaryStates sorts in the following order: + * ABSENT, UNKNOWN, PRESENT). + * + * + * @param use_last + */ + public void setUseLast( final boolean use_last ) { + if ( use_last && isRandomize() ) { + throw new IllegalArgumentException( "attempt to allways use last state (ordered) if more than one choices and randomization at the same time" ); + } + _use_last = use_last; + } + + public void setVerbose( final boolean verbose ) { + _verbose = verbose; + } + + private int determineIndex( final SortedSet current_node_states, int i ) { + if ( isRandomize() ) { + i = getRandomGenerator().nextInt( current_node_states.size() ); + } + else if ( isUseLast() ) { + i = current_node_states.size() - 1; + } + return i; + } + + private void executeForOneCharacter( final Phylogeny p, + final Map> states, + final Map traceback_states, + final int character_state_column ) { + postOrderTraversal( p, states ); + preOrderTraversal( p, states, traceback_states, character_state_column ); + } + private SortedSet getIntersectionOfStatesOfChildNodes( final Map> states, final PhylogenyNode node ) throws AssertionError { final SortedSet states_in_child_nodes = new TreeSet(); @@ -164,7 +245,7 @@ public class FitchParsimony { final PhylogenyNode node_child = node.getChildNode( i ); if ( !states.containsKey( node_child ) ) { throw new AssertionError( "this should not have happened: node [" + node_child.getName() - + "] not found in node state map" ); + + "] not found in node state map" ); } if ( i == 0 ) { states_in_child_nodes.addAll( states.get( node_child ) ); @@ -201,7 +282,7 @@ public class FitchParsimony { final STATE_TYPE state = matrix.getState( indentifier_index, character_index ); if ( state == null ) { throw new IllegalArgumentException( "value at [" + indentifier_index + ", " + character_index - + "] is null" ); + + "] is null" ); } final SortedSet l = new TreeSet(); l.add( state ); @@ -218,25 +299,13 @@ public class FitchParsimony { final STATE_TYPE state = matrix.getState( indentifier_index, character_index ); if ( state == null ) { throw new IllegalArgumentException( "value at [" + indentifier_index + ", " + character_index - + "] is null" ); + + "] is null" ); } states.put( p.getNode( matrix.getIdentifier( indentifier_index ) ), state ); } return states; } - public int getTotalGains() { - return _total_gains; - } - - public int getTotalLosses() { - return _total_losses; - } - - public int getTotalUnchanged() { - return _total_unchanged; - } - private SortedSet getUnionOfStatesOfChildNodes( final Map> states, final PhylogenyNode node ) throws AssertionError { final SortedSet states_in_child_nodes = new TreeSet(); @@ -244,7 +313,7 @@ public class FitchParsimony { final PhylogenyNode node_child = node.getChildNode( i ); if ( !states.containsKey( node_child ) ) { throw new AssertionError( "this should not have happened: node [" + node_child.getName() - + "] not found in node state map" ); + + "] not found in node state map" ); } states_in_child_nodes.addAll( states.get( node_child ) ); } @@ -271,8 +340,8 @@ public class FitchParsimony { nodes.add( postorder.next() ); } setGainLossMatrix( new BasicCharacterStateMatrix( nodes.size(), - external_node_states_matrix - .getNumberOfCharacters() ) ); + external_node_states_matrix + .getNumberOfCharacters() ) ); int identifier_index = 0; for( final PhylogenyNode node : nodes ) { getGainLossMatrix().setIdentifier( identifier_index++, @@ -295,11 +364,11 @@ public class FitchParsimony { } } setInternalStatesMatrixPriorToTraceback( new BasicCharacterStateMatrix>( internal_nodes.size(), - external_node_states_matrix - .getNumberOfCharacters() ) ); + external_node_states_matrix + .getNumberOfCharacters() ) ); setInternalStatesMatrixTraceback( new BasicCharacterStateMatrix( internal_nodes.size(), - external_node_states_matrix - .getNumberOfCharacters() ) ); + external_node_states_matrix + .getNumberOfCharacters() ) ); int identifier_index = 0; for( final PhylogenyNode node : internal_nodes ) { getInternalStatesMatrix().setIdentifier( identifier_index, @@ -315,7 +384,7 @@ public class FitchParsimony { external_node_states_matrix.getCharacter( character_index ) ); getInternalStatesMatrixPriorToTraceback().setCharacter( character_index, external_node_states_matrix - .getCharacter( character_index ) ); + .getCharacter( character_index ) ); } } @@ -335,8 +404,7 @@ public class FitchParsimony { return _use_last; } - private void postOrderTraversal( final Phylogeny p, final Map> states ) - throws AssertionError { + private void postOrderTraversal( final Phylogeny p, final Map> states ) { for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) { final PhylogenyNode node = postorder.next(); if ( !node.isExternal() ) { @@ -352,7 +420,7 @@ public class FitchParsimony { private void preOrderTraversal( final Phylogeny p, final Map> states, final Map traceback_states, - final int character_state_column ) throws AssertionError { + final int character_state_column ) { for( final PhylogenyNodeIterator preorder = p.iteratorPreorder(); preorder.hasNext(); ) { final PhylogenyNode current_node = preorder.next(); final SortedSet current_node_states = states.get( current_node ); @@ -414,7 +482,7 @@ public class FitchParsimony { else if ( current_binary_state == ABSENT ) {//new setGainLossState( character_state_column, current_node, UNCHANGED_ABSENT );//new }//new - // setGainLossState( character_state_column, current_node, UNKNOWN_GAIN_LOSS ); + // setGainLossState( character_state_column, current_node, UNKNOWN_GAIN_LOSS ); } } } @@ -439,26 +507,26 @@ public class FitchParsimony { final PhylogenyNode node, final GainLossStates state ) { getGainLossMatrix().setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(), - character_state_column, - state ); + character_state_column, + state ); } private void setInternalNodeState( final Map states, final int character_state_column, final PhylogenyNode node ) { getInternalStatesMatrix() - .setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(), - character_state_column, - states.get( node ) ); + .setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(), + character_state_column, + states.get( node ) ); } private void setInternalNodeStatePriorToTraceback( final Map> states, final int character_state_column, final PhylogenyNode node ) { getInternalStatesMatrixPriorToTraceback() - .setState( ForesterUtil.isEmpty( node.getName() ) ? String.valueOf( node.getId() ) : node.getName(), - character_state_column, - toListSorted( states.get( node ) ) ); + .setState( ForesterUtil.isEmpty( node.getName() ) ? String.valueOf( node.getId() ) : node.getName(), + character_state_column, + toListSorted( states.get( node ) ) ); } private void setInternalStatesMatrixPriorToTraceback( final CharacterStateMatrix> internal_states_matrix_prior_to_traceback ) { @@ -473,28 +541,6 @@ public class FitchParsimony { _random_generator = random_generator; } - public void setRandomize( final boolean randomize ) { - if ( randomize && isUseLast() ) { - throw new IllegalArgumentException( "attempt to allways use last state (ordered) if more than one choices and randomization at the same time" ); - } - _randomize = randomize; - } - - public void setRandomNumberSeed( final long random_number_seed ) { - if ( !isRandomize() ) { - throw new IllegalArgumentException( "attempt to set random number generator seed without randomization enabled" ); - } - _random_number_seed = random_number_seed; - } - - public void setReturnGainLossMatrix( final boolean return_gain_loss ) { - _return_gain_loss = return_gain_loss; - } - - public void setReturnInternalStates( final boolean return_internal_states ) { - _return_internal_states = return_internal_states; - } - private void setTotalGains( final int total_gains ) { _total_gains = total_gains; } @@ -507,26 +553,6 @@ public class FitchParsimony { _total_unchanged = total_unchanged; } - /** - * This sets whether to use the first or last state in the sorted - * states at the undecided internal nodes. - * For randomized choices set randomize to true (and this to false). - * - * Note. It might be advisable to set this to false - * for BinaryStates if absence at the root is preferred - * (given the enum BinaryStates sorts in the following order: - * ABSENT, UNKNOWN, PRESENT). - * - * - * @param use_last - */ - public void setUseLast( final boolean use_last ) { - if ( use_last && isRandomize() ) { - throw new IllegalArgumentException( "attempt to allways use last state (ordered) if more than one choices and randomization at the same time" ); - } - _use_last = use_last; - } - private List toListSorted( final SortedSet states ) { final List l = new ArrayList( states.size() ); for( final STATE_TYPE state : states ) {