3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 // Contact: phylosoft @ gmail . com
25 // WWW: www.phylosoft.org/forester
27 package org.forester.surfacing;
29 import java.util.HashMap;
30 import java.util.HashSet;
31 import java.util.Iterator;
32 import java.util.List;
34 import java.util.Map.Entry;
36 import java.util.SortedSet;
37 import java.util.TreeSet;
39 import org.forester.application.surfacing;
40 import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
41 import org.forester.evoinference.matrix.character.CharacterStateMatrix;
42 import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
43 import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates;
44 import org.forester.evoinference.parsimony.DolloParsimony;
45 import org.forester.evoinference.parsimony.FitchParsimony;
46 import org.forester.phylogeny.Phylogeny;
47 import org.forester.phylogeny.PhylogenyNode;
48 import org.forester.phylogeny.data.BinaryCharacters;
49 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
50 import org.forester.surfacing.BinaryDomainCombination.DomainCombinationType;
51 import org.forester.util.ForesterUtil;
53 public final class DomainParsimonyCalculator {
55 private static final String TYPE_FORBINARY_CHARACTERS = "parsimony inferred";
56 private CharacterStateMatrix<GainLossStates> _gain_loss_matrix;
57 private CharacterStateMatrix<BinaryStates> _binary_internal_states_matrix;
58 private final List<GenomeWideCombinableDomains> _gwcd_list;
59 private final Phylogeny _phylogeny;
60 private int _total_losses;
61 private int _total_gains;
62 private int _total_unchanged;
64 private Map<DomainId, Set<String>> _domain_id_to_secondary_features_map;
65 private SortedSet<DomainId> _positive_filter;
67 private DomainParsimonyCalculator( final Phylogeny phylogeny ) {
69 _phylogeny = phylogeny;
73 private DomainParsimonyCalculator( final Phylogeny phylogeny, final List<GenomeWideCombinableDomains> gwcd_list ) {
75 _phylogeny = phylogeny;
76 _gwcd_list = gwcd_list;
79 private DomainParsimonyCalculator( final Phylogeny phylogeny,
80 final List<GenomeWideCombinableDomains> gwcd_list,
81 final Map<DomainId, Set<String>> domain_id_to_secondary_features_map ) {
83 _phylogeny = phylogeny;
84 _gwcd_list = gwcd_list;
85 setDomainIdToSecondaryFeaturesMap( domain_id_to_secondary_features_map );
88 int calculateNumberOfBinaryDomainCombination() {
89 if ( getGenomeWideCombinableDomainsList().isEmpty() ) {
90 throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
92 final Set<BinaryDomainCombination> all_binary_combinations = new HashSet<BinaryDomainCombination>();
93 for( final GenomeWideCombinableDomains gwcd : getGenomeWideCombinableDomainsList() ) {
94 for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
95 all_binary_combinations.add( bc );
98 return all_binary_combinations.size();
101 CharacterStateMatrix<BinaryStates> createMatrixOfBinaryDomainCombinationPresenceOrAbsence() {
102 return createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
105 CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence() {
106 return createMatrixOfDomainPresenceOrAbsence( getGenomeWideCombinableDomainsList(), getPositiveFilter() );
109 CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeaturePresenceOrAbsence( final Map<Species, MappingResults> mapping_results_map ) {
110 return createMatrixOfSecondaryFeaturePresenceOrAbsence( getGenomeWideCombinableDomainsList(),
111 getDomainIdToSecondaryFeaturesMap(),
112 mapping_results_map );
115 Phylogeny decoratePhylogenyWithDomains( final Phylogeny phylogeny ) {
116 for( final PhylogenyNodeIterator it = phylogeny.iteratorPostorder(); it.hasNext(); ) {
117 final PhylogenyNode node = it.next();
118 final String node_identifier = node.getName();
119 final BinaryCharacters bc = new BinaryCharacters( getUnitsOnNode( node_identifier ),
120 getUnitsGainedOnNode( node_identifier ),
121 getUnitsLostOnNode( node_identifier ),
122 TYPE_FORBINARY_CHARACTERS,
123 getSumOfPresentOnNode( node_identifier ),
124 getSumOfGainsOnNode( node_identifier ),
125 getSumOfLossesOnNode( node_identifier ) );
126 node.getNodeData().setBinaryCharacters( bc );
131 private void executeDolloParsimony( final boolean on_domain_presence ) {
133 final DolloParsimony dollo = DolloParsimony.createInstance();
134 dollo.setReturnGainLossMatrix( true );
135 dollo.setReturnInternalStates( true );
136 CharacterStateMatrix<BinaryStates> states = null;
137 if ( on_domain_presence ) {
138 states = createMatrixOfDomainPresenceOrAbsence();
141 states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence();
143 dollo.execute( getPhylogeny(), states );
144 setGainLossMatrix( dollo.getGainLossMatrix() );
145 setBinaryInternalStatesMatrix( dollo.getInternalStatesMatrix() );
146 setCost( dollo.getCost() );
147 setTotalGains( dollo.getTotalGains() );
148 setTotalLosses( dollo.getTotalLosses() );
149 setTotalUnchanged( dollo.getTotalUnchanged() );
152 public void executeDolloParsimonyOnBinaryDomainCombintionPresence() {
153 executeDolloParsimony( false );
156 public void executeDolloParsimonyOnDomainPresence() {
157 executeDolloParsimony( true );
160 public void executeDolloParsimonyOnDomainPresence( final SortedSet<DomainId> positive_filter ) {
161 setPositiveFilter( positive_filter );
162 executeDolloParsimony( true );
163 setPositiveFilter( null );
166 public void executeDolloParsimonyOnSecondaryFeatures( final Map<Species, MappingResults> mapping_results_map ) {
167 if ( getDomainIdToSecondaryFeaturesMap() == null ) {
168 throw new RuntimeException( "Domain id to secondary features map has apparently not been set" );
171 final DolloParsimony dollo = DolloParsimony.createInstance();
172 dollo.setReturnGainLossMatrix( true );
173 dollo.setReturnInternalStates( true );
174 final CharacterStateMatrix<BinaryStates> states = createMatrixOfSecondaryFeaturePresenceOrAbsence( mapping_results_map );
175 dollo.execute( getPhylogeny(), states );
176 setGainLossMatrix( dollo.getGainLossMatrix() );
177 setBinaryInternalStatesMatrix( dollo.getInternalStatesMatrix() );
178 setCost( dollo.getCost() );
179 setTotalGains( dollo.getTotalGains() );
180 setTotalLosses( dollo.getTotalLosses() );
181 setTotalUnchanged( dollo.getTotalUnchanged() );
184 private void executeFitchParsimony( final boolean on_domain_presence,
185 final boolean use_last,
186 final boolean randomize,
187 final long random_number_seed ) {
190 System.out.println( " Fitch parsimony: use_last = true" );
192 final FitchParsimony<BinaryStates> fitch = new FitchParsimony<BinaryStates>();
193 fitch.setRandomize( randomize );
195 fitch.setRandomNumberSeed( random_number_seed );
197 fitch.setUseLast( use_last );
198 fitch.setReturnGainLossMatrix( true );
199 fitch.setReturnInternalStates( true );
200 CharacterStateMatrix<BinaryStates> states = null;
201 if ( on_domain_presence ) {
202 states = createMatrixOfDomainPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
205 states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
207 fitch.execute( getPhylogeny(), states );
208 setGainLossMatrix( fitch.getGainLossMatrix() );
209 setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() );
210 setCost( fitch.getCost() );
211 setTotalGains( fitch.getTotalGains() );
212 setTotalLosses( fitch.getTotalLosses() );
213 setTotalUnchanged( fitch.getTotalUnchanged() );
216 private void executeFitchParsimonyOnSecondaryFeatures( final boolean use_last,
217 final boolean randomize,
218 final long random_number_seed ) {
221 System.out.println( " Fitch parsimony: use_last = true" );
223 final FitchParsimony<BinaryStates> fitch = new FitchParsimony<BinaryStates>();
224 fitch.setRandomize( randomize );
226 fitch.setRandomNumberSeed( random_number_seed );
228 fitch.setUseLast( use_last );
229 fitch.setReturnGainLossMatrix( true );
230 fitch.setReturnInternalStates( true );
231 final Map<DomainId, Set<String>> map = getDomainIdToSecondaryFeaturesMap();
232 final Map<DomainId, String> newmap = new HashMap<DomainId, String>();
233 final Iterator<Entry<DomainId, Set<String>>> it = map.entrySet().iterator();
234 while ( it.hasNext() ) {
235 final Map.Entry<DomainId, Set<String>> pair = it.next();
236 if ( pair.getValue().size() != 1 ) {
237 throw new IllegalArgumentException( pair.getKey().getId() + " mapps to " + pair.getValue().size()
240 newmap.put( pair.getKey(), ( String ) pair.getValue().toArray()[ 0 ] );
242 final CharacterStateMatrix<BinaryStates> states = createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList(),
244 fitch.execute( getPhylogeny(), states );
245 setGainLossMatrix( fitch.getGainLossMatrix() );
246 setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() );
247 setCost( fitch.getCost() );
248 setTotalGains( fitch.getTotalGains() );
249 setTotalLosses( fitch.getTotalLosses() );
250 setTotalUnchanged( fitch.getTotalUnchanged() );
253 public void executeFitchParsimonyOnBinaryDomainCombintion( final boolean use_last ) {
254 executeFitchParsimony( false, use_last, false, 0 );
257 public void executeFitchParsimonyOnBinaryDomainCombintionOnSecondaryFeatures( final boolean use_last ) {
258 executeFitchParsimonyOnSecondaryFeatures( use_last, false, 0 );
261 public void executeFitchParsimonyOnBinaryDomainCombintion( final long random_number_seed ) {
262 executeFitchParsimony( false, false, true, random_number_seed );
265 public void executeFitchParsimonyOnDomainPresence( final boolean use_last ) {
266 executeFitchParsimony( true, use_last, false, 0 );
269 public void executeFitchParsimonyOnDomainPresence( final long random_number_seed ) {
270 executeFitchParsimony( true, false, true, random_number_seed );
273 public void executeOnGivenBinaryStatesMatrix( final CharacterStateMatrix<BinaryStates> binary_states_matrix,
274 final String[] character_labels ) {
276 if ( binary_states_matrix.getNumberOfCharacters() != character_labels.length ) {
277 throw new IllegalArgumentException( "binary states matrix number of characters is not equal to the number of character labels provided" );
279 if ( binary_states_matrix.getNumberOfIdentifiers() != getPhylogeny().getNumberOfBranches() ) {
280 throw new IllegalArgumentException( "binary states matrix number of identifiers is not equal to the number of tree nodes provided" );
282 final CharacterStateMatrix<GainLossStates> gl_matrix = new BasicCharacterStateMatrix<GainLossStates>( binary_states_matrix
283 .getNumberOfIdentifiers(),
285 .getNumberOfCharacters() );
287 int total_losses = 0;
288 int total_unchanged = 0;
290 for( final PhylogenyNodeIterator it = getPhylogeny().iteratorPostorder(); it.hasNext(); ) {
291 gl_matrix.setIdentifier( i++, it.next().getName() );
293 for( int c = 0; c < character_labels.length; ++c ) {
294 gl_matrix.setCharacter( c, character_labels[ c ] );
295 final PhylogenyNodeIterator it = getPhylogeny().iteratorPostorder();
296 while ( it.hasNext() ) {
297 final PhylogenyNode node = it.next();
298 final String name = node.getName();
299 final BinaryStates bin_state = binary_states_matrix.getState( binary_states_matrix
300 .getIdentifierIndex( name ), c );
301 final PhylogenyNode parent_node = getPhylogeny().getNode( name ).getParent();
302 GainLossStates gl_state = null;
303 if ( node.isRoot() ) {
305 if ( bin_state == BinaryStates.ABSENT ) {
306 gl_state = GainLossStates.UNCHANGED_ABSENT;
309 gl_state = GainLossStates.UNCHANGED_PRESENT;
313 final BinaryStates parent_bin_state = binary_states_matrix.getState( binary_states_matrix
314 .getIdentifierIndex( parent_node.getName() ), c );
315 if ( bin_state == BinaryStates.ABSENT ) {
316 if ( parent_bin_state == BinaryStates.ABSENT ) {
318 gl_state = GainLossStates.UNCHANGED_ABSENT;
322 gl_state = GainLossStates.LOSS;
326 if ( parent_bin_state == BinaryStates.ABSENT ) {
328 gl_state = GainLossStates.GAIN;
332 gl_state = GainLossStates.UNCHANGED_PRESENT;
336 gl_matrix.setState( name, c, gl_state );
339 setTotalGains( total_gains );
340 setTotalLosses( total_losses );
341 setTotalUnchanged( total_unchanged );
342 setCost( total_gains + total_losses );
343 setGainLossMatrix( gl_matrix );
346 public int getCost() {
350 private Map<DomainId, Set<String>> getDomainIdToSecondaryFeaturesMap() {
351 return _domain_id_to_secondary_features_map;
354 public CharacterStateMatrix<Integer> getGainLossCountsMatrix() {
355 final CharacterStateMatrix<Integer> matrix = new BasicCharacterStateMatrix<Integer>( getGainLossMatrix()
356 .getNumberOfIdentifiers(), 3 );
357 for( int i = 0; i < getGainLossMatrix().getNumberOfIdentifiers(); ++i ) {
358 matrix.setIdentifier( i, getGainLossMatrix().getIdentifier( i ) );
360 matrix.setCharacter( 0, "GAINS" );
361 matrix.setCharacter( 1, "LOSSES" );
362 matrix.setCharacter( 2, "NET" );
363 for( int i = 0; i < getGainLossMatrix().getNumberOfIdentifiers(); ++i ) {
366 for( int c = 0; c < getGainLossMatrix().getNumberOfCharacters(); ++c ) {
367 final GainLossStates s = getGainLossMatrix().getState( i, c );
368 if ( s == GainLossStates.GAIN ) {
371 else if ( s == GainLossStates.LOSS ) {
375 matrix.setState( i, 0, gains );
376 matrix.setState( i, 1, losses );
377 matrix.setState( i, 2, gains - losses );
382 public CharacterStateMatrix<GainLossStates> getGainLossMatrix() {
383 return _gain_loss_matrix;
386 private List<GenomeWideCombinableDomains> getGenomeWideCombinableDomainsList() {
390 public CharacterStateMatrix<BinaryStates> getInternalStatesMatrix() {
391 return _binary_internal_states_matrix;
394 public int getNetGainsOnNode( final String node_identifier ) {
395 if ( getGainLossMatrix() == null ) {
396 throw new RuntimeException( "no gain loss matrix has been calculated" );
399 final int id_index = getGainLossMatrix().getIdentifierIndex( node_identifier );
400 for( int c = 0; c < getGainLossMatrix().getNumberOfCharacters(); ++c ) {
401 if ( getGainLossMatrix().getState( id_index, c ) == GainLossStates.GAIN ) {
404 else if ( getGainLossMatrix().getState( id_index, c ) == GainLossStates.LOSS ) {
411 private Phylogeny getPhylogeny() {
415 private SortedSet<DomainId> getPositiveFilter() {
416 return _positive_filter;
419 public int getSumOfGainsOnNode( final String node_identifier ) {
420 return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.GAIN );
423 public int getSumOfLossesOnNode( final String node_identifier ) {
424 return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.LOSS );
427 public int getSumOfPresentOnNode( final String node_identifier ) {
428 return getSumOfGainsOnNode( node_identifier ) + getSumOfUnchangedPresentOnNode( node_identifier );
431 int getSumOfUnchangedAbsentOnNode( final String node_identifier ) {
432 return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_ABSENT );
435 int getSumOfUnchangedOnNode( final String node_identifier ) {
436 return getSumOfUnchangedPresentOnNode( node_identifier ) + getSumOfUnchangedAbsentOnNode( node_identifier );
439 int getSumOfUnchangedPresentOnNode( final String node_identifier ) {
440 return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_PRESENT );
443 public int getTotalGains() {
447 public int getTotalLosses() {
448 return _total_losses;
451 public int getTotalUnchanged() {
452 return _total_unchanged;
455 public SortedSet<String> getUnitsGainedOnNode( final String node_identifier ) {
456 return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.GAIN );
459 public SortedSet<String> getUnitsLostOnNode( final String node_identifier ) {
460 return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.LOSS );
463 public SortedSet<String> getUnitsOnNode( final String node_identifier ) {
464 final SortedSet<String> present = getUnitsGainedOnNode( node_identifier );
465 present.addAll( getUnitsUnchangedPresentOnNode( node_identifier ) );
469 SortedSet<String> getUnitsUnchangedAbsentOnNode( final String node_identifier ) {
470 return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_ABSENT );
473 SortedSet<String> getUnitsUnchangedPresentOnNode( final String node_identifier ) {
474 return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_PRESENT );
477 private void init() {
478 setDomainIdToSecondaryFeaturesMap( null );
479 setPositiveFilter( null );
483 private void reset() {
484 setGainLossMatrix( null );
485 setBinaryInternalStatesMatrix( null );
488 setTotalLosses( -1 );
489 setTotalUnchanged( -1 );
492 private void setBinaryInternalStatesMatrix( final CharacterStateMatrix<BinaryStates> binary_states_matrix ) {
493 _binary_internal_states_matrix = binary_states_matrix;
496 private void setCost( final int cost ) {
500 private void setDomainIdToSecondaryFeaturesMap( final Map<DomainId, Set<String>> domain_id_to_secondary_features_map ) {
501 _domain_id_to_secondary_features_map = domain_id_to_secondary_features_map;
504 private void setGainLossMatrix( final CharacterStateMatrix<GainLossStates> gain_loss_matrix ) {
505 _gain_loss_matrix = gain_loss_matrix;
508 private void setPositiveFilter( final SortedSet<DomainId> positive_filter ) {
509 _positive_filter = positive_filter;
512 private void setTotalGains( final int total_gains ) {
513 _total_gains = total_gains;
516 private void setTotalLosses( final int total_losses ) {
517 _total_losses = total_losses;
520 private void setTotalUnchanged( final int total_unchanged ) {
521 _total_unchanged = total_unchanged;
524 public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny ) {
525 return new DomainParsimonyCalculator( phylogeny );
528 public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny,
529 final List<GenomeWideCombinableDomains> gwcd_list ) {
530 if ( phylogeny.getNumberOfExternalNodes() != gwcd_list.size() ) {
531 throw new IllegalArgumentException( "number of external nodes [" + phylogeny.getNumberOfExternalNodes()
532 + "] does not equal size of genome wide combinable domains list [" + gwcd_list.size() + "]" );
534 return new DomainParsimonyCalculator( phylogeny, gwcd_list );
537 public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny,
538 final List<GenomeWideCombinableDomains> gwcd_list,
539 final Map<DomainId, Set<String>> domain_id_to_secondary_features_map ) {
540 if ( phylogeny.getNumberOfExternalNodes() != gwcd_list.size() ) {
541 throw new IllegalArgumentException( "size of external nodes does not equal size of genome wide combinable domains list" );
543 return new DomainParsimonyCalculator( phylogeny, gwcd_list, domain_id_to_secondary_features_map );
547 * For folds instead of Pfam-domains, for example
553 static CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeaturePresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
554 final Map<DomainId, Set<String>> domain_id_to_second_features_map,
555 final Map<Species, MappingResults> mapping_results_map ) {
556 if ( gwcd_list.isEmpty() ) {
557 throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
559 if ( ( domain_id_to_second_features_map == null ) || domain_id_to_second_features_map.isEmpty() ) {
560 throw new IllegalArgumentException( "domain id to secondary features map is null or empty" );
562 final int number_of_identifiers = gwcd_list.size();
563 final SortedSet<String> all_secondary_features = new TreeSet<String>();
564 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
567 for( final DomainId domain : gwcd.getAllDomainIds() ) {
568 if ( domain_id_to_second_features_map.containsKey( domain ) ) {
569 all_secondary_features.addAll( domain_id_to_second_features_map.get( domain ) );
576 if ( mapping_results_map != null ) {
577 final MappingResults mr = new MappingResults();
578 mr.setDescription( gwcd.getSpecies().getSpeciesId() );
579 mr.setSumOfSuccesses( mapped );
580 mr.setSumOfFailures( not_mapped );
581 mapping_results_map.put( gwcd.getSpecies(), mr );
584 final int number_of_characters = all_secondary_features.size();
585 final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
586 number_of_characters );
587 int character_index = 0;
588 for( final String second_id : all_secondary_features ) {
589 matrix.setCharacter( character_index++, second_id );
591 int identifier_index = 0;
592 final Set<String> all_identifiers = new HashSet<String>();
593 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
594 final String species_id = gwcd.getSpecies().getSpeciesId();
595 if ( all_identifiers.contains( species_id ) ) {
596 throw new IllegalArgumentException( "species [" + species_id + "] is not unique" );
598 all_identifiers.add( species_id );
599 matrix.setIdentifier( identifier_index, species_id );
600 final Set<String> all_second_per_gwcd = new HashSet<String>();
601 for( final DomainId domain : gwcd.getAllDomainIds() ) {
602 if ( domain_id_to_second_features_map.containsKey( domain ) ) {
603 all_second_per_gwcd.addAll( domain_id_to_second_features_map.get( domain ) );
606 for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
607 if ( all_second_per_gwcd.contains( matrix.getCharacter( ci ) ) ) {
608 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
611 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
619 public static CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
620 final Map<DomainId, String> domain_id_to_second_features_map ) {
621 if ( gwcd_list.isEmpty() ) {
622 throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
624 if ( ( domain_id_to_second_features_map == null ) || domain_id_to_second_features_map.isEmpty() ) {
625 throw new IllegalArgumentException( "domain id to secondary features map is null or empty" );
627 final int number_of_identifiers = gwcd_list.size();
628 final SortedSet<BinaryDomainCombination> all_binary_combinations_mapped = new TreeSet<BinaryDomainCombination>();
629 final Set<BinaryDomainCombination>[] binary_combinations_per_genome_mapped = new HashSet[ number_of_identifiers ];
630 int identifier_index = 0;
631 final SortedSet<String> no_mappings = new TreeSet<String>();
632 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
633 binary_combinations_per_genome_mapped[ identifier_index ] = new HashSet<BinaryDomainCombination>();
634 for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
635 final BinaryDomainCombination mapped_bc = mapBinaryDomainCombination( domain_id_to_second_features_map,
638 all_binary_combinations_mapped.add( mapped_bc );
639 binary_combinations_per_genome_mapped[ identifier_index ].add( mapped_bc );
643 if ( !no_mappings.isEmpty() ) {
644 ForesterUtil.programMessage( surfacing.PRG_NAME, "No mappings for the following (" + no_mappings.size()
646 for( final String id : no_mappings ) {
647 ForesterUtil.programMessage( surfacing.PRG_NAME, id );
650 final int number_of_characters = all_binary_combinations_mapped.size();
651 final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
652 number_of_characters );
653 int character_index = 0;
654 for( final BinaryDomainCombination bc : all_binary_combinations_mapped ) {
655 matrix.setCharacter( character_index++, bc.toString() );
657 identifier_index = 0;
658 final Set<String> all_identifiers = new HashSet<String>();
659 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
660 final String species_id = gwcd.getSpecies().getSpeciesId();
661 if ( all_identifiers.contains( species_id ) ) {
662 throw new AssertionError( "species [" + species_id + "] is not unique" );
664 all_identifiers.add( species_id );
665 matrix.setIdentifier( identifier_index, species_id );
666 for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
667 BinaryDomainCombination bc = null;
668 if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED_ADJACTANT ) {
669 bc = AdjactantDirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
671 else if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED ) {
672 bc = DirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
675 bc = BasicBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
677 if ( binary_combinations_per_genome_mapped[ identifier_index ].contains( bc ) ) {
678 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
681 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
689 private static BinaryDomainCombination mapBinaryDomainCombination( final Map<DomainId, String> domain_id_to_second_features_map,
690 final BinaryDomainCombination bc,
691 final SortedSet<String> no_mappings ) {
694 if ( !domain_id_to_second_features_map.containsKey( bc.getId0() ) ) {
695 no_mappings.add( bc.getId0().getId() );
696 id0 = bc.getId0().getId();
699 id0 = domain_id_to_second_features_map.get( bc.getId0() );
701 if ( !domain_id_to_second_features_map.containsKey( bc.getId1() ) ) {
702 no_mappings.add( bc.getId1().getId() );
703 id1 = bc.getId1().getId();
706 id1 = domain_id_to_second_features_map.get( bc.getId1() );
708 return new BasicBinaryDomainCombination( id0, id1 );
711 public static CharacterStateMatrix<BinaryStates> createMatrixOfBinaryDomainCombinationPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list ) {
712 if ( gwcd_list.isEmpty() ) {
713 throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
715 final int number_of_identifiers = gwcd_list.size();
716 final SortedSet<BinaryDomainCombination> all_binary_combinations = new TreeSet<BinaryDomainCombination>();
717 final Set<BinaryDomainCombination>[] binary_combinations_per_genome = new HashSet[ number_of_identifiers ];
718 int identifier_index = 0;
719 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
720 binary_combinations_per_genome[ identifier_index ] = new HashSet<BinaryDomainCombination>();
721 for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
722 all_binary_combinations.add( bc );
723 binary_combinations_per_genome[ identifier_index ].add( bc );
727 final int number_of_characters = all_binary_combinations.size();
728 final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
729 number_of_characters );
730 int character_index = 0;
731 for( final BinaryDomainCombination bc : all_binary_combinations ) {
732 matrix.setCharacter( character_index++, bc.toString() );
734 identifier_index = 0;
735 final Set<String> all_identifiers = new HashSet<String>();
736 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
737 final String species_id = gwcd.getSpecies().getSpeciesId();
738 if ( all_identifiers.contains( species_id ) ) {
739 throw new AssertionError( "species [" + species_id + "] is not unique" );
741 all_identifiers.add( species_id );
742 matrix.setIdentifier( identifier_index, species_id );
743 for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
744 BinaryDomainCombination bc = null;
745 if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED_ADJACTANT ) {
746 bc = AdjactantDirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
748 else if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED ) {
749 bc = DirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
752 bc = BasicBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
754 if ( binary_combinations_per_genome[ identifier_index ].contains( bc ) ) {
755 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
758 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
766 static CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list ) {
767 return createMatrixOfDomainPresenceOrAbsence( gwcd_list, null );
770 public static CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
771 final SortedSet<DomainId> positive_filter ) {
772 if ( gwcd_list.isEmpty() ) {
773 throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
775 if ( ( positive_filter != null ) && ( positive_filter.size() < 1 ) ) {
776 throw new IllegalArgumentException( "positive filter is empty" );
778 final int number_of_identifiers = gwcd_list.size();
779 final SortedSet<DomainId> all_domain_ids = new TreeSet<DomainId>();
780 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
781 for( final DomainId domain : gwcd.getAllDomainIds() ) {
782 all_domain_ids.add( domain );
785 int number_of_characters = all_domain_ids.size();
786 if ( positive_filter != null ) {
787 //number_of_characters = positive_filter.size(); -- bad if doms in filter but not in genomes
788 number_of_characters = 0;
789 for( final DomainId id : all_domain_ids ) {
790 if ( positive_filter.contains( id ) ) {
791 number_of_characters++;
795 final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
796 number_of_characters );
797 int character_index = 0;
798 for( final DomainId id : all_domain_ids ) {
799 if ( positive_filter == null ) {
800 matrix.setCharacter( character_index++, id.getId() );
803 if ( positive_filter.contains( id ) ) {
804 matrix.setCharacter( character_index++, id.getId() );
808 int identifier_index = 0;
809 final Set<String> all_identifiers = new HashSet<String>();
810 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
811 final String species_id = gwcd.getSpecies().getSpeciesId();
812 if ( all_identifiers.contains( species_id ) ) {
813 throw new IllegalArgumentException( "species [" + species_id + "] is not unique" );
815 all_identifiers.add( species_id );
816 matrix.setIdentifier( identifier_index, species_id );
817 for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
818 if ( ForesterUtil.isEmpty( matrix.getCharacter( ci ) ) ) {
819 throw new RuntimeException( "this should not have happened: problem with character #" + ci );
821 final DomainId id = new DomainId( matrix.getCharacter( ci ) );
822 if ( gwcd.contains( id ) ) {
823 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
826 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
834 private static int getStateSumDeltaOnNode( final String node_identifier,
835 final CharacterStateMatrix<GainLossStates> gain_loss_matrix,
836 final GainLossStates state ) {
837 if ( gain_loss_matrix == null ) {
838 throw new RuntimeException( "no gain loss matrix has been calculated" );
840 if ( ForesterUtil.isEmpty( node_identifier ) ) {
841 throw new IllegalArgumentException( "node identifier must not be empty" );
843 if ( gain_loss_matrix.isEmpty() ) {
844 throw new RuntimeException( "gain loss matrix is empty" );
847 final int id_index = gain_loss_matrix.getIdentifierIndex( node_identifier );
848 for( int c = 0; c < gain_loss_matrix.getNumberOfCharacters(); ++c ) {
849 if ( gain_loss_matrix.getState( id_index, c ) == state ) {
856 private static SortedSet<String> getUnitsDeltaOnNode( final String node_identifier,
857 final CharacterStateMatrix<GainLossStates> gain_loss_matrix,
858 final GainLossStates state ) {
859 if ( gain_loss_matrix == null ) {
860 throw new RuntimeException( "no gain loss matrix has been calculated" );
862 if ( ForesterUtil.isEmpty( node_identifier ) ) {
863 throw new IllegalArgumentException( "node identifier must not be empty" );
865 if ( gain_loss_matrix.isEmpty() ) {
866 throw new RuntimeException( "gain loss matrix is empty" );
868 final SortedSet<String> d = new TreeSet<String>();
869 final int id_index = gain_loss_matrix.getIdentifierIndex( node_identifier );
870 for( int c = 0; c < gain_loss_matrix.getNumberOfCharacters(); ++c ) {
871 if ( gain_loss_matrix.getState( id_index, c ) == state ) {
872 if ( d.contains( gain_loss_matrix.getCharacter( c ) ) ) {
873 throw new AssertionError( "this should not have happended: character ["
874 + gain_loss_matrix.getCharacter( c ) + "] already in set" );
876 d.add( gain_loss_matrix.getCharacter( c ) );