3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 // Contact: phylosoft @ gmail . com
25 // WWW: www.phylosoft.org/forester
27 package org.forester.surfacing;
29 import java.util.HashSet;
30 import java.util.List;
33 import java.util.SortedSet;
34 import java.util.TreeSet;
36 import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
37 import org.forester.evoinference.matrix.character.CharacterStateMatrix;
38 import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
39 import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates;
40 import org.forester.evoinference.parsimony.DolloParsimony;
41 import org.forester.evoinference.parsimony.FitchParsimony;
42 import org.forester.phylogeny.Phylogeny;
43 import org.forester.phylogeny.PhylogenyNode;
44 import org.forester.phylogeny.data.BinaryCharacters;
45 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
46 import org.forester.surfacing.BinaryDomainCombination.DomainCombinationType;
47 import org.forester.util.ForesterUtil;
49 public final class DomainParsimonyCalculator {
51 private static final String TYPE_FORBINARY_CHARACTERS = "parsimony inferred";
52 private CharacterStateMatrix<GainLossStates> _gain_loss_matrix;
53 private CharacterStateMatrix<BinaryStates> _binary_internal_states_matrix;
54 private final List<GenomeWideCombinableDomains> _gwcd_list;
55 private final Phylogeny _phylogeny;
56 private int _total_losses;
57 private int _total_gains;
58 private int _total_unchanged;
60 private Map<DomainId, Set<String>> _domain_id_to_secondary_features_map;
61 private SortedSet<DomainId> _positive_filter;
63 private DomainParsimonyCalculator( final Phylogeny phylogeny ) {
65 _phylogeny = phylogeny;
69 private DomainParsimonyCalculator( final Phylogeny phylogeny, final List<GenomeWideCombinableDomains> gwcd_list ) {
71 _phylogeny = phylogeny;
72 _gwcd_list = gwcd_list;
75 private DomainParsimonyCalculator( final Phylogeny phylogeny,
76 final List<GenomeWideCombinableDomains> gwcd_list,
77 final Map<DomainId, Set<String>> domain_id_to_secondary_features_map ) {
79 _phylogeny = phylogeny;
80 _gwcd_list = gwcd_list;
81 setDomainIdToSecondaryFeaturesMap( domain_id_to_secondary_features_map );
84 int calculateNumberOfBinaryDomainCombination() {
85 if ( getGenomeWideCombinableDomainsList().isEmpty() ) {
86 throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
88 final Set<BinaryDomainCombination> all_binary_combinations = new HashSet<BinaryDomainCombination>();
89 for( final GenomeWideCombinableDomains gwcd : getGenomeWideCombinableDomainsList() ) {
90 for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
91 all_binary_combinations.add( bc );
94 return all_binary_combinations.size();
97 CharacterStateMatrix<BinaryStates> createMatrixOfBinaryDomainCombinationPresenceOrAbsence() {
98 return createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
101 CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence() {
102 return createMatrixOfDomainPresenceOrAbsence( getGenomeWideCombinableDomainsList(), getPositiveFilter() );
105 CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeaturePresenceOrAbsence( final Map<Species, MappingResults> mapping_results_map ) {
106 return createMatrixOfSecondaryFeaturePresenceOrAbsence( getGenomeWideCombinableDomainsList(),
107 getDomainIdToSecondaryFeaturesMap(),
108 mapping_results_map );
111 Phylogeny decoratePhylogenyWithDomains( final Phylogeny phylogeny ) {
112 for( final PhylogenyNodeIterator it = phylogeny.iteratorPostorder(); it.hasNext(); ) {
113 final PhylogenyNode node = it.next();
114 final String node_identifier = node.getName();
115 final BinaryCharacters bc = new BinaryCharacters( getUnitsOnNode( node_identifier ),
116 getUnitsGainedOnNode( node_identifier ),
117 getUnitsLostOnNode( node_identifier ),
118 TYPE_FORBINARY_CHARACTERS,
119 getSumOfPresentOnNode( node_identifier ),
120 getSumOfGainsOnNode( node_identifier ),
121 getSumOfLossesOnNode( node_identifier ) );
122 node.getNodeData().setBinaryCharacters( bc );
127 private void executeDolloParsimony( final boolean on_domain_presence ) {
129 final DolloParsimony dollo = DolloParsimony.createInstance();
130 dollo.setReturnGainLossMatrix( true );
131 dollo.setReturnInternalStates( true );
132 CharacterStateMatrix<BinaryStates> states = null;
133 if ( on_domain_presence ) {
134 states = createMatrixOfDomainPresenceOrAbsence();
137 states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence();
139 dollo.execute( getPhylogeny(), states );
140 setGainLossMatrix( dollo.getGainLossMatrix() );
141 setBinaryInternalStatesMatrix( dollo.getInternalStatesMatrix() );
142 setCost( dollo.getCost() );
143 setTotalGains( dollo.getTotalGains() );
144 setTotalLosses( dollo.getTotalLosses() );
145 setTotalUnchanged( dollo.getTotalUnchanged() );
148 public void executeDolloParsimonyOnBinaryDomainCombintionPresence() {
149 executeDolloParsimony( false );
152 public void executeDolloParsimonyOnDomainPresence() {
153 executeDolloParsimony( true );
156 public void executeDolloParsimonyOnDomainPresence( final SortedSet<DomainId> positive_filter ) {
157 setPositiveFilter( positive_filter );
158 executeDolloParsimony( true );
159 setPositiveFilter( null );
162 public void executeDolloParsimonyOnSecondaryFeatures( final Map<Species, MappingResults> mapping_results_map ) {
163 if ( getDomainIdToSecondaryFeaturesMap() == null ) {
164 throw new RuntimeException( "Domain id to secondary features map has apparently not been set" );
167 final DolloParsimony dollo = DolloParsimony.createInstance();
168 dollo.setReturnGainLossMatrix( true );
169 dollo.setReturnInternalStates( true );
170 final CharacterStateMatrix<BinaryStates> states = createMatrixOfSecondaryFeaturePresenceOrAbsence( mapping_results_map );
171 dollo.execute( getPhylogeny(), states );
172 setGainLossMatrix( dollo.getGainLossMatrix() );
173 setBinaryInternalStatesMatrix( dollo.getInternalStatesMatrix() );
174 setCost( dollo.getCost() );
175 setTotalGains( dollo.getTotalGains() );
176 setTotalLosses( dollo.getTotalLosses() );
177 setTotalUnchanged( dollo.getTotalUnchanged() );
180 private void executeFitchParsimony( final boolean on_domain_presence,
181 final boolean use_last,
182 final boolean randomize,
183 final long random_number_seed ) {
186 System.out.println( " Fitch parsimony: use_last = true" );
188 final FitchParsimony<BinaryStates> fitch = new FitchParsimony<BinaryStates>();
189 fitch.setRandomize( randomize );
191 fitch.setRandomNumberSeed( random_number_seed );
193 fitch.setUseLast( use_last );
194 fitch.setReturnGainLossMatrix( true );
195 fitch.setReturnInternalStates( true );
196 CharacterStateMatrix<BinaryStates> states = null;
197 if ( on_domain_presence ) {
198 states = createMatrixOfDomainPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
201 states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
203 fitch.execute( getPhylogeny(), states );
204 setGainLossMatrix( fitch.getGainLossMatrix() );
205 setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() );
206 setCost( fitch.getCost() );
207 setTotalGains( fitch.getTotalGains() );
208 setTotalLosses( fitch.getTotalLosses() );
209 setTotalUnchanged( fitch.getTotalUnchanged() );
212 public void executeFitchParsimonyOnBinaryDomainCombintion( final boolean use_last ) {
213 executeFitchParsimony( false, use_last, false, 0 );
216 public void executeFitchParsimonyOnBinaryDomainCombintion( final long random_number_seed ) {
217 executeFitchParsimony( false, false, true, random_number_seed );
220 public void executeFitchParsimonyOnDomainPresence( final boolean use_last ) {
221 executeFitchParsimony( true, use_last, false, 0 );
224 public void executeFitchParsimonyOnDomainPresence( final long random_number_seed ) {
225 executeFitchParsimony( true, false, true, random_number_seed );
228 public void executeOnGivenBinaryStatesMatrix( final CharacterStateMatrix<BinaryStates> binary_states_matrix,
229 final String[] character_labels ) {
231 if ( binary_states_matrix.getNumberOfCharacters() != character_labels.length ) {
232 throw new IllegalArgumentException( "binary states matrix number of characters is not equal to the number of character labels provided" );
234 if ( binary_states_matrix.getNumberOfIdentifiers() != getPhylogeny().getNumberOfBranches() ) {
235 throw new IllegalArgumentException( "binary states matrix number of identifiers is not equal to the number of tree nodes provided" );
237 final CharacterStateMatrix<GainLossStates> gl_matrix = new BasicCharacterStateMatrix<GainLossStates>( binary_states_matrix
238 .getNumberOfIdentifiers(),
240 .getNumberOfCharacters() );
242 int total_losses = 0;
243 int total_unchanged = 0;
245 for( final PhylogenyNodeIterator it = getPhylogeny().iteratorPostorder(); it.hasNext(); ) {
246 gl_matrix.setIdentifier( i++, it.next().getName() );
248 for( int c = 0; c < character_labels.length; ++c ) {
249 gl_matrix.setCharacter( c, character_labels[ c ] );
250 final PhylogenyNodeIterator it = getPhylogeny().iteratorPostorder();
251 while ( it.hasNext() ) {
252 final PhylogenyNode node = it.next();
253 final String name = node.getName();
254 final BinaryStates bin_state = binary_states_matrix.getState( binary_states_matrix
255 .getIdentifierIndex( name ), c );
256 final PhylogenyNode parent_node = getPhylogeny().getNode( name ).getParent();
257 GainLossStates gl_state = null;
258 if ( node.isRoot() ) {
260 if ( bin_state == BinaryStates.ABSENT ) {
261 gl_state = GainLossStates.UNCHANGED_ABSENT;
264 gl_state = GainLossStates.UNCHANGED_PRESENT;
268 final BinaryStates parent_bin_state = binary_states_matrix.getState( binary_states_matrix
269 .getIdentifierIndex( parent_node.getName() ), c );
270 if ( bin_state == BinaryStates.ABSENT ) {
271 if ( parent_bin_state == BinaryStates.ABSENT ) {
273 gl_state = GainLossStates.UNCHANGED_ABSENT;
277 gl_state = GainLossStates.LOSS;
281 if ( parent_bin_state == BinaryStates.ABSENT ) {
283 gl_state = GainLossStates.GAIN;
287 gl_state = GainLossStates.UNCHANGED_PRESENT;
291 gl_matrix.setState( name, c, gl_state );
294 setTotalGains( total_gains );
295 setTotalLosses( total_losses );
296 setTotalUnchanged( total_unchanged );
297 setCost( total_gains + total_losses );
298 setGainLossMatrix( gl_matrix );
301 public int getCost() {
305 private Map<DomainId, Set<String>> getDomainIdToSecondaryFeaturesMap() {
306 return _domain_id_to_secondary_features_map;
309 public CharacterStateMatrix<Integer> getGainLossCountsMatrix() {
310 final CharacterStateMatrix<Integer> matrix = new BasicCharacterStateMatrix<Integer>( getGainLossMatrix()
311 .getNumberOfIdentifiers(), 3 );
312 for( int i = 0; i < getGainLossMatrix().getNumberOfIdentifiers(); ++i ) {
313 matrix.setIdentifier( i, getGainLossMatrix().getIdentifier( i ) );
315 matrix.setCharacter( 0, "GAINS" );
316 matrix.setCharacter( 1, "LOSSES" );
317 matrix.setCharacter( 2, "NET" );
318 for( int i = 0; i < getGainLossMatrix().getNumberOfIdentifiers(); ++i ) {
321 for( int c = 0; c < getGainLossMatrix().getNumberOfCharacters(); ++c ) {
322 final GainLossStates s = getGainLossMatrix().getState( i, c );
323 if ( s == GainLossStates.GAIN ) {
326 else if ( s == GainLossStates.LOSS ) {
330 matrix.setState( i, 0, gains );
331 matrix.setState( i, 1, losses );
332 matrix.setState( i, 2, gains - losses );
337 public CharacterStateMatrix<GainLossStates> getGainLossMatrix() {
338 return _gain_loss_matrix;
341 private List<GenomeWideCombinableDomains> getGenomeWideCombinableDomainsList() {
345 public CharacterStateMatrix<BinaryStates> getInternalStatesMatrix() {
346 return _binary_internal_states_matrix;
349 public int getNetGainsOnNode( final String node_identifier ) {
350 if ( getGainLossMatrix() == null ) {
351 throw new RuntimeException( "no gain loss matrix has been calculated" );
354 final int id_index = getGainLossMatrix().getIdentifierIndex( node_identifier );
355 for( int c = 0; c < getGainLossMatrix().getNumberOfCharacters(); ++c ) {
356 if ( getGainLossMatrix().getState( id_index, c ) == GainLossStates.GAIN ) {
359 else if ( getGainLossMatrix().getState( id_index, c ) == GainLossStates.LOSS ) {
366 private Phylogeny getPhylogeny() {
370 private SortedSet<DomainId> getPositiveFilter() {
371 return _positive_filter;
374 public int getSumOfGainsOnNode( final String node_identifier ) {
375 return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.GAIN );
378 public int getSumOfLossesOnNode( final String node_identifier ) {
379 return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.LOSS );
382 public int getSumOfPresentOnNode( final String node_identifier ) {
383 return getSumOfGainsOnNode( node_identifier ) + getSumOfUnchangedPresentOnNode( node_identifier );
386 int getSumOfUnchangedAbsentOnNode( final String node_identifier ) {
387 return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_ABSENT );
390 int getSumOfUnchangedOnNode( final String node_identifier ) {
391 return getSumOfUnchangedPresentOnNode( node_identifier ) + getSumOfUnchangedAbsentOnNode( node_identifier );
394 int getSumOfUnchangedPresentOnNode( final String node_identifier ) {
395 return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_PRESENT );
398 public int getTotalGains() {
402 public int getTotalLosses() {
403 return _total_losses;
406 public int getTotalUnchanged() {
407 return _total_unchanged;
410 public SortedSet<String> getUnitsGainedOnNode( final String node_identifier ) {
411 return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.GAIN );
414 public SortedSet<String> getUnitsLostOnNode( final String node_identifier ) {
415 return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.LOSS );
418 public SortedSet<String> getUnitsOnNode( final String node_identifier ) {
419 final SortedSet<String> present = getUnitsGainedOnNode( node_identifier );
420 present.addAll( getUnitsUnchangedPresentOnNode( node_identifier ) );
424 SortedSet<String> getUnitsUnchangedAbsentOnNode( final String node_identifier ) {
425 return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_ABSENT );
428 SortedSet<String> getUnitsUnchangedPresentOnNode( final String node_identifier ) {
429 return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_PRESENT );
432 private void init() {
433 setDomainIdToSecondaryFeaturesMap( null );
434 setPositiveFilter( null );
438 private void reset() {
439 setGainLossMatrix( null );
440 setBinaryInternalStatesMatrix( null );
443 setTotalLosses( -1 );
444 setTotalUnchanged( -1 );
447 private void setBinaryInternalStatesMatrix( final CharacterStateMatrix<BinaryStates> binary_states_matrix ) {
448 _binary_internal_states_matrix = binary_states_matrix;
451 private void setCost( final int cost ) {
455 private void setDomainIdToSecondaryFeaturesMap( final Map<DomainId, Set<String>> domain_id_to_secondary_features_map ) {
456 _domain_id_to_secondary_features_map = domain_id_to_secondary_features_map;
459 private void setGainLossMatrix( final CharacterStateMatrix<GainLossStates> gain_loss_matrix ) {
460 _gain_loss_matrix = gain_loss_matrix;
463 private void setPositiveFilter( final SortedSet<DomainId> positive_filter ) {
464 _positive_filter = positive_filter;
467 private void setTotalGains( final int total_gains ) {
468 _total_gains = total_gains;
471 private void setTotalLosses( final int total_losses ) {
472 _total_losses = total_losses;
475 private void setTotalUnchanged( final int total_unchanged ) {
476 _total_unchanged = total_unchanged;
479 public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny ) {
480 return new DomainParsimonyCalculator( phylogeny );
483 public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny,
484 final List<GenomeWideCombinableDomains> gwcd_list ) {
485 if ( phylogeny.getNumberOfExternalNodes() != gwcd_list.size() ) {
486 throw new IllegalArgumentException( "number of external nodes [" + phylogeny.getNumberOfExternalNodes()
487 + "] does not equal size of genome wide combinable domains list [" + gwcd_list.size() + "]" );
489 return new DomainParsimonyCalculator( phylogeny, gwcd_list );
492 public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny,
493 final List<GenomeWideCombinableDomains> gwcd_list,
494 final Map<DomainId, Set<String>> domain_id_to_secondary_features_map ) {
495 if ( phylogeny.getNumberOfExternalNodes() != gwcd_list.size() ) {
496 throw new IllegalArgumentException( "size of external nodes does not equal size of genome wide combinable domains list" );
498 return new DomainParsimonyCalculator( phylogeny, gwcd_list, domain_id_to_secondary_features_map );
501 public static CharacterStateMatrix<BinaryStates> createMatrixOfBinaryDomainCombinationPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list ) {
502 if ( gwcd_list.isEmpty() ) {
503 throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
505 final int number_of_identifiers = gwcd_list.size();
506 final SortedSet<BinaryDomainCombination> all_binary_combinations = new TreeSet<BinaryDomainCombination>();
507 final Set<BinaryDomainCombination>[] binary_combinations_per_genome = new HashSet[ number_of_identifiers ];
508 int identifier_index = 0;
509 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
510 binary_combinations_per_genome[ identifier_index ] = new HashSet<BinaryDomainCombination>();
511 for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
512 all_binary_combinations.add( bc );
513 binary_combinations_per_genome[ identifier_index ].add( bc );
517 final int number_of_characters = all_binary_combinations.size();
518 final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
519 number_of_characters );
520 int character_index = 0;
521 for( final BinaryDomainCombination bc : all_binary_combinations ) {
522 matrix.setCharacter( character_index++, bc.toString() );
524 identifier_index = 0;
525 final Set<String> all_identifiers = new HashSet<String>();
526 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
527 final String species_id = gwcd.getSpecies().getSpeciesId();
528 if ( all_identifiers.contains( species_id ) ) {
529 throw new AssertionError( "species [" + species_id + "] is not unique" );
531 all_identifiers.add( species_id );
532 matrix.setIdentifier( identifier_index, species_id );
533 for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
534 BinaryDomainCombination bc = null;
535 if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED_ADJACTANT ) {
536 bc = AdjactantDirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
538 else if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED ) {
539 bc = DirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
542 bc = BasicBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
544 if ( binary_combinations_per_genome[ identifier_index ].contains( bc ) ) {
545 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
548 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
556 static CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list ) {
557 return createMatrixOfDomainPresenceOrAbsence( gwcd_list, null );
560 public static CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
561 final SortedSet<DomainId> positive_filter ) {
562 if ( gwcd_list.isEmpty() ) {
563 throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
565 if ( ( positive_filter != null ) && ( positive_filter.size() < 1 ) ) {
566 throw new IllegalArgumentException( "positive filter is empty" );
568 final int number_of_identifiers = gwcd_list.size();
569 final SortedSet<DomainId> all_domain_ids = new TreeSet<DomainId>();
570 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
571 for( final DomainId domain : gwcd.getAllDomainIds() ) {
572 all_domain_ids.add( domain );
575 int number_of_characters = all_domain_ids.size();
576 if ( positive_filter != null ) {
577 //number_of_characters = positive_filter.size(); -- bad if doms in filter but not in genomes
578 number_of_characters = 0;
579 for( final DomainId id : all_domain_ids ) {
580 if ( positive_filter.contains( id ) ) {
581 number_of_characters++;
585 final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
586 number_of_characters );
587 int character_index = 0;
588 for( final DomainId id : all_domain_ids ) {
589 if ( positive_filter == null ) {
590 matrix.setCharacter( character_index++, id.getId() );
593 if ( positive_filter.contains( id ) ) {
594 matrix.setCharacter( character_index++, id.getId() );
598 int identifier_index = 0;
599 final Set<String> all_identifiers = new HashSet<String>();
600 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
601 final String species_id = gwcd.getSpecies().getSpeciesId();
602 if ( all_identifiers.contains( species_id ) ) {
603 throw new IllegalArgumentException( "species [" + species_id + "] is not unique" );
605 all_identifiers.add( species_id );
606 matrix.setIdentifier( identifier_index, species_id );
607 for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
608 if ( ForesterUtil.isEmpty( matrix.getCharacter( ci ) ) ) {
609 throw new RuntimeException( "this should not have happened: problem with character #" + ci );
611 final DomainId id = new DomainId( matrix.getCharacter( ci ) );
612 if ( gwcd.contains( id ) ) {
613 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
616 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
625 * For folds instead of Pfam-domains, for example
631 static CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeaturePresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
632 final Map<DomainId, Set<String>> domain_id_to_second_features_map,
633 final Map<Species, MappingResults> mapping_results_map ) {
634 if ( gwcd_list.isEmpty() ) {
635 throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
637 if ( ( domain_id_to_second_features_map == null ) || domain_id_to_second_features_map.isEmpty() ) {
638 throw new IllegalArgumentException( "domain id to secondary features map is null or empty" );
640 final int number_of_identifiers = gwcd_list.size();
641 final SortedSet<String> all_secondary_features = new TreeSet<String>();
642 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
645 for( final DomainId domain : gwcd.getAllDomainIds() ) {
646 if ( domain_id_to_second_features_map.containsKey( domain ) ) {
647 all_secondary_features.addAll( domain_id_to_second_features_map.get( domain ) );
654 if ( mapping_results_map != null ) {
655 final MappingResults mr = new MappingResults();
656 mr.setDescription( gwcd.getSpecies().getSpeciesId() );
657 mr.setSumOfSuccesses( mapped );
658 mr.setSumOfFailures( not_mapped );
659 mapping_results_map.put( gwcd.getSpecies(), mr );
662 final int number_of_characters = all_secondary_features.size();
663 final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
664 number_of_characters );
665 int character_index = 0;
666 for( final String second_id : all_secondary_features ) {
667 matrix.setCharacter( character_index++, second_id );
669 int identifier_index = 0;
670 final Set<String> all_identifiers = new HashSet<String>();
671 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
672 final String species_id = gwcd.getSpecies().getSpeciesId();
673 if ( all_identifiers.contains( species_id ) ) {
674 throw new IllegalArgumentException( "species [" + species_id + "] is not unique" );
676 all_identifiers.add( species_id );
677 matrix.setIdentifier( identifier_index, species_id );
678 final Set<String> all_second_per_gwcd = new HashSet<String>();
679 for( final DomainId domain : gwcd.getAllDomainIds() ) {
680 if ( domain_id_to_second_features_map.containsKey( domain ) ) {
681 all_second_per_gwcd.addAll( domain_id_to_second_features_map.get( domain ) );
684 for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
685 if ( all_second_per_gwcd.contains( matrix.getCharacter( ci ) ) ) {
686 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
689 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
697 private static int getStateSumDeltaOnNode( final String node_identifier,
698 final CharacterStateMatrix<GainLossStates> gain_loss_matrix,
699 final GainLossStates state ) {
700 if ( gain_loss_matrix == null ) {
701 throw new RuntimeException( "no gain loss matrix has been calculated" );
703 if ( ForesterUtil.isEmpty( node_identifier ) ) {
704 throw new IllegalArgumentException( "node identifier must not be empty" );
706 if ( gain_loss_matrix.isEmpty() ) {
707 throw new RuntimeException( "gain loss matrix is empty" );
710 final int id_index = gain_loss_matrix.getIdentifierIndex( node_identifier );
711 for( int c = 0; c < gain_loss_matrix.getNumberOfCharacters(); ++c ) {
712 if ( gain_loss_matrix.getState( id_index, c ) == state ) {
719 private static SortedSet<String> getUnitsDeltaOnNode( final String node_identifier,
720 final CharacterStateMatrix<GainLossStates> gain_loss_matrix,
721 final GainLossStates state ) {
722 if ( gain_loss_matrix == null ) {
723 throw new RuntimeException( "no gain loss matrix has been calculated" );
725 if ( ForesterUtil.isEmpty( node_identifier ) ) {
726 throw new IllegalArgumentException( "node identifier must not be empty" );
728 if ( gain_loss_matrix.isEmpty() ) {
729 throw new RuntimeException( "gain loss matrix is empty" );
731 final SortedSet<String> d = new TreeSet<String>();
732 final int id_index = gain_loss_matrix.getIdentifierIndex( node_identifier );
733 for( int c = 0; c < gain_loss_matrix.getNumberOfCharacters(); ++c ) {
734 if ( gain_loss_matrix.getState( id_index, c ) == state ) {
735 if ( d.contains( gain_loss_matrix.getCharacter( c ) ) ) {
736 throw new AssertionError( "this should not have happended: character ["
737 + gain_loss_matrix.getCharacter( c ) + "] already in set" );
739 d.add( gain_loss_matrix.getCharacter( c ) );