3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 // Contact: phylosoft @ gmail . com
25 // WWW: www.phylosoft.org/forester
27 package org.forester.surfacing;
29 import java.util.HashMap;
30 import java.util.HashSet;
31 import java.util.Iterator;
32 import java.util.List;
34 import java.util.Map.Entry;
36 import java.util.SortedSet;
37 import java.util.TreeSet;
39 import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
40 import org.forester.evoinference.matrix.character.CharacterStateMatrix;
41 import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
42 import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates;
43 import org.forester.evoinference.parsimony.DolloParsimony;
44 import org.forester.evoinference.parsimony.FitchParsimony;
45 import org.forester.phylogeny.Phylogeny;
46 import org.forester.phylogeny.PhylogenyNode;
47 import org.forester.phylogeny.data.BinaryCharacters;
48 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
49 import org.forester.surfacing.BinaryDomainCombination.DomainCombinationType;
50 import org.forester.util.ForesterUtil;
52 public final class DomainParsimonyCalculator {
54 private static final String TYPE_FORBINARY_CHARACTERS = "parsimony inferred";
55 private CharacterStateMatrix<GainLossStates> _gain_loss_matrix;
56 private CharacterStateMatrix<BinaryStates> _binary_internal_states_matrix;
57 private final List<GenomeWideCombinableDomains> _gwcd_list;
58 private final Phylogeny _phylogeny;
59 private int _total_losses;
60 private int _total_gains;
61 private int _total_unchanged;
63 private Map<DomainId, Set<String>> _domain_id_to_secondary_features_map;
64 private SortedSet<DomainId> _positive_filter;
66 private DomainParsimonyCalculator( final Phylogeny phylogeny ) {
68 _phylogeny = phylogeny;
72 private DomainParsimonyCalculator( final Phylogeny phylogeny, final List<GenomeWideCombinableDomains> gwcd_list ) {
74 _phylogeny = phylogeny;
75 _gwcd_list = gwcd_list;
78 private DomainParsimonyCalculator( final Phylogeny phylogeny,
79 final List<GenomeWideCombinableDomains> gwcd_list,
80 final Map<DomainId, Set<String>> domain_id_to_secondary_features_map ) {
82 _phylogeny = phylogeny;
83 _gwcd_list = gwcd_list;
84 setDomainIdToSecondaryFeaturesMap( domain_id_to_secondary_features_map );
87 int calculateNumberOfBinaryDomainCombination() {
88 if ( getGenomeWideCombinableDomainsList().isEmpty() ) {
89 throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
91 final Set<BinaryDomainCombination> all_binary_combinations = new HashSet<BinaryDomainCombination>();
92 for( final GenomeWideCombinableDomains gwcd : getGenomeWideCombinableDomainsList() ) {
93 for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
94 all_binary_combinations.add( bc );
97 return all_binary_combinations.size();
100 CharacterStateMatrix<BinaryStates> createMatrixOfBinaryDomainCombinationPresenceOrAbsence() {
101 return createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
104 CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence() {
105 return createMatrixOfDomainPresenceOrAbsence( getGenomeWideCombinableDomainsList(), getPositiveFilter() );
108 CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeaturePresenceOrAbsence( final Map<Species, MappingResults> mapping_results_map ) {
109 return createMatrixOfSecondaryFeaturePresenceOrAbsence( getGenomeWideCombinableDomainsList(),
110 getDomainIdToSecondaryFeaturesMap(),
111 mapping_results_map );
114 Phylogeny decoratePhylogenyWithDomains( final Phylogeny phylogeny ) {
115 for( final PhylogenyNodeIterator it = phylogeny.iteratorPostorder(); it.hasNext(); ) {
116 final PhylogenyNode node = it.next();
117 final String node_identifier = node.getName();
118 final BinaryCharacters bc = new BinaryCharacters( getUnitsOnNode( node_identifier ),
119 getUnitsGainedOnNode( node_identifier ),
120 getUnitsLostOnNode( node_identifier ),
121 TYPE_FORBINARY_CHARACTERS,
122 getSumOfPresentOnNode( node_identifier ),
123 getSumOfGainsOnNode( node_identifier ),
124 getSumOfLossesOnNode( node_identifier ) );
125 node.getNodeData().setBinaryCharacters( bc );
130 private void executeDolloParsimony( final boolean on_domain_presence ) {
132 final DolloParsimony dollo = DolloParsimony.createInstance();
133 dollo.setReturnGainLossMatrix( true );
134 dollo.setReturnInternalStates( true );
135 CharacterStateMatrix<BinaryStates> states = null;
136 if ( on_domain_presence ) {
137 states = createMatrixOfDomainPresenceOrAbsence();
140 states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence();
142 dollo.execute( getPhylogeny(), states );
143 setGainLossMatrix( dollo.getGainLossMatrix() );
144 setBinaryInternalStatesMatrix( dollo.getInternalStatesMatrix() );
145 setCost( dollo.getCost() );
146 setTotalGains( dollo.getTotalGains() );
147 setTotalLosses( dollo.getTotalLosses() );
148 setTotalUnchanged( dollo.getTotalUnchanged() );
151 public void executeDolloParsimonyOnBinaryDomainCombintionPresence() {
152 executeDolloParsimony( false );
155 public void executeDolloParsimonyOnDomainPresence() {
156 executeDolloParsimony( true );
159 public void executeDolloParsimonyOnDomainPresence( final SortedSet<DomainId> positive_filter ) {
160 setPositiveFilter( positive_filter );
161 executeDolloParsimony( true );
162 setPositiveFilter( null );
165 public void executeDolloParsimonyOnSecondaryFeatures( final Map<Species, MappingResults> mapping_results_map ) {
166 if ( getDomainIdToSecondaryFeaturesMap() == null ) {
167 throw new RuntimeException( "Domain id to secondary features map has apparently not been set" );
170 final DolloParsimony dollo = DolloParsimony.createInstance();
171 dollo.setReturnGainLossMatrix( true );
172 dollo.setReturnInternalStates( true );
173 final CharacterStateMatrix<BinaryStates> states = createMatrixOfSecondaryFeaturePresenceOrAbsence( mapping_results_map );
174 dollo.execute( getPhylogeny(), states );
175 setGainLossMatrix( dollo.getGainLossMatrix() );
176 setBinaryInternalStatesMatrix( dollo.getInternalStatesMatrix() );
177 setCost( dollo.getCost() );
178 setTotalGains( dollo.getTotalGains() );
179 setTotalLosses( dollo.getTotalLosses() );
180 setTotalUnchanged( dollo.getTotalUnchanged() );
183 private void executeFitchParsimony( final boolean on_domain_presence,
184 final boolean use_last,
185 final boolean randomize,
186 final long random_number_seed ) {
189 System.out.println( " Fitch parsimony: use_last = true" );
191 final FitchParsimony<BinaryStates> fitch = new FitchParsimony<BinaryStates>();
192 fitch.setRandomize( randomize );
194 fitch.setRandomNumberSeed( random_number_seed );
196 fitch.setUseLast( use_last );
197 fitch.setReturnGainLossMatrix( true );
198 fitch.setReturnInternalStates( true );
199 CharacterStateMatrix<BinaryStates> states = null;
200 if ( on_domain_presence ) {
201 states = createMatrixOfDomainPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
204 states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
206 fitch.execute( getPhylogeny(), states );
207 setGainLossMatrix( fitch.getGainLossMatrix() );
208 setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() );
209 setCost( fitch.getCost() );
210 setTotalGains( fitch.getTotalGains() );
211 setTotalLosses( fitch.getTotalLosses() );
212 setTotalUnchanged( fitch.getTotalUnchanged() );
215 private void executeFitchParsimonyOnSecondaryFeatures(
216 final boolean use_last,
217 final boolean randomize,
218 final long random_number_seed ) {
221 System.out.println( " Fitch parsimony: use_last = true" );
223 final FitchParsimony<BinaryStates> fitch = new FitchParsimony<BinaryStates>();
224 fitch.setRandomize( randomize );
226 fitch.setRandomNumberSeed( random_number_seed );
228 fitch.setUseLast( use_last );
229 fitch.setReturnGainLossMatrix( true );
230 fitch.setReturnInternalStates( true );
232 final Map<DomainId, Set<String>> map = getDomainIdToSecondaryFeaturesMap();
233 final Map<DomainId, String> newmap = new HashMap<DomainId, String>();
234 final Iterator<Entry<DomainId, Set<String>>> it = map.entrySet().iterator();
235 while (it.hasNext()) {
236 final Map.Entry<DomainId, Set<String>> pair = (Map.Entry<DomainId, Set<String>>)it.next();
237 if ( pair.getValue().size() != 1 ) {
238 throw new IllegalArgumentException( pair.getKey().getId() + " mapps to " + pair.getValue().size() + " items" );
240 newmap.put( pair.getKey(), ( String ) pair.getValue().toArray()[ 0 ] );
243 CharacterStateMatrix<BinaryStates> states =createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList(),
246 fitch.execute( getPhylogeny(), states );
247 setGainLossMatrix( fitch.getGainLossMatrix() );
248 setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() );
249 setCost( fitch.getCost() );
250 setTotalGains( fitch.getTotalGains() );
251 setTotalLosses( fitch.getTotalLosses() );
252 setTotalUnchanged( fitch.getTotalUnchanged() );
255 public void executeFitchParsimonyOnBinaryDomainCombintion( final boolean use_last ) {
256 executeFitchParsimony( false, use_last, false, 0 );
259 public void executeFitchParsimonyOnBinaryDomainCombintionOnSecondaryFeatures( final boolean use_last ) {
260 executeFitchParsimonyOnSecondaryFeatures( use_last, false, 0 );
263 public void executeFitchParsimonyOnBinaryDomainCombintion( final long random_number_seed ) {
264 executeFitchParsimony( false, false, true, random_number_seed );
267 public void executeFitchParsimonyOnDomainPresence( final boolean use_last ) {
268 executeFitchParsimony( true, use_last, false, 0 );
271 public void executeFitchParsimonyOnDomainPresence( final long random_number_seed ) {
272 executeFitchParsimony( true, false, true, random_number_seed );
275 public void executeOnGivenBinaryStatesMatrix( final CharacterStateMatrix<BinaryStates> binary_states_matrix,
276 final String[] character_labels ) {
278 if ( binary_states_matrix.getNumberOfCharacters() != character_labels.length ) {
279 throw new IllegalArgumentException( "binary states matrix number of characters is not equal to the number of character labels provided" );
281 if ( binary_states_matrix.getNumberOfIdentifiers() != getPhylogeny().getNumberOfBranches() ) {
282 throw new IllegalArgumentException( "binary states matrix number of identifiers is not equal to the number of tree nodes provided" );
284 final CharacterStateMatrix<GainLossStates> gl_matrix = new BasicCharacterStateMatrix<GainLossStates>( binary_states_matrix
285 .getNumberOfIdentifiers(),
287 .getNumberOfCharacters() );
289 int total_losses = 0;
290 int total_unchanged = 0;
292 for( final PhylogenyNodeIterator it = getPhylogeny().iteratorPostorder(); it.hasNext(); ) {
293 gl_matrix.setIdentifier( i++, it.next().getName() );
295 for( int c = 0; c < character_labels.length; ++c ) {
296 gl_matrix.setCharacter( c, character_labels[ c ] );
297 final PhylogenyNodeIterator it = getPhylogeny().iteratorPostorder();
298 while ( it.hasNext() ) {
299 final PhylogenyNode node = it.next();
300 final String name = node.getName();
301 final BinaryStates bin_state = binary_states_matrix.getState( binary_states_matrix
302 .getIdentifierIndex( name ), c );
303 final PhylogenyNode parent_node = getPhylogeny().getNode( name ).getParent();
304 GainLossStates gl_state = null;
305 if ( node.isRoot() ) {
307 if ( bin_state == BinaryStates.ABSENT ) {
308 gl_state = GainLossStates.UNCHANGED_ABSENT;
311 gl_state = GainLossStates.UNCHANGED_PRESENT;
315 final BinaryStates parent_bin_state = binary_states_matrix.getState( binary_states_matrix
316 .getIdentifierIndex( parent_node.getName() ), c );
317 if ( bin_state == BinaryStates.ABSENT ) {
318 if ( parent_bin_state == BinaryStates.ABSENT ) {
320 gl_state = GainLossStates.UNCHANGED_ABSENT;
324 gl_state = GainLossStates.LOSS;
328 if ( parent_bin_state == BinaryStates.ABSENT ) {
330 gl_state = GainLossStates.GAIN;
334 gl_state = GainLossStates.UNCHANGED_PRESENT;
338 gl_matrix.setState( name, c, gl_state );
341 setTotalGains( total_gains );
342 setTotalLosses( total_losses );
343 setTotalUnchanged( total_unchanged );
344 setCost( total_gains + total_losses );
345 setGainLossMatrix( gl_matrix );
348 public int getCost() {
352 private Map<DomainId, Set<String>> getDomainIdToSecondaryFeaturesMap() {
353 return _domain_id_to_secondary_features_map;
356 public CharacterStateMatrix<Integer> getGainLossCountsMatrix() {
357 final CharacterStateMatrix<Integer> matrix = new BasicCharacterStateMatrix<Integer>( getGainLossMatrix()
358 .getNumberOfIdentifiers(), 3 );
359 for( int i = 0; i < getGainLossMatrix().getNumberOfIdentifiers(); ++i ) {
360 matrix.setIdentifier( i, getGainLossMatrix().getIdentifier( i ) );
362 matrix.setCharacter( 0, "GAINS" );
363 matrix.setCharacter( 1, "LOSSES" );
364 matrix.setCharacter( 2, "NET" );
365 for( int i = 0; i < getGainLossMatrix().getNumberOfIdentifiers(); ++i ) {
368 for( int c = 0; c < getGainLossMatrix().getNumberOfCharacters(); ++c ) {
369 final GainLossStates s = getGainLossMatrix().getState( i, c );
370 if ( s == GainLossStates.GAIN ) {
373 else if ( s == GainLossStates.LOSS ) {
377 matrix.setState( i, 0, gains );
378 matrix.setState( i, 1, losses );
379 matrix.setState( i, 2, gains - losses );
384 public CharacterStateMatrix<GainLossStates> getGainLossMatrix() {
385 return _gain_loss_matrix;
388 private List<GenomeWideCombinableDomains> getGenomeWideCombinableDomainsList() {
392 public CharacterStateMatrix<BinaryStates> getInternalStatesMatrix() {
393 return _binary_internal_states_matrix;
396 public int getNetGainsOnNode( final String node_identifier ) {
397 if ( getGainLossMatrix() == null ) {
398 throw new RuntimeException( "no gain loss matrix has been calculated" );
401 final int id_index = getGainLossMatrix().getIdentifierIndex( node_identifier );
402 for( int c = 0; c < getGainLossMatrix().getNumberOfCharacters(); ++c ) {
403 if ( getGainLossMatrix().getState( id_index, c ) == GainLossStates.GAIN ) {
406 else if ( getGainLossMatrix().getState( id_index, c ) == GainLossStates.LOSS ) {
413 private Phylogeny getPhylogeny() {
417 private SortedSet<DomainId> getPositiveFilter() {
418 return _positive_filter;
421 public int getSumOfGainsOnNode( final String node_identifier ) {
422 return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.GAIN );
425 public int getSumOfLossesOnNode( final String node_identifier ) {
426 return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.LOSS );
429 public int getSumOfPresentOnNode( final String node_identifier ) {
430 return getSumOfGainsOnNode( node_identifier ) + getSumOfUnchangedPresentOnNode( node_identifier );
433 int getSumOfUnchangedAbsentOnNode( final String node_identifier ) {
434 return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_ABSENT );
437 int getSumOfUnchangedOnNode( final String node_identifier ) {
438 return getSumOfUnchangedPresentOnNode( node_identifier ) + getSumOfUnchangedAbsentOnNode( node_identifier );
441 int getSumOfUnchangedPresentOnNode( final String node_identifier ) {
442 return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_PRESENT );
445 public int getTotalGains() {
449 public int getTotalLosses() {
450 return _total_losses;
453 public int getTotalUnchanged() {
454 return _total_unchanged;
457 public SortedSet<String> getUnitsGainedOnNode( final String node_identifier ) {
458 return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.GAIN );
461 public SortedSet<String> getUnitsLostOnNode( final String node_identifier ) {
462 return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.LOSS );
465 public SortedSet<String> getUnitsOnNode( final String node_identifier ) {
466 final SortedSet<String> present = getUnitsGainedOnNode( node_identifier );
467 present.addAll( getUnitsUnchangedPresentOnNode( node_identifier ) );
471 SortedSet<String> getUnitsUnchangedAbsentOnNode( final String node_identifier ) {
472 return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_ABSENT );
475 SortedSet<String> getUnitsUnchangedPresentOnNode( final String node_identifier ) {
476 return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_PRESENT );
479 private void init() {
480 setDomainIdToSecondaryFeaturesMap( null );
481 setPositiveFilter( null );
485 private void reset() {
486 setGainLossMatrix( null );
487 setBinaryInternalStatesMatrix( null );
490 setTotalLosses( -1 );
491 setTotalUnchanged( -1 );
494 private void setBinaryInternalStatesMatrix( final CharacterStateMatrix<BinaryStates> binary_states_matrix ) {
495 _binary_internal_states_matrix = binary_states_matrix;
498 private void setCost( final int cost ) {
502 private void setDomainIdToSecondaryFeaturesMap( final Map<DomainId, Set<String>> domain_id_to_secondary_features_map ) {
503 _domain_id_to_secondary_features_map = domain_id_to_secondary_features_map;
506 private void setGainLossMatrix( final CharacterStateMatrix<GainLossStates> gain_loss_matrix ) {
507 _gain_loss_matrix = gain_loss_matrix;
510 private void setPositiveFilter( final SortedSet<DomainId> positive_filter ) {
511 _positive_filter = positive_filter;
514 private void setTotalGains( final int total_gains ) {
515 _total_gains = total_gains;
518 private void setTotalLosses( final int total_losses ) {
519 _total_losses = total_losses;
522 private void setTotalUnchanged( final int total_unchanged ) {
523 _total_unchanged = total_unchanged;
526 public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny ) {
527 return new DomainParsimonyCalculator( phylogeny );
530 public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny,
531 final List<GenomeWideCombinableDomains> gwcd_list ) {
532 if ( phylogeny.getNumberOfExternalNodes() != gwcd_list.size() ) {
533 throw new IllegalArgumentException( "number of external nodes [" + phylogeny.getNumberOfExternalNodes()
534 + "] does not equal size of genome wide combinable domains list [" + gwcd_list.size() + "]" );
536 return new DomainParsimonyCalculator( phylogeny, gwcd_list );
539 public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny,
540 final List<GenomeWideCombinableDomains> gwcd_list,
541 final Map<DomainId, Set<String>> domain_id_to_secondary_features_map ) {
542 if ( phylogeny.getNumberOfExternalNodes() != gwcd_list.size() ) {
543 throw new IllegalArgumentException( "size of external nodes does not equal size of genome wide combinable domains list" );
545 return new DomainParsimonyCalculator( phylogeny, gwcd_list, domain_id_to_secondary_features_map );
550 * For folds instead of Pfam-domains, for example
556 static CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeaturePresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
557 final Map<DomainId, Set<String>> domain_id_to_second_features_map,
558 final Map<Species, MappingResults> mapping_results_map ) {
559 if ( gwcd_list.isEmpty() ) {
560 throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
562 if ( ( domain_id_to_second_features_map == null ) || domain_id_to_second_features_map.isEmpty() ) {
563 throw new IllegalArgumentException( "domain id to secondary features map is null or empty" );
565 final int number_of_identifiers = gwcd_list.size();
566 final SortedSet<String> all_secondary_features = new TreeSet<String>();
567 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
570 for( final DomainId domain : gwcd.getAllDomainIds() ) {
571 if ( domain_id_to_second_features_map.containsKey( domain ) ) {
572 all_secondary_features.addAll( domain_id_to_second_features_map.get( domain ) );
579 if ( mapping_results_map != null ) {
580 final MappingResults mr = new MappingResults();
581 mr.setDescription( gwcd.getSpecies().getSpeciesId() );
582 mr.setSumOfSuccesses( mapped );
583 mr.setSumOfFailures( not_mapped );
584 mapping_results_map.put( gwcd.getSpecies(), mr );
587 final int number_of_characters = all_secondary_features.size();
588 final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
589 number_of_characters );
590 int character_index = 0;
591 for( final String second_id : all_secondary_features ) {
592 matrix.setCharacter( character_index++, second_id );
594 int identifier_index = 0;
595 final Set<String> all_identifiers = new HashSet<String>();
596 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
597 final String species_id = gwcd.getSpecies().getSpeciesId();
598 if ( all_identifiers.contains( species_id ) ) {
599 throw new IllegalArgumentException( "species [" + species_id + "] is not unique" );
601 all_identifiers.add( species_id );
602 matrix.setIdentifier( identifier_index, species_id );
603 final Set<String> all_second_per_gwcd = new HashSet<String>();
604 for( final DomainId domain : gwcd.getAllDomainIds() ) {
605 if ( domain_id_to_second_features_map.containsKey( domain ) ) {
606 all_second_per_gwcd.addAll( domain_id_to_second_features_map.get( domain ) );
609 for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
610 if ( all_second_per_gwcd.contains( matrix.getCharacter( ci ) ) ) {
611 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
614 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
623 public static CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
624 final Map<DomainId, String> domain_id_to_second_features_map) {
625 if ( gwcd_list.isEmpty() ) {
626 throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
628 if ( ( domain_id_to_second_features_map == null ) || domain_id_to_second_features_map.isEmpty() ) {
629 throw new IllegalArgumentException( "domain id to secondary features map is null or empty" );
631 final int number_of_identifiers = gwcd_list.size();
632 final SortedSet<BinaryDomainCombination> all_binary_combinations_mapped = new TreeSet<BinaryDomainCombination>();
633 final Set<BinaryDomainCombination>[] binary_combinations_per_genome_mapped = new HashSet[ number_of_identifiers ];
634 int identifier_index = 0;
635 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
636 binary_combinations_per_genome_mapped[ identifier_index ] = new HashSet<BinaryDomainCombination>();
637 for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
639 if ( !domain_id_to_second_features_map.containsKey( bc.getId0() ) ) {
640 throw new IllegalArgumentException( "no mapping found for " + bc.getId0() );
642 if ( !domain_id_to_second_features_map.containsKey( bc.getId1() ) ) {
643 throw new IllegalArgumentException( "no mapping found for " + bc.getId1() );
647 final BinaryDomainCombination mapped_bc = new BasicBinaryDomainCombination( domain_id_to_second_features_map.get( bc.getId0()) ,
648 domain_id_to_second_features_map.get( bc.getId1()) );
649 all_binary_combinations_mapped.add( mapped_bc );
650 binary_combinations_per_genome_mapped[ identifier_index ].add( mapped_bc );
655 final int number_of_characters = all_binary_combinations_mapped.size();
656 final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
657 number_of_characters );
658 int character_index = 0;
659 for( final BinaryDomainCombination bc : all_binary_combinations_mapped ) {
660 matrix.setCharacter( character_index++, bc.toString() );
662 identifier_index = 0;
663 final Set<String> all_identifiers = new HashSet<String>();
664 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
665 final String species_id = gwcd.getSpecies().getSpeciesId();
666 if ( all_identifiers.contains( species_id ) ) {
667 throw new AssertionError( "species [" + species_id + "] is not unique" );
669 all_identifiers.add( species_id );
670 matrix.setIdentifier( identifier_index, species_id );
671 for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
672 BinaryDomainCombination bc = null;
673 if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED_ADJACTANT ) {
674 bc = AdjactantDirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
676 else if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED ) {
677 bc = DirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
680 bc = BasicBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
682 if ( binary_combinations_per_genome_mapped[ identifier_index ].contains( bc ) ) {
683 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
686 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
697 public static CharacterStateMatrix<BinaryStates> createMatrixOfBinaryDomainCombinationPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list ) {
698 if ( gwcd_list.isEmpty() ) {
699 throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
701 final int number_of_identifiers = gwcd_list.size();
702 final SortedSet<BinaryDomainCombination> all_binary_combinations = new TreeSet<BinaryDomainCombination>();
703 final Set<BinaryDomainCombination>[] binary_combinations_per_genome = new HashSet[ number_of_identifiers ];
704 int identifier_index = 0;
705 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
706 binary_combinations_per_genome[ identifier_index ] = new HashSet<BinaryDomainCombination>();
707 for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
708 all_binary_combinations.add( bc );
709 binary_combinations_per_genome[ identifier_index ].add( bc );
713 final int number_of_characters = all_binary_combinations.size();
714 final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
715 number_of_characters );
716 int character_index = 0;
717 for( final BinaryDomainCombination bc : all_binary_combinations ) {
718 matrix.setCharacter( character_index++, bc.toString() );
720 identifier_index = 0;
721 final Set<String> all_identifiers = new HashSet<String>();
722 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
723 final String species_id = gwcd.getSpecies().getSpeciesId();
724 if ( all_identifiers.contains( species_id ) ) {
725 throw new AssertionError( "species [" + species_id + "] is not unique" );
727 all_identifiers.add( species_id );
728 matrix.setIdentifier( identifier_index, species_id );
729 for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
730 BinaryDomainCombination bc = null;
731 if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED_ADJACTANT ) {
732 bc = AdjactantDirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
734 else if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED ) {
735 bc = DirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
738 bc = BasicBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
740 if ( binary_combinations_per_genome[ identifier_index ].contains( bc ) ) {
741 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
744 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
752 static CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list ) {
753 return createMatrixOfDomainPresenceOrAbsence( gwcd_list, null );
756 public static CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
757 final SortedSet<DomainId> positive_filter ) {
758 if ( gwcd_list.isEmpty() ) {
759 throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
761 if ( ( positive_filter != null ) && ( positive_filter.size() < 1 ) ) {
762 throw new IllegalArgumentException( "positive filter is empty" );
764 final int number_of_identifiers = gwcd_list.size();
765 final SortedSet<DomainId> all_domain_ids = new TreeSet<DomainId>();
766 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
767 for( final DomainId domain : gwcd.getAllDomainIds() ) {
768 all_domain_ids.add( domain );
771 int number_of_characters = all_domain_ids.size();
772 if ( positive_filter != null ) {
773 //number_of_characters = positive_filter.size(); -- bad if doms in filter but not in genomes
774 number_of_characters = 0;
775 for( final DomainId id : all_domain_ids ) {
776 if ( positive_filter.contains( id ) ) {
777 number_of_characters++;
781 final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
782 number_of_characters );
783 int character_index = 0;
784 for( final DomainId id : all_domain_ids ) {
785 if ( positive_filter == null ) {
786 matrix.setCharacter( character_index++, id.getId() );
789 if ( positive_filter.contains( id ) ) {
790 matrix.setCharacter( character_index++, id.getId() );
794 int identifier_index = 0;
795 final Set<String> all_identifiers = new HashSet<String>();
796 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
797 final String species_id = gwcd.getSpecies().getSpeciesId();
798 if ( all_identifiers.contains( species_id ) ) {
799 throw new IllegalArgumentException( "species [" + species_id + "] is not unique" );
801 all_identifiers.add( species_id );
802 matrix.setIdentifier( identifier_index, species_id );
803 for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
804 if ( ForesterUtil.isEmpty( matrix.getCharacter( ci ) ) ) {
805 throw new RuntimeException( "this should not have happened: problem with character #" + ci );
807 final DomainId id = new DomainId( matrix.getCharacter( ci ) );
808 if ( gwcd.contains( id ) ) {
809 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
812 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
822 private static int getStateSumDeltaOnNode( final String node_identifier,
823 final CharacterStateMatrix<GainLossStates> gain_loss_matrix,
824 final GainLossStates state ) {
825 if ( gain_loss_matrix == null ) {
826 throw new RuntimeException( "no gain loss matrix has been calculated" );
828 if ( ForesterUtil.isEmpty( node_identifier ) ) {
829 throw new IllegalArgumentException( "node identifier must not be empty" );
831 if ( gain_loss_matrix.isEmpty() ) {
832 throw new RuntimeException( "gain loss matrix is empty" );
835 final int id_index = gain_loss_matrix.getIdentifierIndex( node_identifier );
836 for( int c = 0; c < gain_loss_matrix.getNumberOfCharacters(); ++c ) {
837 if ( gain_loss_matrix.getState( id_index, c ) == state ) {
844 private static SortedSet<String> getUnitsDeltaOnNode( final String node_identifier,
845 final CharacterStateMatrix<GainLossStates> gain_loss_matrix,
846 final GainLossStates state ) {
847 if ( gain_loss_matrix == null ) {
848 throw new RuntimeException( "no gain loss matrix has been calculated" );
850 if ( ForesterUtil.isEmpty( node_identifier ) ) {
851 throw new IllegalArgumentException( "node identifier must not be empty" );
853 if ( gain_loss_matrix.isEmpty() ) {
854 throw new RuntimeException( "gain loss matrix is empty" );
856 final SortedSet<String> d = new TreeSet<String>();
857 final int id_index = gain_loss_matrix.getIdentifierIndex( node_identifier );
858 for( int c = 0; c < gain_loss_matrix.getNumberOfCharacters(); ++c ) {
859 if ( gain_loss_matrix.getState( id_index, c ) == state ) {
860 if ( d.contains( gain_loss_matrix.getCharacter( c ) ) ) {
861 throw new AssertionError( "this should not have happended: character ["
862 + gain_loss_matrix.getCharacter( c ) + "] already in set" );
864 d.add( gain_loss_matrix.getCharacter( c ) );