3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 // Contact: phylosoft @ gmail . com
25 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
27 package org.forester.surfacing;
29 import java.util.HashMap;
30 import java.util.HashSet;
31 import java.util.Iterator;
32 import java.util.List;
34 import java.util.Map.Entry;
36 import java.util.SortedSet;
37 import java.util.TreeSet;
39 import org.forester.application.surfacing;
40 import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
41 import org.forester.evoinference.matrix.character.CharacterStateMatrix;
42 import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
43 import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates;
44 import org.forester.evoinference.parsimony.DolloParsimony;
45 import org.forester.evoinference.parsimony.FitchParsimony;
46 import org.forester.phylogeny.Phylogeny;
47 import org.forester.phylogeny.PhylogenyNode;
48 import org.forester.phylogeny.data.BinaryCharacters;
49 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
50 import org.forester.protein.BinaryDomainCombination;
51 import org.forester.protein.BinaryDomainCombination.DomainCombinationType;
52 import org.forester.species.Species;
53 import org.forester.util.ForesterUtil;
55 public final class DomainParsimonyCalculator {
57 private static final String TYPE_FORBINARY_CHARACTERS = "parsimony inferred";
58 private CharacterStateMatrix<BinaryStates> _binary_internal_states_matrix;
60 private Map<String, Set<String>> _domain_id_to_secondary_features_map;
61 private CharacterStateMatrix<GainLossStates> _gain_loss_matrix;
62 private final List<GenomeWideCombinableDomains> _gwcd_list;
63 private final Phylogeny _phylogeny;
64 private SortedSet<String> _positive_filter;
65 private int _total_gains;
66 private int _total_losses;
67 private int _total_unchanged;
69 private DomainParsimonyCalculator( final Phylogeny phylogeny ) {
71 _phylogeny = phylogeny;
75 private DomainParsimonyCalculator( final Phylogeny phylogeny, final List<GenomeWideCombinableDomains> gwcd_list ) {
77 _phylogeny = phylogeny;
78 _gwcd_list = gwcd_list;
81 private DomainParsimonyCalculator( final Phylogeny phylogeny,
82 final List<GenomeWideCombinableDomains> gwcd_list,
83 final Map<String, Set<String>> domain_id_to_secondary_features_map ) {
85 _phylogeny = phylogeny;
86 _gwcd_list = gwcd_list;
87 setDomainIdToSecondaryFeaturesMap( domain_id_to_secondary_features_map );
90 public void executeDolloParsimonyOnBinaryDomainCombintionPresence() {
91 executeDolloParsimony( false );
94 public void executeDolloParsimonyOnDomainPresence() {
95 executeDolloParsimony( true );
98 public void executeDolloParsimonyOnDomainPresence( final SortedSet<String> positive_filter ) {
99 setPositiveFilter( positive_filter );
100 executeDolloParsimony( true );
101 setPositiveFilter( null );
104 public void executeDolloParsimonyOnSecondaryFeatures( final Map<Species, MappingResults> mapping_results_map ) {
105 if ( getDomainIdToSecondaryFeaturesMap() == null ) {
106 throw new RuntimeException( "Domain id to secondary features map has apparently not been set" );
109 final DolloParsimony dollo = DolloParsimony.createInstance();
110 dollo.setReturnGainLossMatrix( true );
111 dollo.setReturnInternalStates( true );
112 final CharacterStateMatrix<BinaryStates> states = createMatrixOfSecondaryFeaturePresenceOrAbsence( mapping_results_map );
113 dollo.execute( getPhylogeny(), states );
114 setGainLossMatrix( dollo.getGainLossMatrix() );
115 setBinaryInternalStatesMatrix( dollo.getInternalStatesMatrix() );
116 setCost( dollo.getCost() );
117 setTotalGains( dollo.getTotalGains() );
118 setTotalLosses( dollo.getTotalLosses() );
119 setTotalUnchanged( dollo.getTotalUnchanged() );
122 public void executeFitchParsimonyOnBinaryDomainCombintion( final boolean use_last ) {
123 executeFitchParsimony( false, use_last, false, 0 );
126 public void executeFitchParsimonyOnBinaryDomainCombintion( final long random_number_seed ) {
127 executeFitchParsimony( false, false, true, random_number_seed );
130 public void executeFitchParsimonyOnBinaryDomainCombintionOnSecondaryFeatures( final boolean use_last ) {
131 executeFitchParsimonyOnSecondaryFeatures( use_last, false, 0 );
134 public void executeFitchParsimonyOnDomainPresence( final boolean use_last ) {
135 executeFitchParsimony( true, use_last, false, 0 );
138 public void executeFitchParsimonyOnDomainPresence( final long random_number_seed ) {
139 executeFitchParsimony( true, false, true, random_number_seed );
142 public void executeOnGivenBinaryStatesMatrix( final CharacterStateMatrix<BinaryStates> binary_states_matrix,
143 final String[] character_labels ) {
145 if ( binary_states_matrix.getNumberOfCharacters() != character_labels.length ) {
146 throw new IllegalArgumentException( "binary states matrix number of characters is not equal to the number of character labels provided" );
148 if ( binary_states_matrix.getNumberOfIdentifiers() != getPhylogeny().getNumberOfBranches() ) {
149 throw new IllegalArgumentException( "binary states matrix number of identifiers is not equal to the number of tree nodes provided" );
151 final CharacterStateMatrix<GainLossStates> gl_matrix = new BasicCharacterStateMatrix<GainLossStates>( binary_states_matrix
152 .getNumberOfIdentifiers(),
154 .getNumberOfCharacters() );
156 int total_losses = 0;
157 int total_unchanged = 0;
159 for( final PhylogenyNodeIterator it = getPhylogeny().iteratorPostorder(); it.hasNext(); ) {
160 gl_matrix.setIdentifier( i++, it.next().getName() );
162 for( int c = 0; c < character_labels.length; ++c ) {
163 gl_matrix.setCharacter( c, character_labels[ c ] );
164 final PhylogenyNodeIterator it = getPhylogeny().iteratorPostorder();
165 while ( it.hasNext() ) {
166 final PhylogenyNode node = it.next();
167 final String name = node.getName();
168 final BinaryStates bin_state = binary_states_matrix.getState( binary_states_matrix
169 .getIdentifierIndex( name ), c );
170 final PhylogenyNode parent_node = getPhylogeny().getNode( name ).getParent();
171 GainLossStates gl_state = null;
172 if ( node.isRoot() ) {
174 if ( bin_state == BinaryStates.ABSENT ) {
175 gl_state = GainLossStates.UNCHANGED_ABSENT;
178 gl_state = GainLossStates.UNCHANGED_PRESENT;
182 final BinaryStates parent_bin_state = binary_states_matrix.getState( binary_states_matrix
183 .getIdentifierIndex( parent_node.getName() ), c );
184 if ( bin_state == BinaryStates.ABSENT ) {
185 if ( parent_bin_state == BinaryStates.ABSENT ) {
187 gl_state = GainLossStates.UNCHANGED_ABSENT;
191 gl_state = GainLossStates.LOSS;
195 if ( parent_bin_state == BinaryStates.ABSENT ) {
197 gl_state = GainLossStates.GAIN;
201 gl_state = GainLossStates.UNCHANGED_PRESENT;
205 gl_matrix.setState( name, c, gl_state );
208 setTotalGains( total_gains );
209 setTotalLosses( total_losses );
210 setTotalUnchanged( total_unchanged );
211 setCost( total_gains + total_losses );
212 setGainLossMatrix( gl_matrix );
215 public int getCost() {
219 public CharacterStateMatrix<Integer> getGainLossCountsMatrix() {
220 final CharacterStateMatrix<Integer> matrix = new BasicCharacterStateMatrix<Integer>( getGainLossMatrix()
221 .getNumberOfIdentifiers(), 3 );
222 for( int i = 0; i < getGainLossMatrix().getNumberOfIdentifiers(); ++i ) {
223 matrix.setIdentifier( i, getGainLossMatrix().getIdentifier( i ) );
225 matrix.setCharacter( 0, "GAINS" );
226 matrix.setCharacter( 1, "LOSSES" );
227 matrix.setCharacter( 2, "NET" );
228 for( int i = 0; i < getGainLossMatrix().getNumberOfIdentifiers(); ++i ) {
231 for( int c = 0; c < getGainLossMatrix().getNumberOfCharacters(); ++c ) {
232 final GainLossStates s = getGainLossMatrix().getState( i, c );
233 if ( s == GainLossStates.GAIN ) {
236 else if ( s == GainLossStates.LOSS ) {
240 matrix.setState( i, 0, gains );
241 matrix.setState( i, 1, losses );
242 matrix.setState( i, 2, gains - losses );
247 public CharacterStateMatrix<GainLossStates> getGainLossMatrix() {
248 return _gain_loss_matrix;
251 public CharacterStateMatrix<BinaryStates> getInternalStatesMatrix() {
252 return _binary_internal_states_matrix;
255 public int getNetGainsOnNode( final String node_identifier ) {
256 if ( getGainLossMatrix() == null ) {
257 throw new RuntimeException( "no gain loss matrix has been calculated" );
260 final int id_index = getGainLossMatrix().getIdentifierIndex( node_identifier );
261 for( int c = 0; c < getGainLossMatrix().getNumberOfCharacters(); ++c ) {
262 if ( getGainLossMatrix().getState( id_index, c ) == GainLossStates.GAIN ) {
265 else if ( getGainLossMatrix().getState( id_index, c ) == GainLossStates.LOSS ) {
272 public int getSumOfGainsOnNode( final String node_identifier ) {
273 return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.GAIN );
276 public int getSumOfLossesOnNode( final String node_identifier ) {
277 return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.LOSS );
280 public int getSumOfPresentOnNode( final String node_identifier ) {
281 return getSumOfGainsOnNode( node_identifier ) + getSumOfUnchangedPresentOnNode( node_identifier );
284 public int getTotalGains() {
288 public int getTotalLosses() {
289 return _total_losses;
292 public int getTotalUnchanged() {
293 return _total_unchanged;
296 public SortedSet<String> getUnitsGainedOnNode( final String node_identifier ) {
297 return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.GAIN );
300 public SortedSet<String> getUnitsLostOnNode( final String node_identifier ) {
301 return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.LOSS );
304 public SortedSet<String> getUnitsOnNode( final String node_identifier ) {
305 final SortedSet<String> present = getUnitsGainedOnNode( node_identifier );
306 present.addAll( getUnitsUnchangedPresentOnNode( node_identifier ) );
310 int calculateNumberOfBinaryDomainCombination() {
311 if ( getGenomeWideCombinableDomainsList().isEmpty() ) {
312 throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
314 final Set<BinaryDomainCombination> all_binary_combinations = new HashSet<BinaryDomainCombination>();
315 for( final GenomeWideCombinableDomains gwcd : getGenomeWideCombinableDomainsList() ) {
316 for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
317 all_binary_combinations.add( bc );
320 return all_binary_combinations.size();
323 CharacterStateMatrix<BinaryStates> createMatrixOfBinaryDomainCombinationPresenceOrAbsence() {
324 return createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
327 CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence() {
328 return createMatrixOfDomainPresenceOrAbsence( getGenomeWideCombinableDomainsList(), getPositiveFilter() );
331 CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeaturePresenceOrAbsence( final Map<Species, MappingResults> mapping_results_map ) {
332 return createMatrixOfSecondaryFeaturePresenceOrAbsence( getGenomeWideCombinableDomainsList(),
333 getDomainIdToSecondaryFeaturesMap(),
334 mapping_results_map );
337 Phylogeny decoratePhylogenyWithDomains( final Phylogeny phylogeny ) {
338 for( final PhylogenyNodeIterator it = phylogeny.iteratorPostorder(); it.hasNext(); ) {
339 final PhylogenyNode node = it.next();
340 final String node_identifier = node.getName();
341 final BinaryCharacters bc = new BinaryCharacters( getUnitsOnNode( node_identifier ),
342 getUnitsGainedOnNode( node_identifier ),
343 getUnitsLostOnNode( node_identifier ),
344 TYPE_FORBINARY_CHARACTERS,
345 getSumOfPresentOnNode( node_identifier ),
346 getSumOfGainsOnNode( node_identifier ),
347 getSumOfLossesOnNode( node_identifier ) );
348 node.getNodeData().setBinaryCharacters( bc );
353 int getSumOfUnchangedAbsentOnNode( final String node_identifier ) {
354 return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_ABSENT );
357 int getSumOfUnchangedOnNode( final String node_identifier ) {
358 return getSumOfUnchangedPresentOnNode( node_identifier ) + getSumOfUnchangedAbsentOnNode( node_identifier );
361 int getSumOfUnchangedPresentOnNode( final String node_identifier ) {
362 return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_PRESENT );
365 SortedSet<String> getUnitsUnchangedAbsentOnNode( final String node_identifier ) {
366 return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_ABSENT );
369 SortedSet<String> getUnitsUnchangedPresentOnNode( final String node_identifier ) {
370 return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_PRESENT );
373 private void executeDolloParsimony( final boolean on_domain_presence ) {
375 final DolloParsimony dollo = DolloParsimony.createInstance();
376 dollo.setReturnGainLossMatrix( true );
377 dollo.setReturnInternalStates( true );
378 CharacterStateMatrix<BinaryStates> states = null;
379 if ( on_domain_presence ) {
380 states = createMatrixOfDomainPresenceOrAbsence();
383 states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence();
385 dollo.execute( getPhylogeny(), states );
386 setGainLossMatrix( dollo.getGainLossMatrix() );
387 setBinaryInternalStatesMatrix( dollo.getInternalStatesMatrix() );
388 setCost( dollo.getCost() );
389 setTotalGains( dollo.getTotalGains() );
390 setTotalLosses( dollo.getTotalLosses() );
391 setTotalUnchanged( dollo.getTotalUnchanged() );
394 private void executeFitchParsimony( final boolean on_domain_presence,
395 final boolean use_last,
396 final boolean randomize,
397 final long random_number_seed ) {
400 System.out.println( " Fitch parsimony: use_last = true" );
402 final FitchParsimony<BinaryStates> fitch = new FitchParsimony<BinaryStates>();
403 fitch.setRandomize( randomize );
405 fitch.setRandomNumberSeed( random_number_seed );
407 fitch.setUseLast( use_last );
408 fitch.setReturnGainLossMatrix( true );
409 fitch.setReturnInternalStates( true );
410 CharacterStateMatrix<BinaryStates> states = null;
411 if ( on_domain_presence ) {
412 states = createMatrixOfDomainPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
415 states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
417 fitch.execute( getPhylogeny(), states, true );
418 setGainLossMatrix( fitch.getGainLossMatrix() );
419 setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() );
420 setCost( fitch.getCost() );
421 setTotalGains( fitch.getTotalGains() );
422 setTotalLosses( fitch.getTotalLosses() );
423 setTotalUnchanged( fitch.getTotalUnchanged() );
426 private void executeFitchParsimonyOnSecondaryFeatures( final boolean use_last,
427 final boolean randomize,
428 final long random_number_seed ) {
431 System.out.println( " Fitch parsimony: use_last = true" );
433 final FitchParsimony<BinaryStates> fitch = new FitchParsimony<BinaryStates>();
434 fitch.setRandomize( randomize );
436 fitch.setRandomNumberSeed( random_number_seed );
438 fitch.setUseLast( use_last );
439 fitch.setReturnGainLossMatrix( true );
440 fitch.setReturnInternalStates( true );
441 final Map<String, Set<String>> map = getDomainIdToSecondaryFeaturesMap();
442 final Map<String, String> newmap = new HashMap<String, String>();
443 final Iterator<Entry<String, Set<String>>> it = map.entrySet().iterator();
444 while ( it.hasNext() ) {
445 final Map.Entry<String, Set<String>> pair = it.next();
446 if ( pair.getValue().size() != 1 ) {
447 throw new IllegalArgumentException( pair.getKey() + " mapps to " + pair.getValue().size() + " items" );
449 newmap.put( pair.getKey(), ( String ) pair.getValue().toArray()[ 0 ] );
451 final CharacterStateMatrix<BinaryStates> states = createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList(),
453 fitch.execute( getPhylogeny(), states, true );
454 setGainLossMatrix( fitch.getGainLossMatrix() );
455 setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() );
456 setCost( fitch.getCost() );
457 setTotalGains( fitch.getTotalGains() );
458 setTotalLosses( fitch.getTotalLosses() );
459 setTotalUnchanged( fitch.getTotalUnchanged() );
462 private Map<String, Set<String>> getDomainIdToSecondaryFeaturesMap() {
463 return _domain_id_to_secondary_features_map;
466 private List<GenomeWideCombinableDomains> getGenomeWideCombinableDomainsList() {
470 private Phylogeny getPhylogeny() {
474 private SortedSet<String> getPositiveFilter() {
475 return _positive_filter;
478 private void init() {
479 setDomainIdToSecondaryFeaturesMap( null );
480 setPositiveFilter( null );
484 private void reset() {
485 setGainLossMatrix( null );
486 setBinaryInternalStatesMatrix( null );
489 setTotalLosses( -1 );
490 setTotalUnchanged( -1 );
493 private void setBinaryInternalStatesMatrix( final CharacterStateMatrix<BinaryStates> binary_states_matrix ) {
494 _binary_internal_states_matrix = binary_states_matrix;
497 private void setCost( final int cost ) {
501 private void setDomainIdToSecondaryFeaturesMap( final Map<String, Set<String>> domain_id_to_secondary_features_map ) {
502 _domain_id_to_secondary_features_map = domain_id_to_secondary_features_map;
505 private void setGainLossMatrix( final CharacterStateMatrix<GainLossStates> gain_loss_matrix ) {
506 _gain_loss_matrix = gain_loss_matrix;
509 private void setPositiveFilter( final SortedSet<String> positive_filter ) {
510 _positive_filter = positive_filter;
513 private void setTotalGains( final int total_gains ) {
514 _total_gains = total_gains;
517 private void setTotalLosses( final int total_losses ) {
518 _total_losses = total_losses;
521 private void setTotalUnchanged( final int total_unchanged ) {
522 _total_unchanged = total_unchanged;
525 public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny ) {
526 return new DomainParsimonyCalculator( phylogeny );
529 public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny,
530 final List<GenomeWideCombinableDomains> gwcd_list ) {
531 if ( phylogeny.getNumberOfExternalNodes() != gwcd_list.size() ) {
532 throw new IllegalArgumentException( "number of external nodes [" + phylogeny.getNumberOfExternalNodes()
533 + "] does not equal size of genome wide combinable domains list [" + gwcd_list.size() + "]" );
535 return new DomainParsimonyCalculator( phylogeny, gwcd_list );
538 public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny,
539 final List<GenomeWideCombinableDomains> gwcd_list,
540 final Map<String, Set<String>> domain_id_to_secondary_features_map ) {
541 if ( phylogeny.getNumberOfExternalNodes() != gwcd_list.size() ) {
542 throw new IllegalArgumentException( "size of external nodes does not equal size of genome wide combinable domains list" );
544 return new DomainParsimonyCalculator( phylogeny, gwcd_list, domain_id_to_secondary_features_map );
547 public static CharacterStateMatrix<BinaryStates> createMatrixOfBinaryDomainCombinationPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list ) {
548 if ( gwcd_list.isEmpty() ) {
549 throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
551 final int number_of_identifiers = gwcd_list.size();
552 final SortedSet<BinaryDomainCombination> all_binary_combinations = new TreeSet<BinaryDomainCombination>();
553 final Set<BinaryDomainCombination>[] binary_combinations_per_genome = new HashSet[ number_of_identifiers ];
554 int identifier_index = 0;
555 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
556 binary_combinations_per_genome[ identifier_index ] = new HashSet<BinaryDomainCombination>();
557 for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
558 all_binary_combinations.add( bc );
559 binary_combinations_per_genome[ identifier_index ].add( bc );
563 final int number_of_characters = all_binary_combinations.size();
564 final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
565 number_of_characters );
566 int character_index = 0;
567 for( final BinaryDomainCombination bc : all_binary_combinations ) {
568 matrix.setCharacter( character_index++, bc.toString() );
570 identifier_index = 0;
571 final Set<String> all_identifiers = new HashSet<String>();
572 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
573 final String species_id = gwcd.getSpecies().getSpeciesId();
574 if ( all_identifiers.contains( species_id ) ) {
575 throw new AssertionError( "species [" + species_id + "] is not unique" );
577 all_identifiers.add( species_id );
578 matrix.setIdentifier( identifier_index, species_id );
579 for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
580 BinaryDomainCombination bc = null;
581 if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED_ADJACTANT ) {
582 bc = AdjactantDirectedBinaryDomainCombination.obtainInstance( matrix.getCharacter( ci ) );
584 else if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED ) {
585 bc = DirectedBinaryDomainCombination.obtainInstance( matrix.getCharacter( ci ) );
588 bc = BasicBinaryDomainCombination.obtainInstance( matrix.getCharacter( ci ) );
590 if ( binary_combinations_per_genome[ identifier_index ].contains( bc ) ) {
591 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
594 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
602 public static CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
603 final SortedSet<String> positive_filter ) {
604 if ( gwcd_list.isEmpty() ) {
605 throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
607 if ( ( positive_filter != null ) && ( positive_filter.size() < 1 ) ) {
608 throw new IllegalArgumentException( "positive filter is empty" );
610 final int number_of_identifiers = gwcd_list.size();
611 final SortedSet<String> all_domain_ids = new TreeSet<String>();
612 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
613 for( final String domain : gwcd.getAllDomainIds() ) {
614 all_domain_ids.add( domain );
617 int number_of_characters = all_domain_ids.size();
618 if ( positive_filter != null ) {
619 //number_of_characters = positive_filter.size(); -- bad if doms in filter but not in genomes
620 number_of_characters = 0;
621 for( final String id : all_domain_ids ) {
622 if ( positive_filter.contains( id ) ) {
623 number_of_characters++;
627 final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
628 number_of_characters );
629 int character_index = 0;
630 for( final String id : all_domain_ids ) {
631 if ( positive_filter == null ) {
632 matrix.setCharacter( character_index++, id );
635 if ( positive_filter.contains( id ) ) {
636 matrix.setCharacter( character_index++, id );
640 int identifier_index = 0;
641 final Set<String> all_identifiers = new HashSet<String>();
642 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
643 final String species_id = gwcd.getSpecies().getSpeciesId();
644 if ( all_identifiers.contains( species_id ) ) {
645 throw new IllegalArgumentException( "species [" + species_id + "] is not unique" );
647 all_identifiers.add( species_id );
648 matrix.setIdentifier( identifier_index, species_id );
649 for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
650 if ( ForesterUtil.isEmpty( matrix.getCharacter( ci ) ) ) {
651 throw new RuntimeException( "this should not have happened: problem with character #" + ci );
653 final String id = matrix.getCharacter( ci );
654 if ( gwcd.contains( id ) ) {
655 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
658 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
666 public static CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
667 final Map<String, String> domain_id_to_second_features_map ) {
668 if ( gwcd_list.isEmpty() ) {
669 throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
671 if ( ( domain_id_to_second_features_map == null ) || domain_id_to_second_features_map.isEmpty() ) {
672 throw new IllegalArgumentException( "domain id to secondary features map is null or empty" );
674 final int number_of_identifiers = gwcd_list.size();
675 final SortedSet<BinaryDomainCombination> all_binary_combinations_mapped = new TreeSet<BinaryDomainCombination>();
676 final Set<BinaryDomainCombination>[] binary_combinations_per_genome_mapped = new HashSet[ number_of_identifiers ];
677 int identifier_index = 0;
678 final SortedSet<String> no_mappings = new TreeSet<String>();
679 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
680 binary_combinations_per_genome_mapped[ identifier_index ] = new HashSet<BinaryDomainCombination>();
681 for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
682 final BinaryDomainCombination mapped_bc = mapBinaryDomainCombination( domain_id_to_second_features_map,
685 all_binary_combinations_mapped.add( mapped_bc );
686 binary_combinations_per_genome_mapped[ identifier_index ].add( mapped_bc );
690 if ( !no_mappings.isEmpty() ) {
691 ForesterUtil.programMessage( surfacing.PRG_NAME, "No mappings for the following (" + no_mappings.size()
693 for( final String id : no_mappings ) {
694 ForesterUtil.programMessage( surfacing.PRG_NAME, id );
697 final int number_of_characters = all_binary_combinations_mapped.size();
698 final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
699 number_of_characters );
700 int character_index = 0;
701 for( final BinaryDomainCombination bc : all_binary_combinations_mapped ) {
702 matrix.setCharacter( character_index++, bc.toString() );
704 identifier_index = 0;
705 final Set<String> all_identifiers = new HashSet<String>();
706 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
707 final String species_id = gwcd.getSpecies().getSpeciesId();
708 if ( all_identifiers.contains( species_id ) ) {
709 throw new AssertionError( "species [" + species_id + "] is not unique" );
711 all_identifiers.add( species_id );
712 matrix.setIdentifier( identifier_index, species_id );
713 for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
714 BinaryDomainCombination bc = null;
715 if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED_ADJACTANT ) {
716 bc = AdjactantDirectedBinaryDomainCombination.obtainInstance( matrix.getCharacter( ci ) );
718 else if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED ) {
719 bc = DirectedBinaryDomainCombination.obtainInstance( matrix.getCharacter( ci ) );
722 bc = BasicBinaryDomainCombination.obtainInstance( matrix.getCharacter( ci ) );
724 if ( binary_combinations_per_genome_mapped[ identifier_index ].contains( bc ) ) {
725 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
728 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
736 static CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list ) {
737 return createMatrixOfDomainPresenceOrAbsence( gwcd_list, null );
741 * For folds instead of Pfam-domains, for example
747 static CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeaturePresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
748 final Map<String, Set<String>> domain_id_to_second_features_map,
749 final Map<Species, MappingResults> mapping_results_map ) {
750 if ( gwcd_list.isEmpty() ) {
751 throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
753 if ( ( domain_id_to_second_features_map == null ) || domain_id_to_second_features_map.isEmpty() ) {
754 throw new IllegalArgumentException( "domain id to secondary features map is null or empty" );
756 final int number_of_identifiers = gwcd_list.size();
757 final SortedSet<String> all_secondary_features = new TreeSet<String>();
758 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
761 for( final String domain : gwcd.getAllDomainIds() ) {
762 if ( domain_id_to_second_features_map.containsKey( domain ) ) {
763 all_secondary_features.addAll( domain_id_to_second_features_map.get( domain ) );
770 if ( mapping_results_map != null ) {
771 final MappingResults mr = new MappingResults();
772 mr.setDescription( gwcd.getSpecies().getSpeciesId() );
773 mr.setSumOfSuccesses( mapped );
774 mr.setSumOfFailures( not_mapped );
775 mapping_results_map.put( gwcd.getSpecies(), mr );
778 final int number_of_characters = all_secondary_features.size();
779 final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
780 number_of_characters );
781 int character_index = 0;
782 for( final String second_id : all_secondary_features ) {
783 matrix.setCharacter( character_index++, second_id );
785 int identifier_index = 0;
786 final Set<String> all_identifiers = new HashSet<String>();
787 for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
788 final String species_id = gwcd.getSpecies().getSpeciesId();
789 if ( all_identifiers.contains( species_id ) ) {
790 throw new IllegalArgumentException( "species [" + species_id + "] is not unique" );
792 all_identifiers.add( species_id );
793 matrix.setIdentifier( identifier_index, species_id );
794 final Set<String> all_second_per_gwcd = new HashSet<String>();
795 for( final String domain : gwcd.getAllDomainIds() ) {
796 if ( domain_id_to_second_features_map.containsKey( domain ) ) {
797 all_second_per_gwcd.addAll( domain_id_to_second_features_map.get( domain ) );
800 for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
801 if ( all_second_per_gwcd.contains( matrix.getCharacter( ci ) ) ) {
802 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
805 matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
813 private static int getStateSumDeltaOnNode( final String node_identifier,
814 final CharacterStateMatrix<GainLossStates> gain_loss_matrix,
815 final GainLossStates state ) {
816 if ( gain_loss_matrix == null ) {
817 throw new RuntimeException( "no gain loss matrix has been calculated" );
819 if ( ForesterUtil.isEmpty( node_identifier ) ) {
820 throw new IllegalArgumentException( "node identifier must not be empty" );
822 if ( gain_loss_matrix.isEmpty() ) {
823 throw new RuntimeException( "gain loss matrix is empty" );
826 final int id_index = gain_loss_matrix.getIdentifierIndex( node_identifier );
827 for( int c = 0; c < gain_loss_matrix.getNumberOfCharacters(); ++c ) {
828 if ( gain_loss_matrix.getState( id_index, c ) == state ) {
835 private static SortedSet<String> getUnitsDeltaOnNode( final String node_identifier,
836 final CharacterStateMatrix<GainLossStates> gain_loss_matrix,
837 final GainLossStates state ) {
838 if ( gain_loss_matrix == null ) {
839 throw new RuntimeException( "no gain loss matrix has been calculated" );
841 if ( ForesterUtil.isEmpty( node_identifier ) ) {
842 throw new IllegalArgumentException( "node identifier must not be empty" );
844 if ( gain_loss_matrix.isEmpty() ) {
845 throw new RuntimeException( "gain loss matrix is empty" );
847 final SortedSet<String> d = new TreeSet<String>();
848 final int id_index = gain_loss_matrix.getIdentifierIndex( node_identifier );
849 for( int c = 0; c < gain_loss_matrix.getNumberOfCharacters(); ++c ) {
850 if ( gain_loss_matrix.getState( id_index, c ) == state ) {
851 if ( d.contains( gain_loss_matrix.getCharacter( c ) ) ) {
852 throw new AssertionError( "this should not have happended: character ["
853 + gain_loss_matrix.getCharacter( c ) + "] already in set" );
855 d.add( gain_loss_matrix.getCharacter( c ) );
861 private static BinaryDomainCombination mapBinaryDomainCombination( final Map<String, String> domain_id_to_second_features_map,
862 final BinaryDomainCombination bc,
863 final SortedSet<String> no_mappings ) {
866 if ( !domain_id_to_second_features_map.containsKey( bc.getId0() ) ) {
867 no_mappings.add( bc.getId0() );
871 id0 = domain_id_to_second_features_map.get( bc.getId0() );
873 if ( !domain_id_to_second_features_map.containsKey( bc.getId1() ) ) {
874 no_mappings.add( bc.getId1() );
878 id1 = domain_id_to_second_features_map.get( bc.getId1() );
880 // return new BasicBinaryDomainCombination( id0, id1 );
881 return BasicBinaryDomainCombination.obtainInstance( id0, id1 );