clean up
[jalview.git] / forester / java / src / org / forester / surfacing / DomainParsimonyCalculator.java
1 // $Id:
2 //
3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
5 //
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
8 // All rights reserved
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 //
24 // Contact: phylosoft @ gmail . com
25 // WWW: www.phylosoft.org/forester
26
27 package org.forester.surfacing;
28
29 import java.util.HashSet;
30 import java.util.List;
31 import java.util.Map;
32 import java.util.Set;
33 import java.util.SortedSet;
34 import java.util.TreeSet;
35
36 import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
37 import org.forester.evoinference.matrix.character.CharacterStateMatrix;
38 import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
39 import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates;
40 import org.forester.evoinference.parsimony.DolloParsimony;
41 import org.forester.evoinference.parsimony.FitchParsimony;
42 import org.forester.phylogeny.Phylogeny;
43 import org.forester.phylogeny.PhylogenyNode;
44 import org.forester.phylogeny.data.BinaryCharacters;
45 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
46 import org.forester.surfacing.BinaryDomainCombination.DomainCombinationType;
47 import org.forester.util.ForesterUtil;
48
49 public final class DomainParsimonyCalculator {
50
51     private static final String                     TYPE_FORBINARY_CHARACTERS = "parsimony inferred";
52     private CharacterStateMatrix<GainLossStates>    _gain_loss_matrix;
53     private CharacterStateMatrix<BinaryStates>      _binary_internal_states_matrix;
54     private final List<GenomeWideCombinableDomains> _gwcd_list;
55     private final Phylogeny                         _phylogeny;
56     private int                                     _total_losses;
57     private int                                     _total_gains;
58     private int                                     _total_unchanged;
59     private int                                     _cost;
60     private Map<DomainId, Set<String>>              _domain_id_to_secondary_features_map;
61     private SortedSet<DomainId>                     _positive_filter;
62
63     private DomainParsimonyCalculator( final Phylogeny phylogeny ) {
64         init();
65         _phylogeny = phylogeny;
66         _gwcd_list = null;
67     }
68
69     private DomainParsimonyCalculator( final Phylogeny phylogeny, final List<GenomeWideCombinableDomains> gwcd_list ) {
70         init();
71         _phylogeny = phylogeny;
72         _gwcd_list = gwcd_list;
73     }
74
75     private DomainParsimonyCalculator( final Phylogeny phylogeny,
76                                        final List<GenomeWideCombinableDomains> gwcd_list,
77                                        final Map<DomainId, Set<String>> domain_id_to_secondary_features_map ) {
78         init();
79         _phylogeny = phylogeny;
80         _gwcd_list = gwcd_list;
81         setDomainIdToSecondaryFeaturesMap( domain_id_to_secondary_features_map );
82     }
83
84     int calculateNumberOfBinaryDomainCombination() {
85         if ( getGenomeWideCombinableDomainsList().isEmpty() ) {
86             throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
87         }
88         final Set<BinaryDomainCombination> all_binary_combinations = new HashSet<BinaryDomainCombination>();
89         for( final GenomeWideCombinableDomains gwcd : getGenomeWideCombinableDomainsList() ) {
90             for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
91                 all_binary_combinations.add( bc );
92             }
93         }
94         return all_binary_combinations.size();
95     }
96
97     CharacterStateMatrix<BinaryStates> createMatrixOfBinaryDomainCombinationPresenceOrAbsence() {
98         return createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
99     }
100
101     CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence() {
102         return createMatrixOfDomainPresenceOrAbsence( getGenomeWideCombinableDomainsList(), getPositiveFilter() );
103     }
104
105     CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeaturePresenceOrAbsence( final Map<Species, MappingResults> mapping_results_map ) {
106         return createMatrixOfSecondaryFeaturePresenceOrAbsence( getGenomeWideCombinableDomainsList(),
107                                                                 getDomainIdToSecondaryFeaturesMap(),
108                                                                 mapping_results_map );
109     }
110
111     Phylogeny decoratePhylogenyWithDomains( final Phylogeny phylogeny ) {
112         for( final PhylogenyNodeIterator it = phylogeny.iteratorPostorder(); it.hasNext(); ) {
113             final PhylogenyNode node = it.next();
114             final String node_identifier = node.getName();
115             final BinaryCharacters bc = new BinaryCharacters( getUnitsOnNode( node_identifier ),
116                                                               getUnitsGainedOnNode( node_identifier ),
117                                                               getUnitsLostOnNode( node_identifier ),
118                                                               TYPE_FORBINARY_CHARACTERS,
119                                                               getSumOfPresentOnNode( node_identifier ),
120                                                               getSumOfGainsOnNode( node_identifier ),
121                                                               getSumOfLossesOnNode( node_identifier ) );
122             node.getNodeData().setBinaryCharacters( bc );
123         }
124         return phylogeny;
125     }
126
127     private void executeDolloParsimony( final boolean on_domain_presence ) {
128         reset();
129         final DolloParsimony dollo = DolloParsimony.createInstance();
130         dollo.setReturnGainLossMatrix( true );
131         dollo.setReturnInternalStates( true );
132         CharacterStateMatrix<BinaryStates> states = null;
133         if ( on_domain_presence ) {
134             states = createMatrixOfDomainPresenceOrAbsence();
135         }
136         else {
137             states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence();
138         }
139         dollo.execute( getPhylogeny(), states );
140         setGainLossMatrix( dollo.getGainLossMatrix() );
141         setBinaryInternalStatesMatrix( dollo.getInternalStatesMatrix() );
142         setCost( dollo.getCost() );
143         setTotalGains( dollo.getTotalGains() );
144         setTotalLosses( dollo.getTotalLosses() );
145         setTotalUnchanged( dollo.getTotalUnchanged() );
146     }
147
148     public void executeDolloParsimonyOnBinaryDomainCombintionPresence() {
149         executeDolloParsimony( false );
150     }
151
152     public void executeDolloParsimonyOnDomainPresence() {
153         executeDolloParsimony( true );
154     }
155
156     public void executeDolloParsimonyOnDomainPresence( final SortedSet<DomainId> positive_filter ) {
157         setPositiveFilter( positive_filter );
158         executeDolloParsimony( true );
159         setPositiveFilter( null );
160     }
161
162     public void executeDolloParsimonyOnSecondaryFeatures( final Map<Species, MappingResults> mapping_results_map ) {
163         if ( getDomainIdToSecondaryFeaturesMap() == null ) {
164             throw new RuntimeException( "Domain id to secondary features map has apparently not been set" );
165         }
166         reset();
167         final DolloParsimony dollo = DolloParsimony.createInstance();
168         dollo.setReturnGainLossMatrix( true );
169         dollo.setReturnInternalStates( true );
170         final CharacterStateMatrix<BinaryStates> states = createMatrixOfSecondaryFeaturePresenceOrAbsence( mapping_results_map );
171         dollo.execute( getPhylogeny(), states );
172         setGainLossMatrix( dollo.getGainLossMatrix() );
173         setBinaryInternalStatesMatrix( dollo.getInternalStatesMatrix() );
174         setCost( dollo.getCost() );
175         setTotalGains( dollo.getTotalGains() );
176         setTotalLosses( dollo.getTotalLosses() );
177         setTotalUnchanged( dollo.getTotalUnchanged() );
178     }
179
180     private void executeFitchParsimony( final boolean on_domain_presence,
181                                         final boolean use_last,
182                                         final boolean randomize,
183                                         final long random_number_seed ) {
184         reset();
185         if ( use_last ) {
186             System.out.println( "   Fitch parsimony: use_last = true" );
187         }
188         final FitchParsimony<BinaryStates> fitch = new FitchParsimony<BinaryStates>();
189         fitch.setRandomize( randomize );
190         if ( randomize ) {
191             fitch.setRandomNumberSeed( random_number_seed );
192         }
193         fitch.setUseLast( use_last );
194         fitch.setReturnGainLossMatrix( true );
195         fitch.setReturnInternalStates( true );
196         CharacterStateMatrix<BinaryStates> states = null;
197         if ( on_domain_presence ) {
198             states = createMatrixOfDomainPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
199         }
200         else {
201             states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
202         }
203         fitch.execute( getPhylogeny(), states );
204         setGainLossMatrix( fitch.getGainLossMatrix() );
205         setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() );
206         setCost( fitch.getCost() );
207         setTotalGains( fitch.getTotalGains() );
208         setTotalLosses( fitch.getTotalLosses() );
209         setTotalUnchanged( fitch.getTotalUnchanged() );
210     }
211
212     public void executeFitchParsimonyOnBinaryDomainCombintion( final boolean use_last ) {
213         executeFitchParsimony( false, use_last, false, 0 );
214     }
215
216     public void executeFitchParsimonyOnBinaryDomainCombintion( final long random_number_seed ) {
217         executeFitchParsimony( false, false, true, random_number_seed );
218     }
219
220     public void executeFitchParsimonyOnDomainPresence( final boolean use_last ) {
221         executeFitchParsimony( true, use_last, false, 0 );
222     }
223
224     public void executeFitchParsimonyOnDomainPresence( final long random_number_seed ) {
225         executeFitchParsimony( true, false, true, random_number_seed );
226     }
227
228     public void executeOnGivenBinaryStatesMatrix( final CharacterStateMatrix<BinaryStates> binary_states_matrix,
229                                                   final String[] character_labels ) {
230         reset();
231         if ( binary_states_matrix.getNumberOfCharacters() != character_labels.length ) {
232             throw new IllegalArgumentException( "binary states matrix number of characters is not equal to the number of character labels provided" );
233         }
234         if ( binary_states_matrix.getNumberOfIdentifiers() != getPhylogeny().getNumberOfBranches() ) {
235             throw new IllegalArgumentException( "binary states matrix number of identifiers is not equal to the number of tree nodes provided" );
236         }
237         final CharacterStateMatrix<GainLossStates> gl_matrix = new BasicCharacterStateMatrix<GainLossStates>( binary_states_matrix
238                                                                                                                       .getNumberOfIdentifiers(),
239                                                                                                               binary_states_matrix
240                                                                                                                       .getNumberOfCharacters() );
241         int total_gains = 0;
242         int total_losses = 0;
243         int total_unchanged = 0;
244         int i = 0;
245         for( final PhylogenyNodeIterator it = getPhylogeny().iteratorPostorder(); it.hasNext(); ) {
246             gl_matrix.setIdentifier( i++, it.next().getName() );
247         }
248         for( int c = 0; c < character_labels.length; ++c ) {
249             gl_matrix.setCharacter( c, character_labels[ c ] );
250             final PhylogenyNodeIterator it = getPhylogeny().iteratorPostorder();
251             while ( it.hasNext() ) {
252                 final PhylogenyNode node = it.next();
253                 final String name = node.getName();
254                 final BinaryStates bin_state = binary_states_matrix.getState( binary_states_matrix
255                         .getIdentifierIndex( name ), c );
256                 final PhylogenyNode parent_node = getPhylogeny().getNode( name ).getParent();
257                 GainLossStates gl_state = null;
258                 if ( node.isRoot() ) {
259                     ++total_unchanged;
260                     if ( bin_state == BinaryStates.ABSENT ) {
261                         gl_state = GainLossStates.UNCHANGED_ABSENT;
262                     }
263                     else {
264                         gl_state = GainLossStates.UNCHANGED_PRESENT;
265                     }
266                 }
267                 else {
268                     final BinaryStates parent_bin_state = binary_states_matrix.getState( binary_states_matrix
269                             .getIdentifierIndex( parent_node.getName() ), c );
270                     if ( bin_state == BinaryStates.ABSENT ) {
271                         if ( parent_bin_state == BinaryStates.ABSENT ) {
272                             ++total_unchanged;
273                             gl_state = GainLossStates.UNCHANGED_ABSENT;
274                         }
275                         else {
276                             ++total_losses;
277                             gl_state = GainLossStates.LOSS;
278                         }
279                     }
280                     else {
281                         if ( parent_bin_state == BinaryStates.ABSENT ) {
282                             ++total_gains;
283                             gl_state = GainLossStates.GAIN;
284                         }
285                         else {
286                             ++total_unchanged;
287                             gl_state = GainLossStates.UNCHANGED_PRESENT;
288                         }
289                     }
290                 }
291                 gl_matrix.setState( name, c, gl_state );
292             }
293         }
294         setTotalGains( total_gains );
295         setTotalLosses( total_losses );
296         setTotalUnchanged( total_unchanged );
297         setCost( total_gains + total_losses );
298         setGainLossMatrix( gl_matrix );
299     }
300
301     public int getCost() {
302         return _cost;
303     }
304
305     private Map<DomainId, Set<String>> getDomainIdToSecondaryFeaturesMap() {
306         return _domain_id_to_secondary_features_map;
307     }
308
309     public CharacterStateMatrix<Integer> getGainLossCountsMatrix() {
310         final CharacterStateMatrix<Integer> matrix = new BasicCharacterStateMatrix<Integer>( getGainLossMatrix()
311                 .getNumberOfIdentifiers(), 3 );
312         for( int i = 0; i < getGainLossMatrix().getNumberOfIdentifiers(); ++i ) {
313             matrix.setIdentifier( i, getGainLossMatrix().getIdentifier( i ) );
314         }
315         matrix.setCharacter( 0, "GAINS" );
316         matrix.setCharacter( 1, "LOSSES" );
317         matrix.setCharacter( 2, "NET" );
318         for( int i = 0; i < getGainLossMatrix().getNumberOfIdentifiers(); ++i ) {
319             int gains = 0;
320             int losses = 0;
321             for( int c = 0; c < getGainLossMatrix().getNumberOfCharacters(); ++c ) {
322                 final GainLossStates s = getGainLossMatrix().getState( i, c );
323                 if ( s == GainLossStates.GAIN ) {
324                     ++gains;
325                 }
326                 else if ( s == GainLossStates.LOSS ) {
327                     ++losses;
328                 }
329             }
330             matrix.setState( i, 0, gains );
331             matrix.setState( i, 1, losses );
332             matrix.setState( i, 2, gains - losses );
333         }
334         return matrix;
335     }
336
337     public CharacterStateMatrix<GainLossStates> getGainLossMatrix() {
338         return _gain_loss_matrix;
339     }
340
341     private List<GenomeWideCombinableDomains> getGenomeWideCombinableDomainsList() {
342         return _gwcd_list;
343     }
344
345     public CharacterStateMatrix<BinaryStates> getInternalStatesMatrix() {
346         return _binary_internal_states_matrix;
347     }
348
349     public int getNetGainsOnNode( final String node_identifier ) {
350         if ( getGainLossMatrix() == null ) {
351             throw new RuntimeException( "no gain loss matrix has been calculated" );
352         }
353         int net = 0;
354         final int id_index = getGainLossMatrix().getIdentifierIndex( node_identifier );
355         for( int c = 0; c < getGainLossMatrix().getNumberOfCharacters(); ++c ) {
356             if ( getGainLossMatrix().getState( id_index, c ) == GainLossStates.GAIN ) {
357                 ++net;
358             }
359             else if ( getGainLossMatrix().getState( id_index, c ) == GainLossStates.LOSS ) {
360                 --net;
361             }
362         }
363         return net;
364     }
365
366     private Phylogeny getPhylogeny() {
367         return _phylogeny;
368     }
369
370     private SortedSet<DomainId> getPositiveFilter() {
371         return _positive_filter;
372     }
373
374     public int getSumOfGainsOnNode( final String node_identifier ) {
375         return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.GAIN );
376     }
377
378     public int getSumOfLossesOnNode( final String node_identifier ) {
379         return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.LOSS );
380     }
381
382     public int getSumOfPresentOnNode( final String node_identifier ) {
383         return getSumOfGainsOnNode( node_identifier ) + getSumOfUnchangedPresentOnNode( node_identifier );
384     }
385
386     int getSumOfUnchangedAbsentOnNode( final String node_identifier ) {
387         return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_ABSENT );
388     }
389
390     int getSumOfUnchangedOnNode( final String node_identifier ) {
391         return getSumOfUnchangedPresentOnNode( node_identifier ) + getSumOfUnchangedAbsentOnNode( node_identifier );
392     }
393
394     int getSumOfUnchangedPresentOnNode( final String node_identifier ) {
395         return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_PRESENT );
396     }
397
398     public int getTotalGains() {
399         return _total_gains;
400     }
401
402     public int getTotalLosses() {
403         return _total_losses;
404     }
405
406     public int getTotalUnchanged() {
407         return _total_unchanged;
408     }
409
410     public SortedSet<String> getUnitsGainedOnNode( final String node_identifier ) {
411         return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.GAIN );
412     }
413
414     public SortedSet<String> getUnitsLostOnNode( final String node_identifier ) {
415         return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.LOSS );
416     }
417
418     public SortedSet<String> getUnitsOnNode( final String node_identifier ) {
419         final SortedSet<String> present = getUnitsGainedOnNode( node_identifier );
420         present.addAll( getUnitsUnchangedPresentOnNode( node_identifier ) );
421         return present;
422     }
423
424     SortedSet<String> getUnitsUnchangedAbsentOnNode( final String node_identifier ) {
425         return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_ABSENT );
426     }
427
428     SortedSet<String> getUnitsUnchangedPresentOnNode( final String node_identifier ) {
429         return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_PRESENT );
430     }
431
432     private void init() {
433         setDomainIdToSecondaryFeaturesMap( null );
434         setPositiveFilter( null );
435         reset();
436     }
437
438     private void reset() {
439         setGainLossMatrix( null );
440         setBinaryInternalStatesMatrix( null );
441         setCost( -1 );
442         setTotalGains( -1 );
443         setTotalLosses( -1 );
444         setTotalUnchanged( -1 );
445     }
446
447     private void setBinaryInternalStatesMatrix( final CharacterStateMatrix<BinaryStates> binary_states_matrix ) {
448         _binary_internal_states_matrix = binary_states_matrix;
449     }
450
451     private void setCost( final int cost ) {
452         _cost = cost;
453     }
454
455     private void setDomainIdToSecondaryFeaturesMap( final Map<DomainId, Set<String>> domain_id_to_secondary_features_map ) {
456         _domain_id_to_secondary_features_map = domain_id_to_secondary_features_map;
457     }
458
459     private void setGainLossMatrix( final CharacterStateMatrix<GainLossStates> gain_loss_matrix ) {
460         _gain_loss_matrix = gain_loss_matrix;
461     }
462
463     private void setPositiveFilter( final SortedSet<DomainId> positive_filter ) {
464         _positive_filter = positive_filter;
465     }
466
467     private void setTotalGains( final int total_gains ) {
468         _total_gains = total_gains;
469     }
470
471     private void setTotalLosses( final int total_losses ) {
472         _total_losses = total_losses;
473     }
474
475     private void setTotalUnchanged( final int total_unchanged ) {
476         _total_unchanged = total_unchanged;
477     }
478
479     public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny ) {
480         return new DomainParsimonyCalculator( phylogeny );
481     }
482
483     public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny,
484                                                             final List<GenomeWideCombinableDomains> gwcd_list ) {
485         if ( phylogeny.getNumberOfExternalNodes() != gwcd_list.size() ) {
486             throw new IllegalArgumentException( "number of external nodes [" + phylogeny.getNumberOfExternalNodes()
487                     + "] does not equal size of genome wide combinable domains list [" + gwcd_list.size() + "]" );
488         }
489         return new DomainParsimonyCalculator( phylogeny, gwcd_list );
490     }
491
492     public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny,
493                                                             final List<GenomeWideCombinableDomains> gwcd_list,
494                                                             final Map<DomainId, Set<String>> domain_id_to_secondary_features_map ) {
495         if ( phylogeny.getNumberOfExternalNodes() != gwcd_list.size() ) {
496             throw new IllegalArgumentException( "size of external nodes does not equal size of genome wide combinable domains list" );
497         }
498         return new DomainParsimonyCalculator( phylogeny, gwcd_list, domain_id_to_secondary_features_map );
499     }
500
501     public static CharacterStateMatrix<BinaryStates> createMatrixOfBinaryDomainCombinationPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list ) {
502         if ( gwcd_list.isEmpty() ) {
503             throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
504         }
505         final int number_of_identifiers = gwcd_list.size();
506         final SortedSet<BinaryDomainCombination> all_binary_combinations = new TreeSet<BinaryDomainCombination>();
507         final Set<BinaryDomainCombination>[] binary_combinations_per_genome = new HashSet[ number_of_identifiers ];
508         int identifier_index = 0;
509         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
510             binary_combinations_per_genome[ identifier_index ] = new HashSet<BinaryDomainCombination>();
511             for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
512                 all_binary_combinations.add( bc );
513                 binary_combinations_per_genome[ identifier_index ].add( bc );
514             }
515             ++identifier_index;
516         }
517         final int number_of_characters = all_binary_combinations.size();
518         final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
519                                                                                                                                                  number_of_characters );
520         int character_index = 0;
521         for( final BinaryDomainCombination bc : all_binary_combinations ) {
522             matrix.setCharacter( character_index++, bc.toString() );
523         }
524         identifier_index = 0;
525         final Set<String> all_identifiers = new HashSet<String>();
526         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
527             final String species_id = gwcd.getSpecies().getSpeciesId();
528             if ( all_identifiers.contains( species_id ) ) {
529                 throw new AssertionError( "species [" + species_id + "] is not unique" );
530             }
531             all_identifiers.add( species_id );
532             matrix.setIdentifier( identifier_index, species_id );
533             for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
534                 BinaryDomainCombination bc = null;
535                 if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED_ADJACTANT ) {
536                     bc = AdjactantDirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
537                 }
538                 else if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED ) {
539                     bc = DirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
540                 }
541                 else {
542                     bc = BasicBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
543                 }
544                 if ( binary_combinations_per_genome[ identifier_index ].contains( bc ) ) {
545                     matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
546                 }
547                 else {
548                     matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
549                 }
550             }
551             ++identifier_index;
552         }
553         return matrix;
554     }
555
556     static CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list ) {
557         return createMatrixOfDomainPresenceOrAbsence( gwcd_list, null );
558     }
559
560     public static CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
561                                                                                             final SortedSet<DomainId> positive_filter ) {
562         if ( gwcd_list.isEmpty() ) {
563             throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
564         }
565         if ( ( positive_filter != null ) && ( positive_filter.size() < 1 ) ) {
566             throw new IllegalArgumentException( "positive filter is empty" );
567         }
568         final int number_of_identifiers = gwcd_list.size();
569         final SortedSet<DomainId> all_domain_ids = new TreeSet<DomainId>();
570         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
571             for( final DomainId domain : gwcd.getAllDomainIds() ) {
572                 all_domain_ids.add( domain );
573             }
574         }
575         int number_of_characters = all_domain_ids.size();
576         if ( positive_filter != null ) {
577             //number_of_characters = positive_filter.size(); -- bad if doms in filter but not in genomes 
578             number_of_characters = 0;
579             for( final DomainId id : all_domain_ids ) {
580                 if ( positive_filter.contains( id ) ) {
581                     number_of_characters++;
582                 }
583             }
584         }
585         final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
586                                                                                                                                                  number_of_characters );
587         int character_index = 0;
588         for( final DomainId id : all_domain_ids ) {
589             if ( positive_filter == null ) {
590                 matrix.setCharacter( character_index++, id.getId() );
591             }
592             else {
593                 if ( positive_filter.contains( id ) ) {
594                     matrix.setCharacter( character_index++, id.getId() );
595                 }
596             }
597         }
598         int identifier_index = 0;
599         final Set<String> all_identifiers = new HashSet<String>();
600         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
601             final String species_id = gwcd.getSpecies().getSpeciesId();
602             if ( all_identifiers.contains( species_id ) ) {
603                 throw new IllegalArgumentException( "species [" + species_id + "] is not unique" );
604             }
605             all_identifiers.add( species_id );
606             matrix.setIdentifier( identifier_index, species_id );
607             for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
608                 if ( ForesterUtil.isEmpty( matrix.getCharacter( ci ) ) ) {
609                     throw new RuntimeException( "this should not have happened: problem with character #" + ci );
610                 }
611                 final DomainId id = new DomainId( matrix.getCharacter( ci ) );
612                 if ( gwcd.contains( id ) ) {
613                     matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
614                 }
615                 else {
616                     matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
617                 }
618             }
619             ++identifier_index;
620         }
621         return matrix;
622     }
623
624     /**
625      * For folds instead of Pfam-domains, for example
626      * 
627      * 
628      * @param gwcd_list
629      * @return
630      */
631     static CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeaturePresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
632                                                                                                final Map<DomainId, Set<String>> domain_id_to_second_features_map,
633                                                                                                final Map<Species, MappingResults> mapping_results_map ) {
634         if ( gwcd_list.isEmpty() ) {
635             throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
636         }
637         if ( ( domain_id_to_second_features_map == null ) || domain_id_to_second_features_map.isEmpty() ) {
638             throw new IllegalArgumentException( "domain id to secondary features map is null or empty" );
639         }
640         final int number_of_identifiers = gwcd_list.size();
641         final SortedSet<String> all_secondary_features = new TreeSet<String>();
642         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
643             int mapped = 0;
644             int not_mapped = 0;
645             for( final DomainId domain : gwcd.getAllDomainIds() ) {
646                 if ( domain_id_to_second_features_map.containsKey( domain ) ) {
647                     all_secondary_features.addAll( domain_id_to_second_features_map.get( domain ) );
648                     mapped++;
649                 }
650                 else {
651                     not_mapped++;
652                 }
653             }
654             if ( mapping_results_map != null ) {
655                 final MappingResults mr = new MappingResults();
656                 mr.setDescription( gwcd.getSpecies().getSpeciesId() );
657                 mr.setSumOfSuccesses( mapped );
658                 mr.setSumOfFailures( not_mapped );
659                 mapping_results_map.put( gwcd.getSpecies(), mr );
660             }
661         }
662         final int number_of_characters = all_secondary_features.size();
663         final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
664                                                                                                                                                  number_of_characters );
665         int character_index = 0;
666         for( final String second_id : all_secondary_features ) {
667             matrix.setCharacter( character_index++, second_id );
668         }
669         int identifier_index = 0;
670         final Set<String> all_identifiers = new HashSet<String>();
671         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
672             final String species_id = gwcd.getSpecies().getSpeciesId();
673             if ( all_identifiers.contains( species_id ) ) {
674                 throw new IllegalArgumentException( "species [" + species_id + "] is not unique" );
675             }
676             all_identifiers.add( species_id );
677             matrix.setIdentifier( identifier_index, species_id );
678             final Set<String> all_second_per_gwcd = new HashSet<String>();
679             for( final DomainId domain : gwcd.getAllDomainIds() ) {
680                 if ( domain_id_to_second_features_map.containsKey( domain ) ) {
681                     all_second_per_gwcd.addAll( domain_id_to_second_features_map.get( domain ) );
682                 }
683             }
684             for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
685                 if ( all_second_per_gwcd.contains( matrix.getCharacter( ci ) ) ) {
686                     matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
687                 }
688                 else {
689                     matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
690                 }
691             }
692             ++identifier_index;
693         }
694         return matrix;
695     }
696
697     private static int getStateSumDeltaOnNode( final String node_identifier,
698                                                final CharacterStateMatrix<GainLossStates> gain_loss_matrix,
699                                                final GainLossStates state ) {
700         if ( gain_loss_matrix == null ) {
701             throw new RuntimeException( "no gain loss matrix has been calculated" );
702         }
703         if ( ForesterUtil.isEmpty( node_identifier ) ) {
704             throw new IllegalArgumentException( "node identifier must not be empty" );
705         }
706         if ( gain_loss_matrix.isEmpty() ) {
707             throw new RuntimeException( "gain loss matrix is empty" );
708         }
709         int sum = 0;
710         final int id_index = gain_loss_matrix.getIdentifierIndex( node_identifier );
711         for( int c = 0; c < gain_loss_matrix.getNumberOfCharacters(); ++c ) {
712             if ( gain_loss_matrix.getState( id_index, c ) == state ) {
713                 ++sum;
714             }
715         }
716         return sum;
717     }
718
719     private static SortedSet<String> getUnitsDeltaOnNode( final String node_identifier,
720                                                           final CharacterStateMatrix<GainLossStates> gain_loss_matrix,
721                                                           final GainLossStates state ) {
722         if ( gain_loss_matrix == null ) {
723             throw new RuntimeException( "no gain loss matrix has been calculated" );
724         }
725         if ( ForesterUtil.isEmpty( node_identifier ) ) {
726             throw new IllegalArgumentException( "node identifier must not be empty" );
727         }
728         if ( gain_loss_matrix.isEmpty() ) {
729             throw new RuntimeException( "gain loss matrix is empty" );
730         }
731         final SortedSet<String> d = new TreeSet<String>();
732         final int id_index = gain_loss_matrix.getIdentifierIndex( node_identifier );
733         for( int c = 0; c < gain_loss_matrix.getNumberOfCharacters(); ++c ) {
734             if ( gain_loss_matrix.getState( id_index, c ) == state ) {
735                 if ( d.contains( gain_loss_matrix.getCharacter( c ) ) ) {
736                     throw new AssertionError( "this should not have happended: character ["
737                             + gain_loss_matrix.getCharacter( c ) + "] already in set" );
738                 }
739                 d.add( gain_loss_matrix.getCharacter( c ) );
740             }
741         }
742         return d;
743     }
744 }