in progress
[jalview.git] / forester / java / src / org / forester / surfacing / DomainParsimonyCalculator.java
index 4658371..4de0b35 100644 (file)
@@ -22,7 +22,7 @@
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 //
 // Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
 
 package org.forester.surfacing;
 
@@ -47,7 +47,9 @@ import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyNode;
 import org.forester.phylogeny.data.BinaryCharacters;
 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
-import org.forester.surfacing.BinaryDomainCombination.DomainCombinationType;
+import org.forester.protein.BinaryDomainCombination;
+import org.forester.protein.BinaryDomainCombination.DomainCombinationType;
+import org.forester.species.Species;
 import org.forester.util.ForesterUtil;
 
 public final class DomainParsimonyCalculator {
@@ -61,8 +63,8 @@ public final class DomainParsimonyCalculator {
     private int                                     _total_gains;
     private int                                     _total_unchanged;
     private int                                     _cost;
-    private Map<DomainId, Set<String>>              _domain_id_to_secondary_features_map;
-    private SortedSet<DomainId>                     _positive_filter;
+    private Map<String, Set<String>>                _domain_id_to_secondary_features_map;
+    private SortedSet<String>                       _positive_filter;
 
     private DomainParsimonyCalculator( final Phylogeny phylogeny ) {
         init();
@@ -78,7 +80,7 @@ public final class DomainParsimonyCalculator {
 
     private DomainParsimonyCalculator( final Phylogeny phylogeny,
                                        final List<GenomeWideCombinableDomains> gwcd_list,
-                                       final Map<DomainId, Set<String>> domain_id_to_secondary_features_map ) {
+                                       final Map<String, Set<String>> domain_id_to_secondary_features_map ) {
         init();
         _phylogeny = phylogeny;
         _gwcd_list = gwcd_list;
@@ -157,7 +159,7 @@ public final class DomainParsimonyCalculator {
         executeDolloParsimony( true );
     }
 
-    public void executeDolloParsimonyOnDomainPresence( final SortedSet<DomainId> positive_filter ) {
+    public void executeDolloParsimonyOnDomainPresence( final SortedSet<String> positive_filter ) {
         setPositiveFilter( positive_filter );
         executeDolloParsimony( true );
         setPositiveFilter( null );
@@ -204,7 +206,7 @@ public final class DomainParsimonyCalculator {
         else {
             states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
         }
-        fitch.execute( getPhylogeny(), states );
+        fitch.execute( getPhylogeny(), states, true );
         setGainLossMatrix( fitch.getGainLossMatrix() );
         setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() );
         setCost( fitch.getCost() );
@@ -212,11 +214,10 @@ public final class DomainParsimonyCalculator {
         setTotalLosses( fitch.getTotalLosses() );
         setTotalUnchanged( fitch.getTotalUnchanged() );
     }
-    
-    private void executeFitchParsimonyOnSecondaryFeatures( 
-                                        final boolean use_last,
-                                        final boolean randomize,
-                                        final long random_number_seed ) {
+
+    private void executeFitchParsimonyOnSecondaryFeatures( final boolean use_last,
+                                                           final boolean randomize,
+                                                           final long random_number_seed ) {
         reset();
         if ( use_last ) {
             System.out.println( "   Fitch parsimony: use_last = true" );
@@ -229,22 +230,19 @@ public final class DomainParsimonyCalculator {
         fitch.setUseLast( use_last );
         fitch.setReturnGainLossMatrix( true );
         fitch.setReturnInternalStates( true );
-        
-        final Map<DomainId, Set<String>> map = getDomainIdToSecondaryFeaturesMap();
-        final Map<DomainId, String> newmap = new HashMap<DomainId, String>();
-        final Iterator<Entry<DomainId, Set<String>>> it = map.entrySet().iterator();
-        while (it.hasNext()) {
-            final Map.Entry<DomainId, Set<String>> pair =  (Map.Entry<DomainId, Set<String>>)it.next();
+        final Map<String, Set<String>> map = getDomainIdToSecondaryFeaturesMap();
+        final Map<String, String> newmap = new HashMap<String, String>();
+        final Iterator<Entry<String, Set<String>>> it = map.entrySet().iterator();
+        while ( it.hasNext() ) {
+            final Map.Entry<String, Set<String>> pair = it.next();
             if ( pair.getValue().size() != 1 ) {
-                throw new IllegalArgumentException( pair.getKey().getId() + " mapps to " + pair.getValue().size() + " items" );
+                throw new IllegalArgumentException( pair.getKey() + " mapps to " + pair.getValue().size() + " items" );
             }
             newmap.put( pair.getKey(), ( String ) pair.getValue().toArray()[ 0 ] );
         }
-        
-        CharacterStateMatrix<BinaryStates> states  =createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList(),
-                                                                                newmap );
-       
-        fitch.execute( getPhylogeny(), states );
+        final CharacterStateMatrix<BinaryStates> states = createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList(),
+                                                                                                                                  newmap );
+        fitch.execute( getPhylogeny(), states, true );
         setGainLossMatrix( fitch.getGainLossMatrix() );
         setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() );
         setCost( fitch.getCost() );
@@ -256,7 +254,7 @@ public final class DomainParsimonyCalculator {
     public void executeFitchParsimonyOnBinaryDomainCombintion( final boolean use_last ) {
         executeFitchParsimony( false, use_last, false, 0 );
     }
-    
+
     public void executeFitchParsimonyOnBinaryDomainCombintionOnSecondaryFeatures( final boolean use_last ) {
         executeFitchParsimonyOnSecondaryFeatures( use_last, false, 0 );
     }
@@ -350,7 +348,7 @@ public final class DomainParsimonyCalculator {
         return _cost;
     }
 
-    private Map<DomainId, Set<String>> getDomainIdToSecondaryFeaturesMap() {
+    private Map<String, Set<String>> getDomainIdToSecondaryFeaturesMap() {
         return _domain_id_to_secondary_features_map;
     }
 
@@ -415,7 +413,7 @@ public final class DomainParsimonyCalculator {
         return _phylogeny;
     }
 
-    private SortedSet<DomainId> getPositiveFilter() {
+    private SortedSet<String> getPositiveFilter() {
         return _positive_filter;
     }
 
@@ -500,7 +498,7 @@ public final class DomainParsimonyCalculator {
         _cost = cost;
     }
 
-    private void setDomainIdToSecondaryFeaturesMap( final Map<DomainId, Set<String>> domain_id_to_secondary_features_map ) {
+    private void setDomainIdToSecondaryFeaturesMap( final Map<String, Set<String>> domain_id_to_secondary_features_map ) {
         _domain_id_to_secondary_features_map = domain_id_to_secondary_features_map;
     }
 
@@ -508,7 +506,7 @@ public final class DomainParsimonyCalculator {
         _gain_loss_matrix = gain_loss_matrix;
     }
 
-    private void setPositiveFilter( final SortedSet<DomainId> positive_filter ) {
+    private void setPositiveFilter( final SortedSet<String> positive_filter ) {
         _positive_filter = positive_filter;
     }
 
@@ -539,14 +537,13 @@ public final class DomainParsimonyCalculator {
 
     public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny,
                                                             final List<GenomeWideCombinableDomains> gwcd_list,
-                                                            final Map<DomainId, Set<String>> domain_id_to_secondary_features_map ) {
+                                                            final Map<String, Set<String>> domain_id_to_secondary_features_map ) {
         if ( phylogeny.getNumberOfExternalNodes() != gwcd_list.size() ) {
             throw new IllegalArgumentException( "size of external nodes does not equal size of genome wide combinable domains list" );
         }
         return new DomainParsimonyCalculator( phylogeny, gwcd_list, domain_id_to_secondary_features_map );
     }
 
-    
     /**
      * For folds instead of Pfam-domains, for example
      * 
@@ -555,7 +552,7 @@ public final class DomainParsimonyCalculator {
      * @return
      */
     static CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeaturePresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
-                                                                                               final Map<DomainId, Set<String>> domain_id_to_second_features_map,
+                                                                                               final Map<String, Set<String>> domain_id_to_second_features_map,
                                                                                                final Map<Species, MappingResults> mapping_results_map ) {
         if ( gwcd_list.isEmpty() ) {
             throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
@@ -568,7 +565,7 @@ public final class DomainParsimonyCalculator {
         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
             int mapped = 0;
             int not_mapped = 0;
-            for( final DomainId domain : gwcd.getAllDomainIds() ) {
+            for( final String domain : gwcd.getAllDomainIds() ) {
                 if ( domain_id_to_second_features_map.containsKey( domain ) ) {
                     all_secondary_features.addAll( domain_id_to_second_features_map.get( domain ) );
                     mapped++;
@@ -602,7 +599,7 @@ public final class DomainParsimonyCalculator {
             all_identifiers.add( species_id );
             matrix.setIdentifier( identifier_index, species_id );
             final Set<String> all_second_per_gwcd = new HashSet<String>();
-            for( final DomainId domain : gwcd.getAllDomainIds() ) {
+            for( final String domain : gwcd.getAllDomainIds() ) {
                 if ( domain_id_to_second_features_map.containsKey( domain ) ) {
                     all_second_per_gwcd.addAll( domain_id_to_second_features_map.get( domain ) );
                 }
@@ -619,10 +616,9 @@ public final class DomainParsimonyCalculator {
         }
         return matrix;
     }
-    
-    
+
     public static CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
-                                                                                                                             final Map<DomainId, String> domain_id_to_second_features_map) {
+                                                                                                                             final Map<String, String> domain_id_to_second_features_map ) {
         if ( gwcd_list.isEmpty() ) {
             throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
         }
@@ -633,12 +629,10 @@ public final class DomainParsimonyCalculator {
         final SortedSet<BinaryDomainCombination> all_binary_combinations_mapped = new TreeSet<BinaryDomainCombination>();
         final Set<BinaryDomainCombination>[] binary_combinations_per_genome_mapped = new HashSet[ number_of_identifiers ];
         int identifier_index = 0;
-        
         final SortedSet<String> no_mappings = new TreeSet<String>();
         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
             binary_combinations_per_genome_mapped[ identifier_index ] = new HashSet<BinaryDomainCombination>();
             for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
-                
                 final BinaryDomainCombination mapped_bc = mapBinaryDomainCombination( domain_id_to_second_features_map,
                                                                                       bc,
                                                                                       no_mappings );
@@ -647,14 +641,13 @@ public final class DomainParsimonyCalculator {
             }
             ++identifier_index;
         }
-       
         if ( !no_mappings.isEmpty() ) {
-            ForesterUtil.programMessage( surfacing.PRG_NAME, "No mappings for the following (" + no_mappings.size()  + "):" );
+            ForesterUtil.programMessage( surfacing.PRG_NAME, "No mappings for the following (" + no_mappings.size()
+                    + "):" );
             for( final String id : no_mappings ) {
-                ForesterUtil.programMessage( surfacing.PRG_NAME, id);
+                ForesterUtil.programMessage( surfacing.PRG_NAME, id );
             }
         }
-        
         final int number_of_characters = all_binary_combinations_mapped.size();
         final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
                                                                                                                                                  number_of_characters );
@@ -694,35 +687,28 @@ public final class DomainParsimonyCalculator {
         return matrix;
     }
 
-    private static BinaryDomainCombination mapBinaryDomainCombination( final Map<DomainId, String> domain_id_to_second_features_map,
+    private static BinaryDomainCombination mapBinaryDomainCombination( final Map<String, String> domain_id_to_second_features_map,
                                                                        final BinaryDomainCombination bc,
                                                                        final SortedSet<String> no_mappings ) {
         String id0 = "";
         String id1 = "";
-        
         if ( !domain_id_to_second_features_map.containsKey( bc.getId0() ) ) {
-           
-            no_mappings.add(bc.getId0().getId()  );
-            id0 = bc.getId0().getId();
+            no_mappings.add( bc.getId0() );
+            id0 = bc.getId0();
         }
         else {
-            id0 = domain_id_to_second_features_map.get(  bc.getId0());
+            id0 = domain_id_to_second_features_map.get( bc.getId0() );
         }
         if ( !domain_id_to_second_features_map.containsKey( bc.getId1() ) ) {
-           
-            no_mappings.add(bc.getId1().getId()  );
-            id1 = bc.getId1().getId();
+            no_mappings.add( bc.getId1() );
+            id1 = bc.getId1();
         }
         else {
-            id1 = domain_id_to_second_features_map.get(  bc.getId1());
+            id1 = domain_id_to_second_features_map.get( bc.getId1() );
         }
-        
         return new BasicBinaryDomainCombination( id0, id1 );
     }
-    
-    
-    
-    
+
     public static CharacterStateMatrix<BinaryStates> createMatrixOfBinaryDomainCombinationPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list ) {
         if ( gwcd_list.isEmpty() ) {
             throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
@@ -783,7 +769,7 @@ public final class DomainParsimonyCalculator {
     }
 
     public static CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
-                                                                                            final SortedSet<DomainId> positive_filter ) {
+                                                                                            final SortedSet<String> positive_filter ) {
         if ( gwcd_list.isEmpty() ) {
             throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
         }
@@ -791,9 +777,9 @@ public final class DomainParsimonyCalculator {
             throw new IllegalArgumentException( "positive filter is empty" );
         }
         final int number_of_identifiers = gwcd_list.size();
-        final SortedSet<DomainId> all_domain_ids = new TreeSet<DomainId>();
+        final SortedSet<String> all_domain_ids = new TreeSet<String>();
         for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
-            for( final DomainId domain : gwcd.getAllDomainIds() ) {
+            for( final String domain : gwcd.getAllDomainIds() ) {
                 all_domain_ids.add( domain );
             }
         }
@@ -801,7 +787,7 @@ public final class DomainParsimonyCalculator {
         if ( positive_filter != null ) {
             //number_of_characters = positive_filter.size(); -- bad if doms in filter but not in genomes 
             number_of_characters = 0;
-            for( final DomainId id : all_domain_ids ) {
+            for( final String id : all_domain_ids ) {
                 if ( positive_filter.contains( id ) ) {
                     number_of_characters++;
                 }
@@ -810,13 +796,13 @@ public final class DomainParsimonyCalculator {
         final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
                                                                                                                                                  number_of_characters );
         int character_index = 0;
-        for( final DomainId id : all_domain_ids ) {
+        for( final String id : all_domain_ids ) {
             if ( positive_filter == null ) {
-                matrix.setCharacter( character_index++, id.getId() );
+                matrix.setCharacter( character_index++, id );
             }
             else {
                 if ( positive_filter.contains( id ) ) {
-                    matrix.setCharacter( character_index++, id.getId() );
+                    matrix.setCharacter( character_index++, id );
                 }
             }
         }
@@ -833,7 +819,7 @@ public final class DomainParsimonyCalculator {
                 if ( ForesterUtil.isEmpty( matrix.getCharacter( ci ) ) ) {
                     throw new RuntimeException( "this should not have happened: problem with character #" + ci );
                 }
-                final DomainId id = new DomainId( matrix.getCharacter( ci ) );
+                final String id = matrix.getCharacter( ci );
                 if ( gwcd.contains( id ) ) {
                     matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
                 }
@@ -846,8 +832,6 @@ public final class DomainParsimonyCalculator {
         return matrix;
     }
 
-   
-
     private static int getStateSumDeltaOnNode( final String node_identifier,
                                                final CharacterStateMatrix<GainLossStates> gain_loss_matrix,
                                                final GainLossStates state ) {