X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsurfacing%2FDomainParsimonyCalculator.java;h=4de0b35c6efd707ce2250847ebe5096ced528b49;hb=d4d61f9d2969283e821f650d031c169899fb3870;hp=4658371a9d0097ffddcb081f532bde07840424d9;hpb=4d86cc1858160830406c6428cea1c45128216646;p=jalview.git diff --git a/forester/java/src/org/forester/surfacing/DomainParsimonyCalculator.java b/forester/java/src/org/forester/surfacing/DomainParsimonyCalculator.java index 4658371..4de0b35 100644 --- a/forester/java/src/org/forester/surfacing/DomainParsimonyCalculator.java +++ b/forester/java/src/org/forester/surfacing/DomainParsimonyCalculator.java @@ -22,7 +22,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; @@ -47,7 +47,9 @@ import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.BinaryCharacters; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; -import org.forester.surfacing.BinaryDomainCombination.DomainCombinationType; +import org.forester.protein.BinaryDomainCombination; +import org.forester.protein.BinaryDomainCombination.DomainCombinationType; +import org.forester.species.Species; import org.forester.util.ForesterUtil; public final class DomainParsimonyCalculator { @@ -61,8 +63,8 @@ public final class DomainParsimonyCalculator { private int _total_gains; private int _total_unchanged; private int _cost; - private Map> _domain_id_to_secondary_features_map; - private SortedSet _positive_filter; + private Map> _domain_id_to_secondary_features_map; + private SortedSet _positive_filter; private DomainParsimonyCalculator( final Phylogeny phylogeny ) { init(); @@ -78,7 +80,7 @@ public final class DomainParsimonyCalculator { private DomainParsimonyCalculator( final Phylogeny phylogeny, final List gwcd_list, - final Map> domain_id_to_secondary_features_map ) { + final Map> domain_id_to_secondary_features_map ) { init(); _phylogeny = phylogeny; _gwcd_list = gwcd_list; @@ -157,7 +159,7 @@ public final class DomainParsimonyCalculator { executeDolloParsimony( true ); } - public void executeDolloParsimonyOnDomainPresence( final SortedSet positive_filter ) { + public void executeDolloParsimonyOnDomainPresence( final SortedSet positive_filter ) { setPositiveFilter( positive_filter ); executeDolloParsimony( true ); setPositiveFilter( null ); @@ -204,7 +206,7 @@ public final class DomainParsimonyCalculator { else { states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() ); } - fitch.execute( getPhylogeny(), states ); + fitch.execute( getPhylogeny(), states, true ); setGainLossMatrix( fitch.getGainLossMatrix() ); setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() ); setCost( fitch.getCost() ); @@ -212,11 +214,10 @@ public final class DomainParsimonyCalculator { setTotalLosses( fitch.getTotalLosses() ); setTotalUnchanged( fitch.getTotalUnchanged() ); } - - private void executeFitchParsimonyOnSecondaryFeatures( - final boolean use_last, - final boolean randomize, - final long random_number_seed ) { + + private void executeFitchParsimonyOnSecondaryFeatures( final boolean use_last, + final boolean randomize, + final long random_number_seed ) { reset(); if ( use_last ) { System.out.println( " Fitch parsimony: use_last = true" ); @@ -229,22 +230,19 @@ public final class DomainParsimonyCalculator { fitch.setUseLast( use_last ); fitch.setReturnGainLossMatrix( true ); fitch.setReturnInternalStates( true ); - - final Map> map = getDomainIdToSecondaryFeaturesMap(); - final Map newmap = new HashMap(); - final Iterator>> it = map.entrySet().iterator(); - while (it.hasNext()) { - final Map.Entry> pair = (Map.Entry>)it.next(); + final Map> map = getDomainIdToSecondaryFeaturesMap(); + final Map newmap = new HashMap(); + final Iterator>> it = map.entrySet().iterator(); + while ( it.hasNext() ) { + final Map.Entry> pair = it.next(); if ( pair.getValue().size() != 1 ) { - throw new IllegalArgumentException( pair.getKey().getId() + " mapps to " + pair.getValue().size() + " items" ); + throw new IllegalArgumentException( pair.getKey() + " mapps to " + pair.getValue().size() + " items" ); } newmap.put( pair.getKey(), ( String ) pair.getValue().toArray()[ 0 ] ); } - - CharacterStateMatrix states =createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList(), - newmap ); - - fitch.execute( getPhylogeny(), states ); + final CharacterStateMatrix states = createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList(), + newmap ); + fitch.execute( getPhylogeny(), states, true ); setGainLossMatrix( fitch.getGainLossMatrix() ); setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() ); setCost( fitch.getCost() ); @@ -256,7 +254,7 @@ public final class DomainParsimonyCalculator { public void executeFitchParsimonyOnBinaryDomainCombintion( final boolean use_last ) { executeFitchParsimony( false, use_last, false, 0 ); } - + public void executeFitchParsimonyOnBinaryDomainCombintionOnSecondaryFeatures( final boolean use_last ) { executeFitchParsimonyOnSecondaryFeatures( use_last, false, 0 ); } @@ -350,7 +348,7 @@ public final class DomainParsimonyCalculator { return _cost; } - private Map> getDomainIdToSecondaryFeaturesMap() { + private Map> getDomainIdToSecondaryFeaturesMap() { return _domain_id_to_secondary_features_map; } @@ -415,7 +413,7 @@ public final class DomainParsimonyCalculator { return _phylogeny; } - private SortedSet getPositiveFilter() { + private SortedSet getPositiveFilter() { return _positive_filter; } @@ -500,7 +498,7 @@ public final class DomainParsimonyCalculator { _cost = cost; } - private void setDomainIdToSecondaryFeaturesMap( final Map> domain_id_to_secondary_features_map ) { + private void setDomainIdToSecondaryFeaturesMap( final Map> domain_id_to_secondary_features_map ) { _domain_id_to_secondary_features_map = domain_id_to_secondary_features_map; } @@ -508,7 +506,7 @@ public final class DomainParsimonyCalculator { _gain_loss_matrix = gain_loss_matrix; } - private void setPositiveFilter( final SortedSet positive_filter ) { + private void setPositiveFilter( final SortedSet positive_filter ) { _positive_filter = positive_filter; } @@ -539,14 +537,13 @@ public final class DomainParsimonyCalculator { public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny, final List gwcd_list, - final Map> domain_id_to_secondary_features_map ) { + final Map> domain_id_to_secondary_features_map ) { if ( phylogeny.getNumberOfExternalNodes() != gwcd_list.size() ) { throw new IllegalArgumentException( "size of external nodes does not equal size of genome wide combinable domains list" ); } return new DomainParsimonyCalculator( phylogeny, gwcd_list, domain_id_to_secondary_features_map ); } - /** * For folds instead of Pfam-domains, for example * @@ -555,7 +552,7 @@ public final class DomainParsimonyCalculator { * @return */ static CharacterStateMatrix createMatrixOfSecondaryFeaturePresenceOrAbsence( final List gwcd_list, - final Map> domain_id_to_second_features_map, + final Map> domain_id_to_second_features_map, final Map mapping_results_map ) { if ( gwcd_list.isEmpty() ) { throw new IllegalArgumentException( "genome wide combinable domains list is empty" ); @@ -568,7 +565,7 @@ public final class DomainParsimonyCalculator { for( final GenomeWideCombinableDomains gwcd : gwcd_list ) { int mapped = 0; int not_mapped = 0; - for( final DomainId domain : gwcd.getAllDomainIds() ) { + for( final String domain : gwcd.getAllDomainIds() ) { if ( domain_id_to_second_features_map.containsKey( domain ) ) { all_secondary_features.addAll( domain_id_to_second_features_map.get( domain ) ); mapped++; @@ -602,7 +599,7 @@ public final class DomainParsimonyCalculator { all_identifiers.add( species_id ); matrix.setIdentifier( identifier_index, species_id ); final Set all_second_per_gwcd = new HashSet(); - for( final DomainId domain : gwcd.getAllDomainIds() ) { + for( final String domain : gwcd.getAllDomainIds() ) { if ( domain_id_to_second_features_map.containsKey( domain ) ) { all_second_per_gwcd.addAll( domain_id_to_second_features_map.get( domain ) ); } @@ -619,10 +616,9 @@ public final class DomainParsimonyCalculator { } return matrix; } - - + public static CharacterStateMatrix createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( final List gwcd_list, - final Map domain_id_to_second_features_map) { + final Map domain_id_to_second_features_map ) { if ( gwcd_list.isEmpty() ) { throw new IllegalArgumentException( "genome wide combinable domains list is empty" ); } @@ -633,12 +629,10 @@ public final class DomainParsimonyCalculator { final SortedSet all_binary_combinations_mapped = new TreeSet(); final Set[] binary_combinations_per_genome_mapped = new HashSet[ number_of_identifiers ]; int identifier_index = 0; - final SortedSet no_mappings = new TreeSet(); for( final GenomeWideCombinableDomains gwcd : gwcd_list ) { binary_combinations_per_genome_mapped[ identifier_index ] = new HashSet(); for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) { - final BinaryDomainCombination mapped_bc = mapBinaryDomainCombination( domain_id_to_second_features_map, bc, no_mappings ); @@ -647,14 +641,13 @@ public final class DomainParsimonyCalculator { } ++identifier_index; } - if ( !no_mappings.isEmpty() ) { - ForesterUtil.programMessage( surfacing.PRG_NAME, "No mappings for the following (" + no_mappings.size() + "):" ); + ForesterUtil.programMessage( surfacing.PRG_NAME, "No mappings for the following (" + no_mappings.size() + + "):" ); for( final String id : no_mappings ) { - ForesterUtil.programMessage( surfacing.PRG_NAME, id); + ForesterUtil.programMessage( surfacing.PRG_NAME, id ); } } - final int number_of_characters = all_binary_combinations_mapped.size(); final CharacterStateMatrix matrix = new BasicCharacterStateMatrix( number_of_identifiers, number_of_characters ); @@ -694,35 +687,28 @@ public final class DomainParsimonyCalculator { return matrix; } - private static BinaryDomainCombination mapBinaryDomainCombination( final Map domain_id_to_second_features_map, + private static BinaryDomainCombination mapBinaryDomainCombination( final Map domain_id_to_second_features_map, final BinaryDomainCombination bc, final SortedSet no_mappings ) { String id0 = ""; String id1 = ""; - if ( !domain_id_to_second_features_map.containsKey( bc.getId0() ) ) { - - no_mappings.add(bc.getId0().getId() ); - id0 = bc.getId0().getId(); + no_mappings.add( bc.getId0() ); + id0 = bc.getId0(); } else { - id0 = domain_id_to_second_features_map.get( bc.getId0()); + id0 = domain_id_to_second_features_map.get( bc.getId0() ); } if ( !domain_id_to_second_features_map.containsKey( bc.getId1() ) ) { - - no_mappings.add(bc.getId1().getId() ); - id1 = bc.getId1().getId(); + no_mappings.add( bc.getId1() ); + id1 = bc.getId1(); } else { - id1 = domain_id_to_second_features_map.get( bc.getId1()); + id1 = domain_id_to_second_features_map.get( bc.getId1() ); } - return new BasicBinaryDomainCombination( id0, id1 ); } - - - - + public static CharacterStateMatrix createMatrixOfBinaryDomainCombinationPresenceOrAbsence( final List gwcd_list ) { if ( gwcd_list.isEmpty() ) { throw new IllegalArgumentException( "genome wide combinable domains list is empty" ); @@ -783,7 +769,7 @@ public final class DomainParsimonyCalculator { } public static CharacterStateMatrix createMatrixOfDomainPresenceOrAbsence( final List gwcd_list, - final SortedSet positive_filter ) { + final SortedSet positive_filter ) { if ( gwcd_list.isEmpty() ) { throw new IllegalArgumentException( "genome wide combinable domains list is empty" ); } @@ -791,9 +777,9 @@ public final class DomainParsimonyCalculator { throw new IllegalArgumentException( "positive filter is empty" ); } final int number_of_identifiers = gwcd_list.size(); - final SortedSet all_domain_ids = new TreeSet(); + final SortedSet all_domain_ids = new TreeSet(); for( final GenomeWideCombinableDomains gwcd : gwcd_list ) { - for( final DomainId domain : gwcd.getAllDomainIds() ) { + for( final String domain : gwcd.getAllDomainIds() ) { all_domain_ids.add( domain ); } } @@ -801,7 +787,7 @@ public final class DomainParsimonyCalculator { if ( positive_filter != null ) { //number_of_characters = positive_filter.size(); -- bad if doms in filter but not in genomes number_of_characters = 0; - for( final DomainId id : all_domain_ids ) { + for( final String id : all_domain_ids ) { if ( positive_filter.contains( id ) ) { number_of_characters++; } @@ -810,13 +796,13 @@ public final class DomainParsimonyCalculator { final CharacterStateMatrix matrix = new BasicCharacterStateMatrix( number_of_identifiers, number_of_characters ); int character_index = 0; - for( final DomainId id : all_domain_ids ) { + for( final String id : all_domain_ids ) { if ( positive_filter == null ) { - matrix.setCharacter( character_index++, id.getId() ); + matrix.setCharacter( character_index++, id ); } else { if ( positive_filter.contains( id ) ) { - matrix.setCharacter( character_index++, id.getId() ); + matrix.setCharacter( character_index++, id ); } } } @@ -833,7 +819,7 @@ public final class DomainParsimonyCalculator { if ( ForesterUtil.isEmpty( matrix.getCharacter( ci ) ) ) { throw new RuntimeException( "this should not have happened: problem with character #" + ci ); } - final DomainId id = new DomainId( matrix.getCharacter( ci ) ); + final String id = matrix.getCharacter( ci ); if ( gwcd.contains( id ) ) { matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT ); } @@ -846,8 +832,6 @@ public final class DomainParsimonyCalculator { return matrix; } - - private static int getStateSumDeltaOnNode( final String node_identifier, final CharacterStateMatrix gain_loss_matrix, final GainLossStates state ) {