+ /**
+ * For folds instead of Pfam-domains, for example
+ *
+ *
+ * @param gwcd_list
+ * @return
+ */
+ static CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeaturePresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
+ final Map<String, Set<String>> domain_id_to_second_features_map,
+ final Map<Species, MappingResults> mapping_results_map ) {
+ if ( gwcd_list.isEmpty() ) {
+ throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
+ }
+ if ( ( domain_id_to_second_features_map == null ) || domain_id_to_second_features_map.isEmpty() ) {
+ throw new IllegalArgumentException( "domain id to secondary features map is null or empty" );
+ }
+ final int number_of_identifiers = gwcd_list.size();
+ final SortedSet<String> all_secondary_features = new TreeSet<String>();
+ for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
+ int mapped = 0;
+ int not_mapped = 0;
+ for( final String domain : gwcd.getAllDomainIds() ) {
+ if ( domain_id_to_second_features_map.containsKey( domain ) ) {
+ all_secondary_features.addAll( domain_id_to_second_features_map.get( domain ) );
+ mapped++;
+ }
+ else {
+ not_mapped++;
+ }
+ }
+ if ( mapping_results_map != null ) {
+ final MappingResults mr = new MappingResults();
+ mr.setDescription( gwcd.getSpecies().getSpeciesId() );
+ mr.setSumOfSuccesses( mapped );
+ mr.setSumOfFailures( not_mapped );
+ mapping_results_map.put( gwcd.getSpecies(), mr );
+ }
+ }
+ final int number_of_characters = all_secondary_features.size();
+ final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
+ number_of_characters );
+ int character_index = 0;
+ for( final String second_id : all_secondary_features ) {
+ matrix.setCharacter( character_index++, second_id );
+ }
+ int identifier_index = 0;
+ final Set<String> all_identifiers = new HashSet<String>();
+ for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
+ final String species_id = gwcd.getSpecies().getSpeciesId();
+ if ( all_identifiers.contains( species_id ) ) {
+ throw new IllegalArgumentException( "species [" + species_id + "] is not unique" );
+ }
+ all_identifiers.add( species_id );
+ matrix.setIdentifier( identifier_index, species_id );
+ final Set<String> all_second_per_gwcd = new HashSet<String>();
+ for( final String domain : gwcd.getAllDomainIds() ) {
+ if ( domain_id_to_second_features_map.containsKey( domain ) ) {
+ all_second_per_gwcd.addAll( domain_id_to_second_features_map.get( domain ) );
+ }
+ }
+ for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
+ if ( all_second_per_gwcd.contains( matrix.getCharacter( ci ) ) ) {
+ matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
+ }
+ else {
+ matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
+ }
+ }
+ ++identifier_index;
+ }
+ return matrix;
+ }
+
+ public static CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
+ final Map<String, String> domain_id_to_second_features_map ) {
+ if ( gwcd_list.isEmpty() ) {
+ throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
+ }
+ if ( ( domain_id_to_second_features_map == null ) || domain_id_to_second_features_map.isEmpty() ) {
+ throw new IllegalArgumentException( "domain id to secondary features map is null or empty" );
+ }
+ final int number_of_identifiers = gwcd_list.size();
+ final SortedSet<BinaryDomainCombination> all_binary_combinations_mapped = new TreeSet<BinaryDomainCombination>();
+ final Set<BinaryDomainCombination>[] binary_combinations_per_genome_mapped = new HashSet[ number_of_identifiers ];
+ int identifier_index = 0;
+ final SortedSet<String> no_mappings = new TreeSet<String>();
+ for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
+ binary_combinations_per_genome_mapped[ identifier_index ] = new HashSet<BinaryDomainCombination>();
+ for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
+ final BinaryDomainCombination mapped_bc = mapBinaryDomainCombination( domain_id_to_second_features_map,
+ bc,
+ no_mappings );
+ all_binary_combinations_mapped.add( mapped_bc );
+ binary_combinations_per_genome_mapped[ identifier_index ].add( mapped_bc );
+ }
+ ++identifier_index;
+ }
+ if ( !no_mappings.isEmpty() ) {
+ ForesterUtil.programMessage( surfacing.PRG_NAME, "No mappings for the following (" + no_mappings.size()
+ + "):" );
+ for( final String id : no_mappings ) {
+ ForesterUtil.programMessage( surfacing.PRG_NAME, id );
+ }
+ }
+ final int number_of_characters = all_binary_combinations_mapped.size();
+ final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
+ number_of_characters );
+ int character_index = 0;
+ for( final BinaryDomainCombination bc : all_binary_combinations_mapped ) {
+ matrix.setCharacter( character_index++, bc.toString() );
+ }
+ identifier_index = 0;
+ final Set<String> all_identifiers = new HashSet<String>();
+ for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
+ final String species_id = gwcd.getSpecies().getSpeciesId();
+ if ( all_identifiers.contains( species_id ) ) {
+ throw new AssertionError( "species [" + species_id + "] is not unique" );
+ }
+ all_identifiers.add( species_id );
+ matrix.setIdentifier( identifier_index, species_id );
+ for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
+ BinaryDomainCombination bc = null;
+ if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED_ADJACTANT ) {
+ bc = AdjactantDirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
+ }
+ else if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED ) {
+ bc = DirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
+ }
+ else {
+ bc = BasicBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
+ }
+ if ( binary_combinations_per_genome_mapped[ identifier_index ].contains( bc ) ) {
+ matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
+ }
+ else {
+ matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
+ }
+ }
+ ++identifier_index;
+ }
+ return matrix;
+ }
+
+ private static BinaryDomainCombination mapBinaryDomainCombination( final Map<String, String> domain_id_to_second_features_map,
+ final BinaryDomainCombination bc,
+ final SortedSet<String> no_mappings ) {
+ String id0 = "";
+ String id1 = "";
+ if ( !domain_id_to_second_features_map.containsKey( bc.getId0() ) ) {
+ no_mappings.add( bc.getId0() );
+ id0 = bc.getId0();
+ }
+ else {
+ id0 = domain_id_to_second_features_map.get( bc.getId0() );
+ }
+ if ( !domain_id_to_second_features_map.containsKey( bc.getId1() ) ) {
+ no_mappings.add( bc.getId1() );
+ id1 = bc.getId1();
+ }
+ else {
+ id1 = domain_id_to_second_features_map.get( bc.getId1() );
+ }
+ return new BasicBinaryDomainCombination( id0, id1 );
+ }
+