X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsurfacing%2FBasicGenomeWideCombinableDomains.java;h=2813185b625a095310fa962f687cc2fbdfd224d6;hb=876ced97014fafe54ff51dcc17da8bf25913fec7;hp=ca8bff0a6cf3a990ebc7339c10a2f6ad9e258626;hpb=48f7a89be9d34f1930a1f863e608235cc27184c5;p=jalview.git diff --git a/forester/java/src/org/forester/surfacing/BasicGenomeWideCombinableDomains.java b/forester/java/src/org/forester/surfacing/BasicGenomeWideCombinableDomains.java index ca8bff0..2813185 100644 --- a/forester/java/src/org/forester/surfacing/BasicGenomeWideCombinableDomains.java +++ b/forester/java/src/org/forester/surfacing/BasicGenomeWideCombinableDomains.java @@ -1,8 +1,6 @@ package org.forester.surfacing; -import java.text.DecimalFormat; -import java.text.NumberFormat; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -17,119 +15,115 @@ import java.util.TreeMap; import java.util.TreeSet; import org.forester.go.GoId; -import org.forester.surfacing.BinaryDomainCombination.DomainCombinationType; +import org.forester.protein.BinaryDomainCombination; +import org.forester.protein.BinaryDomainCombination.DomainCombinationType; +import org.forester.protein.Domain; +import org.forester.protein.Protein; +import org.forester.species.Species; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDomains { - private final static NumberFormat FORMATTER = new DecimalFormat( "0.0E0" ); - private static final Comparator DESCENDING_KEY_DOMAIN_COUNT_ORDER = new Comparator() { + private static final Comparator DESCENDING_COMBINATIONS_COUNT_ORDER = new Comparator() { - public int compare( final CombinableDomains d1, - final CombinableDomains d2 ) { - if ( d1 - .getKeyDomainCount() < d2 - .getKeyDomainCount() ) { - return 1; - } - else if ( d1 - .getKeyDomainCount() > d2 - .getKeyDomainCount() ) { - return -1; - } - else { - return d1 - .getKeyDomain() - .getId() - .compareTo( d2 - .getKeyDomain() - .getId() ); - } - } - }; - private static final Comparator DESCENDING_KEY_DOMAIN_PROTEINS_COUNT_ORDER = new Comparator() { + @Override + public int compare( final CombinableDomains d1, + final CombinableDomains d2 ) { + if ( d1.getNumberOfCombinableDomains() < d2 + .getNumberOfCombinableDomains() ) { + return 1; + } + else if ( d1 + .getNumberOfCombinableDomains() > d2 + .getNumberOfCombinableDomains() ) { + return -1; + } + else { + return d1 + .getKeyDomain() + .compareTo( d2 + .getKeyDomain() ); + } + } + }; + private static final Comparator DESCENDING_KEY_DOMAIN_COUNT_ORDER = new Comparator() { - public int compare( final CombinableDomains d1, - final CombinableDomains d2 ) { - if ( d1 - .getKeyDomainProteinsCount() < d2 - .getKeyDomainProteinsCount() ) { - return 1; - } - else if ( d1 - .getKeyDomainProteinsCount() > d2 - .getKeyDomainProteinsCount() ) { - return -1; - } - else { - return d1 - .getKeyDomain() - .getId() - .compareTo( d2 - .getKeyDomain() - .getId() ); - } - } - }; - private static final Comparator DESCENDING_COMBINATIONS_COUNT_ORDER = new Comparator() { + @Override + public int compare( final CombinableDomains d1, + final CombinableDomains d2 ) { + if ( d1.getKeyDomainCount() < d2 + .getKeyDomainCount() ) { + return 1; + } + else if ( d1 + .getKeyDomainCount() > d2 + .getKeyDomainCount() ) { + return -1; + } + else { + return d1 + .getKeyDomain() + .compareTo( d2 + .getKeyDomain() ); + } + } + }; + private static final Comparator DESCENDING_KEY_DOMAIN_PROTEINS_COUNT_ORDER = new Comparator() { - public int compare( final CombinableDomains d1, - final CombinableDomains d2 ) { - if ( d1 - .getNumberOfCombinableDomains() < d2 - .getNumberOfCombinableDomains() ) { - return 1; - } - else if ( d1 - .getNumberOfCombinableDomains() > d2 - .getNumberOfCombinableDomains() ) { - return -1; - } - else { - return d1 - .getKeyDomain() - .getId() - .compareTo( d2 - .getKeyDomain() - .getId() ); - } - } - }; - final private SortedMap _combinable_domains_map; - final private Species _species; - final private DomainCombinationType _dc_type; + @Override + public int compare( final CombinableDomains d1, + final CombinableDomains d2 ) { + if ( d1.getKeyDomainProteinsCount() < d2 + .getKeyDomainProteinsCount() ) { + return 1; + } + else if ( d1 + .getKeyDomainProteinsCount() > d2 + .getKeyDomainProteinsCount() ) { + return -1; + } + else { + return d1 + .getKeyDomain() + .compareTo( d2 + .getKeyDomain() ); + } + } + }; + final private SortedMap _combinable_domains_map; + final private DomainCombinationType _dc_type; + final private Species _species; private BasicGenomeWideCombinableDomains( final Species species, final DomainCombinationType dc_type ) { - _combinable_domains_map = new TreeMap(); + _combinable_domains_map = new TreeMap(); _species = species; _dc_type = dc_type; } - private void add( final DomainId key, final CombinableDomains cdc ) { - _combinable_domains_map.put( key, cdc ); - } - - public boolean contains( final DomainId key_id ) { + @Override + public boolean contains( final String key_id ) { return _combinable_domains_map.containsKey( key_id ); } - public CombinableDomains get( final DomainId key_id ) { + @Override + public CombinableDomains get( final String key_id ) { return _combinable_domains_map.get( key_id ); } - public SortedMap getAllCombinableDomainsIds() { + @Override + public SortedMap getAllCombinableDomainsIds() { return _combinable_domains_map; } @Override - public SortedSet getAllDomainIds() { - final SortedSet domains = new TreeSet(); - for( final DomainId key : getAllCombinableDomainsIds().keySet() ) { + public SortedSet getAllDomainIds() { + final SortedSet domains = new TreeSet(); + for( final String key : getAllCombinableDomainsIds().keySet() ) { final CombinableDomains cb = getAllCombinableDomainsIds().get( key ); - final List ds = cb.getAllDomains(); - for( final DomainId d : ds ) { + final List ds = cb.getAllDomains(); + for( final String d : ds ) { domains.add( d ); } } @@ -142,10 +136,10 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom } @Override - public SortedSet getMostPromiscuosDomain() { - final SortedSet doms = new TreeSet(); + public SortedSet getMostPromiscuosDomain() { + final SortedSet doms = new TreeSet(); final int max = ( int ) getPerGenomeDomainPromiscuityStatistics().getMax(); - for( final DomainId key : getAllCombinableDomainsIds().keySet() ) { + for( final String key : getAllCombinableDomainsIds().keySet() ) { final CombinableDomains cb = getAllCombinableDomainsIds().get( key ); if ( cb.getNumberOfCombinableDomains() == max ) { doms.add( key ); @@ -157,17 +151,19 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom @Override public DescriptiveStatistics getPerGenomeDomainPromiscuityStatistics() { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); - for( final DomainId key : getAllCombinableDomainsIds().keySet() ) { + for( final String key : getAllCombinableDomainsIds().keySet() ) { final CombinableDomains cb = getAllCombinableDomainsIds().get( key ); stats.addValue( cb.getNumberOfCombinableDomains() ); } return stats; } + @Override public int getSize() { return _combinable_domains_map.size(); } + @Override public Species getSpecies() { return _species; } @@ -175,7 +171,7 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom @Override public SortedSet toBinaryDomainCombinations() { final SortedSet binary_combinations = new TreeSet(); - for( final DomainId key : getAllCombinableDomainsIds().keySet() ) { + for( final String key : getAllCombinableDomainsIds().keySet() ) { final CombinableDomains cb = getAllCombinableDomainsIds().get( key ); for( final BinaryDomainCombination b : cb.toBinaryDomainCombinations() ) { binary_combinations.add( b ); @@ -189,12 +185,13 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom return toStringBuilder( GenomeWideCombinableDomainsSortOrder.ALPHABETICAL_KEY_ID ).toString(); } - // Produces something like: - // 2-oxoacid_dh 5 5 2 4.8E-67 Biotin_lipoyl [4], E3_binding [3] - public StringBuilder toStringBuilder( final GenomeWideCombinableDomainsSortOrder sort_order ) { + // Produces something like: + // 2-oxoacid_dh 5 5 2 Biotin_lipoyl [4], E3_binding [3] + @Override + public StringBuilder toStringBuilder( final GenomeWideCombinableDomainsSortOrder sort_order ) { final StringBuilder sb = new StringBuilder(); final List combinable_domains = new ArrayList(); - for( final DomainId key : getAllCombinableDomainsIds().keySet() ) { + for( final String key : getAllCombinableDomainsIds().keySet() ) { final CombinableDomains cb = getAllCombinableDomainsIds().get( key ); combinable_domains.add( cb ); } @@ -213,39 +210,14 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getKeyDomainCount() ), 8, ' ', false ) ); sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getKeyDomainProteinsCount() ), 8, ' ', false ) ); sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getNumberOfCombinableDomains() ), 8, ' ', false ) ); - sb - .append( ForesterUtil - .pad( new StringBuffer( "" - + FORMATTER - .format( cb.getKeyDomainConfidenceDescriptiveStatistics().median() ) ), - 10, - ' ', - false ) ); sb.append( cb.getCombiningDomainIdsAsStringBuilder() ); sb.append( ForesterUtil.getLineSeparator() ); } return sb; } - private static void countDomains( final Map domain_counts, - final Map domain_protein_counts, - final Map stats, - final Set saw_c, - final DomainId id_i, - final double support ) { - if ( domain_counts.containsKey( id_i ) ) { - domain_counts.put( id_i, 1 + domain_counts.get( ( id_i ) ) ); - if ( !saw_c.contains( id_i ) ) { - domain_protein_counts.put( id_i, 1 + domain_protein_counts.get( ( id_i ) ) ); - } - } - else { - stats.put( id_i, new BasicDescriptiveStatistics() ); - domain_counts.put( id_i, 1 ); - domain_protein_counts.put( id_i, 1 ); - } - stats.get( id_i ).addValue( support ); - saw_c.add( id_i ); + private void add( final String key, final CombinableDomains cdc ) { + _combinable_domains_map.put( key, cdc ); } public static BasicGenomeWideCombinableDomains createInstance( final List protein_list, @@ -255,42 +227,39 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom ignore_combination_with_same_domain, species, null, - DomainCombinationType.BASIC ); + DomainCombinationType.BASIC, + null, + null ); } public static BasicGenomeWideCombinableDomains createInstance( final List protein_list, final boolean ignore_combination_with_same_domain, final Species species, final DomainCombinationType dc_type ) { - return createInstance( protein_list, ignore_combination_with_same_domain, species, null, dc_type ); + return createInstance( protein_list, ignore_combination_with_same_domain, species, null, dc_type, null, null ); } public static BasicGenomeWideCombinableDomains createInstance( final List protein_list, final boolean ignore_combination_with_same_domain, final Species species, - final Map> domain_id_to_go_ids_map, - final DomainCombinationType dc_type ) { + final Map> domain_id_to_go_ids_map, + final DomainCombinationType dc_type, + final Map protein_length_stats_by_dc, + final Map domain_number_stats_by_dc ) { final BasicGenomeWideCombinableDomains instance = new BasicGenomeWideCombinableDomains( species, dc_type ); - final Map domain_counts = new HashMap(); - final Map domain_protein_counts = new HashMap(); - final Map stats = new HashMap(); + final Map domain_counts = new HashMap(); for( final Protein protein : protein_list ) { if ( !protein.getSpecies().equals( species ) ) { throw new IllegalArgumentException( "species (" + protein.getSpecies() - + ") does not match species of combinable domains collection (" + species + ")" ); + + ") does not match species of combinable domains collection (" + species + ")" ); } - final Set saw_i = new HashSet(); - final Set saw_c = new HashSet(); + final Set saw_i = new HashSet(); + final Set saw_c = new HashSet(); for( int i = 0; i < protein.getProteinDomains().size(); ++i ) { final Domain pd_i = protein.getProteinDomain( i ); - final DomainId id_i = pd_i.getDomainId(); + final String id_i = pd_i.getDomainId(); final int current_start = pd_i.getFrom(); - BasicGenomeWideCombinableDomains.countDomains( domain_counts, - domain_protein_counts, - stats, - saw_c, - id_i, - pd_i.getPerSequenceEvalue() ); + BasicGenomeWideCombinableDomains.countDomains( domain_counts, saw_c, id_i ); if ( !saw_i.contains( id_i ) ) { if ( dc_type == DomainCombinationType.BASIC ) { saw_i.add( id_i ); @@ -309,16 +278,10 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom else { domain_combination = new BasicCombinableDomains( pd_i.getDomainId(), species ); } - if ( ( domain_id_to_go_ids_map != null ) - && domain_id_to_go_ids_map.containsKey( pd_i.getDomainId() ) ) { - final List go_ids = domain_id_to_go_ids_map.get( pd_i.getDomainId() ); - for( final GoId go_id : go_ids ) { - domain_combination.getKeyDomain().addGoId( go_id ); - } - } instance.add( id_i, domain_combination ); } - final Set saw_j = new HashSet(); + domain_combination.addKeyDomainProtein( protein.getProteinId().getId() );//^^^^^^^^^^^^^^ + final Set saw_j = new HashSet(); if ( ignore_combination_with_same_domain ) { saw_j.add( id_i ); } @@ -329,12 +292,12 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom continue; } if ( i != j ) { - final DomainId id = protein.getProteinDomain( j ).getDomainId(); + final String id = protein.getProteinDomain( j ).getDomainId(); if ( !saw_j.contains( id ) ) { saw_j.add( id ); if ( dc_type != DomainCombinationType.DIRECTED_ADJACTANT ) { domain_combination - .addCombinableDomain( protein.getProteinDomain( j ).getDomainId() ); + .addCombinableDomain( protein.getProteinDomain( j ).getDomainId() ); } else { if ( closest == null ) { @@ -352,14 +315,44 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom if ( ( dc_type == DomainCombinationType.DIRECTED_ADJACTANT ) && ( closest != null ) ) { domain_combination.addCombinableDomain( closest.getDomainId() ); } + if ( protein_length_stats_by_dc != null ) { + final List dcs = domain_combination.toBinaryDomainCombinations(); + for( final BinaryDomainCombination dc : dcs ) { + final String dc_str = dc.toString(); + if ( !protein_length_stats_by_dc.containsKey( dc_str ) ) { + protein_length_stats_by_dc.put( dc_str, new BasicDescriptiveStatistics() ); + } + protein_length_stats_by_dc.get( dc_str ).addValue( protein.getLength() ); + } + } + if ( domain_number_stats_by_dc != null ) { + final List dcs = domain_combination.toBinaryDomainCombinations(); + for( final BinaryDomainCombination dc : dcs ) { + final String dc_str = dc.toString(); + if ( !domain_number_stats_by_dc.containsKey( dc_str ) ) { + domain_number_stats_by_dc.put( dc_str, new BasicDescriptiveStatistics() ); + } + domain_number_stats_by_dc.get( dc_str ).addValue( protein.getNumberOfProteinDomains() ); + } + } } } } - for( final DomainId key_id : domain_counts.keySet() ) { + for( final String key_id : domain_counts.keySet() ) { instance.get( key_id ).setKeyDomainCount( domain_counts.get( key_id ) ); - instance.get( key_id ).setKeyDomainProteinsCount( domain_protein_counts.get( key_id ) ); - instance.get( key_id ).setKeyDomainConfidenceDescriptiveStatistics( stats.get( key_id ) ); } return instance; } + + private static void countDomains( final Map domain_counts, + final Set saw_c, + final String id_i ) { + if ( domain_counts.containsKey( id_i ) ) { + domain_counts.put( id_i, 1 + domain_counts.get( ( id_i ) ) ); + } + else { + domain_counts.put( id_i, 1 ); + } + saw_c.add( id_i ); + } }