package org.forester.surfacing;
-import java.text.DecimalFormat;
-import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDomains {
- private final static NumberFormat FORMATTER = new DecimalFormat( "0.0E0" );
+ private static final Comparator<CombinableDomains> DESCENDING_COMBINATIONS_COUNT_ORDER = new Comparator<CombinableDomains>() {
+
+ @Override
+ public int compare( final CombinableDomains d1,
+ final CombinableDomains d2 ) {
+ if ( d1.getNumberOfCombinableDomains() < d2
+ .getNumberOfCombinableDomains() ) {
+ return 1;
+ }
+ else if ( d1
+ .getNumberOfCombinableDomains() > d2
+ .getNumberOfCombinableDomains() ) {
+ return -1;
+ }
+ else {
+ return d1
+ .getKeyDomain()
+ .compareTo( d2
+ .getKeyDomain() );
+ }
+ }
+ };
private static final Comparator<CombinableDomains> DESCENDING_KEY_DOMAIN_COUNT_ORDER = new Comparator<CombinableDomains>() {
- @Override
- public int compare( final CombinableDomains d1,
- final CombinableDomains d2 ) {
- if ( d1.getKeyDomainCount() < d2
- .getKeyDomainCount() ) {
- return 1;
- }
- else if ( d1
- .getKeyDomainCount() > d2
- .getKeyDomainCount() ) {
- return -1;
- }
- else {
- return d1
- .getKeyDomain()
- .compareTo( d2
- .getKeyDomain() );
- }
- }
- };
+ @Override
+ public int compare( final CombinableDomains d1,
+ final CombinableDomains d2 ) {
+ if ( d1.getKeyDomainCount() < d2
+ .getKeyDomainCount() ) {
+ return 1;
+ }
+ else if ( d1
+ .getKeyDomainCount() > d2
+ .getKeyDomainCount() ) {
+ return -1;
+ }
+ else {
+ return d1
+ .getKeyDomain()
+ .compareTo( d2
+ .getKeyDomain() );
+ }
+ }
+ };
private static final Comparator<CombinableDomains> DESCENDING_KEY_DOMAIN_PROTEINS_COUNT_ORDER = new Comparator<CombinableDomains>() {
- @Override
- public int compare( final CombinableDomains d1,
- final CombinableDomains d2 ) {
- if ( d1.getKeyDomainProteinsCount() < d2
- .getKeyDomainProteinsCount() ) {
- return 1;
- }
- else if ( d1
- .getKeyDomainProteinsCount() > d2
- .getKeyDomainProteinsCount() ) {
- return -1;
- }
- else {
- return d1
- .getKeyDomain()
- .compareTo( d2
- .getKeyDomain() );
- }
- }
- };
- private static final Comparator<CombinableDomains> DESCENDING_COMBINATIONS_COUNT_ORDER = new Comparator<CombinableDomains>() {
-
- @Override
- public int compare( final CombinableDomains d1,
- final CombinableDomains d2 ) {
- if ( d1.getNumberOfCombinableDomains() < d2
- .getNumberOfCombinableDomains() ) {
- return 1;
- }
- else if ( d1
- .getNumberOfCombinableDomains() > d2
- .getNumberOfCombinableDomains() ) {
- return -1;
- }
- else {
- return d1
- .getKeyDomain()
- .compareTo( d2
- .getKeyDomain() );
- }
- }
- };
+ @Override
+ public int compare( final CombinableDomains d1,
+ final CombinableDomains d2 ) {
+ if ( d1.getKeyDomainProteinsCount() < d2
+ .getKeyDomainProteinsCount() ) {
+ return 1;
+ }
+ else if ( d1
+ .getKeyDomainProteinsCount() > d2
+ .getKeyDomainProteinsCount() ) {
+ return -1;
+ }
+ else {
+ return d1
+ .getKeyDomain()
+ .compareTo( d2
+ .getKeyDomain() );
+ }
+ }
+ };
final private SortedMap<String, CombinableDomains> _combinable_domains_map;
- final private Species _species;
final private DomainCombinationType _dc_type;
+ final private Species _species;
private BasicGenomeWideCombinableDomains( final Species species, final DomainCombinationType dc_type ) {
_combinable_domains_map = new TreeMap<String, CombinableDomains>();
_dc_type = dc_type;
}
- private void add( final String key, final CombinableDomains cdc ) {
- _combinable_domains_map.put( key, cdc );
- }
-
@Override
public boolean contains( final String key_id ) {
return _combinable_domains_map.containsKey( key_id );
return toStringBuilder( GenomeWideCombinableDomainsSortOrder.ALPHABETICAL_KEY_ID ).toString();
}
- // Produces something like:
- // 2-oxoacid_dh 5 5 2 4.8E-67 Biotin_lipoyl [4], E3_binding [3]
- @Override
- public StringBuilder toStringBuilder( final GenomeWideCombinableDomainsSortOrder sort_order ) {
+ // Produces something like:
+ // 2-oxoacid_dh 5 5 2 Biotin_lipoyl [4], E3_binding [3]
+ @Override
+ public StringBuilder toStringBuilder( final GenomeWideCombinableDomainsSortOrder sort_order ) {
final StringBuilder sb = new StringBuilder();
final List<CombinableDomains> combinable_domains = new ArrayList<CombinableDomains>();
for( final String key : getAllCombinableDomainsIds().keySet() ) {
sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getKeyDomainCount() ), 8, ' ', false ) );
sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getKeyDomainProteinsCount() ), 8, ' ', false ) );
sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getNumberOfCombinableDomains() ), 8, ' ', false ) );
- sb.append( ForesterUtil.pad( new StringBuffer( ""
- + FORMATTER.format( cb.getKeyDomainConfidenceDescriptiveStatistics()
- .median() ) ),
- 10,
- ' ',
- false ) );
sb.append( cb.getCombiningDomainIdsAsStringBuilder() );
sb.append( ForesterUtil.getLineSeparator() );
}
return sb;
}
- private static void countDomains( final Map<String, Integer> domain_counts,
- final Map<String, Integer> domain_protein_counts,
- final Map<String, DescriptiveStatistics> stats,
- final Set<String> saw_c,
- final String id_i,
- final double support ) {
- if ( domain_counts.containsKey( id_i ) ) {
- domain_counts.put( id_i, 1 + domain_counts.get( ( id_i ) ) );
- if ( !saw_c.contains( id_i ) ) {
- domain_protein_counts.put( id_i, 1 + domain_protein_counts.get( ( id_i ) ) );
- }
- }
- else {
- stats.put( id_i, new BasicDescriptiveStatistics() );
- domain_counts.put( id_i, 1 );
- domain_protein_counts.put( id_i, 1 );
- }
- stats.get( id_i ).addValue( support );
- saw_c.add( id_i );
+ private void add( final String key, final CombinableDomains cdc ) {
+ _combinable_domains_map.put( key, cdc );
}
public static BasicGenomeWideCombinableDomains createInstance( final List<Protein> protein_list,
final Map<String, DescriptiveStatistics> domain_number_stats_by_dc ) {
final BasicGenomeWideCombinableDomains instance = new BasicGenomeWideCombinableDomains( species, dc_type );
final Map<String, Integer> domain_counts = new HashMap<String, Integer>();
- final Map<String, Integer> domain_protein_counts = new HashMap<String, Integer>();
- final Map<String, DescriptiveStatistics> stats = new HashMap<String, DescriptiveStatistics>();
for( final Protein protein : protein_list ) {
if ( !protein.getSpecies().equals( species ) ) {
throw new IllegalArgumentException( "species (" + protein.getSpecies()
- + ") does not match species of combinable domains collection (" + species + ")" );
+ + ") does not match species of combinable domains collection (" + species + ")" );
}
final Set<String> saw_i = new HashSet<String>();
final Set<String> saw_c = new HashSet<String>();
final Domain pd_i = protein.getProteinDomain( i );
final String id_i = pd_i.getDomainId();
final int current_start = pd_i.getFrom();
- BasicGenomeWideCombinableDomains.countDomains( domain_counts,
- domain_protein_counts,
- stats,
- saw_c,
- id_i,
- pd_i.getPerSequenceEvalue() );
+ BasicGenomeWideCombinableDomains.countDomains( domain_counts, saw_c, id_i );
if ( !saw_i.contains( id_i ) ) {
if ( dc_type == DomainCombinationType.BASIC ) {
saw_i.add( id_i );
else {
domain_combination = new BasicCombinableDomains( pd_i.getDomainId(), species );
}
- // ^^ if ( ( domain_id_to_go_ids_map != null )
- // ^^ && domain_id_to_go_ids_map.containsKey( pd_i.getDomainId() ) ) {
- // ^^ final List<GoId> go_ids = domain_id_to_go_ids_map.get( pd_i.getDomainId() );
- // ^^ for( final GoId go_id : go_ids ) {
- // ^^ domain_combination.getKeyDomain().addGoId( go_id );
- // ^^ }
- // ^^ }
instance.add( id_i, domain_combination );
}
+ domain_combination.addKeyDomainProtein( protein.getProteinId().getId() );//^^^^^^^^^^^^^^
final Set<String> saw_j = new HashSet<String>();
if ( ignore_combination_with_same_domain ) {
saw_j.add( id_i );
saw_j.add( id );
if ( dc_type != DomainCombinationType.DIRECTED_ADJACTANT ) {
domain_combination
- .addCombinableDomain( protein.getProteinDomain( j ).getDomainId() );
+ .addCombinableDomain( protein.getProteinDomain( j ).getDomainId() );
}
else {
if ( closest == null ) {
domain_number_stats_by_dc.get( dc_str ).addValue( protein.getNumberOfProteinDomains() );
}
}
- //
}
}
}
for( final String key_id : domain_counts.keySet() ) {
instance.get( key_id ).setKeyDomainCount( domain_counts.get( key_id ) );
- instance.get( key_id ).setKeyDomainProteinsCount( domain_protein_counts.get( key_id ) );
- instance.get( key_id ).setKeyDomainConfidenceDescriptiveStatistics( stats.get( key_id ) );
}
return instance;
}
+
+ private static void countDomains( final Map<String, Integer> domain_counts,
+ final Set<String> saw_c,
+ final String id_i ) {
+ if ( domain_counts.containsKey( id_i ) ) {
+ domain_counts.put( id_i, 1 + domain_counts.get( ( id_i ) ) );
+ }
+ else {
+ domain_counts.put( id_i, 1 );
+ }
+ saw_c.add( id_i );
+ }
}