X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsurfacing%2FBasicGenomeWideCombinableDomains.java;h=2813185b625a095310fa962f687cc2fbdfd224d6;hb=0b49b8e750b34d28a5989facdd8a7959870de996;hp=c6d205fe1e41b45a17374bd5a2d9e64b349ea2ae;hpb=87d34f343d0262cd0c009c6f1da058a5a217bc64;p=jalview.git diff --git a/forester/java/src/org/forester/surfacing/BasicGenomeWideCombinableDomains.java b/forester/java/src/org/forester/surfacing/BasicGenomeWideCombinableDomains.java index c6d205f..2813185 100644 --- a/forester/java/src/org/forester/surfacing/BasicGenomeWideCombinableDomains.java +++ b/forester/java/src/org/forester/surfacing/BasicGenomeWideCombinableDomains.java @@ -1,8 +1,6 @@ package org.forester.surfacing; -import java.text.DecimalFormat; -import java.text.NumberFormat; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -28,76 +26,75 @@ import org.forester.util.ForesterUtil; public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDomains { - private final static NumberFormat FORMATTER = new DecimalFormat( "0.0E0" ); + private static final Comparator DESCENDING_COMBINATIONS_COUNT_ORDER = new Comparator() { + + @Override + public int compare( final CombinableDomains d1, + final CombinableDomains d2 ) { + if ( d1.getNumberOfCombinableDomains() < d2 + .getNumberOfCombinableDomains() ) { + return 1; + } + else if ( d1 + .getNumberOfCombinableDomains() > d2 + .getNumberOfCombinableDomains() ) { + return -1; + } + else { + return d1 + .getKeyDomain() + .compareTo( d2 + .getKeyDomain() ); + } + } + }; private static final Comparator DESCENDING_KEY_DOMAIN_COUNT_ORDER = new Comparator() { - @Override - public int compare( final CombinableDomains d1, - final CombinableDomains d2 ) { - if ( d1.getKeyDomainCount() < d2 - .getKeyDomainCount() ) { - return 1; - } - else if ( d1 - .getKeyDomainCount() > d2 - .getKeyDomainCount() ) { - return -1; - } - else { - return d1 - .getKeyDomain() - .compareTo( d2 - .getKeyDomain() ); - } - } - }; + @Override + public int compare( final CombinableDomains d1, + final CombinableDomains d2 ) { + if ( d1.getKeyDomainCount() < d2 + .getKeyDomainCount() ) { + return 1; + } + else if ( d1 + .getKeyDomainCount() > d2 + .getKeyDomainCount() ) { + return -1; + } + else { + return d1 + .getKeyDomain() + .compareTo( d2 + .getKeyDomain() ); + } + } + }; private static final Comparator DESCENDING_KEY_DOMAIN_PROTEINS_COUNT_ORDER = new Comparator() { - @Override - public int compare( final CombinableDomains d1, - final CombinableDomains d2 ) { - if ( d1.getKeyDomainProteinsCount() < d2 - .getKeyDomainProteinsCount() ) { - return 1; - } - else if ( d1 - .getKeyDomainProteinsCount() > d2 - .getKeyDomainProteinsCount() ) { - return -1; - } - else { - return d1 - .getKeyDomain() - .compareTo( d2 - .getKeyDomain() ); - } - } - }; - private static final Comparator DESCENDING_COMBINATIONS_COUNT_ORDER = new Comparator() { - - @Override - public int compare( final CombinableDomains d1, - final CombinableDomains d2 ) { - if ( d1.getNumberOfCombinableDomains() < d2 - .getNumberOfCombinableDomains() ) { - return 1; - } - else if ( d1 - .getNumberOfCombinableDomains() > d2 - .getNumberOfCombinableDomains() ) { - return -1; - } - else { - return d1 - .getKeyDomain() - .compareTo( d2 - .getKeyDomain() ); - } - } - }; + @Override + public int compare( final CombinableDomains d1, + final CombinableDomains d2 ) { + if ( d1.getKeyDomainProteinsCount() < d2 + .getKeyDomainProteinsCount() ) { + return 1; + } + else if ( d1 + .getKeyDomainProteinsCount() > d2 + .getKeyDomainProteinsCount() ) { + return -1; + } + else { + return d1 + .getKeyDomain() + .compareTo( d2 + .getKeyDomain() ); + } + } + }; final private SortedMap _combinable_domains_map; - final private Species _species; final private DomainCombinationType _dc_type; + final private Species _species; private BasicGenomeWideCombinableDomains( final Species species, final DomainCombinationType dc_type ) { _combinable_domains_map = new TreeMap(); @@ -105,10 +102,6 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom _dc_type = dc_type; } - private void add( final String key, final CombinableDomains cdc ) { - _combinable_domains_map.put( key, cdc ); - } - @Override public boolean contains( final String key_id ) { return _combinable_domains_map.containsKey( key_id ); @@ -192,10 +185,10 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom return toStringBuilder( GenomeWideCombinableDomainsSortOrder.ALPHABETICAL_KEY_ID ).toString(); } - // Produces something like: - // 2-oxoacid_dh 5 5 2 4.8E-67 Biotin_lipoyl [4], E3_binding [3] - @Override - public StringBuilder toStringBuilder( final GenomeWideCombinableDomainsSortOrder sort_order ) { + // Produces something like: + // 2-oxoacid_dh 5 5 2 Biotin_lipoyl [4], E3_binding [3] + @Override + public StringBuilder toStringBuilder( final GenomeWideCombinableDomainsSortOrder sort_order ) { final StringBuilder sb = new StringBuilder(); final List combinable_domains = new ArrayList(); for( final String key : getAllCombinableDomainsIds().keySet() ) { @@ -217,37 +210,14 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getKeyDomainCount() ), 8, ' ', false ) ); sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getKeyDomainProteinsCount() ), 8, ' ', false ) ); sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getNumberOfCombinableDomains() ), 8, ' ', false ) ); - sb.append( ForesterUtil.pad( new StringBuffer( "" - + FORMATTER.format( cb.getKeyDomainConfidenceDescriptiveStatistics() - .median() ) ), - 10, - ' ', - false ) ); sb.append( cb.getCombiningDomainIdsAsStringBuilder() ); sb.append( ForesterUtil.getLineSeparator() ); } return sb; } - private static void countDomains( final Map domain_counts, - final Map domain_protein_counts, - final Map stats, - final Set saw_c, - final String id_i, - final double support ) { - if ( domain_counts.containsKey( id_i ) ) { - domain_counts.put( id_i, 1 + domain_counts.get( ( id_i ) ) ); - if ( !saw_c.contains( id_i ) ) { - domain_protein_counts.put( id_i, 1 + domain_protein_counts.get( ( id_i ) ) ); - } - } - else { - stats.put( id_i, new BasicDescriptiveStatistics() ); - domain_counts.put( id_i, 1 ); - domain_protein_counts.put( id_i, 1 ); - } - stats.get( id_i ).addValue( support ); - saw_c.add( id_i ); + private void add( final String key, final CombinableDomains cdc ) { + _combinable_domains_map.put( key, cdc ); } public static BasicGenomeWideCombinableDomains createInstance( final List protein_list, @@ -278,12 +248,10 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom final Map domain_number_stats_by_dc ) { final BasicGenomeWideCombinableDomains instance = new BasicGenomeWideCombinableDomains( species, dc_type ); final Map domain_counts = new HashMap(); - final Map domain_protein_counts = new HashMap(); - final Map stats = new HashMap(); for( final Protein protein : protein_list ) { if ( !protein.getSpecies().equals( species ) ) { throw new IllegalArgumentException( "species (" + protein.getSpecies() - + ") does not match species of combinable domains collection (" + species + ")" ); + + ") does not match species of combinable domains collection (" + species + ")" ); } final Set saw_i = new HashSet(); final Set saw_c = new HashSet(); @@ -291,12 +259,7 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom final Domain pd_i = protein.getProteinDomain( i ); final String id_i = pd_i.getDomainId(); final int current_start = pd_i.getFrom(); - BasicGenomeWideCombinableDomains.countDomains( domain_counts, - domain_protein_counts, - stats, - saw_c, - id_i, - pd_i.getPerSequenceEvalue() ); + BasicGenomeWideCombinableDomains.countDomains( domain_counts, saw_c, id_i ); if ( !saw_i.contains( id_i ) ) { if ( dc_type == DomainCombinationType.BASIC ) { saw_i.add( id_i ); @@ -315,15 +278,9 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom else { domain_combination = new BasicCombinableDomains( pd_i.getDomainId(), species ); } - // ^^ if ( ( domain_id_to_go_ids_map != null ) - // ^^ && domain_id_to_go_ids_map.containsKey( pd_i.getDomainId() ) ) { - // ^^ final List go_ids = domain_id_to_go_ids_map.get( pd_i.getDomainId() ); - // ^^ for( final GoId go_id : go_ids ) { - // ^^ domain_combination.getKeyDomain().addGoId( go_id ); - // ^^ } - // ^^ } instance.add( id_i, domain_combination ); } + domain_combination.addKeyDomainProtein( protein.getProteinId().getId() );//^^^^^^^^^^^^^^ final Set saw_j = new HashSet(); if ( ignore_combination_with_same_domain ) { saw_j.add( id_i ); @@ -340,7 +297,7 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom saw_j.add( id ); if ( dc_type != DomainCombinationType.DIRECTED_ADJACTANT ) { domain_combination - .addCombinableDomain( protein.getProteinDomain( j ).getDomainId() ); + .addCombinableDomain( protein.getProteinDomain( j ).getDomainId() ); } else { if ( closest == null ) { @@ -378,15 +335,24 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom domain_number_stats_by_dc.get( dc_str ).addValue( protein.getNumberOfProteinDomains() ); } } - // } } } for( final String key_id : domain_counts.keySet() ) { instance.get( key_id ).setKeyDomainCount( domain_counts.get( key_id ) ); - instance.get( key_id ).setKeyDomainProteinsCount( domain_protein_counts.get( key_id ) ); - instance.get( key_id ).setKeyDomainConfidenceDescriptiveStatistics( stats.get( key_id ) ); } return instance; } + + private static void countDomains( final Map domain_counts, + final Set saw_c, + final String id_i ) { + if ( domain_counts.containsKey( id_i ) ) { + domain_counts.put( id_i, 1 + domain_counts.get( ( id_i ) ) ); + } + else { + domain_counts.put( id_i, 1 ); + } + saw_c.add( id_i ); + } }