inprogress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Thu, 11 Jul 2013 17:23:19 +0000 (17:23 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Thu, 11 Jul 2013 17:23:19 +0000 (17:23 +0000)
forester/java/src/org/forester/surfacing/BasicCombinableDomains.java
forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java
forester/java/src/org/forester/surfacing/BasicGenomeWideCombinableDomains.java
forester/java/src/org/forester/surfacing/CombinableDomains.java
forester/java/src/org/forester/surfacing/DomainCountsDifferenceUtil.java
forester/java/src/org/forester/surfacing/DomainSimilarity.java
forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java
forester/java/src/org/forester/surfacing/PrintableSpeciesSpecificDcData.java [moved from forester/java/src/org/forester/surfacing/PrintableSpeciesSpecificDomainSimilariyData.java with 71% similarity]
forester/java/src/org/forester/surfacing/SpeciesSpecificDcData.java [moved from forester/java/src/org/forester/surfacing/SpeciesSpecificDomainSimilariyData.java with 91% similarity]
forester/java/src/org/forester/surfacing/TestSurfacing.java
forester/java/src/org/forester/util/ForesterUtil.java

index 84408dc..c04333c 100644 (file)
 package org.forester.surfacing;
 
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Set;
 import java.util.SortedMap;
 import java.util.TreeMap;
 
 import org.forester.protein.BinaryDomainCombination;
 import org.forester.species.Species;
-import org.forester.util.DescriptiveStatistics;
+import org.forester.util.ForesterUtil;
 
 public class BasicCombinableDomains implements CombinableDomains {
 
     final private String                   _key_domain;
     private int                            _key_domain_count;
-    private int                            _key_domain_proteins_count;
     final private Species                  _species;
     final private TreeMap<String, Integer> _combining_domains;
-    private DescriptiveStatistics          _key_domain_confidence_statistics;
+    final private Set<String>              _proteins_with_key_domain;
 
     public BasicCombinableDomains( final String key_domain, final Species species ) {
         _key_domain = key_domain;
         _species = species;
         _combining_domains = new TreeMap<String, Integer>();
-        init();
+        _proteins_with_key_domain = new HashSet<String>();
+        _key_domain_count = 0;
     }
 
     @Override
@@ -63,6 +65,14 @@ public class BasicCombinableDomains implements CombinableDomains {
     }
 
     @Override
+    public void addKeyDomainProtein( final String protein ) {
+        if ( ForesterUtil.isEmpty( protein ) ) {
+            throw new IllegalArgumentException( "attempt to add null or empty protein" );
+        }
+        getKeyDomainProteins().add( protein );
+    }
+
+    @Override
     public List<String> getAllDomains() {
         final List<String> domains = getCombinableDomains();
         if ( !domains.contains( getKeyDomain() ) ) {
@@ -117,18 +127,13 @@ public class BasicCombinableDomains implements CombinableDomains {
     }
 
     @Override
-    public DescriptiveStatistics getKeyDomainConfidenceDescriptiveStatistics() {
-        return _key_domain_confidence_statistics;
-    }
-
-    @Override
     public int getKeyDomainCount() {
         return _key_domain_count;
     }
 
     @Override
     public int getKeyDomainProteinsCount() {
-        return _key_domain_proteins_count;
+        return getKeyDomainProteins().size();
     }
 
     @Override
@@ -151,33 +156,17 @@ public class BasicCombinableDomains implements CombinableDomains {
         return _species;
     }
 
-    private void init() {
-        _key_domain_count = 0;
-        _key_domain_proteins_count = 0;
-        _key_domain_confidence_statistics = null;
-    }
-
     @Override
     public boolean isCombinable( final String protein_domain ) {
         return getCombiningDomains().containsKey( protein_domain );
     }
 
     @Override
-    public void setKeyDomainConfidenceDescriptiveStatistics( final DescriptiveStatistics key_domain_confidence_statistics ) {
-        _key_domain_confidence_statistics = key_domain_confidence_statistics;
-    }
-
-    @Override
     public void setKeyDomainCount( final int key_domain_count ) {
         _key_domain_count = key_domain_count;
     }
 
     @Override
-    public void setKeyDomainProteinsCount( final int key_domain_proteins_count ) {
-        _key_domain_proteins_count = key_domain_proteins_count;
-    }
-
-    @Override
     public List<BinaryDomainCombination> toBinaryDomainCombinations() {
         final List<BinaryDomainCombination> binary_combinations = new ArrayList<BinaryDomainCombination>( getNumberOfCombinableDomains() );
         for( final String domain : getCombiningDomains().keySet() ) {
@@ -200,4 +189,9 @@ public class BasicCombinableDomains implements CombinableDomains {
         sb.append( getCombiningDomainIdsAsStringBuilder() );
         return sb.toString();
     }
+
+    @Override
+    public Set<String> getKeyDomainProteins() {
+        return _proteins_with_key_domain;
+    }
 }
index 0e8406a..042b785 100644 (file)
@@ -125,7 +125,7 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
             // ~~~OLD:
             //throw new IllegalArgumentException( "attempt to calculate multiple combinable domains similarity for less than two combinable domains" );
             // ~~~new: 
-            final SortedMap<Species, SpeciesSpecificDomainSimilariyData> species_data = new TreeMap<Species, SpeciesSpecificDomainSimilariyData>();
+            final SortedMap<Species, SpeciesSpecificDcData> species_data = new TreeMap<Species, SpeciesSpecificDcData>();
             species_data.put( domains_list.get( 0 ).getSpecies(),
                               createSpeciesSpecificDomainSimilariyData( domains_list.get( 0 ) ) );
             return new PrintableDomainSimilarity( domains_list.get( 0 ),
@@ -142,7 +142,7 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
                                                   isTreatAsBinaryComparison() );
         }
         final DescriptiveStatistics stat = new BasicDescriptiveStatistics();
-        final SortedMap<Species, SpeciesSpecificDomainSimilariyData> species_data = new TreeMap<Species, SpeciesSpecificDomainSimilariyData>();
+        final SortedMap<Species, SpeciesSpecificDcData> species_data = new TreeMap<Species, SpeciesSpecificDcData>();
         species_data.put( domains_list.get( 0 ).getSpecies(),
                           createSpeciesSpecificDomainSimilariyData( domains_list.get( 0 ) ) );
         int max_difference_in_counts = 0;
@@ -225,10 +225,12 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
         return _treat_as_binary_comparison;
     }
 
-    private static SpeciesSpecificDomainSimilariyData createSpeciesSpecificDomainSimilariyData( final CombinableDomains cd ) {
-        final SpeciesSpecificDomainSimilariyData sd = new PrintableSpeciesSpecificDomainSimilariyData( cd.getKeyDomainProteinsCount(),
-                                                                                                       cd.getKeyDomainCount(),
-                                                                                                       cd.getNumberOfCombinableDomains() );
+    private static SpeciesSpecificDcData createSpeciesSpecificDomainSimilariyData( final CombinableDomains cd ) {
+        final SpeciesSpecificDcData sd = new PrintableSpeciesSpecificDcData( cd.getKeyDomainCount(),
+                                                                             cd.getNumberOfCombinableDomains() );
+        for( final String prot : cd.getKeyDomainProteins() ) {
+            sd.addKeyDomainProtein( prot );
+        }
         for( final String domain : cd.getCombinableDomains() ) {
             sd.addProteinsExhibitingCombinationCount( domain, cd.getNumberOfProteinsExhibitingCombination( domain ) );
         }
index 6e782a3..3e59603 100644 (file)
@@ -1,8 +1,6 @@
 
 package org.forester.surfacing;
 
-import java.text.DecimalFormat;
-import java.text.NumberFormat;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
@@ -28,7 +26,6 @@ import org.forester.util.ForesterUtil;
 
 public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDomains {
 
-    private final static NumberFormat                  FORMATTER                                  = new DecimalFormat( "0.0E0" );
     private static final Comparator<CombinableDomains> DESCENDING_KEY_DOMAIN_COUNT_ORDER          = new Comparator<CombinableDomains>() {
 
                                                                                                       @Override
@@ -193,7 +190,7 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom
     }
 
     // Produces something like: 
-    // 2-oxoacid_dh      5       5       2       4.8E-67   Biotin_lipoyl [4], E3_binding [3]
+    // 2-oxoacid_dh      5       5       2      Biotin_lipoyl [4], E3_binding [3]
     @Override
     public StringBuilder toStringBuilder( final GenomeWideCombinableDomainsSortOrder sort_order ) {
         final StringBuilder sb = new StringBuilder();
@@ -217,12 +214,6 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom
             sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getKeyDomainCount() ), 8, ' ', false ) );
             sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getKeyDomainProteinsCount() ), 8, ' ', false ) );
             sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getNumberOfCombinableDomains() ), 8, ' ', false ) );
-            sb.append( ForesterUtil.pad( new StringBuffer( ""
-                                                 + FORMATTER.format( cb.getKeyDomainConfidenceDescriptiveStatistics()
-                                                         .median() ) ),
-                                         10,
-                                         ' ',
-                                         false ) );
             sb.append( cb.getCombiningDomainIdsAsStringBuilder() );
             sb.append( ForesterUtil.getLineSeparator() );
         }
@@ -230,23 +221,14 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom
     }
 
     private static void countDomains( final Map<String, Integer> domain_counts,
-                                      final Map<String, Integer> domain_protein_counts,
-                                      final Map<String, DescriptiveStatistics> stats,
                                       final Set<String> saw_c,
-                                      final String id_i,
-                                      final double support ) {
+                                      final String id_i ) {
         if ( domain_counts.containsKey( id_i ) ) {
             domain_counts.put( id_i, 1 + domain_counts.get( ( id_i ) ) );
-            if ( !saw_c.contains( id_i ) ) {
-                domain_protein_counts.put( id_i, 1 + domain_protein_counts.get( ( id_i ) ) );
-            }
         }
         else {
-            stats.put( id_i, new BasicDescriptiveStatistics() );
             domain_counts.put( id_i, 1 );
-            domain_protein_counts.put( id_i, 1 );
         }
-        stats.get( id_i ).addValue( support );
         saw_c.add( id_i );
     }
 
@@ -278,8 +260,6 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom
                                                                    final Map<String, DescriptiveStatistics> domain_number_stats_by_dc ) {
         final BasicGenomeWideCombinableDomains instance = new BasicGenomeWideCombinableDomains( species, dc_type );
         final Map<String, Integer> domain_counts = new HashMap<String, Integer>();
-        final Map<String, Integer> domain_protein_counts = new HashMap<String, Integer>();
-        final Map<String, DescriptiveStatistics> stats = new HashMap<String, DescriptiveStatistics>();
         for( final Protein protein : protein_list ) {
             if ( !protein.getSpecies().equals( species ) ) {
                 throw new IllegalArgumentException( "species (" + protein.getSpecies()
@@ -291,12 +271,7 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom
                 final Domain pd_i = protein.getProteinDomain( i );
                 final String id_i = pd_i.getDomainId();
                 final int current_start = pd_i.getFrom();
-                BasicGenomeWideCombinableDomains.countDomains( domain_counts,
-                                                               domain_protein_counts,
-                                                               stats,
-                                                               saw_c,
-                                                               id_i,
-                                                               pd_i.getPerSequenceEvalue() );
+                BasicGenomeWideCombinableDomains.countDomains( domain_counts, saw_c, id_i );
                 if ( !saw_i.contains( id_i ) ) {
                     if ( dc_type == DomainCombinationType.BASIC ) {
                         saw_i.add( id_i );
@@ -317,6 +292,7 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom
                         }
                         instance.add( id_i, domain_combination );
                     }
+                    domain_combination.addKeyDomainProtein( protein.getProteinId().getId() );//^^^^^^^^^^^^^^
                     final Set<String> saw_j = new HashSet<String>();
                     if ( ignore_combination_with_same_domain ) {
                         saw_j.add( id_i );
@@ -371,14 +347,11 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom
                             domain_number_stats_by_dc.get( dc_str ).addValue( protein.getNumberOfProteinDomains() );
                         }
                     }
-                    //
                 }
             }
         }
         for( final String key_id : domain_counts.keySet() ) {
             instance.get( key_id ).setKeyDomainCount( domain_counts.get( key_id ) );
-            instance.get( key_id ).setKeyDomainProteinsCount( domain_protein_counts.get( key_id ) );
-            instance.get( key_id ).setKeyDomainConfidenceDescriptiveStatistics( stats.get( key_id ) );
         }
         return instance;
     }
index 58b055d..028810e 100644 (file)
 package org.forester.surfacing;
 
 import java.util.List;
+import java.util.Set;
 import java.util.SortedMap;
 
 import org.forester.protein.BinaryDomainCombination;
 import org.forester.species.Species;
-import org.forester.util.DescriptiveStatistics;
 
 public interface CombinableDomains {
 
@@ -73,15 +73,6 @@ public interface CombinableDomains {
     public String getKeyDomain();
 
     /**
-     * Gets descriptive statistics for the confidence (i.e. E-values) of the key
-     * domain.
-     * 
-     * 
-     * @return descriptive statistics for the confidence of the key domain
-     */
-    public DescriptiveStatistics getKeyDomainConfidenceDescriptiveStatistics();
-
-    /**
      * Returns how many times the key domain is present in a given species
      * genome.
      * 
@@ -97,6 +88,8 @@ public interface CombinableDomains {
      */
     public int getKeyDomainProteinsCount();
 
+    public Set<String> getKeyDomainProteins();
+
     public int getNumberOfCombinableDomains();
 
     public int getNumberOfProteinsExhibitingCombination( final String protein_domain );
@@ -111,15 +104,6 @@ public interface CombinableDomains {
     public boolean isCombinable( final String protein_domain );
 
     /**
-     * This is to set descriptive statistics for the confidence (i.e. E-values)
-     * of the key domain.
-     * 
-     * 
-     * @param statistics
-     */
-    void setKeyDomainConfidenceDescriptiveStatistics( final DescriptiveStatistics statistics );
-
-    /**
      * Sets how many times the key domain is present in a given species genome.
      * 
      * @param key_domain_count
@@ -127,14 +111,7 @@ public interface CombinableDomains {
      */
     void setKeyDomainCount( final int key_domain_count );
 
-    /**
-     * Sets how many proteins with the key domain are present in a given species
-     * genome.
-     * 
-     * @param key_domain_proteins_count
-     *            key domain protein count in species
-     */
-    void setKeyDomainProteinsCount( final int key_domain_proteins_count );
-
     public List<BinaryDomainCombination> toBinaryDomainCombinations();
+
+    void addKeyDomainProtein( String protein );
 }
\ No newline at end of file
index 08f3989..2a18c9c 100644 (file)
@@ -32,8 +32,6 @@ import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.io.Writer;
-import java.text.DecimalFormat;
-import java.text.NumberFormat;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -60,7 +58,6 @@ import org.forester.util.ForesterUtil;
  */
 public final class DomainCountsDifferenceUtil {
 
-    private final static NumberFormat          FORMATTER                                   = new DecimalFormat( "0.0E0" );
     private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES = COPY_CALCULATION_MODE.MIN;
     private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES   = COPY_CALCULATION_MODE.MIN;
     private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_LOW_COPY_SPECIES         = COPY_CALCULATION_MODE.MAX;
@@ -457,9 +454,6 @@ public final class DomainCountsDifferenceUtil {
     private static String combinableDomaindToString( final CombinableDomains cd ) {
         final StringBuilder sb = new StringBuilder();
         sb.append( cd.getKeyDomainProteinsCount() );
-        sb.append( "\t[" );
-        sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) );
-        sb.append( "]" );
         return sb.toString();
     }
 
@@ -471,8 +465,6 @@ public final class DomainCountsDifferenceUtil {
         sb.append( cd.getKeyDomainProteinsCount() );
         sb.append( "</b>, " );
         sb.append( cd.getNumberOfCombinableDomains() );
-        sb.append( "]</td><td>[" );
-        sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) );
         sb.append( "]</td><td>" );
         sb.append( cd.getCombiningDomainIdsAsStringBuilder() );
         return sb.toString();
index be7273e..bf9cef4 100644 (file)
@@ -97,7 +97,7 @@ public interface DomainSimilarity extends Comparable<DomainSimilarity> {
      * 
      * @return SortedMap<String, SpeciesSpecificDomainSimilariyData>
      */
-    public SortedMap<Species, SpeciesSpecificDomainSimilariyData> getSpeciesData();
+    public SortedMap<Species, SpeciesSpecificDcData> getSpeciesData();
 
     public double getStandardDeviationOfSimilarityScore();
 
index c785ff7..fc7d8b5 100644 (file)
@@ -39,22 +39,22 @@ import org.forester.util.ForesterUtil;
 
 public class PrintableDomainSimilarity implements DomainSimilarity {
 
-    final public static String                                           SPECIES_SEPARATOR = "  ";
-    final private static int                                             EQUAL             = 0;
-    final private static String                                          NO_SPECIES        = "     ";
-    private static final boolean                                         PRINT_MORE_INFO   = false;
-    final private double                                                 _min;
-    final private double                                                 _max;
-    final private double                                                 _mean;
-    final private double                                                 _sd;
-    final private int                                                    _n;
-    private final int                                                    _max_difference_in_counts;
-    private final int                                                    _max_difference;
-    final private CombinableDomains                                      _combinable_domains;
-    final private SortedMap<Species, SpeciesSpecificDomainSimilariyData> _species_data;
-    private List<Species>                                                _species_order;
-    private DomainSimilarityCalculator.Detailedness                      _detailedness;
-    private final boolean                                                _treat_as_binary_comparison;
+    final public static String                              SPECIES_SEPARATOR = "  ";
+    final private static int                                EQUAL             = 0;
+    final private static String                             NO_SPECIES        = "     ";
+    private static final boolean                            PRINT_MORE_INFO   = false;
+    final private double                                    _min;
+    final private double                                    _max;
+    final private double                                    _mean;
+    final private double                                    _sd;
+    final private int                                       _n;
+    private final int                                       _max_difference_in_counts;
+    private final int                                       _max_difference;
+    final private CombinableDomains                         _combinable_domains;
+    final private SortedMap<Species, SpeciesSpecificDcData> _species_data;
+    private List<Species>                                   _species_order;
+    private DomainSimilarityCalculator.Detailedness         _detailedness;
+    private final boolean                                   _treat_as_binary_comparison;
 
     public PrintableDomainSimilarity( final CombinableDomains combinable_domains,
                                       final double min,
@@ -65,7 +65,7 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
                                       final int n,
                                       final int max_difference_in_counts,
                                       final int max_difference,
-                                      final SortedMap<Species, SpeciesSpecificDomainSimilariyData> species_data,
+                                      final SortedMap<Species, SpeciesSpecificDcData> species_data,
                                       final boolean sort_by_species_count_first,
                                       final boolean treat_as_binary_comparison ) {
         if ( combinable_domains == null ) {
@@ -240,7 +240,7 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
     }
 
     @Override
-    public SortedMap<Species, SpeciesSpecificDomainSimilariyData> getSpeciesData() {
+    public SortedMap<Species, SpeciesSpecificDcData> getSpeciesData() {
         return _species_data;
     }
 
@@ -29,19 +29,21 @@ package org.forester.surfacing;
 
 import java.util.Set;
 import java.util.SortedMap;
+import java.util.SortedSet;
 import java.util.TreeMap;
+import java.util.TreeSet;
 
-class PrintableSpeciesSpecificDomainSimilariyData implements SpeciesSpecificDomainSimilariyData {
+import org.forester.util.ForesterUtil;
+
+class PrintableSpeciesSpecificDcData implements SpeciesSpecificDcData {
 
     final SortedMap<String, Integer> _combinable_domain_id_to_count_map;
-    final private int                _key_domain_proteins_count;
+    final SortedSet<String>          _key_domain_proteins;
     final private int                _key_domain_domains_count;
     final private int                _combinable_domains_count;
 
-    public PrintableSpeciesSpecificDomainSimilariyData( final int key_domain_proteins_count,
-                                                        final int key_domain_domains_count,
-                                                        final int combinable_domains ) {
-        _key_domain_proteins_count = key_domain_proteins_count;
+    public PrintableSpeciesSpecificDcData( final int key_domain_domains_count, final int combinable_domains ) {
+        _key_domain_proteins = new TreeSet<String>();
         _key_domain_domains_count = key_domain_domains_count;
         _combinable_domains_count = combinable_domains;
         _combinable_domain_id_to_count_map = new TreeMap<String, Integer>();
@@ -69,7 +71,7 @@ class PrintableSpeciesSpecificDomainSimilariyData implements SpeciesSpecificDoma
     }
 
     private int getKeyDomainProteinsCount() {
-        return _key_domain_proteins_count;
+        return _key_domain_proteins.size();
     }
 
     @Override
@@ -81,6 +83,22 @@ class PrintableSpeciesSpecificDomainSimilariyData implements SpeciesSpecificDoma
     }
 
     @Override
+    public void addKeyDomainProtein( final String protein ) {
+        if ( ForesterUtil.isEmpty( protein ) ) {
+            throw new IllegalArgumentException( "attempt to add null or empty protein" );
+        }
+        if ( getKeyDomainProteins().contains( protein ) ) {
+            throw new IllegalArgumentException( "protein \"" + protein + "\" is not unique" );
+        }
+        getKeyDomainProteins().add( protein );
+    }
+
+    @Override
+    public SortedSet<String> getKeyDomainProteins() {
+        return _key_domain_proteins;
+    }
+
+    @Override
     public String toString() {
         return toStringBuffer( DomainSimilarityCalculator.Detailedness.LIST_COMBINING_DOMAIN_FOR_EACH_SPECIES, false )
                 .toString();
@@ -117,6 +135,26 @@ class PrintableSpeciesSpecificDomainSimilariyData implements SpeciesSpecificDoma
                 sb.append( getCombinableDomainIdToCountsMap().get( domain_id ) );
             }
         }
+        sb.append( " [" );
+        boolean first = true;
+        for( final String p : getKeyDomainProteins() ) {
+            String link = null;
+            final String up_id = ForesterUtil.extractUniProtKbProteinSeqIdentifier( p );
+            if ( !ForesterUtil.isEmpty( up_id ) ) {
+                link = "<a href=\"" + ForesterUtil.UNIPROT_KB + up_id + "\" target=\"_up_window\">" + up_id + "</a>";
+            }
+            else {
+                link = "<a href=\"" + "http://www.google.com/search?q=" + p + "\" target=\"_g_window\">" + p + "</a>";
+            }
+            if ( first ) {
+                first = false;
+            }
+            else {
+                sb.append( ", " );
+            }
+            sb.append( p );
+        }
+        sb.append( "]" );
         return sb;
     }
 }
 package org.forester.surfacing;
 
 import java.util.SortedMap;
+import java.util.SortedSet;
 
 /*
  * A helper class for PrintableDomainSimilarity.
  */
-interface SpeciesSpecificDomainSimilariyData {
+interface SpeciesSpecificDcData {
 
     public void addProteinsExhibitingCombinationCount( final String domain_id, final int count );
 
@@ -47,4 +48,8 @@ interface SpeciesSpecificDomainSimilariyData {
     public int getNumberOfProteinsExhibitingCombinationWith( final String domain_id );
 
     public StringBuffer toStringBuffer( final DomainSimilarityCalculator.Detailedness detailedness, boolean html );
+
+    public SortedSet<String> getKeyDomainProteins();
+
+    void addKeyDomainProtein( String protein );
 }
index 4ab0fb0..3a79d7b 100644 (file)
@@ -575,7 +575,7 @@ public class TestSurfacing {
             if ( !sa3.getDomainId().equals( "A" ) ) {
                 return false;
             }
-            final SpeciesSpecificDomainSimilariyData ssdsd = sa3.getSpeciesData().get( new BasicSpecies( "ciona" ) );
+            final SpeciesSpecificDcData ssdsd = sa3.getSpeciesData().get( new BasicSpecies( "ciona" ) );
             if ( ssdsd.getCombinableDomainIdToCountsMap().size() != 4 ) {
                 return false;
             }
@@ -618,7 +618,7 @@ public class TestSurfacing {
             if ( !sa4.getDomainId().equals( "A" ) ) {
                 return false;
             }
-            final SpeciesSpecificDomainSimilariyData ssdsd4 = sa4.getSpeciesData().get( new BasicSpecies( "ciona" ) );
+            final SpeciesSpecificDcData ssdsd4 = sa4.getSpeciesData().get( new BasicSpecies( "ciona" ) );
             if ( ssdsd4.getCombinableDomainIdToCountsMap().size() != 5 ) {
                 return false;
             }
@@ -726,7 +726,7 @@ public class TestSurfacing {
             if ( !sa5_d.getSpecies().last().equals( new BasicSpecies( "rabbit" ) ) ) {
                 return false;
             }
-            final SpeciesSpecificDomainSimilariyData ssdsd5 = sa5_d.getSpeciesData().get( new BasicSpecies( "ciona" ) );
+            final SpeciesSpecificDcData ssdsd5 = sa5_d.getSpeciesData().get( new BasicSpecies( "ciona" ) );
             if ( ssdsd5.getCombinableDomainIdToCountsMap().size() != 4 ) {
                 return false;
             }
@@ -861,7 +861,7 @@ public class TestSurfacing {
             if ( !sa6_d.getSpecies().last().equals( new BasicSpecies( "rabbit" ) ) ) {
                 return false;
             }
-            final SpeciesSpecificDomainSimilariyData ssdsd6 = sa6_d.getSpeciesData().get( new BasicSpecies( "ciona" ) );
+            final SpeciesSpecificDcData ssdsd6 = sa6_d.getSpeciesData().get( new BasicSpecies( "ciona" ) );
             if ( ssdsd6.getCombinableDomainIdToCountsMap().size() != 5 ) {
                 return false;
             }
index 47e6aa7..46eceac 100644 (file)
@@ -165,6 +165,21 @@ public final class ForesterUtil {
         return v;
     }
 
+    public static String extractUniProtKbProteinSeqIdentifier( final String str ) {
+        String upkb = null;
+        Matcher m = UNIPROT_KB_PATTERN_1.matcher( str );
+        if ( m.find() ) {
+            upkb = m.group( 1 );
+        }
+        else {
+            m = UNIPROT_KB_PATTERN_2.matcher( str );
+            if ( m.find() ) {
+                upkb = m.group();
+            }
+        }
+        return upkb;
+    }
+
     public static String extractUniProtKbProteinSeqIdentifier( final PhylogenyNode node ) {
         String upkb = null;
         if ( node.getNodeData().isHasSequence() ) {