From: cmzmasek@gmail.com Date: Thu, 11 Jul 2013 17:23:19 +0000 (+0000) Subject: inprogress X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=08a92c65e8b969a3ce2a4b511055cd6029357831;p=jalview.git inprogress --- diff --git a/forester/java/src/org/forester/surfacing/BasicCombinableDomains.java b/forester/java/src/org/forester/surfacing/BasicCombinableDomains.java index 84408dc..c04333c 100644 --- a/forester/java/src/org/forester/surfacing/BasicCombinableDomains.java +++ b/forester/java/src/org/forester/surfacing/BasicCombinableDomains.java @@ -27,29 +27,31 @@ package org.forester.surfacing; import java.util.ArrayList; +import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import org.forester.protein.BinaryDomainCombination; import org.forester.species.Species; -import org.forester.util.DescriptiveStatistics; +import org.forester.util.ForesterUtil; public class BasicCombinableDomains implements CombinableDomains { final private String _key_domain; private int _key_domain_count; - private int _key_domain_proteins_count; final private Species _species; final private TreeMap _combining_domains; - private DescriptiveStatistics _key_domain_confidence_statistics; + final private Set _proteins_with_key_domain; public BasicCombinableDomains( final String key_domain, final Species species ) { _key_domain = key_domain; _species = species; _combining_domains = new TreeMap(); - init(); + _proteins_with_key_domain = new HashSet(); + _key_domain_count = 0; } @Override @@ -63,6 +65,14 @@ public class BasicCombinableDomains implements CombinableDomains { } @Override + public void addKeyDomainProtein( final String protein ) { + if ( ForesterUtil.isEmpty( protein ) ) { + throw new IllegalArgumentException( "attempt to add null or empty protein" ); + } + getKeyDomainProteins().add( protein ); + } + + @Override public List getAllDomains() { final List domains = getCombinableDomains(); if ( !domains.contains( getKeyDomain() ) ) { @@ -117,18 +127,13 @@ public class BasicCombinableDomains implements CombinableDomains { } @Override - public DescriptiveStatistics getKeyDomainConfidenceDescriptiveStatistics() { - return _key_domain_confidence_statistics; - } - - @Override public int getKeyDomainCount() { return _key_domain_count; } @Override public int getKeyDomainProteinsCount() { - return _key_domain_proteins_count; + return getKeyDomainProteins().size(); } @Override @@ -151,33 +156,17 @@ public class BasicCombinableDomains implements CombinableDomains { return _species; } - private void init() { - _key_domain_count = 0; - _key_domain_proteins_count = 0; - _key_domain_confidence_statistics = null; - } - @Override public boolean isCombinable( final String protein_domain ) { return getCombiningDomains().containsKey( protein_domain ); } @Override - public void setKeyDomainConfidenceDescriptiveStatistics( final DescriptiveStatistics key_domain_confidence_statistics ) { - _key_domain_confidence_statistics = key_domain_confidence_statistics; - } - - @Override public void setKeyDomainCount( final int key_domain_count ) { _key_domain_count = key_domain_count; } @Override - public void setKeyDomainProteinsCount( final int key_domain_proteins_count ) { - _key_domain_proteins_count = key_domain_proteins_count; - } - - @Override public List toBinaryDomainCombinations() { final List binary_combinations = new ArrayList( getNumberOfCombinableDomains() ); for( final String domain : getCombiningDomains().keySet() ) { @@ -200,4 +189,9 @@ public class BasicCombinableDomains implements CombinableDomains { sb.append( getCombiningDomainIdsAsStringBuilder() ); return sb.toString(); } + + @Override + public Set getKeyDomainProteins() { + return _proteins_with_key_domain; + } } diff --git a/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java b/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java index 0e8406a..042b785 100644 --- a/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java +++ b/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java @@ -125,7 +125,7 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat // ~~~OLD: //throw new IllegalArgumentException( "attempt to calculate multiple combinable domains similarity for less than two combinable domains" ); // ~~~new: - final SortedMap species_data = new TreeMap(); + final SortedMap species_data = new TreeMap(); species_data.put( domains_list.get( 0 ).getSpecies(), createSpeciesSpecificDomainSimilariyData( domains_list.get( 0 ) ) ); return new PrintableDomainSimilarity( domains_list.get( 0 ), @@ -142,7 +142,7 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat isTreatAsBinaryComparison() ); } final DescriptiveStatistics stat = new BasicDescriptiveStatistics(); - final SortedMap species_data = new TreeMap(); + final SortedMap species_data = new TreeMap(); species_data.put( domains_list.get( 0 ).getSpecies(), createSpeciesSpecificDomainSimilariyData( domains_list.get( 0 ) ) ); int max_difference_in_counts = 0; @@ -225,10 +225,12 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat return _treat_as_binary_comparison; } - private static SpeciesSpecificDomainSimilariyData createSpeciesSpecificDomainSimilariyData( final CombinableDomains cd ) { - final SpeciesSpecificDomainSimilariyData sd = new PrintableSpeciesSpecificDomainSimilariyData( cd.getKeyDomainProteinsCount(), - cd.getKeyDomainCount(), - cd.getNumberOfCombinableDomains() ); + private static SpeciesSpecificDcData createSpeciesSpecificDomainSimilariyData( final CombinableDomains cd ) { + final SpeciesSpecificDcData sd = new PrintableSpeciesSpecificDcData( cd.getKeyDomainCount(), + cd.getNumberOfCombinableDomains() ); + for( final String prot : cd.getKeyDomainProteins() ) { + sd.addKeyDomainProtein( prot ); + } for( final String domain : cd.getCombinableDomains() ) { sd.addProteinsExhibitingCombinationCount( domain, cd.getNumberOfProteinsExhibitingCombination( domain ) ); } diff --git a/forester/java/src/org/forester/surfacing/BasicGenomeWideCombinableDomains.java b/forester/java/src/org/forester/surfacing/BasicGenomeWideCombinableDomains.java index 6e782a3..3e59603 100644 --- a/forester/java/src/org/forester/surfacing/BasicGenomeWideCombinableDomains.java +++ b/forester/java/src/org/forester/surfacing/BasicGenomeWideCombinableDomains.java @@ -1,8 +1,6 @@ package org.forester.surfacing; -import java.text.DecimalFormat; -import java.text.NumberFormat; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -28,7 +26,6 @@ import org.forester.util.ForesterUtil; public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDomains { - private final static NumberFormat FORMATTER = new DecimalFormat( "0.0E0" ); private static final Comparator DESCENDING_KEY_DOMAIN_COUNT_ORDER = new Comparator() { @Override @@ -193,7 +190,7 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom } // Produces something like: - // 2-oxoacid_dh 5 5 2 4.8E-67 Biotin_lipoyl [4], E3_binding [3] + // 2-oxoacid_dh 5 5 2 Biotin_lipoyl [4], E3_binding [3] @Override public StringBuilder toStringBuilder( final GenomeWideCombinableDomainsSortOrder sort_order ) { final StringBuilder sb = new StringBuilder(); @@ -217,12 +214,6 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getKeyDomainCount() ), 8, ' ', false ) ); sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getKeyDomainProteinsCount() ), 8, ' ', false ) ); sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getNumberOfCombinableDomains() ), 8, ' ', false ) ); - sb.append( ForesterUtil.pad( new StringBuffer( "" - + FORMATTER.format( cb.getKeyDomainConfidenceDescriptiveStatistics() - .median() ) ), - 10, - ' ', - false ) ); sb.append( cb.getCombiningDomainIdsAsStringBuilder() ); sb.append( ForesterUtil.getLineSeparator() ); } @@ -230,23 +221,14 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom } private static void countDomains( final Map domain_counts, - final Map domain_protein_counts, - final Map stats, final Set saw_c, - final String id_i, - final double support ) { + final String id_i ) { if ( domain_counts.containsKey( id_i ) ) { domain_counts.put( id_i, 1 + domain_counts.get( ( id_i ) ) ); - if ( !saw_c.contains( id_i ) ) { - domain_protein_counts.put( id_i, 1 + domain_protein_counts.get( ( id_i ) ) ); - } } else { - stats.put( id_i, new BasicDescriptiveStatistics() ); domain_counts.put( id_i, 1 ); - domain_protein_counts.put( id_i, 1 ); } - stats.get( id_i ).addValue( support ); saw_c.add( id_i ); } @@ -278,8 +260,6 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom final Map domain_number_stats_by_dc ) { final BasicGenomeWideCombinableDomains instance = new BasicGenomeWideCombinableDomains( species, dc_type ); final Map domain_counts = new HashMap(); - final Map domain_protein_counts = new HashMap(); - final Map stats = new HashMap(); for( final Protein protein : protein_list ) { if ( !protein.getSpecies().equals( species ) ) { throw new IllegalArgumentException( "species (" + protein.getSpecies() @@ -291,12 +271,7 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom final Domain pd_i = protein.getProteinDomain( i ); final String id_i = pd_i.getDomainId(); final int current_start = pd_i.getFrom(); - BasicGenomeWideCombinableDomains.countDomains( domain_counts, - domain_protein_counts, - stats, - saw_c, - id_i, - pd_i.getPerSequenceEvalue() ); + BasicGenomeWideCombinableDomains.countDomains( domain_counts, saw_c, id_i ); if ( !saw_i.contains( id_i ) ) { if ( dc_type == DomainCombinationType.BASIC ) { saw_i.add( id_i ); @@ -317,6 +292,7 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom } instance.add( id_i, domain_combination ); } + domain_combination.addKeyDomainProtein( protein.getProteinId().getId() );//^^^^^^^^^^^^^^ final Set saw_j = new HashSet(); if ( ignore_combination_with_same_domain ) { saw_j.add( id_i ); @@ -371,14 +347,11 @@ public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDom domain_number_stats_by_dc.get( dc_str ).addValue( protein.getNumberOfProteinDomains() ); } } - // } } } for( final String key_id : domain_counts.keySet() ) { instance.get( key_id ).setKeyDomainCount( domain_counts.get( key_id ) ); - instance.get( key_id ).setKeyDomainProteinsCount( domain_protein_counts.get( key_id ) ); - instance.get( key_id ).setKeyDomainConfidenceDescriptiveStatistics( stats.get( key_id ) ); } return instance; } diff --git a/forester/java/src/org/forester/surfacing/CombinableDomains.java b/forester/java/src/org/forester/surfacing/CombinableDomains.java index 58b055d..028810e 100644 --- a/forester/java/src/org/forester/surfacing/CombinableDomains.java +++ b/forester/java/src/org/forester/surfacing/CombinableDomains.java @@ -27,11 +27,11 @@ package org.forester.surfacing; import java.util.List; +import java.util.Set; import java.util.SortedMap; import org.forester.protein.BinaryDomainCombination; import org.forester.species.Species; -import org.forester.util.DescriptiveStatistics; public interface CombinableDomains { @@ -73,15 +73,6 @@ public interface CombinableDomains { public String getKeyDomain(); /** - * Gets descriptive statistics for the confidence (i.e. E-values) of the key - * domain. - * - * - * @return descriptive statistics for the confidence of the key domain - */ - public DescriptiveStatistics getKeyDomainConfidenceDescriptiveStatistics(); - - /** * Returns how many times the key domain is present in a given species * genome. * @@ -97,6 +88,8 @@ public interface CombinableDomains { */ public int getKeyDomainProteinsCount(); + public Set getKeyDomainProteins(); + public int getNumberOfCombinableDomains(); public int getNumberOfProteinsExhibitingCombination( final String protein_domain ); @@ -111,15 +104,6 @@ public interface CombinableDomains { public boolean isCombinable( final String protein_domain ); /** - * This is to set descriptive statistics for the confidence (i.e. E-values) - * of the key domain. - * - * - * @param statistics - */ - void setKeyDomainConfidenceDescriptiveStatistics( final DescriptiveStatistics statistics ); - - /** * Sets how many times the key domain is present in a given species genome. * * @param key_domain_count @@ -127,14 +111,7 @@ public interface CombinableDomains { */ void setKeyDomainCount( final int key_domain_count ); - /** - * Sets how many proteins with the key domain are present in a given species - * genome. - * - * @param key_domain_proteins_count - * key domain protein count in species - */ - void setKeyDomainProteinsCount( final int key_domain_proteins_count ); - public List toBinaryDomainCombinations(); + + void addKeyDomainProtein( String protein ); } \ No newline at end of file diff --git a/forester/java/src/org/forester/surfacing/DomainCountsDifferenceUtil.java b/forester/java/src/org/forester/surfacing/DomainCountsDifferenceUtil.java index 08f3989..2a18c9c 100644 --- a/forester/java/src/org/forester/surfacing/DomainCountsDifferenceUtil.java +++ b/forester/java/src/org/forester/surfacing/DomainCountsDifferenceUtil.java @@ -32,8 +32,6 @@ import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.Writer; -import java.text.DecimalFormat; -import java.text.NumberFormat; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -60,7 +58,6 @@ import org.forester.util.ForesterUtil; */ public final class DomainCountsDifferenceUtil { - private final static NumberFormat FORMATTER = new DecimalFormat( "0.0E0" ); private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES = COPY_CALCULATION_MODE.MIN; private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES = COPY_CALCULATION_MODE.MIN; private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_LOW_COPY_SPECIES = COPY_CALCULATION_MODE.MAX; @@ -457,9 +454,6 @@ public final class DomainCountsDifferenceUtil { private static String combinableDomaindToString( final CombinableDomains cd ) { final StringBuilder sb = new StringBuilder(); sb.append( cd.getKeyDomainProteinsCount() ); - sb.append( "\t[" ); - sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) ); - sb.append( "]" ); return sb.toString(); } @@ -471,8 +465,6 @@ public final class DomainCountsDifferenceUtil { sb.append( cd.getKeyDomainProteinsCount() ); sb.append( ", " ); sb.append( cd.getNumberOfCombinableDomains() ); - sb.append( "][" ); - sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) ); sb.append( "]" ); sb.append( cd.getCombiningDomainIdsAsStringBuilder() ); return sb.toString(); diff --git a/forester/java/src/org/forester/surfacing/DomainSimilarity.java b/forester/java/src/org/forester/surfacing/DomainSimilarity.java index be7273e..bf9cef4 100644 --- a/forester/java/src/org/forester/surfacing/DomainSimilarity.java +++ b/forester/java/src/org/forester/surfacing/DomainSimilarity.java @@ -97,7 +97,7 @@ public interface DomainSimilarity extends Comparable { * * @return SortedMap */ - public SortedMap getSpeciesData(); + public SortedMap getSpeciesData(); public double getStandardDeviationOfSimilarityScore(); diff --git a/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java b/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java index c785ff7..fc7d8b5 100644 --- a/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java +++ b/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java @@ -39,22 +39,22 @@ import org.forester.util.ForesterUtil; public class PrintableDomainSimilarity implements DomainSimilarity { - final public static String SPECIES_SEPARATOR = " "; - final private static int EQUAL = 0; - final private static String NO_SPECIES = " "; - private static final boolean PRINT_MORE_INFO = false; - final private double _min; - final private double _max; - final private double _mean; - final private double _sd; - final private int _n; - private final int _max_difference_in_counts; - private final int _max_difference; - final private CombinableDomains _combinable_domains; - final private SortedMap _species_data; - private List _species_order; - private DomainSimilarityCalculator.Detailedness _detailedness; - private final boolean _treat_as_binary_comparison; + final public static String SPECIES_SEPARATOR = " "; + final private static int EQUAL = 0; + final private static String NO_SPECIES = " "; + private static final boolean PRINT_MORE_INFO = false; + final private double _min; + final private double _max; + final private double _mean; + final private double _sd; + final private int _n; + private final int _max_difference_in_counts; + private final int _max_difference; + final private CombinableDomains _combinable_domains; + final private SortedMap _species_data; + private List _species_order; + private DomainSimilarityCalculator.Detailedness _detailedness; + private final boolean _treat_as_binary_comparison; public PrintableDomainSimilarity( final CombinableDomains combinable_domains, final double min, @@ -65,7 +65,7 @@ public class PrintableDomainSimilarity implements DomainSimilarity { final int n, final int max_difference_in_counts, final int max_difference, - final SortedMap species_data, + final SortedMap species_data, final boolean sort_by_species_count_first, final boolean treat_as_binary_comparison ) { if ( combinable_domains == null ) { @@ -240,7 +240,7 @@ public class PrintableDomainSimilarity implements DomainSimilarity { } @Override - public SortedMap getSpeciesData() { + public SortedMap getSpeciesData() { return _species_data; } diff --git a/forester/java/src/org/forester/surfacing/PrintableSpeciesSpecificDomainSimilariyData.java b/forester/java/src/org/forester/surfacing/PrintableSpeciesSpecificDcData.java similarity index 71% rename from forester/java/src/org/forester/surfacing/PrintableSpeciesSpecificDomainSimilariyData.java rename to forester/java/src/org/forester/surfacing/PrintableSpeciesSpecificDcData.java index 74e168d..79dcb37 100644 --- a/forester/java/src/org/forester/surfacing/PrintableSpeciesSpecificDomainSimilariyData.java +++ b/forester/java/src/org/forester/surfacing/PrintableSpeciesSpecificDcData.java @@ -29,19 +29,21 @@ package org.forester.surfacing; import java.util.Set; import java.util.SortedMap; +import java.util.SortedSet; import java.util.TreeMap; +import java.util.TreeSet; -class PrintableSpeciesSpecificDomainSimilariyData implements SpeciesSpecificDomainSimilariyData { +import org.forester.util.ForesterUtil; + +class PrintableSpeciesSpecificDcData implements SpeciesSpecificDcData { final SortedMap _combinable_domain_id_to_count_map; - final private int _key_domain_proteins_count; + final SortedSet _key_domain_proteins; final private int _key_domain_domains_count; final private int _combinable_domains_count; - public PrintableSpeciesSpecificDomainSimilariyData( final int key_domain_proteins_count, - final int key_domain_domains_count, - final int combinable_domains ) { - _key_domain_proteins_count = key_domain_proteins_count; + public PrintableSpeciesSpecificDcData( final int key_domain_domains_count, final int combinable_domains ) { + _key_domain_proteins = new TreeSet(); _key_domain_domains_count = key_domain_domains_count; _combinable_domains_count = combinable_domains; _combinable_domain_id_to_count_map = new TreeMap(); @@ -69,7 +71,7 @@ class PrintableSpeciesSpecificDomainSimilariyData implements SpeciesSpecificDoma } private int getKeyDomainProteinsCount() { - return _key_domain_proteins_count; + return _key_domain_proteins.size(); } @Override @@ -81,6 +83,22 @@ class PrintableSpeciesSpecificDomainSimilariyData implements SpeciesSpecificDoma } @Override + public void addKeyDomainProtein( final String protein ) { + if ( ForesterUtil.isEmpty( protein ) ) { + throw new IllegalArgumentException( "attempt to add null or empty protein" ); + } + if ( getKeyDomainProteins().contains( protein ) ) { + throw new IllegalArgumentException( "protein \"" + protein + "\" is not unique" ); + } + getKeyDomainProteins().add( protein ); + } + + @Override + public SortedSet getKeyDomainProteins() { + return _key_domain_proteins; + } + + @Override public String toString() { return toStringBuffer( DomainSimilarityCalculator.Detailedness.LIST_COMBINING_DOMAIN_FOR_EACH_SPECIES, false ) .toString(); @@ -117,6 +135,26 @@ class PrintableSpeciesSpecificDomainSimilariyData implements SpeciesSpecificDoma sb.append( getCombinableDomainIdToCountsMap().get( domain_id ) ); } } + sb.append( " [" ); + boolean first = true; + for( final String p : getKeyDomainProteins() ) { + String link = null; + final String up_id = ForesterUtil.extractUniProtKbProteinSeqIdentifier( p ); + if ( !ForesterUtil.isEmpty( up_id ) ) { + link = "" + up_id + ""; + } + else { + link = "" + p + ""; + } + if ( first ) { + first = false; + } + else { + sb.append( ", " ); + } + sb.append( p ); + } + sb.append( "]" ); return sb; } } diff --git a/forester/java/src/org/forester/surfacing/SpeciesSpecificDomainSimilariyData.java b/forester/java/src/org/forester/surfacing/SpeciesSpecificDcData.java similarity index 91% rename from forester/java/src/org/forester/surfacing/SpeciesSpecificDomainSimilariyData.java rename to forester/java/src/org/forester/surfacing/SpeciesSpecificDcData.java index 0e45491..eb75a9b 100644 --- a/forester/java/src/org/forester/surfacing/SpeciesSpecificDomainSimilariyData.java +++ b/forester/java/src/org/forester/surfacing/SpeciesSpecificDcData.java @@ -28,11 +28,12 @@ package org.forester.surfacing; import java.util.SortedMap; +import java.util.SortedSet; /* * A helper class for PrintableDomainSimilarity. */ -interface SpeciesSpecificDomainSimilariyData { +interface SpeciesSpecificDcData { public void addProteinsExhibitingCombinationCount( final String domain_id, final int count ); @@ -47,4 +48,8 @@ interface SpeciesSpecificDomainSimilariyData { public int getNumberOfProteinsExhibitingCombinationWith( final String domain_id ); public StringBuffer toStringBuffer( final DomainSimilarityCalculator.Detailedness detailedness, boolean html ); + + public SortedSet getKeyDomainProteins(); + + void addKeyDomainProtein( String protein ); } diff --git a/forester/java/src/org/forester/surfacing/TestSurfacing.java b/forester/java/src/org/forester/surfacing/TestSurfacing.java index 4ab0fb0..3a79d7b 100644 --- a/forester/java/src/org/forester/surfacing/TestSurfacing.java +++ b/forester/java/src/org/forester/surfacing/TestSurfacing.java @@ -575,7 +575,7 @@ public class TestSurfacing { if ( !sa3.getDomainId().equals( "A" ) ) { return false; } - final SpeciesSpecificDomainSimilariyData ssdsd = sa3.getSpeciesData().get( new BasicSpecies( "ciona" ) ); + final SpeciesSpecificDcData ssdsd = sa3.getSpeciesData().get( new BasicSpecies( "ciona" ) ); if ( ssdsd.getCombinableDomainIdToCountsMap().size() != 4 ) { return false; } @@ -618,7 +618,7 @@ public class TestSurfacing { if ( !sa4.getDomainId().equals( "A" ) ) { return false; } - final SpeciesSpecificDomainSimilariyData ssdsd4 = sa4.getSpeciesData().get( new BasicSpecies( "ciona" ) ); + final SpeciesSpecificDcData ssdsd4 = sa4.getSpeciesData().get( new BasicSpecies( "ciona" ) ); if ( ssdsd4.getCombinableDomainIdToCountsMap().size() != 5 ) { return false; } @@ -726,7 +726,7 @@ public class TestSurfacing { if ( !sa5_d.getSpecies().last().equals( new BasicSpecies( "rabbit" ) ) ) { return false; } - final SpeciesSpecificDomainSimilariyData ssdsd5 = sa5_d.getSpeciesData().get( new BasicSpecies( "ciona" ) ); + final SpeciesSpecificDcData ssdsd5 = sa5_d.getSpeciesData().get( new BasicSpecies( "ciona" ) ); if ( ssdsd5.getCombinableDomainIdToCountsMap().size() != 4 ) { return false; } @@ -861,7 +861,7 @@ public class TestSurfacing { if ( !sa6_d.getSpecies().last().equals( new BasicSpecies( "rabbit" ) ) ) { return false; } - final SpeciesSpecificDomainSimilariyData ssdsd6 = sa6_d.getSpeciesData().get( new BasicSpecies( "ciona" ) ); + final SpeciesSpecificDcData ssdsd6 = sa6_d.getSpeciesData().get( new BasicSpecies( "ciona" ) ); if ( ssdsd6.getCombinableDomainIdToCountsMap().size() != 5 ) { return false; } diff --git a/forester/java/src/org/forester/util/ForesterUtil.java b/forester/java/src/org/forester/util/ForesterUtil.java index 47e6aa7..46eceac 100644 --- a/forester/java/src/org/forester/util/ForesterUtil.java +++ b/forester/java/src/org/forester/util/ForesterUtil.java @@ -165,6 +165,21 @@ public final class ForesterUtil { return v; } + public static String extractUniProtKbProteinSeqIdentifier( final String str ) { + String upkb = null; + Matcher m = UNIPROT_KB_PATTERN_1.matcher( str ); + if ( m.find() ) { + upkb = m.group( 1 ); + } + else { + m = UNIPROT_KB_PATTERN_2.matcher( str ); + if ( m.find() ) { + upkb = m.group(); + } + } + return upkb; + } + public static String extractUniProtKbProteinSeqIdentifier( final PhylogenyNode node ) { String upkb = null; if ( node.getNodeData().isHasSequence() ) {