From fbb7c0a322111e5221773fed19591da29296efb5 Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Wed, 16 Aug 2017 12:22:09 -0700 Subject: [PATCH] in progress... --- .../src/org/forester/clade_analysis/Analysis2.java | 2 +- .../forester/clade_analysis/CladeAnalysisTest.java | 50 ++++++-- .../src/org/forester/clade_analysis/Prefix.java | 49 ++++++-- .../src/org/forester/clade_analysis/Result2.java | 120 ++++++++++++++++---- .../java/src/org/forester/util/ForesterUtil.java | 2 +- 5 files changed, 179 insertions(+), 44 deletions(-) diff --git a/forester/java/src/org/forester/clade_analysis/Analysis2.java b/forester/java/src/org/forester/clade_analysis/Analysis2.java index 3d1561f..762b9b7 100644 --- a/forester/java/src/org/forester/clade_analysis/Analysis2.java +++ b/forester/java/src/org/forester/clade_analysis/Analysis2.java @@ -81,7 +81,7 @@ public final class Analysis2 { } else { // res.setGreatestCommonPrefix( greatest_common_prefix ); - res.addGreatestCommonPrefix( prefix, confidence ); + res.addGreatestCommonPrefix( prefix, confidence, separator ); } if ( qnode_pp.isRoot() ) { res.addWarning( "Least Encompassing Clade is entire tree" ); diff --git a/forester/java/src/org/forester/clade_analysis/CladeAnalysisTest.java b/forester/java/src/org/forester/clade_analysis/CladeAnalysisTest.java index d91e36e..bc832d5 100644 --- a/forester/java/src/org/forester/clade_analysis/CladeAnalysisTest.java +++ b/forester/java/src/org/forester/clade_analysis/CladeAnalysisTest.java @@ -605,30 +605,29 @@ public class CladeAnalysisTest { // System.out.println( x ); - res1.analyzeGreatestCommonPrefixes( "." ); + res1.analyzeGreatestCommonPrefixes( ); - final Result2 res2 = new Result2(); + final Result2 res2 = new Result2("."); res2.addGreatestCommonPrefix( "A.1.1.1", 0.1 ); res2.addGreatestCommonPrefix( "A.1", 0.7 ); res2.addGreatestCommonPrefix( "A.1.2", 0.1 ); res2.addGreatestCommonPrefix( "B.1", 0.1 ); - res2.analyzeGreatestCommonPrefixes( "." ); + res2.analyzeGreatestCommonPrefixes( ); - final Result2 res3 = new Result2(); + final Result2 res3 = new Result2("."); res3.addGreatestCommonPrefix( "A.1.1.1", 0.7 ); res3.addGreatestCommonPrefix( "A.1", 0.1 ); res3.addGreatestCommonPrefix( "A.1.2", 0.1 ); res3.addGreatestCommonPrefix( "B.1", 0.1 ); - res3.analyzeGreatestCommonPrefixes( "." ); - - + res3.analyzeGreatestCommonPrefixes( ); + final Result2 res4 = new Result2(); res4.addGreatestCommonPrefix( "A.1.1.1.1", 0.35); res4.addGreatestCommonPrefix( "A.1.1.1.2", 0.35 ); res4.addGreatestCommonPrefix( "A.1", 0.1 ); res4.addGreatestCommonPrefix( "A.1.2", 0.1 ); res4.addGreatestCommonPrefix( "B.1", 0.1 ); - res4.analyzeGreatestCommonPrefixes( "." ); + res4.analyzeGreatestCommonPrefixes( ); final Result2 res5 = new Result2(); res5.addGreatestCommonPrefix( "A.1.1.1.1", 0.2); @@ -638,7 +637,40 @@ public class CladeAnalysisTest { res5.addGreatestCommonPrefix( "B.1.1", 0.2 ); res5.addGreatestCommonPrefix( "B.1.2", 0.09 ); res5.addGreatestCommonPrefix( "D.1.1.1.1", 0.01 ); - res5.analyzeGreatestCommonPrefixes( "." ); + res5.analyzeGreatestCommonPrefixes( ); + + final Result2 res6 = new Result2(); + res6.addGreatestCommonPrefix( "A.1.1.1", 0.05 ); + res6.addGreatestCommonPrefix( "A.1.1.1.1", 0.65 ); + res6.addGreatestCommonPrefix( "A.1", 0.1 ); + res6.addGreatestCommonPrefix( "A.1.2", 0.1 ); + res6.addGreatestCommonPrefix( "B.1", 0.1 ); + res6.analyzeGreatestCommonPrefixes( ); + + final Result2 res7 = new Result2(); + res7.addGreatestCommonPrefix( "A.1.1.1", 0.07 ); + res7.addGreatestCommonPrefix( "A.1.1.1.1", 0.9 ); + res7.addGreatestCommonPrefix( "A.1", 0.01 ); + res7.addGreatestCommonPrefix( "A.1.2", 0.01 ); + res7.addGreatestCommonPrefix( "B.1", 0.01 ); + res7.analyzeGreatestCommonPrefixes( ); + + final Result2 res8 = new Result2("_/_"); + res8.addGreatestCommonPrefix( "AA_/_abc_/_def", 0.07 ); + res8.addGreatestCommonPrefix( "AA_/_abc_/_sfc", 0.9 ); + res8.addGreatestCommonPrefix( "AA_/_abc_/_xcd", 0.01 ); + res8.addGreatestCommonPrefix( "AA_/_abc_/_memr", 0.01 ); + res8.addGreatestCommonPrefix( "AA_/_abc_/_fkem_/_odem", 0.01 ); + res8.analyzeGreatestCommonPrefixes( ); + + final Result2 res9 = new Result2("_/_"); + res9.addGreatestCommonPrefix( "AA_/_abc_/_def", 0.07 ); + res9.addGreatestCommonPrefix( "AA_/_abc_/_sfc", 0.6 ); + res9.addGreatestCommonPrefix( "AA_/_abc_/_xcd", 0.01 ); + res9.addGreatestCommonPrefix( "AA_/_abc_/_memr", 0.01 ); + res9.addGreatestCommonPrefix( "AA_/_abc_/_fkem_/_odem", 0.01 ); + res9.addGreatestCommonPrefix( "BB_/_fke_/_dme_/_nx2", 0.3 ); + res9.analyzeGreatestCommonPrefixes( ); } catch ( final Exception e ) { e.printStackTrace( System.out ); diff --git a/forester/java/src/org/forester/clade_analysis/Prefix.java b/forester/java/src/org/forester/clade_analysis/Prefix.java index 8adb85b..6362a3e 100644 --- a/forester/java/src/org/forester/clade_analysis/Prefix.java +++ b/forester/java/src/org/forester/clade_analysis/Prefix.java @@ -1,25 +1,52 @@ + package org.forester.clade_analysis; +import java.math.BigDecimal; final class Prefix { - final String _prefix; - final double _confidence; + + private final String _prefix; + private final BigDecimal _confidence; + private final String _separator; + private final String _first; + + Prefix( final String prefix, final String confidence, final String separator ) { + _prefix = prefix; + _confidence = new BigDecimal( confidence); + _separator = separator ; + if ( _prefix.indexOf( _separator ) < 0) { + _first = _prefix; + } + else { + _first = _prefix.substring( 0, _prefix.indexOf(_separator ) ); + } + } - Prefix( final String prefix, final double confidence ) { + Prefix( final String prefix, final double confidence , final String separator) { _prefix = prefix; - _confidence = confidence; + _confidence = new BigDecimal( confidence); + _separator = separator ; + if ( _prefix.indexOf( _separator ) < 0) { + _first = _prefix; + } + else { + _first = _prefix.substring( 0, _prefix.indexOf(_separator ) ); + } } - - String getPrefix() { + String getPrefix() { return _prefix; } - + String getPrefixFirstElement() { + return _first; + } double getConfidence() { - return _confidence; + return _confidence.doubleValue(); + } + + @Override + public String toString() { + return getPrefix() + ": " + getConfidence(); } - - - } diff --git a/forester/java/src/org/forester/clade_analysis/Result2.java b/forester/java/src/org/forester/clade_analysis/Result2.java index a1a7bea..1dc9f0e 100644 --- a/forester/java/src/org/forester/clade_analysis/Result2.java +++ b/forester/java/src/org/forester/clade_analysis/Result2.java @@ -26,8 +26,12 @@ package org.forester.clade_analysis; import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashSet; import java.util.List; import java.util.Map.Entry; +import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; @@ -35,24 +39,35 @@ import org.forester.util.ForesterUtil; public final class Result2 { - private List _greatest_common_prefix = new ArrayList(); - private String _greatest_common_prefix_up = ""; - private String _greatest_common_prefix_down = ""; - private final List _warnings = new ArrayList<>(); - private int _lec_ext_nodes = 0; - private int _p_ext_nodes = 0; + private final String _separator; + private final List _greatest_common_prefixes = new ArrayList<>(); + private String _greatest_common_prefix_up = ""; + private String _greatest_common_prefix_down = ""; + private final List _warnings = new ArrayList<>(); + private int _lec_ext_nodes = 0; + private int _p_ext_nodes = 0; private String _greatest_common_clade_subtree_confidence = ""; private String _greatest_common_clade_subtree_confidence_up = ""; private String _greatest_common_clade_subtree_confidence_down = ""; + + public Result2(final String separator) { + _separator = separator; + } + + public Result2() { + _separator = ".";//TODO make const somewhere + } void addWarning( final String warning ) { _warnings.add( warning ); } void addGreatestCommonPrefix( final String prefix, final double confidence ) { - _greatest_common_prefix.add( new Prefix(prefix, confidence) ); + _greatest_common_prefixes.add( new Prefix( prefix, confidence, _separator ) ); } + + void setGreatestCommonPrefixUp( final String greatest_common_prefix_up ) { _greatest_common_prefix_up = greatest_common_prefix_up; } @@ -73,10 +88,9 @@ public final class Result2 { _greatest_common_clade_subtree_confidence_down = greatest_common_clade_confidence_down; } - // public String getGreatestCommonPrefix() { - // return _greatest_common_prefix; - // } - + // public String getGreatestCommonPrefix() { + // return _greatest_common_prefix; + // } public String getGreatestCommonPrefixUp() { return _greatest_common_prefix_up; } @@ -117,25 +131,87 @@ public final class Result2 { return _p_ext_nodes; } - public void analyzeGreatestCommonPrefixes(final String separator ) { - final SortedMap map = new TreeMap(); - for( final Prefix prefix : _greatest_common_prefix ) { - List prefixes = ForesterUtil.spliIntoPrefixes( prefix.getPrefix(), separator ); + public void analyzeGreatestCommonPrefixes( ) { + analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator ); + } + + public final static void analyzeGreatestCommonPrefixes( List greatest_common_prefixes, final String separator ) { + final SortedMap map = new TreeMap<>(); + for( final Prefix prefix : greatest_common_prefixes ) { + final List prefixes = ForesterUtil.spliIntoPrefixes( prefix.getPrefix(), separator ); for( final String p : prefixes ) { map.put( p, 0.0 ); } } - // System.out.println( map ); - for (final String key : map.keySet()) { + // System.out.println( map ); + for( final String key : map.keySet() ) { //System.out.println(key); - for( final Prefix prefix : _greatest_common_prefix ) { + for( final Prefix prefix : greatest_common_prefixes ) { if ( prefix.getPrefix().startsWith( key ) ) { - map.put( key, map.get( key ) + prefix.getConfidence() ); + map.put( key, map.get( key ) + prefix.getConfidence() ); + } + } + } + //System.out.println( map ); + final List l = new ArrayList<>(); + for( final Entry entry : map.entrySet() ) { + // System.out.println( entry.getKey() + "->" + entry.getValue() ); + l.add( new Prefix( entry.getKey(), entry.getValue(), separator ) ); + } + Collections.sort( l, new Comparator() { + + @Override + public int compare( final Prefix x, final Prefix y ) { + final int start_comparison = compare( x.getConfidence(), y.getConfidence() ); + return start_comparison; + //return startComparison != 0 ? startComparison + // : compare(x.timeEnded, y.timeEnded); + } + + private int compare( final double a, final double b ) { + return a > b ? -1 : a > b ? 1 : 0; + } + } ); + System.out.println(); + for( final Prefix prefix : l ) { + // System.out.println( prefix ); + } + final List cleaned = new ArrayList<>(); + for( final Prefix o : l ) { + boolean ok = true; + for( final Prefix i : l ) { + if ( ( !o.getPrefix().equals( i.getPrefix() ) ) && ( i.getPrefix().startsWith( o.getPrefix() ) ) + && ForesterUtil.isEqual( i.getConfidence(), + o.getConfidence() ) ) { + ok = false; + break; } } + if ( ok ) { + cleaned.add( o ); + } + } + System.out.println(); + for( final Prefix prefix : cleaned ) { + System.out.println( prefix ); + } + final List collapsed = new ArrayList<>(); + final Set firsts = new HashSet<>(); + double confidence_sum = 0; + for( final Prefix prefix : cleaned ) { + final String f = prefix.getPrefixFirstElement(); + if ( !firsts.contains( f ) ) { + firsts.add( f ); + collapsed.add( prefix ); + confidence_sum += prefix.getConfidence(); + } + } + if ( !ForesterUtil.isEqual( confidence_sum, 1.0 ) ) { + throw new IllegalArgumentException( "Confidences add up to " + confidence_sum + " instead of 1.0" ); + } + System.out.println(); + for( final Prefix prefix : collapsed ) { + System.out.println( prefix ); } - System.out.println( map ); } - - } diff --git a/forester/java/src/org/forester/util/ForesterUtil.java b/forester/java/src/org/forester/util/ForesterUtil.java index d3d61d9..3d9888b 100644 --- a/forester/java/src/org/forester/util/ForesterUtil.java +++ b/forester/java/src/org/forester/util/ForesterUtil.java @@ -99,7 +99,7 @@ public final class ForesterUtil { public final static String OS_VERSION = System.getProperty( "os.version" ); public static final String PDB = "http://www.pdb.org/pdb/explore/explore.do?pdbId="; public final static String UNIPROT_KB = "http://www.uniprot.org/uniprot/"; - public final static double ZERO_DIFF = 1.0E-9; + public final static double ZERO_DIFF = 1.0E-12; private static final Pattern PARANTHESESABLE_NH_CHARS_PATTERN = Pattern.compile( "[(),;\\s:\\[\\]]" ); static { final DecimalFormatSymbols dfs = new DecimalFormatSymbols(); -- 1.7.10.2