From ce90114ba0f7379d302672dc96527095f9e27e15 Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Thu, 17 Aug 2017 09:40:45 -0700 Subject: [PATCH] in progress... --- .../forester/clade_analysis/CladeAnalysisTest.java | 20 +- .../src/org/forester/clade_analysis/Result2.java | 191 ++++++++++++++------ 2 files changed, 148 insertions(+), 63 deletions(-) diff --git a/forester/java/src/org/forester/clade_analysis/CladeAnalysisTest.java b/forester/java/src/org/forester/clade_analysis/CladeAnalysisTest.java index bc832d5..7c1d38b 100644 --- a/forester/java/src/org/forester/clade_analysis/CladeAnalysisTest.java +++ b/forester/java/src/org/forester/clade_analysis/CladeAnalysisTest.java @@ -593,6 +593,7 @@ public class CladeAnalysisTest { private static boolean testCladeAnalysis3() { try { + /* final Result2 res1 = new Result2(); res1.addGreatestCommonPrefix( "A.1.1", 0.3 ); res1.addGreatestCommonPrefix( "A.1.2", 0.3 ); @@ -613,14 +614,25 @@ public class CladeAnalysisTest { res2.addGreatestCommonPrefix( "A.1.2", 0.1 ); res2.addGreatestCommonPrefix( "B.1", 0.1 ); res2.analyzeGreatestCommonPrefixes( ); - + */ final Result2 res3 = new Result2("."); - res3.addGreatestCommonPrefix( "A.1.1.1", 0.7 ); + res3.addGreatestCommonPrefix( "A.1.1.1", 0.1 ); + res3.addGreatestCommonPrefix( "A.1.1.1.1", 0.6 ); res3.addGreatestCommonPrefix( "A.1", 0.1 ); res3.addGreatestCommonPrefix( "A.1.2", 0.1 ); res3.addGreatestCommonPrefix( "B.1", 0.1 ); res3.analyzeGreatestCommonPrefixes( ); - + + final Result2 res33 = new Result2("."); + res33.addGreatestCommonPrefix( "A.1.1.1", 0.1 ); + res33.addGreatestCommonPrefix( "A.1.1.1.1", 0.3 ); + res33.addGreatestCommonPrefix( "A.1", 0.1 ); + res33.addGreatestCommonPrefix( "A.1.2", 0.1 ); + res33.addGreatestCommonPrefix( "B.1", 0.1 ); + res33.addGreatestCommonPrefix( "B.1.1.1", 0.3 ); + res33.analyzeGreatestCommonPrefixes( ); + + /* final Result2 res4 = new Result2(); res4.addGreatestCommonPrefix( "A.1.1.1.1", 0.35); res4.addGreatestCommonPrefix( "A.1.1.1.2", 0.35 ); @@ -670,7 +682,7 @@ public class CladeAnalysisTest { res9.addGreatestCommonPrefix( "AA_/_abc_/_memr", 0.01 ); res9.addGreatestCommonPrefix( "AA_/_abc_/_fkem_/_odem", 0.01 ); res9.addGreatestCommonPrefix( "BB_/_fke_/_dme_/_nx2", 0.3 ); - res9.analyzeGreatestCommonPrefixes( ); + res9.analyzeGreatestCommonPrefixes( );*/ } catch ( final Exception e ) { e.printStackTrace( System.out ); diff --git a/forester/java/src/org/forester/clade_analysis/Result2.java b/forester/java/src/org/forester/clade_analysis/Result2.java index 1dc9f0e..f59679b 100644 --- a/forester/java/src/org/forester/clade_analysis/Result2.java +++ b/forester/java/src/org/forester/clade_analysis/Result2.java @@ -39,8 +39,8 @@ import org.forester.util.ForesterUtil; public final class Result2 { - private final String _separator; - private final List _greatest_common_prefixes = new ArrayList<>(); + private final String _separator; + private final List _greatest_common_prefixes = new ArrayList<>(); private String _greatest_common_prefix_up = ""; private String _greatest_common_prefix_down = ""; private final List _warnings = new ArrayList<>(); @@ -49,11 +49,11 @@ public final class Result2 { private String _greatest_common_clade_subtree_confidence = ""; private String _greatest_common_clade_subtree_confidence_up = ""; private String _greatest_common_clade_subtree_confidence_down = ""; - - public Result2(final String separator) { - _separator = separator; + + public Result2( final String separator ) { + _separator = separator; } - + public Result2() { _separator = ".";//TODO make const somewhere } @@ -66,8 +66,6 @@ public final class Result2 { _greatest_common_prefixes.add( new Prefix( prefix, confidence, _separator ) ); } - - void setGreatestCommonPrefixUp( final String greatest_common_prefix_up ) { _greatest_common_prefix_up = greatest_common_prefix_up; } @@ -130,71 +128,76 @@ public final class Result2 { public int getTreeSize() { return _p_ext_nodes; } - - public void analyzeGreatestCommonPrefixes( ) { - analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator ); + + public void analyzeGreatestCommonPrefixes() { + analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator, 0.3 ); } - public final static void analyzeGreatestCommonPrefixes( List greatest_common_prefixes, final String separator ) { - final SortedMap map = new TreeMap<>(); - for( final Prefix prefix : greatest_common_prefixes ) { - final List prefixes = ForesterUtil.spliIntoPrefixes( prefix.getPrefix(), separator ); - for( final String p : prefixes ) { - map.put( p, 0.0 ); - } + public final static void analyzeGreatestCommonPrefixes( final List greatest_common_prefixes, + final String separator, + final double cutoff ) { + final List l = obtainAllPrefixes( greatest_common_prefixes, separator ); + sortPrefixesAccordingToConfidence( l ); + System.out.println(); + System.out.println( "All:" ); + for( final Prefix prefix : l ) { + System.out.println( prefix ); } - // System.out.println( map ); - for( final String key : map.keySet() ) { - //System.out.println(key); - for( final Prefix prefix : greatest_common_prefixes ) { - if ( prefix.getPrefix().startsWith( key ) ) { - map.put( key, map.get( key ) + prefix.getConfidence() ); - } - } + final List cleaned = removeLessSpecificPrefixes( l ); + System.out.println(); + System.out.println( "Cleaned:" ); + for( final Prefix prefix : cleaned ) { + System.out.println( prefix ); } - //System.out.println( map ); - final List l = new ArrayList<>(); - for( final Entry entry : map.entrySet() ) { - // System.out.println( entry.getKey() + "->" + entry.getValue() ); - l.add( new Prefix( entry.getKey(), entry.getValue(), separator ) ); + final List collapsed = collapse( cleaned ); + System.out.println(); + System.out.println( "Collapsed:" ); + for( final Prefix prefix : collapsed ) { + System.out.println( prefix ); } - Collections.sort( l, new Comparator() { - - @Override - public int compare( final Prefix x, final Prefix y ) { - final int start_comparison = compare( x.getConfidence(), y.getConfidence() ); - return start_comparison; - //return startComparison != 0 ? startComparison - // : compare(x.timeEnded, y.timeEnded); + if ( cutoff >= 0 ) { + System.out.println(); + System.out.println( "Specifics:" ); + final List cleaned_spec = obtainSpecifics( cutoff, cleaned, collapsed ); + for( final Prefix prefix : cleaned_spec ) { + System.out.println( prefix ); } + } + System.out.println( "------" ); + } - private int compare( final double a, final double b ) { - return a > b ? -1 : a > b ? 1 : 0; + private final static List obtainSpecifics( final double cutoff, + final List cleaned, + final List collapsed ) { + final List cleaned_spec = new ArrayList<>(); + final Set collapsed_set = new HashSet<>(); + for( final Prefix prefix : collapsed ) { + collapsed_set.add( prefix.getPrefix() ); + } + final List spec = new ArrayList<>(); + for( final Prefix prefix : cleaned ) { + if ( ( prefix.getConfidence() >= cutoff ) && !collapsed_set.contains( prefix.getPrefix() ) ) { + // System.out.println( prefix ); + spec.add( prefix ); } - } ); - System.out.println(); - for( final Prefix prefix : l ) { - // System.out.println( prefix ); } - final List cleaned = new ArrayList<>(); - for( final Prefix o : l ) { + for( final Prefix o : spec ) { boolean ok = true; - for( final Prefix i : l ) { - if ( ( !o.getPrefix().equals( i.getPrefix() ) ) && ( i.getPrefix().startsWith( o.getPrefix() ) ) - && ForesterUtil.isEqual( i.getConfidence(), - o.getConfidence() ) ) { + for( final Prefix i : spec ) { + if ( ( !o.getPrefix().equals( i.getPrefix() ) ) && ( i.getPrefix().startsWith( o.getPrefix() ) ) ) { ok = false; break; } } if ( ok ) { - cleaned.add( o ); + //System.out.println(">" + o ); + cleaned_spec.add( o ); } } - System.out.println(); - for( final Prefix prefix : cleaned ) { - System.out.println( prefix ); - } + return cleaned_spec; + } + + private final static List collapse( final List cleaned ) { final List collapsed = new ArrayList<>(); final Set firsts = new HashSet<>(); double confidence_sum = 0; @@ -209,9 +212,79 @@ public final class Result2 { if ( !ForesterUtil.isEqual( confidence_sum, 1.0 ) ) { throw new IllegalArgumentException( "Confidences add up to " + confidence_sum + " instead of 1.0" ); } - System.out.println(); - for( final Prefix prefix : collapsed ) { - System.out.println( prefix ); + return collapsed; + } + + /* + * This replaces (by way of example) + * A.1.1 0.9 + * A.1 0.9 + * with + * A.1.1 0.9 + * + * I.e. it removes less specific prefixes. + * + */ + private final static List removeLessSpecificPrefixes( final List l ) { + final List cleaned = new ArrayList<>(); + for( final Prefix o : l ) { + boolean ok = true; + for( final Prefix i : l ) { + if ( ( !o.getPrefix().equals( i.getPrefix() ) ) && ( i.getPrefix().startsWith( o.getPrefix() ) ) + && ForesterUtil.isEqual( i.getConfidence(), + o.getConfidence() ) ) { + ok = false; + break; + } + } + if ( ok ) { + cleaned.add( o ); + } + } + return cleaned; + } + + private static void sortPrefixesAccordingToConfidence( final List l ) { + Collections.sort( l, new Comparator() { + + @Override + public int compare( final Prefix x, final Prefix y ) { + final int start_comparison = compare( x.getConfidence(), y.getConfidence() ); + return start_comparison; + //return startComparison != 0 ? startComparison + // : compare(x.timeEnded, y.timeEnded); + } + + private int compare( final double a, final double b ) { + return a > b ? -1 : a > b ? 1 : 0; + } + } ); + } + + private final static List obtainAllPrefixes( final List greatest_common_prefixes, + final String separator ) { + final SortedMap map = new TreeMap<>(); + for( final Prefix prefix : greatest_common_prefixes ) { + final List prefixes = ForesterUtil.spliIntoPrefixes( prefix.getPrefix(), separator ); + for( final String p : prefixes ) { + map.put( p, 0.0 ); + } + } + // System.out.println( map ); + for( final String key : map.keySet() ) { + //System.out.println(key); + for( final Prefix prefix : greatest_common_prefixes ) { + if ( prefix.getPrefix().startsWith( key ) ) { + map.put( key, map.get( key ) + prefix.getConfidence() ); + } + } + } + //System.out.println( map ); + final List l = new ArrayList<>(); + for( final Entry entry : map.entrySet() ) { + // System.out.println( entry.getKey() + "->" + entry.getValue() ); + l.add( new Prefix( entry.getKey(), entry.getValue(), separator ) ); } + return l; } } -- 1.7.10.2