From a0d44ec0ea329861626d05136c2ac28ae3ee4ed9 Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Thu, 17 Aug 2017 14:58:52 -0700 Subject: [PATCH] in progress... --- .../forester/clade_analysis/CladeAnalysisTest.java | 203 +++++++++++--------- .../src/org/forester/clade_analysis/Result2.java | 101 ++++++---- 2 files changed, 176 insertions(+), 128 deletions(-) diff --git a/forester/java/src/org/forester/clade_analysis/CladeAnalysisTest.java b/forester/java/src/org/forester/clade_analysis/CladeAnalysisTest.java index 7c1d38b..3d1f4b9 100644 --- a/forester/java/src/org/forester/clade_analysis/CladeAnalysisTest.java +++ b/forester/java/src/org/forester/clade_analysis/CladeAnalysisTest.java @@ -590,99 +590,120 @@ public class CladeAnalysisTest { } return true; } - + private static boolean testCladeAnalysis3() { try { - /* - final Result2 res1 = new Result2(); - res1.addGreatestCommonPrefix( "A.1.1", 0.3 ); - res1.addGreatestCommonPrefix( "A.1.2", 0.3 ); - res1.addGreatestCommonPrefix( "A.1.3", 0.3 ); - res1.addGreatestCommonPrefix( "B.1", 0.1 ); - - List x = ForesterUtil.spliIntoPrefixes( "A.1.2.3.4", "." ); - // System.out.println( x ); - x =ForesterUtil.spliIntoPrefixes( "A", "." ); - // System.out.println( x ); - - - res1.analyzeGreatestCommonPrefixes( ); - - final Result2 res2 = new Result2("."); - res2.addGreatestCommonPrefix( "A.1.1.1", 0.1 ); - res2.addGreatestCommonPrefix( "A.1", 0.7 ); - res2.addGreatestCommonPrefix( "A.1.2", 0.1 ); - res2.addGreatestCommonPrefix( "B.1", 0.1 ); - res2.analyzeGreatestCommonPrefixes( ); - */ - final Result2 res3 = new Result2("."); - res3.addGreatestCommonPrefix( "A.1.1.1", 0.1 ); - res3.addGreatestCommonPrefix( "A.1.1.1.1", 0.6 ); - res3.addGreatestCommonPrefix( "A.1", 0.1 ); - res3.addGreatestCommonPrefix( "A.1.2", 0.1 ); - res3.addGreatestCommonPrefix( "B.1", 0.1 ); - res3.analyzeGreatestCommonPrefixes( ); - - final Result2 res33 = new Result2("."); - res33.addGreatestCommonPrefix( "A.1.1.1", 0.1 ); - res33.addGreatestCommonPrefix( "A.1.1.1.1", 0.3 ); - res33.addGreatestCommonPrefix( "A.1", 0.1 ); - res33.addGreatestCommonPrefix( "A.1.2", 0.1 ); - res33.addGreatestCommonPrefix( "B.1", 0.1 ); - res33.addGreatestCommonPrefix( "B.1.1.1", 0.3 ); - res33.analyzeGreatestCommonPrefixes( ); - - /* - final Result2 res4 = new Result2(); - res4.addGreatestCommonPrefix( "A.1.1.1.1", 0.35); - res4.addGreatestCommonPrefix( "A.1.1.1.2", 0.35 ); - res4.addGreatestCommonPrefix( "A.1", 0.1 ); - res4.addGreatestCommonPrefix( "A.1.2", 0.1 ); - res4.addGreatestCommonPrefix( "B.1", 0.1 ); - res4.analyzeGreatestCommonPrefixes( ); - - final Result2 res5 = new Result2(); - res5.addGreatestCommonPrefix( "A.1.1.1.1", 0.2); - res5.addGreatestCommonPrefix( "C.2.3", 0.2 ); - res5.addGreatestCommonPrefix( "A.1.5", 0.1 ); - res5.addGreatestCommonPrefix( "A.3.1.4", 0.2 ); - res5.addGreatestCommonPrefix( "B.1.1", 0.2 ); - res5.addGreatestCommonPrefix( "B.1.2", 0.09 ); - res5.addGreatestCommonPrefix( "D.1.1.1.1", 0.01 ); - res5.analyzeGreatestCommonPrefixes( ); - - final Result2 res6 = new Result2(); - res6.addGreatestCommonPrefix( "A.1.1.1", 0.05 ); - res6.addGreatestCommonPrefix( "A.1.1.1.1", 0.65 ); - res6.addGreatestCommonPrefix( "A.1", 0.1 ); - res6.addGreatestCommonPrefix( "A.1.2", 0.1 ); - res6.addGreatestCommonPrefix( "B.1", 0.1 ); - res6.analyzeGreatestCommonPrefixes( ); - - final Result2 res7 = new Result2(); - res7.addGreatestCommonPrefix( "A.1.1.1", 0.07 ); - res7.addGreatestCommonPrefix( "A.1.1.1.1", 0.9 ); - res7.addGreatestCommonPrefix( "A.1", 0.01 ); - res7.addGreatestCommonPrefix( "A.1.2", 0.01 ); - res7.addGreatestCommonPrefix( "B.1", 0.01 ); - res7.analyzeGreatestCommonPrefixes( ); - - final Result2 res8 = new Result2("_/_"); - res8.addGreatestCommonPrefix( "AA_/_abc_/_def", 0.07 ); - res8.addGreatestCommonPrefix( "AA_/_abc_/_sfc", 0.9 ); - res8.addGreatestCommonPrefix( "AA_/_abc_/_xcd", 0.01 ); - res8.addGreatestCommonPrefix( "AA_/_abc_/_memr", 0.01 ); - res8.addGreatestCommonPrefix( "AA_/_abc_/_fkem_/_odem", 0.01 ); - res8.analyzeGreatestCommonPrefixes( ); - - final Result2 res9 = new Result2("_/_"); - res9.addGreatestCommonPrefix( "AA_/_abc_/_def", 0.07 ); - res9.addGreatestCommonPrefix( "AA_/_abc_/_sfc", 0.6 ); - res9.addGreatestCommonPrefix( "AA_/_abc_/_xcd", 0.01 ); - res9.addGreatestCommonPrefix( "AA_/_abc_/_memr", 0.01 ); - res9.addGreatestCommonPrefix( "AA_/_abc_/_fkem_/_odem", 0.01 ); - res9.addGreatestCommonPrefix( "BB_/_fke_/_dme_/_nx2", 0.3 ); - res9.analyzeGreatestCommonPrefixes( );*/ + final Result2 res1 = new Result2(); + res1.addGreatestCommonPrefix( "A.1.1", 0.3 ); + res1.addGreatestCommonPrefix( "A.1.2", 0.3 ); + res1.addGreatestCommonPrefix( "A.1.3", 0.3 ); + res1.addGreatestCommonPrefix( "B.1", 0.1 ); + res1.analyzeGreatestCommonPrefixes( 0.3 ); + System.out.print( res1.toString()); + System.out.println( "------------------------- "); + System.out.println(); + + final Result2 res2 = new Result2( "." ); + res2.addGreatestCommonPrefix( "A.1.1.1", 0.1 ); + res2.addGreatestCommonPrefix( "A.1", 0.7 ); + res2.addGreatestCommonPrefix( "A.1.2", 0.1 ); + res2.addGreatestCommonPrefix( "B.1", 0.1 ); + res2.analyzeGreatestCommonPrefixes( 0.3 ); + System.out.print( res2.toString()); + System.out.println( "------------------------- "); + System.out.println(); + + final Result2 res3 = new Result2( "." ); + res3.addGreatestCommonPrefix( "A.1.1.1", 0.1 ); + res3.addGreatestCommonPrefix( "A.1.1.1.1", 0.6 ); + res3.addGreatestCommonPrefix( "A.1", 0.1 ); + res3.addGreatestCommonPrefix( "A.1.2", 0.1 ); + res3.addGreatestCommonPrefix( "B.1", 0.1 ); + res3.analyzeGreatestCommonPrefixes( 0.3 ); + System.out.print( res3.toString()); + System.out.println( "------------------------- "); + System.out.println(); + + final Result2 res33 = new Result2( "." ); + res33.addGreatestCommonPrefix( "A.1.1.1", 0.1 ); + res33.addGreatestCommonPrefix( "A.1.1.1.1", 0.3 ); + res33.addGreatestCommonPrefix( "A.1", 0.1 ); + res33.addGreatestCommonPrefix( "A.1.2", 0.1 ); + res33.addGreatestCommonPrefix( "B.1", 0.1 ); + res33.addGreatestCommonPrefix( "B.1.1.1", 0.3 ); + res33.analyzeGreatestCommonPrefixes( 0.3 ); + System.out.print( res33.toString()); + System.out.println( "------------------------- "); + System.out.println(); + + final Result2 res4 = new Result2(); + res4.addGreatestCommonPrefix( "A.1.1.1.1", 0.35 ); + res4.addGreatestCommonPrefix( "A.1.1.1.2", 0.35 ); + res4.addGreatestCommonPrefix( "A.1", 0.1 ); + res4.addGreatestCommonPrefix( "A.1.2", 0.1 ); + res4.addGreatestCommonPrefix( "B.1", 0.1 ); + res4.analyzeGreatestCommonPrefixes( 0.3 ); + System.out.print( res4.toString()); + System.out.println( "------------------------- "); + System.out.println(); + + final Result2 res5 = new Result2(); + res5.addGreatestCommonPrefix( "A.1.1.1.1", 0.2 ); + res5.addGreatestCommonPrefix( "C.2.3", 0.2 ); + res5.addGreatestCommonPrefix( "A.1.5", 0.1 ); + res5.addGreatestCommonPrefix( "A.3.1.4", 0.2 ); + res5.addGreatestCommonPrefix( "B.1.1", 0.2 ); + res5.addGreatestCommonPrefix( "B.1.2", 0.09 ); + res5.addGreatestCommonPrefix( "D.1.1.1.1", 0.01 ); + res5.analyzeGreatestCommonPrefixes( 0.3 ); + System.out.print( res5.toString()); + System.out.println( "------------------------- "); + System.out.println(); + + final Result2 res6 = new Result2(); + res6.addGreatestCommonPrefix( "A.1.1.1", 0.05 ); + res6.addGreatestCommonPrefix( "A.1.1.1.1", 0.65 ); + res6.addGreatestCommonPrefix( "A.1", 0.1 ); + res6.addGreatestCommonPrefix( "A.1.2", 0.1 ); + res6.addGreatestCommonPrefix( "B.1", 0.1 ); + res6.analyzeGreatestCommonPrefixes( 0.3 ); + System.out.print( res6.toString()); + System.out.println( "------------------------- "); + System.out.println(); + + final Result2 res7 = new Result2(); + res7.addGreatestCommonPrefix( "A.1.1.1", 0.07 ); + res7.addGreatestCommonPrefix( "A.1.1.1.1", 0.9 ); + res7.addGreatestCommonPrefix( "A.1", 0.01 ); + res7.addGreatestCommonPrefix( "A.1.2", 0.01 ); + res7.addGreatestCommonPrefix( "B.1", 0.01 ); + res7.analyzeGreatestCommonPrefixes( 0.3 ); + System.out.print( res7.toString()); + System.out.println( "------------------------- "); + System.out.println(); + + final Result2 res8 = new Result2( "_/_" ); + res8.addGreatestCommonPrefix( "AA_/_abc_/_def", 0.07 ); + res8.addGreatestCommonPrefix( "AA_/_abc_/_sfc", 0.9 ); + res8.addGreatestCommonPrefix( "AA_/_abc_/_xcd", 0.01 ); + res8.addGreatestCommonPrefix( "AA_/_abc_/_memr", 0.01 ); + res8.addGreatestCommonPrefix( "AA_/_abc_/_fkem_/_odem", 0.01 ); + res8.analyzeGreatestCommonPrefixes( 0.3 ); + System.out.print( res8.toString()); + System.out.println( "------------------------- "); + System.out.println(); + + final Result2 res9 = new Result2( "_/_" ); + res9.addGreatestCommonPrefix( "AA_/_abc_/_def", 0.07 ); + res9.addGreatestCommonPrefix( "AA_/_abc_/_sfc", 0.6 ); + res9.addGreatestCommonPrefix( "AA_/_abc_/_xcd", 0.01 ); + res9.addGreatestCommonPrefix( "AA_/_abc_/_memr", 0.01 ); + res9.addGreatestCommonPrefix( "AA_/_abc_/_fkem_/_odem", 0.01 ); + res9.addGreatestCommonPrefix( "BB_/_fke_/_dme_/_nx2", 0.3 ); + res9.analyzeGreatestCommonPrefixes( 0.3 ); + System.out.print( res9.toString()); + System.out.println( "------------------------- "); + System.out.println(); } catch ( final Exception e ) { e.printStackTrace( System.out ); diff --git a/forester/java/src/org/forester/clade_analysis/Result2.java b/forester/java/src/org/forester/clade_analysis/Result2.java index f59679b..81353c3 100644 --- a/forester/java/src/org/forester/clade_analysis/Result2.java +++ b/forester/java/src/org/forester/clade_analysis/Result2.java @@ -49,6 +49,10 @@ public final class Result2 { private String _greatest_common_clade_subtree_confidence = ""; private String _greatest_common_clade_subtree_confidence_up = ""; private String _greatest_common_clade_subtree_confidence_down = ""; + private List _all = null; + private List _collapsed = null; + private List _cleaned_spec = null; + private boolean _has_specifics; public Result2( final String separator ) { _separator = separator; @@ -129,46 +133,36 @@ public final class Result2 { return _p_ext_nodes; } + public void analyzeGreatestCommonPrefixes( final double cutoff ) { + analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator, cutoff ); + } + public void analyzeGreatestCommonPrefixes() { - analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator, 0.3 ); + analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator, -1 ); } - public final static void analyzeGreatestCommonPrefixes( final List greatest_common_prefixes, - final String separator, - final double cutoff ) { + private final void analyzeGreatestCommonPrefixes( final List greatest_common_prefixes, + final String separator, + final double cutoff ) { final List l = obtainAllPrefixes( greatest_common_prefixes, separator ); sortPrefixesAccordingToConfidence( l ); - System.out.println(); - System.out.println( "All:" ); - for( final Prefix prefix : l ) { - System.out.println( prefix ); - } - final List cleaned = removeLessSpecificPrefixes( l ); - System.out.println(); - System.out.println( "Cleaned:" ); - for( final Prefix prefix : cleaned ) { - System.out.println( prefix ); - } - final List collapsed = collapse( cleaned ); - System.out.println(); - System.out.println( "Collapsed:" ); - for( final Prefix prefix : collapsed ) { - System.out.println( prefix ); - } + _all = removeLessSpecificPrefixes( l ); + _collapsed = collapse( _all ); + _has_specifics = false; if ( cutoff >= 0 ) { - System.out.println(); - System.out.println( "Specifics:" ); - final List cleaned_spec = obtainSpecifics( cutoff, cleaned, collapsed ); - for( final Prefix prefix : cleaned_spec ) { - System.out.println( prefix ); + _cleaned_spec = obtainSpecifics( cutoff, _all, _collapsed ); + if ( _cleaned_spec != null && _cleaned_spec.size() > 0 ) { + _has_specifics = true; } } - System.out.println( "------" ); + else { + _cleaned_spec = null; + } } private final static List obtainSpecifics( final double cutoff, - final List cleaned, - final List collapsed ) { + final List cleaned, + final List collapsed ) { final List cleaned_spec = new ArrayList<>(); final Set collapsed_set = new HashSet<>(); for( final Prefix prefix : collapsed ) { @@ -177,7 +171,6 @@ public final class Result2 { final List spec = new ArrayList<>(); for( final Prefix prefix : cleaned ) { if ( ( prefix.getConfidence() >= cutoff ) && !collapsed_set.contains( prefix.getPrefix() ) ) { - // System.out.println( prefix ); spec.add( prefix ); } } @@ -190,7 +183,6 @@ public final class Result2 { } } if ( ok ) { - //System.out.println(">" + o ); cleaned_spec.add( o ); } } @@ -251,8 +243,6 @@ public final class Result2 { public int compare( final Prefix x, final Prefix y ) { final int start_comparison = compare( x.getConfidence(), y.getConfidence() ); return start_comparison; - //return startComparison != 0 ? startComparison - // : compare(x.timeEnded, y.timeEnded); } private int compare( final double a, final double b ) { @@ -270,21 +260,58 @@ public final class Result2 { map.put( p, 0.0 ); } } - // System.out.println( map ); for( final String key : map.keySet() ) { - //System.out.println(key); for( final Prefix prefix : greatest_common_prefixes ) { if ( prefix.getPrefix().startsWith( key ) ) { map.put( key, map.get( key ) + prefix.getConfidence() ); } } } - //System.out.println( map ); final List l = new ArrayList<>(); for( final Entry entry : map.entrySet() ) { - // System.out.println( entry.getKey() + "->" + entry.getValue() ); l.add( new Prefix( entry.getKey(), entry.getValue(), separator ) ); } return l; } + + public final String toString() { + final StringBuilder sb = new StringBuilder(); + //TODO add all other stuff + sb.append( "Cleaned:" ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + for( final Prefix prefix : _all ) { + sb.append( prefix ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + } + sb.append( ForesterUtil.LINE_SEPARATOR ); + sb.append( "Collapsed:" ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + for( final Prefix prefix : _collapsed ) { + sb.append( prefix ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + } + if ( _has_specifics ) { + sb.append( ForesterUtil.LINE_SEPARATOR ); + sb.append( "Specifics:" ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + for( final Prefix prefix : _cleaned_spec ) { + sb.append( prefix ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + } + sb.append( ForesterUtil.LINE_SEPARATOR ); + sb.append( "Collapsed with specifics:" ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + for( final Prefix prefix : _collapsed ) { + sb.append( prefix ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + for( final Prefix spec : _cleaned_spec ) { + if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) { + sb.append( " " + spec ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + } + } + } + } + return sb.toString(); + } } -- 1.7.10.2