in progress...
authorcmzmasek <chris.zma@outlook.com>
Thu, 17 Aug 2017 16:40:45 +0000 (09:40 -0700)
committercmzmasek <chris.zma@outlook.com>
Thu, 17 Aug 2017 16:40:45 +0000 (09:40 -0700)
forester/java/src/org/forester/clade_analysis/CladeAnalysisTest.java
forester/java/src/org/forester/clade_analysis/Result2.java

index bc832d5..7c1d38b 100644 (file)
@@ -593,6 +593,7 @@ public class CladeAnalysisTest {
     
     private static boolean testCladeAnalysis3() {
         try {
+            /*
           final Result2 res1 = new Result2();
           res1.addGreatestCommonPrefix( "A.1.1", 0.3 );
           res1.addGreatestCommonPrefix( "A.1.2", 0.3 );
@@ -613,14 +614,25 @@ public class CladeAnalysisTest {
           res2.addGreatestCommonPrefix( "A.1.2", 0.1 );
           res2.addGreatestCommonPrefix( "B.1", 0.1 );
           res2.analyzeGreatestCommonPrefixes(  );
-          
+          */
           final Result2 res3 = new Result2(".");
-          res3.addGreatestCommonPrefix( "A.1.1.1", 0.7 );
+          res3.addGreatestCommonPrefix( "A.1.1.1", 0.1 );
+          res3.addGreatestCommonPrefix( "A.1.1.1.1", 0.6 );
           res3.addGreatestCommonPrefix( "A.1", 0.1 );
           res3.addGreatestCommonPrefix( "A.1.2", 0.1 );
           res3.addGreatestCommonPrefix( "B.1", 0.1 );
           res3.analyzeGreatestCommonPrefixes(  );
-                    
+          
+          final Result2 res33 = new Result2(".");
+          res33.addGreatestCommonPrefix( "A.1.1.1", 0.1 );
+          res33.addGreatestCommonPrefix( "A.1.1.1.1", 0.3 );
+          res33.addGreatestCommonPrefix( "A.1", 0.1 );
+          res33.addGreatestCommonPrefix( "A.1.2", 0.1 );
+          res33.addGreatestCommonPrefix( "B.1", 0.1 );
+          res33.addGreatestCommonPrefix( "B.1.1.1", 0.3 );
+          res33.analyzeGreatestCommonPrefixes(  );
+          
+                    /*
           final Result2 res4 = new Result2();
           res4.addGreatestCommonPrefix( "A.1.1.1.1", 0.35);
           res4.addGreatestCommonPrefix( "A.1.1.1.2", 0.35 );
@@ -670,7 +682,7 @@ public class CladeAnalysisTest {
           res9.addGreatestCommonPrefix( "AA_/_abc_/_memr", 0.01 );
           res9.addGreatestCommonPrefix( "AA_/_abc_/_fkem_/_odem", 0.01 );
           res9.addGreatestCommonPrefix( "BB_/_fke_/_dme_/_nx2", 0.3 );
-          res9.analyzeGreatestCommonPrefixes(  );
+          res9.analyzeGreatestCommonPrefixes(  );*/
         }
         catch ( final Exception e ) {
             e.printStackTrace( System.out );
index 1dc9f0e..f59679b 100644 (file)
@@ -39,8 +39,8 @@ import org.forester.util.ForesterUtil;
 
 public final class Result2 {
 
-    private final String _separator;
-    private final List<Prefix> _greatest_common_prefixes                        = new ArrayList<>();
+    private final String       _separator;
+    private final List<Prefix> _greatest_common_prefixes                      = new ArrayList<>();
     private String             _greatest_common_prefix_up                     = "";
     private String             _greatest_common_prefix_down                   = "";
     private final List<String> _warnings                                      = new ArrayList<>();
@@ -49,11 +49,11 @@ public final class Result2 {
     private String             _greatest_common_clade_subtree_confidence      = "";
     private String             _greatest_common_clade_subtree_confidence_up   = "";
     private String             _greatest_common_clade_subtree_confidence_down = "";
-    
-    public Result2(final String separator) {
-        _separator =  separator;
+
+    public Result2( final String separator ) {
+        _separator = separator;
     }
-    
+
     public Result2() {
         _separator = ".";//TODO make const somewhere
     }
@@ -66,8 +66,6 @@ public final class Result2 {
         _greatest_common_prefixes.add( new Prefix( prefix, confidence, _separator ) );
     }
 
-  
-
     void setGreatestCommonPrefixUp( final String greatest_common_prefix_up ) {
         _greatest_common_prefix_up = greatest_common_prefix_up;
     }
@@ -130,71 +128,76 @@ public final class Result2 {
     public int getTreeSize() {
         return _p_ext_nodes;
     }
-    
-    public void analyzeGreatestCommonPrefixes( ) {
-        analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator );
+
+    public void analyzeGreatestCommonPrefixes() {
+        analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator, 0.3 );
     }
 
-    public final static void analyzeGreatestCommonPrefixes( List<Prefix> greatest_common_prefixes, final String separator ) {
-        final SortedMap<String, Double> map = new TreeMap<>();
-        for( final Prefix prefix : greatest_common_prefixes ) {
-            final List<String> prefixes = ForesterUtil.spliIntoPrefixes( prefix.getPrefix(), separator );
-            for( final String p : prefixes ) {
-                map.put( p, 0.0 );
-            }
+    public final static void analyzeGreatestCommonPrefixes( final List<Prefix> greatest_common_prefixes,
+                                                            final String separator,
+                                                            final double cutoff ) {
+        final List<Prefix> l = obtainAllPrefixes( greatest_common_prefixes, separator );
+        sortPrefixesAccordingToConfidence( l );
+        System.out.println();
+        System.out.println( "All:" );
+        for( final Prefix prefix : l ) {
+            System.out.println( prefix );
         }
-        // System.out.println( map );
-        for( final String key : map.keySet() ) {
-            //System.out.println(key);
-            for( final Prefix prefix : greatest_common_prefixes ) {
-                if ( prefix.getPrefix().startsWith( key ) ) {
-                    map.put( key, map.get( key ) + prefix.getConfidence() );
-                }
-            }
+        final List<Prefix> cleaned = removeLessSpecificPrefixes( l );
+        System.out.println();
+        System.out.println( "Cleaned:" );
+        for( final Prefix prefix : cleaned ) {
+            System.out.println( prefix );
         }
-        //System.out.println( map );
-        final List<Prefix> l = new ArrayList<>();
-        for( final Entry<String, Double> entry : map.entrySet() ) {
-            // System.out.println( entry.getKey() + "->" + entry.getValue() );
-            l.add( new Prefix( entry.getKey(), entry.getValue(), separator ) );
+        final List<Prefix> collapsed = collapse( cleaned );
+        System.out.println();
+        System.out.println( "Collapsed:" );
+        for( final Prefix prefix : collapsed ) {
+            System.out.println( prefix );
         }
-        Collections.sort( l, new Comparator<Prefix>() {
-
-            @Override
-            public int compare( final Prefix x, final Prefix y ) {
-                final int start_comparison = compare( x.getConfidence(), y.getConfidence() );
-                return start_comparison;
-                //return startComparison != 0 ? startComparison
-                //                            : compare(x.timeEnded, y.timeEnded);
+        if ( cutoff >= 0 ) {
+            System.out.println();
+            System.out.println( "Specifics:" );
+            final List<Prefix> cleaned_spec = obtainSpecifics( cutoff, cleaned, collapsed );
+            for( final Prefix prefix : cleaned_spec ) {
+                System.out.println( prefix );
             }
+        }
+        System.out.println( "------" );
+    }
 
-            private int compare( final double a, final double b ) {
-                return a > b ? -1 : a > b ? 1 : 0;
+    private final static List<Prefix> obtainSpecifics( final double cutoff,
+                                                 final List<Prefix> cleaned,
+                                                 final List<Prefix> collapsed ) {
+        final List<Prefix> cleaned_spec = new ArrayList<>();
+        final Set<String> collapsed_set = new HashSet<>();
+        for( final Prefix prefix : collapsed ) {
+            collapsed_set.add( prefix.getPrefix() );
+        }
+        final List<Prefix> spec = new ArrayList<>();
+        for( final Prefix prefix : cleaned ) {
+            if ( ( prefix.getConfidence() >= cutoff ) && !collapsed_set.contains( prefix.getPrefix() ) ) {
+                //  System.out.println( prefix );
+                spec.add( prefix );
             }
-        } );
-        System.out.println();
-        for( final Prefix prefix : l ) {
-            // System.out.println( prefix );
         }
-        final List<Prefix> cleaned = new ArrayList<>();
-        for( final Prefix o : l ) {
+        for( final Prefix o : spec ) {
             boolean ok = true;
-            for( final Prefix i : l ) {
-                if ( ( !o.getPrefix().equals( i.getPrefix() ) ) && ( i.getPrefix().startsWith( o.getPrefix() ) )
-                        && ForesterUtil.isEqual( i.getConfidence(),
-                                                 o.getConfidence() ) ) {
+            for( final Prefix i : spec ) {
+                if ( ( !o.getPrefix().equals( i.getPrefix() ) ) && ( i.getPrefix().startsWith( o.getPrefix() ) ) ) {
                     ok = false;
                     break;
                 }
             }
             if ( ok ) {
-                cleaned.add( o );
+                //System.out.println(">" + o );
+                cleaned_spec.add( o );
             }
         }
-        System.out.println();
-        for( final Prefix prefix : cleaned ) {
-            System.out.println( prefix );
-        }
+        return cleaned_spec;
+    }
+
+    private final static List<Prefix> collapse( final List<Prefix> cleaned ) {
         final List<Prefix> collapsed = new ArrayList<>();
         final Set<String> firsts = new HashSet<>();
         double confidence_sum = 0;
@@ -209,9 +212,79 @@ public final class Result2 {
         if ( !ForesterUtil.isEqual( confidence_sum, 1.0 ) ) {
             throw new IllegalArgumentException( "Confidences add up to " + confidence_sum + " instead of 1.0" );
         }
-        System.out.println();
-        for( final Prefix prefix : collapsed ) {
-            System.out.println( prefix );
+        return collapsed;
+    }
+
+    /*
+     * This replaces (by way of example)
+     * A.1.1 0.9
+     * A.1   0.9
+     * with
+     * A.1.1 0.9
+     *
+     * I.e. it removes less specific prefixes.
+     *
+     */
+    private final static List<Prefix> removeLessSpecificPrefixes( final List<Prefix> l ) {
+        final List<Prefix> cleaned = new ArrayList<>();
+        for( final Prefix o : l ) {
+            boolean ok = true;
+            for( final Prefix i : l ) {
+                if ( ( !o.getPrefix().equals( i.getPrefix() ) ) && ( i.getPrefix().startsWith( o.getPrefix() ) )
+                        && ForesterUtil.isEqual( i.getConfidence(),
+                                                 o.getConfidence() ) ) {
+                    ok = false;
+                    break;
+                }
+            }
+            if ( ok ) {
+                cleaned.add( o );
+            }
+        }
+        return cleaned;
+    }
+
+    private static void sortPrefixesAccordingToConfidence( final List<Prefix> l ) {
+        Collections.sort( l, new Comparator<Prefix>() {
+
+            @Override
+            public int compare( final Prefix x, final Prefix y ) {
+                final int start_comparison = compare( x.getConfidence(), y.getConfidence() );
+                return start_comparison;
+                //return startComparison != 0 ? startComparison
+                //                            : compare(x.timeEnded, y.timeEnded);
+            }
+
+            private int compare( final double a, final double b ) {
+                return a > b ? -1 : a > b ? 1 : 0;
+            }
+        } );
+    }
+
+    private final static List<Prefix> obtainAllPrefixes( final List<Prefix> greatest_common_prefixes,
+                                                         final String separator ) {
+        final SortedMap<String, Double> map = new TreeMap<>();
+        for( final Prefix prefix : greatest_common_prefixes ) {
+            final List<String> prefixes = ForesterUtil.spliIntoPrefixes( prefix.getPrefix(), separator );
+            for( final String p : prefixes ) {
+                map.put( p, 0.0 );
+            }
+        }
+        // System.out.println( map );
+        for( final String key : map.keySet() ) {
+            //System.out.println(key);
+            for( final Prefix prefix : greatest_common_prefixes ) {
+                if ( prefix.getPrefix().startsWith( key ) ) {
+                    map.put( key, map.get( key ) + prefix.getConfidence() );
+                }
+            }
+        }
+        //System.out.println( map );
+        final List<Prefix> l = new ArrayList<>();
+        for( final Entry<String, Double> entry : map.entrySet() ) {
+            // System.out.println( entry.getKey() + "->" + entry.getValue() );
+            l.add( new Prefix( entry.getKey(), entry.getValue(), separator ) );
         }
+        return l;
     }
 }