in progress...
authorcmzmasek <chris.zma@outlook.com>
Wed, 16 Aug 2017 19:22:09 +0000 (12:22 -0700)
committercmzmasek <chris.zma@outlook.com>
Wed, 16 Aug 2017 19:22:09 +0000 (12:22 -0700)
forester/java/src/org/forester/clade_analysis/Analysis2.java
forester/java/src/org/forester/clade_analysis/CladeAnalysisTest.java
forester/java/src/org/forester/clade_analysis/Prefix.java
forester/java/src/org/forester/clade_analysis/Result2.java
forester/java/src/org/forester/util/ForesterUtil.java

index 3d1561f..762b9b7 100644 (file)
@@ -81,7 +81,7 @@ public final class Analysis2 {
         }
         else {
           //  res.setGreatestCommonPrefix( greatest_common_prefix );
-            res.addGreatestCommonPrefix( prefix, confidence );
+            res.addGreatestCommonPrefix( prefix, confidence, separator );
         }
         if ( qnode_pp.isRoot() ) {
             res.addWarning( "Least Encompassing Clade is entire tree" );
index d91e36e..bc832d5 100644 (file)
@@ -605,30 +605,29 @@ public class CladeAnalysisTest {
          // System.out.println( x );
           
           
-          res1.analyzeGreatestCommonPrefixes( "." );
+          res1.analyzeGreatestCommonPrefixes( );
           
-          final Result2 res2 = new Result2();
+          final Result2 res2 = new Result2(".");
           res2.addGreatestCommonPrefix( "A.1.1.1", 0.1 );
           res2.addGreatestCommonPrefix( "A.1", 0.7 );
           res2.addGreatestCommonPrefix( "A.1.2", 0.1 );
           res2.addGreatestCommonPrefix( "B.1", 0.1 );
-          res2.analyzeGreatestCommonPrefixes( "." );
+          res2.analyzeGreatestCommonPrefixes(  );
           
-          final Result2 res3 = new Result2();
+          final Result2 res3 = new Result2(".");
           res3.addGreatestCommonPrefix( "A.1.1.1", 0.7 );
           res3.addGreatestCommonPrefix( "A.1", 0.1 );
           res3.addGreatestCommonPrefix( "A.1.2", 0.1 );
           res3.addGreatestCommonPrefix( "B.1", 0.1 );
-          res3.analyzeGreatestCommonPrefixes( "." );
-          
-          
+          res3.analyzeGreatestCommonPrefixes(  );
+                    
           final Result2 res4 = new Result2();
           res4.addGreatestCommonPrefix( "A.1.1.1.1", 0.35);
           res4.addGreatestCommonPrefix( "A.1.1.1.2", 0.35 );
           res4.addGreatestCommonPrefix( "A.1", 0.1 );
           res4.addGreatestCommonPrefix( "A.1.2", 0.1 );
           res4.addGreatestCommonPrefix( "B.1", 0.1 );
-          res4.analyzeGreatestCommonPrefixes( "." );
+          res4.analyzeGreatestCommonPrefixes(  );
           
           final Result2 res5 = new Result2();
           res5.addGreatestCommonPrefix( "A.1.1.1.1", 0.2);
@@ -638,7 +637,40 @@ public class CladeAnalysisTest {
           res5.addGreatestCommonPrefix( "B.1.1", 0.2 );
           res5.addGreatestCommonPrefix( "B.1.2", 0.09 );
           res5.addGreatestCommonPrefix( "D.1.1.1.1", 0.01 );
-          res5.analyzeGreatestCommonPrefixes( "." );
+          res5.analyzeGreatestCommonPrefixes(  );
+          
+          final Result2 res6 = new Result2();
+          res6.addGreatestCommonPrefix( "A.1.1.1", 0.05 );
+          res6.addGreatestCommonPrefix( "A.1.1.1.1", 0.65 );
+          res6.addGreatestCommonPrefix( "A.1", 0.1 );
+          res6.addGreatestCommonPrefix( "A.1.2", 0.1 );
+          res6.addGreatestCommonPrefix( "B.1", 0.1 );
+          res6.analyzeGreatestCommonPrefixes(  );
+          
+          final Result2 res7 = new Result2();
+          res7.addGreatestCommonPrefix( "A.1.1.1", 0.07 );
+          res7.addGreatestCommonPrefix( "A.1.1.1.1", 0.9 );
+          res7.addGreatestCommonPrefix( "A.1", 0.01 );
+          res7.addGreatestCommonPrefix( "A.1.2", 0.01 );
+          res7.addGreatestCommonPrefix( "B.1", 0.01 );
+          res7.analyzeGreatestCommonPrefixes(  );
+          
+          final Result2 res8 = new Result2("_/_");
+          res8.addGreatestCommonPrefix( "AA_/_abc_/_def", 0.07 );
+          res8.addGreatestCommonPrefix( "AA_/_abc_/_sfc", 0.9 );
+          res8.addGreatestCommonPrefix( "AA_/_abc_/_xcd", 0.01 );
+          res8.addGreatestCommonPrefix( "AA_/_abc_/_memr", 0.01 );
+          res8.addGreatestCommonPrefix( "AA_/_abc_/_fkem_/_odem", 0.01 );
+          res8.analyzeGreatestCommonPrefixes(  );
+          
+          final Result2 res9 = new Result2("_/_");
+          res9.addGreatestCommonPrefix( "AA_/_abc_/_def", 0.07 );
+          res9.addGreatestCommonPrefix( "AA_/_abc_/_sfc", 0.6 );
+          res9.addGreatestCommonPrefix( "AA_/_abc_/_xcd", 0.01 );
+          res9.addGreatestCommonPrefix( "AA_/_abc_/_memr", 0.01 );
+          res9.addGreatestCommonPrefix( "AA_/_abc_/_fkem_/_odem", 0.01 );
+          res9.addGreatestCommonPrefix( "BB_/_fke_/_dme_/_nx2", 0.3 );
+          res9.analyzeGreatestCommonPrefixes(  );
         }
         catch ( final Exception e ) {
             e.printStackTrace( System.out );
index 8adb85b..6362a3e 100644 (file)
@@ -1,25 +1,52 @@
+
 package org.forester.clade_analysis;
 
+import java.math.BigDecimal;
 
 final class Prefix {
-    final String _prefix;
-    final double _confidence;
+
+    private final String _prefix;
+    private final BigDecimal _confidence;
+    private final String _separator;
+    private final String _first;
+
+    Prefix( final String prefix, final String confidence, final String separator ) {
+        _prefix = prefix;
+        _confidence = new BigDecimal( confidence);
+        _separator = separator ;
+        if ( _prefix.indexOf( _separator ) < 0) {
+            _first = _prefix;
+        }
+        else {
+        _first = _prefix.substring( 0, _prefix.indexOf(_separator ) );
+        }
+    }
     
-    Prefix( final String prefix, final double confidence ) {
+    Prefix( final String prefix, final double confidence , final String separator) {
         _prefix = prefix;
-        _confidence = confidence;
+        _confidence = new BigDecimal( confidence);
+        _separator = separator ;
+        if ( _prefix.indexOf( _separator ) < 0) {
+            _first = _prefix;
+        }
+        else {
+            _first = _prefix.substring( 0, _prefix.indexOf(_separator ) );
+            }
     }
 
-    
-   String getPrefix() {
+    String getPrefix() {
         return _prefix;
     }
-
     
+    String getPrefixFirstElement() {
+       return _first;
+    }
     double getConfidence() {
-        return _confidence;
+        return _confidence.doubleValue();
+    }
+
+    @Override
+    public String toString() {
+     return getPrefix() + ": " + getConfidence();
     }
-    
-    
-    
 }
index a1a7bea..1dc9f0e 100644 (file)
 package org.forester.clade_analysis;
 
 import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map.Entry;
+import java.util.Set;
 import java.util.SortedMap;
 import java.util.TreeMap;
 
@@ -35,24 +39,35 @@ import org.forester.util.ForesterUtil;
 
 public final class Result2 {
 
-    private List<Prefix>        _greatest_common_prefix                = new ArrayList<Prefix>();
-    private String             _greatest_common_prefix_up             = "";
-    private String             _greatest_common_prefix_down           = "";
-    private final List<String> _warnings                              = new ArrayList<>();
-    private int                _lec_ext_nodes                         = 0;
-    private int                _p_ext_nodes                           = 0;
+    private final String _separator;
+    private final List<Prefix> _greatest_common_prefixes                        = new ArrayList<>();
+    private String             _greatest_common_prefix_up                     = "";
+    private String             _greatest_common_prefix_down                   = "";
+    private final List<String> _warnings                                      = new ArrayList<>();
+    private int                _lec_ext_nodes                                 = 0;
+    private int                _p_ext_nodes                                   = 0;
     private String             _greatest_common_clade_subtree_confidence      = "";
     private String             _greatest_common_clade_subtree_confidence_up   = "";
     private String             _greatest_common_clade_subtree_confidence_down = "";
+    
+    public Result2(final String separator) {
+        _separator =  separator;
+    }
+    
+    public Result2() {
+        _separator = ".";//TODO make const somewhere
+    }
 
     void addWarning( final String warning ) {
         _warnings.add( warning );
     }
 
     void addGreatestCommonPrefix( final String prefix, final double confidence ) {
-        _greatest_common_prefix.add( new Prefix(prefix, confidence) );
+        _greatest_common_prefixes.add( new Prefix( prefix, confidence, _separator ) );
     }
 
+  
+
     void setGreatestCommonPrefixUp( final String greatest_common_prefix_up ) {
         _greatest_common_prefix_up = greatest_common_prefix_up;
     }
@@ -73,10 +88,9 @@ public final class Result2 {
         _greatest_common_clade_subtree_confidence_down = greatest_common_clade_confidence_down;
     }
 
-  //  public String getGreatestCommonPrefix() {
-  //      return _greatest_common_prefix;
-  //  }
-
+    //  public String getGreatestCommonPrefix() {
+    //      return _greatest_common_prefix;
+    //  }
     public String getGreatestCommonPrefixUp() {
         return _greatest_common_prefix_up;
     }
@@ -117,25 +131,87 @@ public final class Result2 {
         return _p_ext_nodes;
     }
     
-    public void analyzeGreatestCommonPrefixes(final String separator ) {
-        final SortedMap<String,Double> map = new TreeMap<String,Double>();
-        for( final Prefix prefix : _greatest_common_prefix ) {
-            List<String> prefixes = ForesterUtil.spliIntoPrefixes( prefix.getPrefix(), separator );
+    public void analyzeGreatestCommonPrefixes( ) {
+        analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator );
+    }
+
+    public final static void analyzeGreatestCommonPrefixes( List<Prefix> greatest_common_prefixes, final String separator ) {
+        final SortedMap<String, Double> map = new TreeMap<>();
+        for( final Prefix prefix : greatest_common_prefixes ) {
+            final List<String> prefixes = ForesterUtil.spliIntoPrefixes( prefix.getPrefix(), separator );
             for( final String p : prefixes ) {
                 map.put( p, 0.0 );
             }
         }
-       // System.out.println( map );
-        for (final String key : map.keySet()) {
+        // System.out.println( map );
+        for( final String key : map.keySet() ) {
             //System.out.println(key);
-            for( final Prefix prefix : _greatest_common_prefix ) {
+            for( final Prefix prefix : greatest_common_prefixes ) {
                 if ( prefix.getPrefix().startsWith( key ) ) {
-                    map.put( key, map.get( key ) + prefix.getConfidence()  );
+                    map.put( key, map.get( key ) + prefix.getConfidence() );
+                }
+            }
+        }
+        //System.out.println( map );
+        final List<Prefix> l = new ArrayList<>();
+        for( final Entry<String, Double> entry : map.entrySet() ) {
+            // System.out.println( entry.getKey() + "->" + entry.getValue() );
+            l.add( new Prefix( entry.getKey(), entry.getValue(), separator ) );
+        }
+        Collections.sort( l, new Comparator<Prefix>() {
+
+            @Override
+            public int compare( final Prefix x, final Prefix y ) {
+                final int start_comparison = compare( x.getConfidence(), y.getConfidence() );
+                return start_comparison;
+                //return startComparison != 0 ? startComparison
+                //                            : compare(x.timeEnded, y.timeEnded);
+            }
+
+            private int compare( final double a, final double b ) {
+                return a > b ? -1 : a > b ? 1 : 0;
+            }
+        } );
+        System.out.println();
+        for( final Prefix prefix : l ) {
+            // System.out.println( prefix );
+        }
+        final List<Prefix> cleaned = new ArrayList<>();
+        for( final Prefix o : l ) {
+            boolean ok = true;
+            for( final Prefix i : l ) {
+                if ( ( !o.getPrefix().equals( i.getPrefix() ) ) && ( i.getPrefix().startsWith( o.getPrefix() ) )
+                        && ForesterUtil.isEqual( i.getConfidence(),
+                                                 o.getConfidence() ) ) {
+                    ok = false;
+                    break;
                 }
             }
+            if ( ok ) {
+                cleaned.add( o );
+            }
+        }
+        System.out.println();
+        for( final Prefix prefix : cleaned ) {
+            System.out.println( prefix );
+        }
+        final List<Prefix> collapsed = new ArrayList<>();
+        final Set<String> firsts = new HashSet<>();
+        double confidence_sum = 0;
+        for( final Prefix prefix : cleaned ) {
+            final String f = prefix.getPrefixFirstElement();
+            if ( !firsts.contains( f ) ) {
+                firsts.add( f );
+                collapsed.add( prefix );
+                confidence_sum += prefix.getConfidence();
+            }
+        }
+        if ( !ForesterUtil.isEqual( confidence_sum, 1.0 ) ) {
+            throw new IllegalArgumentException( "Confidences add up to " + confidence_sum + " instead of 1.0" );
+        }
+        System.out.println();
+        for( final Prefix prefix : collapsed ) {
+            System.out.println( prefix );
         }
-        System.out.println( map );
     }
-    
-    
 }
index d3d61d9..3d9888b 100644 (file)
@@ -99,7 +99,7 @@ public final class ForesterUtil {
     public final static String       OS_VERSION                       = System.getProperty( "os.version" );
     public static final String       PDB                              = "http://www.pdb.org/pdb/explore/explore.do?pdbId=";
     public final static String       UNIPROT_KB                       = "http://www.uniprot.org/uniprot/";
-    public final static double       ZERO_DIFF                        = 1.0E-9;
+    public final static double       ZERO_DIFF                        = 1.0E-12;
     private static final Pattern     PARANTHESESABLE_NH_CHARS_PATTERN = Pattern.compile( "[(),;\\s:\\[\\]]" );
     static {
         final DecimalFormatSymbols dfs = new DecimalFormatSymbols();