JAL-4134 group columns using a tree cut - seems to work after more egregiousness
authorJames Procter <j.procter@dundee.ac.uk>
Thu, 23 Feb 2023 17:01:13 +0000 (17:01 +0000)
committerJames Procter <j.procter@dundee.ac.uk>
Thu, 23 Feb 2023 17:01:13 +0000 (17:01 +0000)
src/jalview/analysis/AverageDistanceEngine.java
test/jalview/analysis/AverageDistanceEngineTest.java

index a330170..90a96e0 100644 (file)
@@ -20,7 +20,9 @@
  */
 package jalview.analysis;
 
+import java.util.ArrayList;
 import java.util.BitSet;
+import java.util.List;
 import java.util.Vector;
 
 import jalview.datamodel.AlignmentAnnotation;
@@ -176,5 +178,148 @@ public class AverageDistanceEngine extends TreeEngine
     nodei.dist = ((dist / 2) - ih);
     nodej.dist = ((dist / 2) - jh);
   }
+  /***
+   * not the right place - OH WELL!
+   */
+
+  /**
+   * Makes a list of groups, where each group is represented by a node whose
+   * height (distance from the root node), as a fraction of the height of the
+   * whole tree, is greater than the given threshold. This corresponds to
+   * selecting the nodes immediately to the right of a vertical line
+   * partitioning the tree (if the tree is drawn with root to the left). Each
+   * such node represents a group that contains all of the sequences linked to
+   * the child leaf nodes.
+   * 
+   * @param threshold
+   * @see #getGroups()
+   */
+  public List<BinaryNode> groupNodes(float threshold)
+  {
+    List<BinaryNode> groups = new ArrayList<BinaryNode>();
+    _groupNodes(groups, getTopNode(), threshold);
+    return groups;
+  }
+
+  protected void _groupNodes(List<BinaryNode> groups, BinaryNode nd,
+          float threshold)
+  {
+    if (nd == null)
+    {
+      return;
+    }
+
+    if ((nd.height / maxheight) > threshold)
+    {
+      groups.add(nd);
+    }
+    else
+    {
+      _groupNodes(groups,  nd.left(), threshold);
+      _groupNodes(groups,  nd.right(), threshold);
+    }
+  }
+
+  /**
+   * DOCUMENT ME!
+   * 
+   * @param nd
+   *          DOCUMENT ME!
+   * 
+   * @return DOCUMENT ME!
+   */
+  public double findHeight(BinaryNode nd)
+  {
+    if (nd == null)
+    {
+      return maxheight;
+    }
+
+    if ((nd.left() == null) && (nd.right() == null))
+    {
+      nd.height = ((BinaryNode) nd.parent()).height + nd.dist;
+
+      if (nd.height > maxheight)
+      {
+        return nd.height;
+      }
+      else
+      {
+        return maxheight;
+      }
+    }
+    else
+    {
+      if (nd.parent() != null)
+      {
+        nd.height = ((BinaryNode) nd.parent()).height + nd.dist;
+      }
+      else
+      {
+        maxheight = 0;
+        nd.height = (float) 0.0;
+      }
+
+      maxheight = findHeight((BinaryNode) (nd.left()));
+      maxheight = findHeight((BinaryNode) (nd.right()));
+    }
+
+    return maxheight;
+  }
+
+
+  /**
+   * Search for leaf nodes below (or at) the given node
+   * 
+   * @param top2
+   *          root node to search from
+   * 
+   * @return
+   */
+  public Vector<BinaryNode> findLeaves(BinaryNode top2)
+  {
+    Vector<BinaryNode> leaves = new Vector<BinaryNode>();
+    findLeaves(top2, leaves);
+    return leaves;
+  }
+
+  /**
+   * Search for leaf nodes.
+   * 
+   * @param nd
+   *          root node to search from
+   * @param leaves
+   *          Vector of leaves to add leaf node objects too.
+   * 
+   * @return Vector of leaf nodes on binary tree
+   */
+  Vector<BinaryNode> findLeaves(BinaryNode nd,
+          Vector<BinaryNode> leaves)
+  {
+    if (nd == null)
+    {
+      return leaves;
+    }
+
+    if ((nd.left() == null) && (nd.right() == null)) // Interior node
+    // detection
+    {
+      leaves.addElement(nd);
+
+      return leaves;
+    }
+    else
+    {
+      /*
+       * TODO: Identify internal nodes... if (node.isSequenceLabel()) {
+       * leaves.addElement(node); }
+       */
+      findLeaves( nd.left(), leaves);
+      findLeaves( nd.right(), leaves);
+    }
+
+    return leaves;
+  }
+
 
 }
index 3697efd..7c068cb 100644 (file)
@@ -16,6 +16,7 @@ import jalview.bin.Cache;
 import jalview.bin.Console;
 import jalview.datamodel.AlignmentAnnotation;
 import jalview.datamodel.AlignmentI;
+import jalview.datamodel.BinaryNode;
 import jalview.datamodel.ContactMatrixI;
 import jalview.datamodel.SequenceI;
 import jalview.gui.AlignFrame;
@@ -63,6 +64,19 @@ public class AverageDistanceEngineTest
       StringBuffer sb = new StringBuffer(); 
       System.out.println("Newick string\n"+      new jalview.io.NewickFile(clusterer.getTopNode(),true,true).print());
       
+      clusterer.findHeight(clusterer.getTopNode());
+      List<BinaryNode> groups = clusterer.groupNodes(0.8f);
+      int n=1;
+      for (BinaryNode root:groups)
+      {
+        System.out.println("Cluster "+n++);
+        for (BinaryNode leaf:clusterer.findLeaves(root))
+        {
+          System.out.print(" "+leaf.getName());
+        }
+        System.out.println("\\");
+      }
+      
     }
 
 }