From 1fae3ccd07da6e8928e45d28cf04ba16f5ae39fb Mon Sep 17 00:00:00 2001 From: James Procter Date: Thu, 23 Feb 2023 17:01:13 +0000 Subject: [PATCH] JAL-4134 group columns using a tree cut - seems to work after more egregiousness --- src/jalview/analysis/AverageDistanceEngine.java | 145 ++++++++++++++++++++ .../analysis/AverageDistanceEngineTest.java | 14 ++ 2 files changed, 159 insertions(+) diff --git a/src/jalview/analysis/AverageDistanceEngine.java b/src/jalview/analysis/AverageDistanceEngine.java index a330170..90a96e0 100644 --- a/src/jalview/analysis/AverageDistanceEngine.java +++ b/src/jalview/analysis/AverageDistanceEngine.java @@ -20,7 +20,9 @@ */ package jalview.analysis; +import java.util.ArrayList; import java.util.BitSet; +import java.util.List; import java.util.Vector; import jalview.datamodel.AlignmentAnnotation; @@ -176,5 +178,148 @@ public class AverageDistanceEngine extends TreeEngine nodei.dist = ((dist / 2) - ih); nodej.dist = ((dist / 2) - jh); } + /*** + * not the right place - OH WELL! + */ + + /** + * Makes a list of groups, where each group is represented by a node whose + * height (distance from the root node), as a fraction of the height of the + * whole tree, is greater than the given threshold. This corresponds to + * selecting the nodes immediately to the right of a vertical line + * partitioning the tree (if the tree is drawn with root to the left). Each + * such node represents a group that contains all of the sequences linked to + * the child leaf nodes. + * + * @param threshold + * @see #getGroups() + */ + public List groupNodes(float threshold) + { + List groups = new ArrayList(); + _groupNodes(groups, getTopNode(), threshold); + return groups; + } + + protected void _groupNodes(List groups, BinaryNode nd, + float threshold) + { + if (nd == null) + { + return; + } + + if ((nd.height / maxheight) > threshold) + { + groups.add(nd); + } + else + { + _groupNodes(groups, nd.left(), threshold); + _groupNodes(groups, nd.right(), threshold); + } + } + + /** + * DOCUMENT ME! + * + * @param nd + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public double findHeight(BinaryNode nd) + { + if (nd == null) + { + return maxheight; + } + + if ((nd.left() == null) && (nd.right() == null)) + { + nd.height = ((BinaryNode) nd.parent()).height + nd.dist; + + if (nd.height > maxheight) + { + return nd.height; + } + else + { + return maxheight; + } + } + else + { + if (nd.parent() != null) + { + nd.height = ((BinaryNode) nd.parent()).height + nd.dist; + } + else + { + maxheight = 0; + nd.height = (float) 0.0; + } + + maxheight = findHeight((BinaryNode) (nd.left())); + maxheight = findHeight((BinaryNode) (nd.right())); + } + + return maxheight; + } + + + /** + * Search for leaf nodes below (or at) the given node + * + * @param top2 + * root node to search from + * + * @return + */ + public Vector findLeaves(BinaryNode top2) + { + Vector leaves = new Vector(); + findLeaves(top2, leaves); + return leaves; + } + + /** + * Search for leaf nodes. + * + * @param nd + * root node to search from + * @param leaves + * Vector of leaves to add leaf node objects too. + * + * @return Vector of leaf nodes on binary tree + */ + Vector findLeaves(BinaryNode nd, + Vector leaves) + { + if (nd == null) + { + return leaves; + } + + if ((nd.left() == null) && (nd.right() == null)) // Interior node + // detection + { + leaves.addElement(nd); + + return leaves; + } + else + { + /* + * TODO: Identify internal nodes... if (node.isSequenceLabel()) { + * leaves.addElement(node); } + */ + findLeaves( nd.left(), leaves); + findLeaves( nd.right(), leaves); + } + + return leaves; + } + } diff --git a/test/jalview/analysis/AverageDistanceEngineTest.java b/test/jalview/analysis/AverageDistanceEngineTest.java index 3697efd..7c068cb 100644 --- a/test/jalview/analysis/AverageDistanceEngineTest.java +++ b/test/jalview/analysis/AverageDistanceEngineTest.java @@ -16,6 +16,7 @@ import jalview.bin.Cache; import jalview.bin.Console; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; +import jalview.datamodel.BinaryNode; import jalview.datamodel.ContactMatrixI; import jalview.datamodel.SequenceI; import jalview.gui.AlignFrame; @@ -63,6 +64,19 @@ public class AverageDistanceEngineTest StringBuffer sb = new StringBuffer(); System.out.println("Newick string\n"+ new jalview.io.NewickFile(clusterer.getTopNode(),true,true).print()); + clusterer.findHeight(clusterer.getTopNode()); + List groups = clusterer.groupNodes(0.8f); + int n=1; + for (BinaryNode root:groups) + { + System.out.println("Cluster "+n++); + for (BinaryNode leaf:clusterer.findLeaves(root)) + { + System.out.print(" "+leaf.getName()); + } + System.out.println("\\"); + } + } } -- 1.7.10.2