+++ /dev/null
-// BEWARE: BETA VERSION\r
-// --------------------\r
-//\r
-// A k-d tree that vastly speeds up an iteration of k-means (in any number of dimensions). The main\r
-// idea for this data structure is from Kanungo/Mount. This is used internally by Kmeans.cpp, and\r
-// will most likely not need to be used directly.\r
-//\r
-// The stucture works as follows:\r
-// - All data points are placed into a tree where we choose child nodes by partitioning all data\r
-// points along a plane parallel to the axis.\r
-// - We maintain for each node, the bounding box of all data points stored at that node.\r
-// - To do a k-means iteration, we need to assign points to clusters and calculate the sum and\r
-// the number of points assigned to each cluster. For each node in the tree, we can rule out\r
-// some cluster centers as being too far away from every single point in that bounding box.\r
-// Once only one cluster is left, all points in the node can be assigned to that cluster in\r
-// batch.\r
-//\r
-// Author: David Arthur (darthur@gmail.com), 2009\r
-\r
-#ifndef KM_TREE_H__\r
-#define KM_TREE_H__\r
-\r
-// Includes\r
-#include "KmUtils.h"\r
-\r
-// KmTree class definition\r
-class KmTree {\r
- public:\r
- // Constructs a tree out of the given n data points living in R^d.\r
- KmTree(int n, int d, Scalar *points);\r
- ~KmTree();\r
-\r
- // Given k cluster centers, this runs a full k-means iterations, choosing the next set of\r
- // centers and returning the cost function for this set of centers. If assignment is not null,\r
- // it should be an array of size n that will be filled with the index of the cluster (0 - k-1)\r
- // that each data point is assigned to. The new center values will overwrite the old ones.\r
- Scalar DoKMeansStep(int k, Scalar *centers, int *assignment) const;\r
-\r
- // Choose k initial centers for k-means using the kmeans++ seeding procedure. The resulting\r
- // centers are returned via the centers variable, which should be pre-allocated to size k*d.\r
- // The cost of the initial clustering is returned.\r
- Scalar SeedKMeansPlusPlus(int k, Scalar *centers) const;\r
-\r
- private:\r
- struct Node {\r
- int num_points; // Number of points stored in this node\r
- int first_point_index; // The smallest point index stored in this node\r
- Scalar *median, *radius; // Bounding box center and half side-lengths\r
- Scalar *sum; // Sum of the points stored in this node\r
- Scalar opt_cost; // Min cost for putting all points in this node in 1 cluster\r
- Node *lower_node, *upper_node; // Child nodes\r
- mutable int kmpp_cluster_index; // The cluster these points are assigned to or -1 if variable\r
- };\r
-\r
- // Helper functions for constructor\r
- Node *BuildNodes(Scalar *points, int first_index, int last_index, char **next_node_data);\r
- Scalar GetNodeCost(const Node *node, Scalar *center) const;\r
-\r
- // Helper functions for DoKMeans step\r
- Scalar DoKMeansStepAtNode(const Node *node, int k, int *candidates, Scalar *centers,\r
- Scalar *sums, int *counts, int *assignment) const;\r
- bool ShouldBePruned(Scalar *box_median, Scalar *box_radius, Scalar *centers, int best_index,\r
- int test_index) const;\r
-\r
- // Helper functions for SeedKMeansPlusPlus\r
- void SeedKmppSetClusterIndex(const Node *node, int index) const;\r
- Scalar SeedKmppUpdateAssignment(const Node *node, int new_cluster, Scalar *centers,\r
- Scalar *dist_sq) const;\r
-\r
- int n_, d_;\r
- Scalar *points_;\r
- Node *top_node_;\r
- char *node_data_;\r
- int *point_indices_;\r
-};\r
-\r
-#endif\r