2 // BEWARE: BETA VERSION
\r
3 // --------------------
\r
5 // The main set of utilities for runnning k-means and k-means++ on arbitrary data sets.
\r
7 // Author: David Arthur (darthur@gmail.com), 2009
\r
15 #include "KmUtils.h"
\r
18 // Sets preferences for how much logging is done and where it is outputted, when k-means is run.
\r
19 void ClearKMeansLogging();
\r
20 void AddKMeansLogging(std::ostream *out, bool verbose);
\r
22 // Runs k-means on the given set of points.
\r
23 // - n: The number of points in the data set
\r
24 // - k: The number of clusters to look for
\r
25 // - d: The number of dimensions that the data set lives in
\r
26 // - points: An array of size n*d where points[d*i + j] gives coordinate j of point i
\r
27 // - attempts: The number of times to independently run k-means with different starting centers.
\r
28 // The best result is always returned (as measured by the cost function).
\r
29 // - centers: This can either be null or an array of size k*d. In the latter case, it will be
\r
30 // filled with the locations of all final cluster centers. Specifically
\r
31 // centers[d*i + j] will give coordinate j of center i. If the cluster is unused, it
\r
32 // will contain NaN instead.
\r
33 // - assignments: This can either be null or an array of size n. In the latter case, it will be
\r
34 // filled with the cluster that each point is assigned to (an integer between 0
\r
35 // and k-1 inclusive).
\r
36 // The final cost of the clustering is also returned.
\r
37 // The final cost of the clustering is also returned.
\r
38 Scalar RunKMeans(int n, int k, int d, Scalar *points, int attempts,
\r
39 Scalar *centers, int *assignments);
\r
41 // Runs k-means++ on the given set of points. Set RunKMeans for info on the parameters.
\r
42 Scalar RunKMeansPlusPlus(int n, int k, int d, Scalar *points, int attempts,
\r
43 Scalar *centers, int *assignments);
\r
48 * same as above, with one addition: if use_lloyds_method is false, kmpp will be used
\r
49 * otherwise the 'classical' i.e. Looyd's method will be used
\r
52 KMeans(int n, int k, int d, double *points, int attempts, int use_lloyds_method,
\r
53 double *centers, int *assignments);
\r