#ifndef CLUSTALO // BEWARE: BETA VERSION // -------------------- // // The main set of utilities for runnning k-means and k-means++ on arbitrary data sets. // // Author: David Arthur (darthur@gmail.com), 2009 #endif #ifndef KMEANS_H__ #define KMEANS_H__ #ifndef CLUSTALO // Includes #include "KmUtils.h" #include // Sets preferences for how much logging is done and where it is outputted, when k-means is run. void ClearKMeansLogging(); void AddKMeansLogging(std::ostream *out, bool verbose); // Runs k-means on the given set of points. // - n: The number of points in the data set // - k: The number of clusters to look for // - d: The number of dimensions that the data set lives in // - points: An array of size n*d where points[d*i + j] gives coordinate j of point i // - attempts: The number of times to independently run k-means with different starting centers. // The best result is always returned (as measured by the cost function). // - centers: This can either be null or an array of size k*d. In the latter case, it will be // filled with the locations of all final cluster centers. Specifically // centers[d*i + j] will give coordinate j of center i. If the cluster is unused, it // will contain NaN instead. // - assignments: This can either be null or an array of size n. In the latter case, it will be // filled with the cluster that each point is assigned to (an integer between 0 // and k-1 inclusive). // The final cost of the clustering is also returned. // The final cost of the clustering is also returned. Scalar RunKMeans(int n, int k, int d, Scalar *points, int attempts, Scalar *centers, int *assignments); // Runs k-means++ on the given set of points. Set RunKMeans for info on the parameters. Scalar RunKMeansPlusPlus(int n, int k, int d, Scalar *points, int attempts, Scalar *centers, int *assignments); #else /* CLUSTALO PATCH: * same as above, with one addition: if use_lloyds_method is false, kmpp will be used * otherwise the 'classical' i.e. Looyd's method will be used */ extern double KMeans(int n, int k, int d, double *points, int attempts, int use_lloyds_method, double *centers, int *assignments); #endif #endif