diff --git a/contrib/kmeans/INFO b/contrib/kmeans/INFO new file mode 100644 index 000000000..124c79442 --- /dev/null +++ b/contrib/kmeans/INFO @@ -0,0 +1,19 @@ +NOTE + +The sources in this sub-directory including the LICENSE and README files +are imported with permission granted by the author (Greg Hamerly). + +The original sources are available from Github here: +https://github.com/ghamerly/fast-kmeans + +Whilst the original source implements multiple algorithms we only kept +the hamerly variant. + +The source files have not been adapted to use Qt containers or to refactor +any of the class inheritance. So updating to newer versions should be +very straightforward (although the base functionality and performance is +good enough). + +Usage in Qt applications will likely use the Kmeans wrapper class + +28/09/2021 diff --git a/contrib/kmeans/LICENSE b/contrib/kmeans/LICENSE new file mode 100644 index 000000000..4a2918983 --- /dev/null +++ b/contrib/kmeans/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2014 Greg Hamerly + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/contrib/kmeans/Makefile-testing b/contrib/kmeans/Makefile-testing new file mode 100644 index 000000000..cd9609214 --- /dev/null +++ b/contrib/kmeans/Makefile-testing @@ -0,0 +1,15 @@ +# doesn't work now since the source has been placed in a subdir +# left for info and fairly trivial to fixup if you want to work +# directly with the original sources + +OBJECTS=kmeans_dataset.o \ +general_functions.o \ +hamerly_kmeans.o \ +kmeans.o \ +original_space_kmeans.o \ +triangle_inequality_base_kmeans.o \ +driver-standalone.o + +driver-standalone: $(OBJECTS) + gcc -o $@ $(OBJECTS) -lstdc++ -lm + ./driver-standalone hamerly smallDataset.txt 4 centers diff --git a/contrib/kmeans/README b/contrib/kmeans/README new file mode 100644 index 000000000..2a00ea95d --- /dev/null +++ b/contrib/kmeans/README @@ -0,0 +1,106 @@ +=============================== +Fast K-means Clustering Toolkit +=============================== + +---------------------- +Version 0.1 (Sat May 17 17:41:11 CDT 2014) + - Initial release. + +---------------------- +WHAT: +This software is a testbed for comparing variants of Lloyd's k-means clustering +algorithm. It includes implementations of several algorithms that accelerate +the algorithm by avoiding unnecessary distance calculations. + +---------------------- +WHO: +Greg Hamerly (hamerly@cs.baylor.edu, primary contact) and Jonathan Drake +(drakej@hp.com). + +---------------------- +HOW TO BUILD THE SOFTWARE: +type "make" (and hope for the best) + +---------------------- +HOW TO RUN THE SOFTWARE: +The driver is designed to take commands from standard input, usually a file +that's been redirected as input: + + ./kmeans < commands.txt + +You can read the source to find all the possible commands, but here is a +summary: + - threads T -- use T threads for clustering + - maxiterations I -- use at most I iterations; default (or negative) + indicates an unlimited number + - dataset D -- use the given path name to a file as the dataset for + clustering. The dataset should have a first line with the number of points + n and dimension d. The next (nd) tokens are taken as the n vectors + to cluster. + - initialize k {kpp|random} -- use the given method (k-means++ or a random + sample of the points) to initialize k centers + - lloyd, hamerly, annulus, elkan, compare, sort, heap, adaptive -- perform + k-means clustering with the given algorithm (requires first having + initialized the centers). The adaptive algorithm is Drake's algorithm with + a heuristic for choosing an initial B + - drake B -- use Drake's algorithm with B lower bounds + - kernel [gaussian T | linear | polynomial P] -- use kernelized k-means with + the given kernel + - elkan_kernel [gaussian T | linear | polynomial P] -- use kernelized + k-means with the given kernel, and Elkan's accelerations + - center -- give the previously-loaded dataset a mean of 0. + - quit -- quit the program + +Note that when a set of centers is initialized, that same set of centers is used +from then on (until a new initialization occurs). So running a clustering +algorithm multiple times will use the same initialization each time. + +Here is an example of a simple set of commands: + + dataset smallDataset.txt + initialize 10 kpp + + annulus + hamerly + adaptive + heap + elkan + sort + compare + + +---------------------- +CAVEATS: +- This software has been developed and tested on Linux. Other platforms may not + work. Please let us know if you have difficulties, and if possible fixes for + the code. + +- This software uses a non-standard pthreads function called + pthread_barrier_wait(), which is implemented on Linux but not on OSX. + Therefore, multithreading doesn't currently work on OSX. To turn it off, + comment out the lines in the Makefile that say: + + CPPFLAGS += -DUSE_THREADS + LDFLAGS += -lpthread + + +---------------------- +REFERENCES: + +Phillips, Steven J. "Acceleration of k-means and related clustering algorithms." +In Algorithm Engineering and Experiments, pp. 166-177. Springer Berlin +Heidelberg, 2002. + +Elkan, Charles. "Using the triangle inequality to accelerate k-means." In ICML, +vol. 3, pp. 147-153. 2003. + +Hamerly, Greg. "Making k-means Even Faster." In SDM, pp. 130-140. 2010. + +Drake, Jonathan, and Greg Hamerly. "Accelerated k-means with adaptive distance +bounds." In 5th NIPS Workshop on Optimization for Machine Learning. 2012. + +Drake, Jonathan. "Faster k-means clustering." MS thesis, 2013. + +Hamerly, Greg, and Jonathan Drake. "Accelerating Lloyd's algorithm for k-means +clustering." To appear in Partitional Clustering Algorithms, Springer, 2014. + diff --git a/contrib/kmeans/driver-standalone.cpp b/contrib/kmeans/driver-standalone.cpp new file mode 100644 index 000000000..cf23cc0a4 --- /dev/null +++ b/contrib/kmeans/driver-standalone.cpp @@ -0,0 +1,70 @@ +#include +#include +#include +#include + +#include "kmeans/general_functions.h" +#include "kmeans/kmeans.h" + + +#include "dataset.h" +#include "hamerly_kmeans.h" + +Dataset *load_dataset(std::string const &filename) { + std::ifstream input(filename.c_str()); + + int n, d; + input >> n >> d; + Dataset *x = new Dataset(n, d); + + for (int i = 0; i < n * d; ++i) input >> x->data[i]; + + return x; +} + +Kmeans *get_algorithm(std::string const &name) { + if (name == "hamerly") return new HamerlyKmeans(); + return NULL; +} + +int main(int argc, char **argv) { + if (argc != 5) { + std::cout << "usage: " << argv[0] << " algorithm dataset k [centers|assignment]\n"; + return 1; + } + + std::string algorithm_name(argv[1]); + std::string filename(argv[2]); + int k = std::stoi(argv[3]); + std::string output(argv[4]); + + Dataset *x = load_dataset(filename); + Kmeans *algorithm = get_algorithm(algorithm_name); + + Dataset *initialCenters = init_centers_kmeanspp_v2(*x, k); + + unsigned short *assignment = new unsigned short[x->n]; + + assign(*x, *initialCenters, assignment); + + algorithm->initialize(x, k, assignment, 1); + + algorithm->run(10000); + + Dataset const *finalCenters = algorithm->getCenters(); + if (output == "centers") { + finalCenters->print(); + } else { + assign(*x, *finalCenters, assignment); + for (int i = 0; i < x->n; ++i) { + std::cout << assignment[i] << "\n"; + } + } + + delete x; + delete algorithm; + delete initialCenters; + delete [] assignment; + + return 0; +} diff --git a/contrib/kmeans/hamerly_kmeans.cpp b/contrib/kmeans/hamerly_kmeans.cpp new file mode 100644 index 000000000..3eef87980 --- /dev/null +++ b/contrib/kmeans/hamerly_kmeans.cpp @@ -0,0 +1,178 @@ +/* Authors: Greg Hamerly and Jonathan Drake + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2014 + */ + +#include "hamerly_kmeans.h" +#include "kmeans_general_functions.h" +#include + +/* Hamerly's algorithm that is a 'simplification' of Elkan's, in that it keeps + * the following bounds: + * - One upper bound per clustered record on the distance between the record + * and its closest center. It is always greater than or equal to the true + * distance between the record and its closest center. This is the same as in + * Elkan's algorithm. + * - *One* lower bound per clustered record on the distance between the record + * and its *second*-closest center. It is always less than or equal to the + * true distance between the record and its second closest center. This is + * different information than Elkan's algorithm -- his algorithm keeps k + * lower bounds for each record, for a total of (n*k) lower bounds. + * + * The basic ideas are: + * - when lower(x) <= upper(x), we need to recalculate the closest centers for + * the record x, and reset lower(x) and upper(x) to their boundary values + * - whenever a center moves + * - calculate the distance it moves 'd' + * - for each record x assigned to that center, update its upper bound + * - upper(x) = upper(x) + d + * - after each iteration + * - find the center that has moved the most (with distance 'd') + * - update the lower bound for all (?) records: + * - lower(x) = lower(x) - d + * + * Parameters: + * - threadId: the index of the thread that is running + * - maxIterations: a bound on the number of iterations to perform + * + * Return value: the number of iterations performed (always at least 1) + */ +// this version only updates center locations when necessary +int HamerlyKmeans::runThread(int threadId, int maxIterations) { + int iterations = 0; + + int startNdx = start(threadId); + int endNdx = end(threadId); + + while ((iterations < maxIterations) && ! converged) { + ++iterations; + + // compute the inter-center distances, keeping only the closest distances + update_s(threadId); + synchronizeAllThreads(); + + // loop over all records + for (int i = startNdx; i < endNdx; ++i) { + unsigned short closest = assignment[i]; + + // if upper[i] is less than the greater of these two, then we can + // ignore record i + double upper_comparison_bound = std::max(s[closest], lower[i]); + + // first check: if u(x) <= s(c(x)) or u(x) <= lower(x), then ignore + // x, because its closest center must still be closest + if (upper[i] <= upper_comparison_bound) { + continue; + } + + // otherwise, compute the real distance between this record and its + // closest center, and update upper + double u2 = pointCenterDist2(i, closest); + upper[i] = sqrt(u2); + + // if (u(x) <= s(c(x))) or (u(x) <= lower(x)), then ignore x + if (upper[i] <= upper_comparison_bound) { + continue; + } + + // now update the lower bound by looking at all other centers + double l2 = std::numeric_limits::max(); // the squared lower bound + for (int j = 0; j < k; ++j) { + if (j == closest) { continue; } + + double dist2 = pointCenterDist2(i, j); + + if (dist2 < u2) { + // another center is closer than the current assignment + + // change the lower bound to be the current upper bound + // (since the current upper bound is the distance to the + // now-second-closest known center) + l2 = u2; + + // adjust the upper bound and the current assignment + u2 = dist2; + closest = j; + } else if (dist2 < l2) { + // we must reduce the lower bound on the distance to the + // *second* closest center to x[i] + l2 = dist2; + } + } + + // we have been dealing in squared distances; need to convert + lower[i] = sqrt(l2); + + // if the assignment for i has changed, then adjust the counts and + // locations of each center's accumulated mass + if (assignment[i] != closest) { + upper[i] = sqrt(u2); + changeAssignment(i, closest, threadId); + } + } + + verifyAssignment(iterations, startNdx, endNdx); + + // ELKAN 4, 5, AND 6 + // calculate the new center locations + synchronizeAllThreads(); + if (threadId == 0) { + int furthestMovingCenter = move_centers(); + converged = (0.0 == centerMovement[furthestMovingCenter]); + } + + synchronizeAllThreads(); + + if (! converged) { + update_bounds(startNdx, endNdx); + } + + synchronizeAllThreads(); + } + + return iterations; +} + + +/* This method does the following: + * - finds the furthest-moving center + * - finds the distances moved by the two furthest-moving centers + * - updates the upper/lower bounds for each record + * + * Parameters: + * - startNdx: the first index of the dataset this thread is responsible for + * - endNdx: one past the last index of the dataset this thread is responsible for + */ +void HamerlyKmeans::update_bounds(int startNdx, int endNdx) { + double longest = centerMovement[0], secondLongest = (1 < k) ? centerMovement[1] : centerMovement[0]; + int furthestMovingCenter = 0; + + if (longest < secondLongest) { + furthestMovingCenter = 1; + std::swap(longest, secondLongest); + } + + for (int j = 2; j < k; ++j) { + if (longest < centerMovement[j]) { + secondLongest = longest; + longest = centerMovement[j]; + furthestMovingCenter = j; + } else if (secondLongest < centerMovement[j]) { + secondLongest = centerMovement[j]; + } + } + + // update upper/lower bounds + for (int i = startNdx; i < endNdx; ++i) { + // the upper bound increases by the amount that its center moved + upper[i] += centerMovement[assignment[i]]; + + // The lower bound decreases by the maximum amount that any center + // moved, unless the furthest-moving center is the one it's assigned + // to. In the latter case, the lower bound decreases by the amount + // of the second-furthest-moving center. + lower[i] -= (assignment[i] == furthestMovingCenter) ? secondLongest : longest; + } +} + diff --git a/contrib/kmeans/hamerly_kmeans.h b/contrib/kmeans/hamerly_kmeans.h new file mode 100644 index 000000000..22b4b214a --- /dev/null +++ b/contrib/kmeans/hamerly_kmeans.h @@ -0,0 +1,29 @@ +#ifndef HAMERLY_KMEANS_H +#define HAMERLY_KMEANS_H + +/* Authors: Greg Hamerly and Jonathan Drake + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2014 + * + * HamerlyKmeans implements Hamerly's k-means algorithm that uses one lower + * bound per point. + */ + +#include "triangle_inequality_base_kmeans.h" + +class HamerlyKmeans : public TriangleInequalityBaseKmeans { + public: + HamerlyKmeans() { numLowerBounds = 1; } + virtual ~HamerlyKmeans() { free(); } + virtual std::string getName() const { return "hamerly"; } + + protected: + // Update the upper and lower bounds for the given range of points. + void update_bounds(int startNdx, int endNdx); + + virtual int runThread(int threadId, int maxIterations); +}; + +#endif + diff --git a/contrib/kmeans/kmeans.cpp b/contrib/kmeans/kmeans.cpp new file mode 100644 index 000000000..368d0428c --- /dev/null +++ b/contrib/kmeans/kmeans.cpp @@ -0,0 +1,165 @@ +/* Authors: Greg Hamerly and Jonathan Drake + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2014 + */ + +#include "kmeans.h" +#include "kmeans_general_functions.h" +#include +#include + +Kmeans::Kmeans() : x(NULL), n(0), k(0), d(0), numThreads(0), converged(false), + clusterSize(NULL), centerMovement(NULL), assignment(NULL) { + #ifdef COUNT_DISTANCES + numDistances = 0; + #endif +} + +void Kmeans::free() { + delete [] centerMovement; + for (int t = 0; t < numThreads; ++t) { + delete [] clusterSize[t]; + } + delete [] clusterSize; + centerMovement = NULL; + clusterSize = NULL; + assignment = NULL; + n = k = d = numThreads = 0; +} + + + +void Kmeans::initialize(Dataset const *aX, unsigned short aK, unsigned short *initialAssignment, int aNumThreads) { + free(); + + converged = false; + x = aX; + n = x->n; + d = x->d; + k = aK; + + #ifdef USE_THREADS + numThreads = aNumThreads; + pthread_barrier_init(&barrier, NULL, numThreads); + #else + numThreads = 1; + #endif + + assignment = initialAssignment; + centerMovement = new double[k]; + clusterSize = new int *[numThreads]; + for (int t = 0; t < numThreads; ++t) { + clusterSize[t] = new int[k]; + std::fill(clusterSize[t], clusterSize[t] + k, 0); + for (int i = start(t); i < end(t); ++i) { + assert(assignment[i] < k); + ++clusterSize[t][assignment[i]]; + } + } + + + #ifdef COUNT_DISTANCES + numDistances = 0; + #endif +} + +void Kmeans::changeAssignment(int xIndex, int closestCluster, int threadId) { + --clusterSize[threadId][assignment[xIndex]]; + ++clusterSize[threadId][closestCluster]; + assignment[xIndex] = closestCluster; +} + +#ifdef USE_THREADS +struct ThreadInfo { + public: + ThreadInfo() : km(NULL), threadId(0), pthread_id(0) {} + Kmeans *km; + int threadId; + pthread_t pthread_id; + int numIterations; + int maxIterations; +}; +#endif + +void *Kmeans::runner(void *args) { + #ifdef USE_THREADS + ThreadInfo *ti = (ThreadInfo *)args; + ti->numIterations = ti->km->runThread(ti->threadId, ti->maxIterations); + #endif + return NULL; +} + +int Kmeans::run(int maxIterations) { + int iterations = 0; + #ifdef USE_THREADS + { + ThreadInfo *info = new ThreadInfo[numThreads]; + for (int t = 0; t < numThreads; ++t) { + info[t].km = this; + info[t].threadId = t; + info[t].maxIterations = maxIterations; + pthread_create(&info[t].pthread_id, NULL, Kmeans::runner, &info[t]); + } + // wait for everything to finish... + for (int t = 0; t < numThreads; ++t) { + pthread_join(info[t].pthread_id, NULL); + } + iterations = info[0].numIterations; + delete [] info; + } + #else + { + iterations = runThread(0, maxIterations); + } + #endif + + return iterations; +} + +double Kmeans::getSSE() const { + double sse = 0.0; + for (int i = 0; i < n; ++i) { + sse += pointCenterDist2(i, assignment[i]); + } + return sse; +} + +void Kmeans::verifyAssignment(int iteration, int startNdx, int endNdx) const { + #ifdef VERIFY_ASSIGNMENTS + for (int i = startNdx; i < endNdx; ++i) { + // keep track of the squared distance and identity of the closest-seen + // cluster (so far) + int closest = assignment[i]; + double closest_dist2 = pointCenterDist2(i, closest); + double original_closest_dist2 = closest_dist2; + // look at all centers + for (int j = 0; j < k; ++j) { + if (j == closest) { + continue; + } + double d2 = pointCenterDist2(i, j); + + // determine if we found a closer center + if (d2 < closest_dist2) { + closest = j; + closest_dist2 = d2; + } + } + + // if we have found a discrepancy, then print out information and crash + // the program + if (closest != assignment[i]) { + std::cerr << "assignment error:" << std::endl; + std::cerr << "iteration = " << iteration << std::endl; + std::cerr << "point index = " << i << std::endl; + std::cerr << "closest center = " << closest << std::endl; + std::cerr << "closest center dist2 = " << closest_dist2 << std::endl; + std::cerr << "assigned center = " << assignment[i] << std::endl; + std::cerr << "assigned center dist2 = " << original_closest_dist2 << std::endl; + assert(false); + } + } + #endif +} + diff --git a/contrib/kmeans/kmeans.h b/contrib/kmeans/kmeans.h new file mode 100644 index 000000000..4bab46255 --- /dev/null +++ b/contrib/kmeans/kmeans.h @@ -0,0 +1,147 @@ +#ifndef KMEANS_H +#define KMEANS_H + +/* Authors: Greg Hamerly and Jonathan Drake + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2014 + * + * Kmeans is an abstract base class for algorithms which implement Lloyd's + * k-means algorithm. Subclasses provide functionality in the "runThread()" + * method. + */ + +#include "kmeans_dataset.h" +#include +#include +#ifdef USE_THREADS + #include +#endif + +class Kmeans { + public: + // Construct a K-means object to operate on the given dataset + Kmeans(); + virtual ~Kmeans() { free(); } + + // This method kicks off the threads that do the clustering and run + // until convergence (or until reaching maxIterations). It returns the + // number of iterations performed. + int run(int aMaxIterations = std::numeric_limits::max()); + + // Get the cluster assignment for the given point index. + int getAssignment(int xIndex) const { return assignment[xIndex]; } + + // Initialize the algorithm at the beginning of the run(), with the + // given data and initial assignment. The parameter initialAssignment + // will be modified by this algorithm and will at the end contain the + // final assignment of clusters. + virtual void initialize(Dataset const *aX, unsigned short aK, unsigned short *initialAssignment, int aNumThreads); + + // Free all memory being used by the object. + virtual void free(); + + // This method verifies that the current assignment is correct, by + // checking every point-center distance. For debugging. + virtual void verifyAssignment(int iteration, int startNdx, int endNdx) const; + + // Compute the sum of squared errors for the data on the centers (not + // designed to be fast). + virtual double getSSE() const; + + // Get the name of this clustering algorithm (to be overridden by + // subclasses). + virtual std::string getName() const = 0; + + // Virtual methods for computing inner products (depending on the kernel + // being used, e.g.). For vanilla k-means these will be the standard dot + // product; for more exotic applications these will be other kernel + // functions. + virtual double pointPointInnerProduct(int x1, int x2) const = 0; + virtual double pointCenterInnerProduct(int xndx, unsigned short cndx) const = 0; + virtual double centerCenterInnerProduct(unsigned short c1, unsigned short c2) const = 0; + + // Use the inner products to compute squared distances between a point + // and center. + virtual double pointCenterDist2(int x1, unsigned short cndx) const { + #ifdef COUNT_DISTANCES + ++numDistances; + #endif + return pointPointInnerProduct(x1, x1) - 2 * pointCenterInnerProduct(x1, cndx) + centerCenterInnerProduct(cndx, cndx); + } + + // Use the inner products to compute squared distances between two + // centers. + virtual double centerCenterDist2(unsigned short c1, unsigned short c2) const { + #ifdef COUNT_DISTANCES + ++numDistances; + #endif + return centerCenterInnerProduct(c1, c1) - 2 * centerCenterInnerProduct(c1, c2) + centerCenterInnerProduct(c2, c2); + } + + #ifdef COUNT_DISTANCES + #ifdef USE_THREADS + // Note: numDistances is NOT thread-safe, but it is not meant to be + // enabled in performant code. + #error Counting distances and using multiple threads is not supported. + #endif + mutable long long numDistances; + #endif + + virtual Dataset const *getCenters() const { return NULL; } + + protected: + // The dataset to cluster. + const Dataset *x; + + // Local copies for convenience. + int n, k, d; + + // Pthread primitives for multithreading. + int numThreads; + #ifdef USE_THREADS + pthread_barrier_t barrier; + #endif + + // To communicate (to all threads) that we have converged. + bool converged; + + // Keep track of how many points are in each cluster, divided over each + // thread. + int **clusterSize; + + // centerMovement is computed in move_centers() and used to detect + // convergence (if max(centerMovement) == 0.0) and update point-center + // distance bounds (in subclasses that use them). + double *centerMovement; + + // For each point in x, keep which cluster it is assigned to. By using a + // short, we assume a limited number of clusters (fewer than 2^16). + unsigned short *assignment; + + + // This is where each thread does its work. + virtual int runThread(int threadId, int maxIterations) = 0; + + // Static entry method for pthread_create(). + static void *runner(void *args); + + // Assign point at xIndex to cluster newCluster, working within thread threadId. + virtual void changeAssignment(int xIndex, int newCluster, int threadId); + + // Over what range in [0, n) does this thread have ownership of the + // points? end() returns one past the last owned point. + int start(int threadId) const { return n * threadId / numThreads; } + int end(int threadId) const { return start(threadId + 1); } + int whichThread(int index) const { return index * numThreads / n; } + + // Convenience method for causing all threads to synchronize. + void synchronizeAllThreads() { + #ifdef USE_THREADS + pthread_barrier_wait(&barrier); + #endif + } +}; + +#endif + diff --git a/contrib/kmeans/kmeans_dataset.cpp b/contrib/kmeans/kmeans_dataset.cpp new file mode 100644 index 000000000..ca71ddf73 --- /dev/null +++ b/contrib/kmeans/kmeans_dataset.cpp @@ -0,0 +1,95 @@ +/* Authors: Greg Hamerly and Jonathan Drake + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2014 + */ + +#include "kmeans_dataset.h" +// #include +#include +#include +#include + +// print the dataset to standard output (cout), using formatting to keep the +// data in matrix format +void Dataset::print(std::ostream &out) const { + //std::ostream &out = std::cout; + out.precision(6); + int ndx = 0; + for (int i = 0; i < n; ++i) { + for (int j = 0; j < d; ++j) { + out << std::setw(13) << data[ndx++] << " "; + } + out << std::endl; + } +} + +// returns a (modifiable) reference to the value in dimension "dim" from record +// "ndx" +double &Dataset::operator()(int ndx, int dim) { +# ifdef DEBUG + assert(ndx < n); + assert(dim < d); +# endif + return data[ndx * d + dim]; +} + +// returns a (const) reference to the value in dimension "dim" from record "ndx" +const double &Dataset::operator()(int ndx, int dim) const { +# ifdef DEBUG + assert(ndx < n); + assert(dim < d); +# endif + return data[ndx * d + dim]; +} + +// fill the entire dataset with value. Does NOT update sumDataSquared. +void Dataset::fill(double value) { + for (int i = 0; i < nd; ++i) { + data[i] = value; + } +} + +// copy constructor -- makes a deep copy of everything in x +Dataset::Dataset(Dataset const &x) { + n = d = nd = 0; + data = sumDataSquared = NULL; + *this = x; +} + +// operator= is the standard deep-copy assignment operator, which +// returns a const reference to *this. +Dataset const &Dataset::operator=(Dataset const &x) { + if (this != &x) { + + // reallocate sumDataSquared and data as necessary + if (n != x.n) { + delete [] sumDataSquared; + sumDataSquared = x.sumDataSquared ? new double[x.n] : NULL; + } + + if (nd != x.nd) { + delete [] data; + data = x.data ? new double[x.nd] : NULL; + } + + // reflect the new sizes + n = x.n; + d = x.d; + nd = x.nd; + + // copy data as appropriate + if (x.sumDataSquared) { + memcpy(sumDataSquared, x.sumDataSquared, x.n * sizeof(double)); + } + + if (x.data) { + memcpy(data, x.data, x.nd * sizeof(double)); + } + + } + + // return a reference for chaining assignments + return *this; +} + diff --git a/contrib/kmeans/kmeans_dataset.h b/contrib/kmeans/kmeans_dataset.h new file mode 100644 index 000000000..7bff5d9d0 --- /dev/null +++ b/contrib/kmeans/kmeans_dataset.h @@ -0,0 +1,85 @@ +#ifndef DATASET_H +#define DATASET_H + +/* Authors: Greg Hamerly and Jonathan Drake + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2014 + * + * A Dataset class represents a collection of multidimensional records, as is + * typical in metric machine learning. Every record has the same number of + * dimensions (values), and every value must be numeric. Undefined values are + * not allowed. + * + * This particular implementation keeps all the data in a 1-dimensional array, + * and also optionally keeps extra storage for the sum of the squared values of + * each record. However, the Dataset class does NOT automatically populate or + * update the sumDataSquared values. + */ + +#include +#include + +class Dataset { + public: + // default constructor -- constructs a completely empty dataset with no + // records + Dataset() : n(0), d(0), nd(0), data(NULL), sumDataSquared(NULL) {} + + // construct a dataset of a particular size, and determine whether to + // keep the sumDataSquared + Dataset(int aN, int aD, bool keepSDS = false) : n(aN), d(aD), nd(n * d), + data(new double[nd]), + sumDataSquared(keepSDS ? new double[n] : NULL) {} + + // copy constructor -- makes a deep copy of everything in x + Dataset(Dataset const &x); + + // destroys the dataset safely + ~Dataset() { + n = d = nd = 0; + double *dp = data, *sdsp = sumDataSquared; + data = sumDataSquared = NULL; + delete [] dp; + delete [] sdsp; + } + + // operator= is the standard deep-copy assignment operator, which + // returns a const reference to *this. + Dataset const &operator=(Dataset const &x); + + // allows modification of the record ndx and dimension dim + double &operator()(int ndx, int dim); + + // allows const access to record ndx and dimension dim + const double &operator()(int ndx, int dim) const; + + // fill the entire dataset with value. Does NOT update sumDataSquared. + void fill(double value); + + // print the dataset to standard output (cout), using formatting to keep the + // data in matrix format + void print(std::ostream &out = std::cout) const; + + // n represents the number of records + // d represents the dimension + // nd is a shortcut for the value n * d + int n, d, nd; + + // data is an array of length n*d that stores all of the records in + // record-major (row-major) order. Thus data[0]...data[d-1] are the + // values associated with the first record. + double *data; + + // sumDataSquared is an (optional) sum of squared values for every + // record. Thus, + // sumDataSquared[0] = data[0]^2 + data[1]^2 + ... + data[d-1]^2 + // sumDataSquared[1] = data[d]^2 + data[d+1]^2 + ... + data[2*d-1]^2 + // and so on. Note that this is the *intended* use of the sumDataSquared + // field, but that the Dataset class does NOT automatically populate or + // update the values in sumDataSquared. + double *sumDataSquared; +}; + +#endif + diff --git a/contrib/kmeans/kmeans_general_functions.cpp b/contrib/kmeans/kmeans_general_functions.cpp new file mode 100644 index 000000000..a3459b3bc --- /dev/null +++ b/contrib/kmeans/kmeans_general_functions.cpp @@ -0,0 +1,330 @@ +/* Authors: Greg Hamerly and Jonathan Drake + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2014 + */ + +#include "kmeans_dataset.h" +#include "kmeans.h" +#include "kmeans_general_functions.h" +#include +#include +#include +#include +#include +#include +#include + +void addVectors(double *a, double const *b, int d) { + double const *end = a + d; + while (a < end) { + *(a++) += *(b++); + } +} + +void subVectors(double *a, double const *b, int d) { + double const *end = a + d; + while (a < end) { + *(a++) -= *(b++); + } +} + +double distance2silent(double const *a, double const *b, int d) { + double d2 = 0.0, diff; + double const *end = a + d; // one past the last valid entry in a + while (a < end) { + diff = *(a++) - *(b++); + d2 += diff * diff; + } + return d2; +} + + +void centerDataset(Dataset *x) { + double *xCentroid = new double[x->d]; + + for (int d = 0; d < x->d; ++d) { + xCentroid[d] = 0.0; + } + + for (int i = 0; i < x->n; ++i) { + addVectors(xCentroid, x->data + i * x->d, x->d); + } + + // compute average (divide by n) + for (int d = 0; d < x->d; ++d) { + xCentroid[d] /= x->n; + } + + // re-center the dataset + const double *xEnd = x->data + x->n * x->d; + for (double *xp = x->data; xp != xEnd; xp += x->d) { + subVectors(xp, xCentroid, x->d); + } + + delete [] xCentroid; +} + +Dataset *init_centers(Dataset const &x, unsigned short k) { + int *chosen_pts = new int[k]; + Dataset *c = new Dataset(k, x.d); + for (int i = 0; i < k; ++i) { + bool acceptable = true; + do { + acceptable = true; + chosen_pts[i] = rand() % x.n; + for (int j = 0; j < i; ++j) { + if (chosen_pts[i] == chosen_pts[j]) { + acceptable = false; + break; + } + } + } while (! acceptable); + double *cdp = c->data + i * x.d; + memcpy(cdp, x.data + chosen_pts[i] * x.d, sizeof(double) * x.d); + if (c->sumDataSquared) { + c->sumDataSquared[i] = std::inner_product(cdp, cdp + x.d, cdp, 0.0); + } + } + + delete [] chosen_pts; + + return c; +} + + +Dataset *init_centers_kmeanspp(Dataset const &x, unsigned short k) { + int *chosen_pts = new int[k]; + std::pair *dist2 = new std::pair[x.n]; + double *distribution = new double[x.n]; + + // initialize dist2 + for (int i = 0; i < x.n; ++i) { + dist2[i].first = std::numeric_limits::max(); + dist2[i].second = i; + } + + // choose the first point randomly + int ndx = 1; + chosen_pts[ndx - 1] = rand() % x.n; + + while (ndx < k) { + double sum_distribution = 0.0; + // look for the point that is furthest from any center + for (int i = 0; i < x.n; ++i) { + int example = dist2[i].second; + double d2 = 0.0, diff; + for (int j = 0; j < x.d; ++j) { + diff = x(example,j) - x(chosen_pts[ndx - 1],j); + d2 += diff * diff; + } + if (d2 < dist2[i].first) { + dist2[i].first = d2; + } + + sum_distribution += dist2[i].first; + } + + // sort the examples by their distance from centers + sort(dist2, dist2 + x.n); + + // turn distribution into a CDF + distribution[0] = dist2[0].first / sum_distribution; + for (int i = 1; i < x.n; ++i) { + distribution[i] = distribution[i - 1] + dist2[i].first / sum_distribution; + } + + // choose a random interval according to the new distribution + double r = (double)rand() / (double)RAND_MAX; + double *new_center_ptr = std::lower_bound(distribution, distribution + x.n, r); + int distribution_ndx = (int)(new_center_ptr - distribution); + chosen_pts[ndx] = dist2[distribution_ndx].second; + /* + cout << "chose " << distribution_ndx << " which is actually " + << chosen_pts[ndx] << " with distance " + << dist2[distribution_ndx].first << std::endl; + */ + + ++ndx; + } + + Dataset *c = new Dataset(k, x.d); + + for (int i = 0; i < k; ++i) { + double *cdp = c->data + i * x.d; + memcpy(cdp, x.data + chosen_pts[i] * x.d, sizeof(double) * x.d); + if (c->sumDataSquared) { + c->sumDataSquared[i] = std::inner_product(cdp, cdp + x.d, cdp, 0.0); + } + } + + delete [] chosen_pts; + delete [] dist2; + delete [] distribution; + + return c; +} + + +Dataset *init_centers_kmeanspp_v2(Dataset const &x, unsigned short k) { + int *chosen_pts = new int[k]; + std::pair *dist2 = new std::pair[x.n]; + + // initialize dist2 + for (int i = 0; i < x.n; ++i) { + dist2[i].first = std::numeric_limits::max(); + dist2[i].second = i; + } + + // choose the first point randomly + int ndx = 1; + chosen_pts[ndx - 1] = rand() % x.n; + + while (ndx < k) { + double sum_distribution = 0.0; + // look for the point that is furthest from any center + double max_dist = 0.0; + for (int i = 0; i < x.n; ++i) { + int example = dist2[i].second; + double d2 = 0.0, diff; + for (int j = 0; j < x.d; ++j) { + diff = x(example,j) - x(chosen_pts[ndx - 1],j); + d2 += diff * diff; + } + if (d2 < dist2[i].first) { + dist2[i].first = d2; + } + + if (dist2[i].first > max_dist) { + max_dist = dist2[i].first; + } + + sum_distribution += dist2[i].first; + } + + bool unique = true; + + do { + // choose a random interval according to the new distribution + double r = sum_distribution * (double)rand() / (double)RAND_MAX; + double sum_cdf = dist2[0].first; + int cdf_ndx = 0; + while (sum_cdf < r) { + sum_cdf += dist2[++cdf_ndx].first; + } + chosen_pts[ndx] = cdf_ndx; + + for (int i = 0; i < ndx; ++i) { + unique = unique && (chosen_pts[ndx] != chosen_pts[i]); + } + } while (! unique); + + ++ndx; + } + + Dataset *c = new Dataset(k, x.d); + for (int i = 0; i < c->n; ++i) { + double *cdp = c->data + i * x.d; + memcpy(cdp, x.data + chosen_pts[i] * x.d, sizeof(double) * x.d); + if (c->sumDataSquared) { + c->sumDataSquared[i] = std::inner_product(cdp, cdp + x.d, cdp, 0.0); + } + } + + delete [] chosen_pts; + delete [] dist2; + + return c; +} + + +/** + * in MB + */ +double getMemoryUsage() { + char buf[30]; + snprintf(buf, 30, "/proc/%u/statm", (unsigned)getpid()); + FILE* pf = fopen(buf, "r"); + unsigned int totalProgramSizeInPages = 0; + unsigned int residentSetSizeInPages = 0; + if (pf) { + int numScanned = fscanf(pf, "%u %u" /* %u %u %u %u %u"*/, &totalProgramSizeInPages, &residentSetSizeInPages); + if (numScanned != 2) { + return 0.0; + } + } + + fclose(pf); + pf = NULL; + + double sizeInKilobytes = residentSetSizeInPages * 4.0; // assumes 4096 byte page + // getconf PAGESIZE + + return sizeInKilobytes; +} + + +void assign(Dataset const &x, Dataset const &c, unsigned short *assignment) { + for (int i = 0; i < x.n; ++i) { + double shortestDist2 = std::numeric_limits::max(); + int closest = 0; + for (int j = 0; j < c.n; ++j) { + double d2 = 0.0, *a = x.data + i * x.d, *b = c.data + j * x.d; + for (; a != x.data + (i + 1) * x.d; ++a, ++b) { + d2 += (*a - *b) * (*a - *b); + } + if (d2 < shortestDist2) { + shortestDist2 = d2; + closest = j; + } + } + assignment[i] = closest; + } +} + +rusage get_time() { + rusage now; + getrusage(RUSAGE_SELF, &now); + return now; +} + + +double get_wall_time(){ + struct timeval time; + if (gettimeofday(&time,NULL)){ + // Handle error + return 0; + } + return (double)time.tv_sec + (double)time.tv_usec * .000001; +} + +int timeval_subtract(timeval *result, timeval *x, timeval *y) { + /* Perform the carry for the later subtraction by updating y. */ + if (x->tv_usec < y->tv_usec) { + int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1; + y->tv_usec -= 1000000 * nsec; + y->tv_sec += nsec; + } + if (x->tv_usec - y->tv_usec > 1000000) { + int nsec = (x->tv_usec - y->tv_usec) / 1000000; + y->tv_usec += 1000000 * nsec; + y->tv_sec -= nsec; + } + + /* Compute the time remaining to wait. tv_usec is certainly positive. */ + result->tv_sec = x->tv_sec - y->tv_sec; + result->tv_usec = x->tv_usec - y->tv_usec; + + /* Return 1 if result + * is negative. */ + return x->tv_sec < y->tv_sec; +} + +double elapsed_time(rusage *start) { + rusage now; + timeval diff; + getrusage(RUSAGE_SELF, &now); + timeval_subtract(&diff, &now.ru_utime, &start->ru_utime); + + return (double)diff.tv_sec + (double)diff.tv_usec / 1e6; +} diff --git a/contrib/kmeans/kmeans_general_functions.h b/contrib/kmeans/kmeans_general_functions.h new file mode 100644 index 000000000..d4debca05 --- /dev/null +++ b/contrib/kmeans/kmeans_general_functions.h @@ -0,0 +1,91 @@ +#ifndef GENERAL_KMEANS_FUNCTIONS_H +#define GENERAL_KMEANS_FUNCTIONS_H + +/* Authors: Greg Hamerly and Jonathan Drake + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2014 + * + * Generally useful functions. + */ + + +#include +#include +#include +#include +#include "kmeans_dataset.h" + +/* Add together two vectors, and put the result in the first argument. + * Calculates a = a + b + * + * Parameters: + * a -- vector to add, and the result of the operation + * b -- vector to add to a + * d -- the dimension + * Return value: none + */ +void addVectors(double *a, double const *b, int d); + +/* Subtract two vectors, and put the result in the first argument. Calculates + * a = a - b + * + * Parameters: + * a -- vector to subtract from, and the result of the operation + * b -- vector to subtract + * d -- the dimension + * Return value: none + */ +void subVectors(double *a, double const *b, int d); + +/* Initialize the centers randomly. Choose random records from x as the initial + * values for the centers. Assumes that c uses the sumDataSquared field. + * + * Parameters: + * x -- records that are being clustered (n * d) + * c -- centers to be initialized. Should be pre-allocated with the number of + * centers desired, and dimension. + * Return value: none + */ +Dataset *init_centers(Dataset const &x, unsigned short k); + +/* Initialize the centers randomly using K-means++. + * + * Parameters: + * x -- records that are being clustered (n * d) + * c -- centers to be initialized. Should be pre-allocated with the number of + * centers desired, and dimension. + * Return value: none + */ +Dataset *init_centers_kmeanspp(Dataset const &x, unsigned short k); +Dataset *init_centers_kmeanspp_v2(Dataset const &x, unsigned short k); + +/* Print an array (templated). Convenience function. + * + * Parameters: + * arr -- the array to print + * length -- the length of the array + * separator -- the string to put between each pair of printed elements + * Return value: none + */ +template +void printArray(T const *arr, int length, std::string separator) { + for (int i = 0; i < length; ++i) { + if (i > 0) { + std::cout << separator; + } + std::cout << arr[i]; + } +} + +double getMemoryUsage(); +rusage get_time(); +double get_wall_time(); +int timeval_subtract(timeval *result, timeval *x, timeval *y); +double elapsed_time(rusage *start); + +void centerDataset(Dataset *x); + +void assign(Dataset const &x, Dataset const &c, unsigned short *assignment); + +#endif diff --git a/contrib/kmeans/original_space_kmeans.cpp b/contrib/kmeans/original_space_kmeans.cpp new file mode 100644 index 000000000..591ff92a8 --- /dev/null +++ b/contrib/kmeans/original_space_kmeans.cpp @@ -0,0 +1,106 @@ +/* Authors: Greg Hamerly and Jonathan Drake + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2014 + */ + +#include "original_space_kmeans.h" +#include "kmeans_general_functions.h" +#include +#include +#include + +OriginalSpaceKmeans::OriginalSpaceKmeans() : centers(NULL), sumNewCenters(NULL) { } + +void OriginalSpaceKmeans::free() { + for (int t = 0; t < numThreads; ++t) { + delete sumNewCenters[t]; + } + Kmeans::free(); + delete centers; + delete [] sumNewCenters; + centers = NULL; + sumNewCenters = NULL; +} + +/* This method moves the newCenters to their new locations, based on the + * sufficient statistics in sumNewCenters. It also computes the centerMovement + * and the center that moved the furthest. + * + * Parameters: none + * + * Return value: index of the furthest-moving centers + */ +int OriginalSpaceKmeans::move_centers() { + int furthestMovingCenter = 0; + for (int j = 0; j < k; ++j) { + centerMovement[j] = 0.0; + int totalClusterSize = 0; + for (int t = 0; t < numThreads; ++t) { + totalClusterSize += clusterSize[t][j]; + } + if (totalClusterSize > 0) { + for (int dim = 0; dim < d; ++dim) { + double z = 0.0; + for (int t = 0; t < numThreads; ++t) { + z += (*sumNewCenters[t])(j,dim); + } + z /= totalClusterSize; + centerMovement[j] += (z - (*centers)(j, dim)) * (z - (*centers)(j, dim)); + (*centers)(j, dim) = z; + } + } + centerMovement[j] = sqrt(centerMovement[j]); + + if (centerMovement[furthestMovingCenter] < centerMovement[j]) { + furthestMovingCenter = j; + } + } + + #ifdef COUNT_DISTANCES + numDistances += k; + #endif + + return furthestMovingCenter; +} + +void OriginalSpaceKmeans::initialize(Dataset const *aX, unsigned short aK, unsigned short *initialAssignment, int aNumThreads) { + Kmeans::initialize(aX, aK, initialAssignment, aNumThreads); + + centers = new Dataset(k, d); + sumNewCenters = new Dataset *[numThreads]; + centers->fill(0.0); + + for (int t = 0; t < numThreads; ++t) { + sumNewCenters[t] = new Dataset(k, d, false); + sumNewCenters[t]->fill(0.0); + for (int i = start(t); i < end(t); ++i) { + addVectors(sumNewCenters[t]->data + assignment[i] * d, x->data + i * d, d); + } + } + + // put the centers at their initial locations, based on clusterSize and + // sumNewCenters + move_centers(); +} + +void OriginalSpaceKmeans::changeAssignment(int xIndex, int closestCluster, int threadId) { + unsigned short oldAssignment = assignment[xIndex]; + Kmeans::changeAssignment(xIndex, closestCluster, threadId); + double *xp = x->data + xIndex * d; + subVectors(sumNewCenters[threadId]->data + oldAssignment * d, xp, d); + addVectors(sumNewCenters[threadId]->data + closestCluster * d, xp, d); +} + +double OriginalSpaceKmeans::pointPointInnerProduct(int x1, int x2) const { + return std::inner_product(x->data + x1 * d, x->data + (x1 + 1) * d, x->data + x2 * d, 0.0); +} + +double OriginalSpaceKmeans::pointCenterInnerProduct(int xndx, unsigned short cndx) const { + return std::inner_product(x->data + xndx * d, x->data + (xndx + 1) * d, centers->data + cndx * d, 0.0); +} + +double OriginalSpaceKmeans::centerCenterInnerProduct(unsigned short c1, unsigned short c2) const { + return std::inner_product(centers->data + c1 * d, centers->data + (c1 + 1) * d, centers->data + c2 * d, 0.0); +} + diff --git a/contrib/kmeans/original_space_kmeans.h b/contrib/kmeans/original_space_kmeans.h new file mode 100644 index 000000000..0f3969f30 --- /dev/null +++ b/contrib/kmeans/original_space_kmeans.h @@ -0,0 +1,54 @@ +#ifndef ORIGINAL_SPACE_KMEANS_H +#define ORIGINAL_SPACE_KMEANS_H + +/* Authors: Greg Hamerly and Jonathan Drake + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2014 + * + * OriginalSpaceKmeans is a base class for other algorithms that operate in the + * same space as the data being clustered (as opposed to kernelized k-means + * algorithms, which operate in kernel space). + */ + +#include "kmeans.h" + +/* Cluster with the cluster centers living in the original space (with the + * data). This is as opposed to a kernelized version of k-means, where the + * center points might not be explicitly represented. This is also an abstract + * class. + */ +class OriginalSpaceKmeans : public Kmeans { + public: + OriginalSpaceKmeans(); + virtual ~OriginalSpaceKmeans() { free(); } + virtual void free(); + virtual void initialize(Dataset const *aX, unsigned short aK, unsigned short *initialAssignment, int aNumThreads); + + virtual double pointPointInnerProduct(int x1ndx, int x2ndx) const; + virtual double pointCenterInnerProduct(int xndx, unsigned short cndx) const; + virtual double centerCenterInnerProduct(unsigned short c1ndx, unsigned short c2ndx) const; + + virtual Dataset const *getCenters() const { return centers; } + + protected: + // Move the centers to the average of their current assigned points, + // compute the distance moved by each center, and return the index of + // the furthest-moving center. + int move_centers(); + + virtual void changeAssignment(int xIndex, int closestCluster, int threadId); + + // The set of centers we are operating on. + Dataset *centers; + + // sumNewCenters and centerCount provide sufficient statistics to + // quickly calculate the changing locations of the centers. Whenever a + // point changes cluster membership, we subtract (add) it from (to) the + // row in sumNewCenters associated with its old (new) cluster. We also + // decrement (increment) centerCount for the old (new) cluster. + Dataset **sumNewCenters; + +}; + +#endif diff --git a/contrib/kmeans/smallDataset.txt b/contrib/kmeans/smallDataset.txt new file mode 100644 index 000000000..6f3bd2116 --- /dev/null +++ b/contrib/kmeans/smallDataset.txt @@ -0,0 +1,1001 @@ +1000 3 +99.749 88.368 12.296 +55.674 34.162 14.085 +20.700 17.002 16.700 +71.968 66.137 25.960 +11.744 98.792 86.863 +87.796 14.483 89.589 +23.633 51.168 6.900 +98.807 70.957 2.526 +33.152 76.346 99.284 +29.342 46.505 65.139 +48.578 19.939 8.881 +64.947 12.189 0.498 +56.131 50.452 20.642 +47.582 17.697 95.263 +17.861 13.619 49.201 +66.425 90.936 83.160 +34.521 48.830 20.665 +68.009 14.405 23.286 +48.914 75.858 7.853 +95.443 93.971 59.748 +80.813 75.945 7.306 +15.207 10.142 42.906 +75.601 4.438 75.554 +8.374 55.277 3.334 +88.235 5.355 3.670 +61.706 79.811 81.527 +61.877 19.559 72.218 +52.987 71.516 65.167 +47.658 26.012 26.667 +57.895 81.983 83.397 +26.553 63.574 68.545 +25.829 59.678 68.079 +65.461 2.978 6.435 +56.371 97.556 55.490 +73.441 74.302 28.943 +22.612 11.696 60.480 +28.023 74.060 88.441 +12.547 24.935 77.018 +49.741 42.952 48.399 +14.472 82.929 14.501 +1.215 5.521 91.122 +39.966 9.303 64.598 +37.307 27.098 88.421 +31.110 49.171 57.053 +99.040 72.312 50.981 +23.426 26.631 29.350 +16.265 39.310 27.838 +58.083 80.230 30.834 +4.511 11.183 74.311 +45.894 51.774 59.508 +28.440 72.708 32.989 +59.932 7.106 93.764 +77.888 18.562 29.991 +66.108 14.293 12.700 +9.199 56.099 18.745 +23.331 4.455 79.880 +42.615 29.243 61.314 +92.885 85.595 5.585 +50.503 53.305 16.821 +75.097 70.244 62.379 +97.531 16.739 66.348 +39.328 46.573 45.321 +5.373 33.893 90.990 +68.662 60.422 80.330 +65.419 1.527 96.046 +5.474 56.072 57.188 +44.435 45.748 40.052 +49.523 27.897 14.458 +52.473 39.473 47.180 +31.377 77.363 25.684 +28.821 13.764 8.841 +5.168 22.259 24.504 +60.942 89.377 83.086 +84.213 22.241 82.507 +70.461 73.421 85.054 +78.165 29.451 90.797 +68.022 25.872 99.333 +92.671 40.408 44.093 +10.652 23.079 95.323 +72.332 68.938 36.002 +57.986 71.796 63.594 +19.769 15.516 23.748 +60.771 44.595 27.740 +11.333 19.958 39.426 +71.587 72.723 99.379 +71.151 28.564 18.870 +15.123 98.159 99.933 +16.533 18.060 95.804 +16.959 25.486 51.691 +70.034 14.252 2.409 +65.163 44.260 61.787 +56.614 48.375 13.184 +69.528 2.294 69.294 +92.150 66.512 29.415 +16.659 83.274 70.100 +85.216 62.963 90.317 +74.876 82.486 62.967 +44.400 45.570 4.424 +36.896 31.385 98.671 +24.755 66.065 62.874 +58.062 14.974 51.951 +89.352 0.767 43.377 +37.601 70.409 51.770 +74.599 43.240 59.896 +35.554 86.102 91.754 +64.327 50.757 4.933 +31.021 18.555 31.408 +19.920 58.098 67.022 +21.756 59.925 85.575 +18.510 55.805 60.216 +81.448 69.259 69.517 +93.821 3.732 51.353 +21.024 90.833 18.790 +94.359 75.575 17.455 +12.471 35.944 74.881 +99.704 74.957 64.261 +98.109 81.963 21.291 +10.543 30.134 18.421 +21.351 21.915 61.571 +33.416 22.086 2.956 +25.168 77.013 55.935 +62.719 71.809 77.211 +96.151 6.568 20.661 +55.287 57.223 79.341 +11.747 27.872 52.908 +45.663 31.147 38.642 +28.620 1.680 70.140 +51.008 9.955 15.462 +35.602 75.846 59.564 +15.620 21.373 93.390 +66.312 84.784 69.192 +87.140 70.801 58.794 +6.747 20.351 53.084 +55.160 72.578 0.014 +51.035 60.478 12.440 +12.109 17.911 87.664 +14.814 25.560 69.288 +74.449 19.323 30.089 +47.474 34.971 27.244 +76.645 95.290 57.259 +30.597 32.793 35.010 +82.513 83.383 71.926 +75.688 14.306 82.797 +48.923 99.151 20.004 +56.581 4.366 53.634 +89.947 77.071 0.728 +24.151 52.365 83.529 +79.047 33.053 40.496 +48.318 23.106 83.752 +60.429 1.885 48.504 +36.163 81.183 61.235 +25.033 12.081 14.735 +58.671 10.945 69.333 +12.532 32.165 85.678 +31.654 52.987 0.348 +88.107 31.580 96.085 +90.258 9.775 26.393 +85.486 54.016 58.085 +48.782 11.049 28.924 +18.876 66.852 26.168 +77.857 88.137 65.854 +22.915 75.941 7.884 +65.236 84.619 84.192 +47.864 36.710 67.850 +62.073 98.377 35.116 +96.999 39.457 44.928 +87.711 72.061 15.890 +63.881 45.919 39.518 +58.250 10.567 91.820 +92.126 35.374 26.237 +79.845 35.797 23.157 +11.878 19.307 55.004 +70.416 91.731 82.368 +91.757 74.538 45.445 +33.676 82.755 31.366 +8.176 53.716 81.856 +48.966 81.889 84.958 +65.124 70.471 31.479 +77.749 44.742 4.083 +75.370 42.947 42.033 +42.842 41.910 92.250 +31.274 65.706 6.205 +56.060 4.505 52.191 +81.743 75.509 93.686 +52.150 12.360 52.206 +87.099 73.524 54.378 +6.292 35.219 47.300 +30.618 65.210 50.348 +45.448 27.307 57.259 +23.185 12.388 15.990 +13.145 33.754 69.628 +83.917 50.520 14.666 +67.367 72.445 71.638 +16.627 81.546 20.969 +13.063 31.251 21.879 +21.026 90.149 27.710 +26.774 62.955 14.547 +83.446 63.092 49.152 +80.267 58.455 20.006 +29.121 89.386 61.989 +23.378 43.158 72.828 +94.810 38.234 14.603 +30.733 27.646 29.258 +94.250 14.520 16.435 +41.454 77.313 0.814 +10.952 73.556 42.549 +40.647 68.180 49.576 +13.721 91.045 4.933 +13.313 8.837 77.315 +64.548 66.772 29.589 +88.817 18.307 68.168 +79.758 35.333 66.422 +32.167 13.711 29.986 +31.239 40.789 53.326 +98.666 32.354 88.963 +91.898 39.115 64.101 +74.260 63.066 5.784 +59.077 39.812 26.170 +74.652 9.250 18.714 +41.236 11.334 83.988 +73.372 39.664 62.825 +90.898 11.824 69.798 +0.342 86.057 37.605 +73.428 82.854 88.863 +77.291 23.437 83.628 +3.916 57.949 21.634 +90.824 68.307 8.529 +79.211 95.960 32.546 +5.365 72.265 62.596 +9.342 91.006 72.703 +70.792 87.863 83.588 +81.254 32.768 91.528 +18.923 75.856 1.515 +1.365 73.040 77.284 +81.345 88.455 48.872 +84.294 84.372 26.139 +12.166 52.835 55.948 +19.570 94.656 69.101 +3.425 39.148 9.240 +31.747 25.960 41.736 +30.051 94.109 12.610 +91.694 17.670 66.069 +23.808 66.730 53.574 +66.436 42.135 81.832 +21.740 29.067 43.133 +54.563 26.935 46.171 +60.641 79.344 57.894 +25.543 41.284 49.613 +43.354 5.369 73.088 +10.162 27.107 30.618 +9.822 25.673 78.716 +10.570 8.120 22.002 +32.096 27.836 26.724 +60.964 75.720 17.177 +70.803 96.570 62.206 +61.850 69.386 91.190 +85.199 97.373 57.578 +84.012 65.076 66.456 +60.964 29.266 45.932 +34.050 89.207 42.517 +91.746 26.904 52.951 +46.669 52.351 56.868 +31.246 18.110 59.417 +14.115 67.722 54.602 +83.045 87.943 6.187 +38.570 89.671 90.160 +52.533 70.309 37.227 +46.268 89.375 87.833 +88.067 58.702 84.810 +93.977 24.050 48.587 +52.086 70.178 43.947 +29.931 55.609 10.587 +4.382 2.585 84.726 +6.608 20.739 77.223 +42.356 31.194 74.199 +35.834 40.746 99.023 +22.061 4.403 51.083 +94.942 12.849 90.333 +0.594 21.437 32.718 +43.099 29.409 35.798 +68.204 30.844 64.804 +60.092 95.069 79.394 +26.926 45.025 19.557 +35.729 29.169 25.757 +73.049 61.495 40.405 +90.557 59.860 32.063 +30.914 49.709 43.593 +10.707 2.905 42.448 +95.041 57.132 55.411 +8.468 20.694 56.255 +72.130 9.039 63.829 +41.793 31.920 18.230 +81.794 93.037 34.523 +20.429 78.714 41.493 +8.863 96.346 12.323 +13.136 2.968 92.792 +86.558 86.728 42.634 +32.624 66.668 80.803 +90.829 27.279 85.146 +84.966 10.093 40.087 +5.585 91.826 99.886 +14.561 46.986 20.652 +66.613 97.697 98.386 +79.880 75.573 1.978 +50.748 12.647 19.157 +48.667 49.607 3.807 +64.567 84.482 94.989 +82.660 23.263 68.323 +23.784 17.657 81.708 +83.904 9.447 20.477 +46.990 8.147 55.126 +29.061 82.252 57.768 +13.722 94.102 10.171 +20.652 3.355 86.698 +31.663 78.119 7.803 +19.648 59.005 71.207 +97.368 90.891 69.641 +91.310 91.971 25.694 +22.174 54.151 64.556 +33.547 92.062 10.657 +84.120 42.230 27.348 +66.417 43.085 37.760 +88.957 60.550 97.356 +33.868 94.828 80.222 +10.964 50.535 29.205 +9.523 49.914 7.259 +96.565 83.752 62.245 +51.545 99.151 12.891 +61.155 33.817 7.509 +25.648 60.166 11.388 +79.479 68.252 27.736 +7.539 35.132 97.046 +50.905 10.022 18.110 +97.857 99.801 64.631 +28.563 0.998 2.654 +84.570 20.980 9.918 +2.309 98.698 17.634 +84.322 9.129 55.187 +63.301 40.058 71.880 +88.959 90.733 1.593 +97.815 68.921 31.608 +54.192 8.152 5.126 +31.058 99.749 40.327 +50.685 57.307 12.770 +91.688 51.562 32.849 +74.675 73.057 14.660 +93.834 51.374 55.528 +95.396 93.563 52.120 +37.248 63.752 46.027 +23.945 36.267 32.639 +35.360 36.293 27.330 +85.888 14.992 4.902 +75.069 63.837 19.036 +94.824 18.723 30.265 +46.850 16.323 89.485 +72.455 64.101 93.427 +16.886 66.123 54.622 +72.630 64.804 42.977 +28.369 5.249 52.307 +11.550 28.706 62.002 +52.632 96.384 76.703 +17.189 77.816 86.603 +74.479 34.764 78.083 +10.308 74.129 14.156 +3.817 61.400 14.867 +53.681 46.977 48.090 +97.723 8.195 71.576 +30.792 32.620 88.284 +45.410 38.681 27.755 +79.863 15.567 98.501 +81.458 86.146 16.739 +42.183 72.383 19.208 +20.407 66.090 51.368 +28.142 64.199 76.448 +44.707 29.488 7.622 +41.680 63.166 97.787 +76.466 25.219 51.806 +80.049 73.117 43.752 +45.888 97.148 16.503 +75.500 65.677 8.297 +62.287 67.168 83.491 +17.902 83.005 21.842 +49.683 58.708 15.410 +80.151 97.921 94.185 +32.817 24.798 3.681 +13.737 94.950 77.293 +31.775 35.056 18.465 +31.047 73.083 76.652 +26.962 91.525 71.882 +99.086 37.073 37.868 +99.801 24.205 41.877 +30.536 60.720 27.634 +99.105 23.438 68.729 +30.727 77.712 86.561 +65.610 3.828 85.941 +90.582 95.737 78.881 +6.564 38.759 3.000 +67.256 83.183 0.358 +13.768 27.183 29.023 +66.589 48.060 32.162 +79.058 46.598 37.613 +99.165 1.763 72.819 +74.262 44.817 87.309 +78.259 69.862 22.025 +23.260 35.502 55.813 +31.058 4.930 85.038 +23.433 90.781 35.965 +10.903 56.878 72.130 +45.734 91.414 7.195 +98.721 31.763 68.202 +6.448 69.072 7.388 +26.256 95.570 78.798 +10.834 94.497 19.791 +15.970 63.031 32.652 +96.951 63.599 80.370 +25.612 18.780 51.268 +86.566 64.653 39.173 +86.549 97.652 23.251 +85.037 73.209 50.263 +0.018 59.038 37.046 +70.964 86.607 58.337 +84.763 16.044 98.846 +13.304 42.141 24.636 +72.252 70.796 62.802 +34.238 47.096 33.099 +5.174 92.318 24.703 +62.471 15.773 21.426 +83.474 90.618 26.429 +57.691 58.984 36.811 +6.342 60.331 11.062 +22.928 1.593 57.225 +3.699 17.751 84.200 +99.395 30.033 95.427 +26.446 47.314 28.303 +76.548 56.066 2.142 +49.650 26.479 88.356 +68.618 57.587 59.259 +29.599 63.100 79.704 +54.227 49.428 44.530 +76.046 66.316 45.041 +46.198 94.605 11.230 +50.696 72.926 16.399 +28.850 82.377 53.925 +67.048 66.338 80.161 +46.341 37.251 83.022 +43.913 87.338 35.857 +50.727 41.264 53.677 +93.334 35.821 21.521 +76.762 29.658 50.215 +5.242 73.933 15.096 +33.071 81.585 16.007 +17.190 44.881 29.576 +18.633 67.521 5.332 +32.945 32.479 92.811 +48.647 23.438 71.645 +23.006 75.917 36.123 +15.466 8.847 63.699 +11.383 77.390 30.720 +5.520 27.070 83.703 +23.717 59.428 80.476 +30.890 67.440 41.064 +53.365 39.630 53.663 +29.642 91.748 71.090 +44.998 46.149 65.956 +9.165 72.667 54.169 +14.021 33.752 90.367 +95.045 86.899 80.550 +18.428 21.442 95.940 +49.367 27.506 35.768 +95.149 35.177 12.254 +8.490 80.820 98.279 +22.329 37.538 88.288 +84.323 86.133 24.067 +93.048 77.669 96.306 +74.677 79.233 62.127 +94.662 24.444 9.627 +38.704 1.995 83.122 +5.100 11.608 19.723 +40.492 23.545 83.509 +6.113 41.231 16.774 +27.071 67.800 35.454 +0.649 59.793 28.990 +42.224 26.912 14.426 +23.399 45.921 41.500 +42.273 79.853 40.960 +80.214 69.375 13.926 +58.338 45.045 99.409 +12.855 94.307 6.914 +77.213 10.827 37.176 +93.648 85.907 58.450 +2.101 88.166 34.681 +32.481 25.806 18.885 +18.849 67.366 64.996 +62.544 68.626 33.989 +73.219 80.624 17.520 +36.938 57.821 34.241 +52.194 41.799 41.192 +55.052 24.714 48.837 +25.664 65.410 7.541 +74.335 89.095 68.919 +14.130 40.803 4.402 +92.202 64.639 67.279 +99.939 32.141 38.530 +86.192 7.151 73.761 +42.051 87.996 54.685 +23.919 70.616 65.869 +12.298 19.074 14.473 +32.606 5.200 73.364 +70.614 52.238 28.373 +0.134 25.770 1.029 +90.801 2.287 96.569 +53.420 39.739 34.159 +41.345 48.821 99.608 +77.703 72.803 21.963 +98.539 38.423 39.291 +43.062 1.597 1.835 +4.353 96.761 77.816 +47.321 57.768 75.905 +88.032 71.187 62.946 +61.793 30.298 25.291 +9.117 92.087 76.309 +36.210 66.991 18.801 +87.487 46.317 57.704 +29.189 74.362 64.432 +2.101 74.267 61.310 +42.024 83.266 63.775 +29.502 4.802 99.725 +46.993 88.565 62.472 +72.858 20.868 52.863 +81.307 71.122 72.113 +95.780 39.866 23.760 +77.854 63.157 49.808 +17.371 4.042 39.478 +91.738 50.279 16.940 +67.518 9.580 38.518 +63.986 99.675 36.074 +17.523 81.614 86.761 +11.524 31.371 28.430 +40.398 93.934 91.093 +5.424 83.261 39.686 +68.906 75.334 66.905 +71.482 68.546 61.097 +85.447 72.799 84.905 +31.339 52.236 99.961 +25.601 9.235 10.598 +75.693 29.780 8.572 +12.546 79.439 31.859 +89.741 32.218 15.836 +70.101 42.128 85.751 +15.070 97.534 29.301 +21.942 53.067 73.854 +50.951 84.189 57.264 +59.256 29.861 38.394 +54.759 89.838 37.649 +10.910 73.420 41.242 +68.335 13.667 0.525 +42.869 95.314 90.188 +61.000 34.938 59.194 +37.525 10.431 26.543 +66.094 1.519 47.014 +0.557 66.970 74.186 +85.909 41.357 46.327 +31.521 62.791 40.434 +28.270 45.959 96.467 +94.484 70.455 5.819 +9.603 79.506 0.751 +83.312 90.536 83.967 +45.664 49.905 60.159 +90.806 21.602 29.551 +48.155 30.953 53.133 +36.513 68.779 15.968 +38.012 48.111 17.279 +60.906 74.367 53.201 +73.100 67.006 6.833 +56.621 2.306 17.723 +7.513 44.385 38.467 +34.990 22.350 81.673 +53.517 89.463 62.564 +49.040 43.955 58.311 +96.714 20.697 24.464 +45.107 38.700 89.625 +25.194 3.187 3.788 +53.248 53.580 43.373 +18.779 87.111 34.836 +62.365 65.837 28.616 +12.941 92.641 38.758 +46.598 18.842 87.581 +1.298 64.239 11.932 +3.916 33.734 48.291 +81.848 42.529 64.393 +51.173 34.178 86.754 +36.079 28.273 38.534 +59.186 93.377 23.436 +71.095 76.437 39.501 +50.047 0.043 79.309 +56.741 2.685 4.075 +61.899 0.590 19.012 +69.889 86.640 30.367 +85.451 60.824 78.652 +73.473 50.275 93.866 +14.610 74.539 93.768 +29.187 93.210 20.336 +30.573 46.839 28.115 +79.936 4.550 58.420 +55.775 85.055 39.601 +65.068 1.115 56.759 +99.067 23.408 78.207 +11.080 80.236 65.808 +45.772 93.284 78.539 +99.938 20.190 43.532 +81.871 78.214 12.196 +72.535 19.296 88.860 +37.912 81.231 92.065 +6.236 74.683 75.447 +79.509 97.016 50.456 +82.242 60.893 31.103 +93.062 57.424 38.437 +63.859 56.516 51.685 +29.811 87.256 46.516 +63.160 71.532 50.499 +92.391 18.205 22.763 +17.258 69.470 0.028 +15.843 9.190 48.398 +2.125 34.988 98.343 +1.613 77.060 21.787 +20.967 70.188 90.714 +76.842 64.899 37.597 +44.364 80.607 52.105 +42.172 47.947 30.659 +93.310 36.221 19.366 +36.025 76.884 74.970 +25.646 23.223 85.558 +30.017 82.734 92.464 +57.078 81.258 92.022 +3.425 59.938 77.780 +54.747 98.183 87.204 +50.860 17.638 92.804 +33.213 93.286 52.051 +9.249 52.540 25.331 +47.620 0.837 8.813 +52.287 78.988 51.492 +68.835 92.573 48.576 +73.727 27.297 41.869 +81.607 81.360 99.817 +47.614 34.633 21.211 +24.428 93.217 59.049 +23.074 43.765 34.833 +83.013 2.919 60.346 +44.070 54.028 8.966 +16.579 4.428 44.690 +17.872 41.472 74.043 +84.520 43.926 25.873 +28.678 57.254 70.237 +12.353 25.791 36.801 +70.105 96.052 83.088 +90.197 13.505 79.748 +69.143 91.203 87.937 +99.243 74.426 24.256 +87.997 77.136 77.912 +65.237 53.326 76.291 +14.090 69.631 38.630 +10.582 31.302 38.036 +3.991 52.988 29.502 +25.092 18.549 46.712 +7.198 53.001 66.902 +70.376 8.894 7.208 +10.380 83.828 47.629 +34.299 65.911 52.763 +63.045 23.776 35.375 +33.743 22.928 92.569 +49.508 33.422 80.679 +39.092 27.849 48.853 +88.928 33.262 92.543 +39.456 97.173 88.967 +80.920 6.700 2.940 +49.745 18.054 12.256 +76.461 57.114 3.477 +59.566 42.028 25.345 +76.486 90.031 15.079 +58.660 96.000 3.135 +29.439 98.312 19.344 +27.631 25.118 82.650 +76.210 30.267 16.048 +28.315 15.713 21.229 +40.242 18.648 20.834 +51.829 95.173 93.992 +72.359 38.737 15.333 +9.484 97.459 34.397 +75.999 62.728 57.742 +72.019 29.127 88.791 +77.490 47.116 43.212 +54.532 90.010 65.604 +87.763 61.991 22.321 +0.805 46.756 93.160 +73.085 73.443 83.582 +73.087 35.785 39.513 +11.137 70.425 37.445 +97.722 77.031 2.356 +29.741 78.519 98.943 +38.065 11.415 7.493 +85.090 88.442 12.246 +62.870 66.001 97.899 +37.221 5.524 40.259 +88.778 92.158 90.689 +77.113 76.778 17.550 +1.679 87.177 91.802 +83.171 98.918 35.705 +94.742 80.106 4.693 +67.451 48.326 61.303 +5.895 52.196 43.347 +72.901 92.531 31.678 +38.842 43.229 56.422 +12.771 52.474 0.120 +17.516 22.028 9.964 +89.386 41.219 17.000 +51.632 35.086 33.986 +80.478 33.789 7.173 +92.076 86.991 62.998 +52.500 45.572 96.846 +79.846 3.390 43.626 +18.790 54.813 28.844 +55.514 17.608 21.515 +49.480 84.864 96.745 +1.557 49.628 84.227 +4.725 21.268 2.904 +54.838 87.574 32.801 +74.733 29.806 68.863 +7.967 71.553 89.455 +92.358 37.133 79.424 +36.852 35.121 56.781 +36.338 53.615 42.803 +57.450 5.646 10.270 +66.985 46.817 82.222 +51.446 59.565 93.381 +58.964 97.516 11.322 +71.222 87.492 56.322 +97.261 92.098 73.343 +66.257 46.073 26.667 +38.395 75.027 67.594 +81.937 64.541 63.718 +66.928 22.508 35.908 +97.984 73.448 47.616 +81.492 18.208 15.093 +66.495 27.848 8.642 +60.439 49.512 71.632 +12.210 87.223 58.468 +97.136 78.539 53.644 +92.527 77.127 41.535 +72.560 27.110 18.371 +18.312 13.375 55.864 +88.575 95.189 5.868 +84.249 96.864 74.811 +10.986 74.407 30.294 +88.831 46.697 37.320 +35.586 34.151 86.986 +67.821 0.196 18.522 +63.029 29.237 28.582 +63.677 72.165 76.088 +45.958 83.083 37.881 +88.820 91.932 22.938 +2.640 36.062 24.025 +58.605 67.578 63.065 +0.068 58.642 36.288 +5.114 12.316 54.679 +22.298 26.183 34.485 +84.586 91.328 21.150 +9.669 45.732 74.177 +47.175 49.028 3.837 +85.759 43.692 69.856 +0.078 80.326 57.297 +62.485 41.267 40.614 +4.778 46.422 93.461 +12.180 6.633 35.043 +61.604 69.952 30.543 +27.860 18.935 78.828 +44.029 98.111 39.807 +19.812 69.496 7.382 +9.664 0.325 80.341 +55.400 65.901 93.689 +81.839 81.797 91.960 +78.283 70.468 79.049 +26.708 91.253 78.822 +8.443 67.483 71.479 +46.212 83.576 62.710 +42.387 4.759 17.191 +31.704 51.914 42.948 +37.173 31.579 37.580 +30.107 63.009 49.008 +67.349 63.854 84.844 +31.840 40.656 67.998 +12.292 50.939 19.626 +4.176 29.818 29.863 +35.747 19.729 84.165 +35.923 5.209 22.465 +9.426 98.381 3.662 +89.282 38.381 22.233 +59.518 12.821 30.094 +46.299 14.202 76.168 +56.787 37.619 69.006 +3.534 57.010 71.253 +56.941 22.963 37.256 +86.415 37.054 18.236 +34.100 10.619 63.784 +90.396 35.585 50.070 +44.037 63.550 5.748 +89.610 41.121 59.640 +39.447 31.223 37.498 +4.516 77.150 46.608 +22.478 40.833 22.419 +71.312 52.431 6.018 +22.361 74.276 79.502 +80.010 16.922 19.760 +91.920 62.541 53.333 +31.518 69.405 30.824 +16.435 99.572 22.918 +32.243 25.472 15.815 +63.599 62.926 90.302 +87.261 27.646 76.159 +92.638 90.659 62.282 +48.532 81.880 47.331 +4.071 34.896 29.718 +15.740 88.200 59.146 +95.688 19.908 0.105 +5.696 7.291 51.232 +93.148 24.003 5.368 +97.857 61.635 65.321 +20.043 73.202 82.565 +13.064 9.533 60.598 +38.407 43.903 68.494 +89.118 85.849 67.260 +29.446 80.202 7.682 +18.281 65.362 45.372 +50.055 18.258 8.070 +10.183 46.705 16.497 +24.449 99.394 90.581 +30.162 58.381 1.183 +44.470 40.129 32.103 +42.819 67.417 19.820 +15.942 0.927 49.894 +7.883 97.831 92.508 +12.465 66.544 86.717 +38.574 95.042 27.100 +69.480 74.765 36.797 +38.898 66.183 86.572 +59.059 33.217 73.862 +97.210 80.514 27.077 +45.016 66.750 63.898 +79.288 81.855 47.698 +95.508 82.302 40.695 +49.767 82.318 69.885 +69.696 10.894 4.901 +59.614 17.381 98.235 +41.397 44.611 54.026 +87.065 23.265 10.566 +47.426 82.595 27.909 +66.868 82.973 91.746 +74.735 6.871 41.652 +8.571 94.719 96.470 +28.217 44.393 95.210 +98.771 70.689 86.814 +51.347 24.387 79.018 +28.728 33.234 3.149 +32.614 93.536 10.585 +50.430 55.299 55.622 +1.238 9.894 24.313 +68.192 90.729 58.220 +52.072 48.812 21.381 +35.261 64.414 10.396 +99.185 40.405 57.982 +75.888 81.145 80.684 +24.696 32.956 87.048 +62.102 62.626 82.844 +99.466 75.526 63.553 +74.357 13.990 4.261 +17.529 16.429 8.328 +88.595 42.102 46.894 +20.459 72.997 95.788 +95.594 18.215 47.579 +31.446 88.446 71.920 +47.164 75.506 33.357 +63.748 20.134 19.368 +77.388 18.575 41.392 +82.664 11.866 4.240 +94.344 66.082 46.539 +54.440 73.362 66.634 +21.160 50.945 23.498 +6.028 99.887 44.780 +66.698 24.397 57.401 +94.555 12.209 56.398 +71.345 92.991 65.655 +69.452 24.495 44.568 +37.230 11.476 15.517 +29.685 68.910 37.389 +63.594 42.177 3.492 +86.254 29.164 68.421 +54.965 13.941 92.909 +40.492 90.177 69.623 +4.398 94.257 0.338 +62.634 71.582 30.898 +17.252 58.275 46.742 +85.224 23.727 34.811 +88.192 62.260 88.809 +21.127 65.976 25.304 +69.382 28.468 64.922 +37.057 98.826 72.884 +29.564 28.362 0.164 +97.628 29.120 12.584 +49.242 24.034 38.807 +80.894 87.381 75.783 +87.948 39.149 10.654 +19.435 59.183 22.595 +96.005 83.790 90.566 +58.177 90.039 31.877 +89.939 38.454 6.440 +34.746 92.308 84.037 +21.245 4.782 69.107 +75.320 90.265 4.712 +10.403 81.256 94.174 +80.163 88.157 41.177 +98.397 5.367 18.087 +21.524 58.552 48.237 +57.578 87.077 62.932 +41.993 92.632 35.562 +62.103 16.721 12.164 +75.082 68.686 7.228 +19.590 71.900 34.915 +89.491 13.575 29.032 +22.782 37.929 53.463 +13.649 8.418 26.381 +70.164 43.777 27.231 +33.702 23.487 20.629 +93.641 56.286 19.160 +83.563 27.895 94.316 +99.706 59.165 85.026 +94.135 72.119 64.379 +97.361 16.623 53.115 +25.927 70.909 79.732 +34.594 14.424 37.365 +37.279 51.316 27.471 +67.850 21.152 99.700 +3.001 76.269 76.040 +32.150 90.009 71.487 +85.809 4.531 38.955 +88.704 54.774 86.225 +17.348 19.529 57.964 +91.637 73.562 18.445 +85.319 14.197 67.906 +14.445 59.758 71.821 +68.265 88.843 68.984 +2.584 62.437 93.601 +56.562 31.474 36.843 +3.077 28.322 95.186 +45.155 52.863 1.922 +60.794 86.118 39.086 +55.358 34.843 21.259 +49.199 41.941 71.254 +15.863 83.399 6.553 +19.632 94.581 72.045 +9.766 16.660 9.220 +45.629 62.033 7.687 +29.351 40.956 57.849 +9.504 29.808 52.387 +4.532 99.964 96.291 +18.442 68.235 35.966 +16.705 17.310 46.280 +51.881 34.512 46.714 +36.507 81.692 70.432 +75.297 47.545 78.961 +42.094 55.929 7.556 +15.847 54.553 81.226 +71.402 82.952 99.013 +80.108 60.435 6.988 +26.128 38.350 93.280 +14.807 59.596 45.658 +92.165 62.065 65.773 +18.352 48.338 67.700 +37.984 32.524 96.188 +18.862 40.249 32.636 +42.218 11.828 19.280 +46.736 0.994 11.552 +29.706 47.661 83.981 +64.285 39.251 72.348 +32.924 18.806 86.370 +28.440 71.700 74.166 +66.278 29.302 64.214 +75.645 39.203 51.036 +3.878 9.415 74.061 +40.795 13.826 19.552 +88.362 58.094 99.286 +78.300 59.905 83.550 +96.999 45.171 31.557 +6.050 12.773 94.001 +31.020 86.177 15.290 +69.346 14.576 45.069 +30.858 57.688 70.269 +37.746 4.112 89.332 +11.284 56.118 1.299 +33.525 36.000 55.079 +40.333 32.879 44.130 +15.260 0.745 66.261 diff --git a/contrib/kmeans/triangle_inequality_base_kmeans.cpp b/contrib/kmeans/triangle_inequality_base_kmeans.cpp new file mode 100644 index 000000000..8bc15d20a --- /dev/null +++ b/contrib/kmeans/triangle_inequality_base_kmeans.cpp @@ -0,0 +1,79 @@ +/* Authors: Greg Hamerly and Jonathan Drake + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2014 + */ + +#include "triangle_inequality_base_kmeans.h" +#include "kmeans_general_functions.h" +#include +#include +#include + +void TriangleInequalityBaseKmeans::free() { + OriginalSpaceKmeans::free(); + delete [] s; + delete [] upper; + delete [] lower; + s = NULL; + upper = NULL; + lower = NULL; +} + +/* This function computes the inter-center distances, keeping only the closest + * distances, and updates "s". After this, s[j] will contain the distance + * between center j and its closest other center, divided by two. The division + * here saves repeated work later, since we always will need the distance / 2. + * + * Parameters: none + * + * Return value: none + */ +// TODO: parallelize this +void TriangleInequalityBaseKmeans::update_s(int threadId) { + // initialize + for (int c1 = 0; c1 < k; ++c1) { + if (c1 % numThreads == threadId) { + s[c1] = std::numeric_limits::max(); + } + } + // compute inter-center squared distances between all pairs + for (int c1 = 0; c1 < k; ++c1) { + if (c1 % numThreads == threadId) { + for (int c2 = 0; c2 < k; ++c2) { + if (c2 == c1) { + continue; + } + double d2 = centerCenterDist2(c1, c2); + if (d2 < s[c1]) { s[c1] = d2; } + } + // take the root and divide by two + s[c1] = sqrt(s[c1]) / 2.0; + } + } +} + + +/* This function initializes the upper/lower bounds, assignment, centerCounts, + * and sumNewCenters. It sets the bounds to invalid values which will force the + * first iteration of k-means to set them correctly. NB: subclasses should set + * numLowerBounds appropriately before entering this function. + * + * Parameters: none + * + * Return value: none + */ +void TriangleInequalityBaseKmeans::initialize(Dataset const *aX, unsigned short aK, unsigned short *initialAssignment, int aNumThreads) { + OriginalSpaceKmeans::initialize(aX, aK, initialAssignment, aNumThreads); + + s = new double[k]; + upper = new double[n]; + lower = new double[n * numLowerBounds]; + + // start with invalid bounds and assignments which will force the first + // iteration of k-means to do all its standard work + std::fill(s, s + k, 0.0); + std::fill(upper, upper + n, std::numeric_limits::max()); + std::fill(lower, lower + n * numLowerBounds, 0.0); +} + diff --git a/contrib/kmeans/triangle_inequality_base_kmeans.h b/contrib/kmeans/triangle_inequality_base_kmeans.h new file mode 100644 index 000000000..f45d7a103 --- /dev/null +++ b/contrib/kmeans/triangle_inequality_base_kmeans.h @@ -0,0 +1,43 @@ +#ifndef TRIANGLE_INEQUALITY_BASE_KMEANS_H +#define TRIANGLE_INEQUALITY_BASE_KMEANS_H + +/* Authors: Greg Hamerly and Jonathan Drake + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2014 + * + * This class is an abstract base class for several other algorithms that use + * upper & lower bounds to avoid distance calculations in k-means. + */ + +#include "original_space_kmeans.h" + +class TriangleInequalityBaseKmeans : public OriginalSpaceKmeans { + public: + TriangleInequalityBaseKmeans() : numLowerBounds(0), s(NULL), upper(NULL), lower(NULL) {} + virtual ~TriangleInequalityBaseKmeans() { free(); } + + virtual void initialize(Dataset const *aX, unsigned short aK, unsigned short *initialAssignment, int aNumThreads); + virtual void free(); + + protected: + void update_s(int threadId); + + // The number of lower bounds being used by this algorithm. + int numLowerBounds; + + // Half the distance between each center and its closest other center. + double *s; + + // One upper bound for each point on the distance between that point and + // its assigned (closest) center. + double *upper; + + // Lower bound(s) for each point on the distance between that point and + // the centers being tracked for lower bounds, which may be 1 to k. + // Actual size is n * numLowerBounds. + double *lower; +}; + +#endif + diff --git a/src/src.pro b/src/src.pro index 66ce81e7d..e274bef20 100644 --- a/src/src.pro +++ b/src/src.pro @@ -62,7 +62,8 @@ INCLUDEPATH += ../qwt/src \ ../contrib/qtsolutions/qwtcurve \ ../contrib/lmfit \ ../contrib/levmar \ - ../contrib/boost + ../contrib/boost \ + ../contrib/kmeans DEFINES += QXT_STATIC @@ -752,7 +753,10 @@ HEADERS += ../contrib/qtsolutions/codeeditor/codeeditor.h ../contrib/qtsolutions ../contrib/qzip/zipwriter.h ../contrib/lmfit/lmcurve.h ../contrib/lmfit/lmcurve_tyd.h \ ../contrib/lmfit/lmmin.h ../contrib/lmfit/lmstruct.h \ ../contrib/levmar/compiler.h ../contrib/levmar/levmar.h ../contrib/levmar/lm.h ../contrib/levmar/misc.h \ - ../contrib/boost/GeometricTools_BSplineCurve.h + ../contrib/boost/GeometricTools_BSplineCurve.h \ + ../contrib/kmeans/kmeans_dataset.h ../contrib/kmeans/kmeans_general_functions.h ../contrib/kmeans/hamerly_kmeans.h \ + ../contrib/kmeans/kmeans.h ../contrib/kmeans/original_space_kmeans.h ../contrib/kmeans/triangle_inequality_base_kmeans.h + # Train View HEADERS += Train/AddDeviceWizard.h Train/CalibrationData.h Train/ComputrainerController.h Train/Computrainer.h Train/DeviceConfiguration.h \ @@ -859,7 +863,10 @@ SOURCES += ../contrib/qtsolutions/codeeditor/codeeditor.cpp ../contrib/qtsolutio ../contrib/levmar/Axb.c ../contrib/levmar/lm_core.c ../contrib/levmar/lmbc_core.c \ ../contrib/levmar/lmblec_core.c ../contrib/levmar/lmbleic_core.c ../contrib/levmar/lmlec.c ../contrib/levmar/misc.c \ ../contrib/levmar/Axb_core.c ../contrib/levmar/lm.c ../contrib/levmar/lmbc.c ../contrib/levmar/lmblec.c ../contrib/levmar/lmbleic.c \ - ../contrib/levmar/lmlec_core.c ../contrib/levmar/misc_core.c + ../contrib/levmar/lmlec_core.c ../contrib/levmar/misc_core.c \ + ../contrib/kmeans/kmeans_dataset.cpp ../contrib/kmeans/kmeans_general_functions.cpp ../contrib/kmeans/hamerly_kmeans.cpp \ + ../contrib/kmeans/kmeans.cpp ../contrib/kmeans/original_space_kmeans.cpp ../contrib/kmeans/triangle_inequality_base_kmeans.cpp + ## Train View Components SOURCES += Train/AddDeviceWizard.cpp Train/CalibrationData.cpp Train/ComputrainerController.cpp Train/Computrainer.cpp Train/DeviceConfiguration.cpp \