diff --git a/contrib/kmeans/kmeans_general_functions.cpp b/contrib/kmeans/kmeans_general_functions.cpp index 5784c87b9..21a2974c3 100644 --- a/contrib/kmeans/kmeans_general_functions.cpp +++ b/contrib/kmeans/kmeans_general_functions.cpp @@ -237,7 +237,7 @@ Dataset *init_centers_kmeanspp_v2(Dataset const &x, unsigned short k) { } -void assign(Dataset const &x, Dataset const &c, unsigned short *assignment) { +void kmeans_assign(Dataset const &x, Dataset const &c, unsigned short *assignment) { for (int i = 0; i < x.n; ++i) { double shortestDist2 = std::numeric_limits::max(); int closest = 0; diff --git a/contrib/kmeans/kmeans_general_functions.h b/contrib/kmeans/kmeans_general_functions.h index 9aef9c32a..c7e4c404d 100644 --- a/contrib/kmeans/kmeans_general_functions.h +++ b/contrib/kmeans/kmeans_general_functions.h @@ -78,6 +78,6 @@ void printArray(T const *arr, int length, std::string separator) { void centerDataset(Dataset *x); -void assign(Dataset const &x, Dataset const &c, unsigned short *assignment); +void kmeans_assign(Dataset const &x, Dataset const &c, unsigned short *assignment); #endif diff --git a/src/Core/DataFilter.cpp b/src/Core/DataFilter.cpp index 9e3c2202d..f2c59bb71 100644 --- a/src/Core/DataFilter.cpp +++ b/src/Core/DataFilter.cpp @@ -37,6 +37,7 @@ #include "lmcurve.h" #include "LTMTrend.h" // for LR when copying CP chart filtering mechanism #include "WPrime.h" // for LR when copying CP chart filtering mechanism +#include "FastKmeans.h" // for kmeans(...) #ifdef GC_HAVE_SAMPLERATE // we have libsamplerate @@ -379,6 +380,9 @@ static struct { { "pdfgamma", 3 }, // pdfgamma(a,b, x) as above for the gamma distribution { "cdfgamma", 3 }, // cdfgamma(a,b, x) as above for the gamma distribution + { "kmeans", 0 }, // kmeans(centers|assignments, k, dim1, dim2, dim3 .. dimn) - return the centers or cluster assignment + // from a k means cluser of the data with n dimensions (but commonly just 2- x and y) + // add new ones above this line { "", -1 } @@ -2023,6 +2027,21 @@ void Leaf::validateFilter(Context *context, DataFilterRuntime *df, Leaf *leaf) } } } + } else if (leaf->function == "kmeans") { + + if (leaf->fparms.count() < 4 || leaf->fparms[0]->type != Leaf::Symbol) { + leaf->inerror = true; + DataFiltererrors << QString(tr("kmeans(centers|assignments, k, dim1, dim2, dimn)")); + } else { + QString symbol=*(leaf->fparms[0]->lvalue.n); + if (symbol != "centers" && symbol != "assignments") { + leaf->inerror = true; + DataFiltererrors << QString(tr("kmeans(centers|assignments, k, dim1, dim2, dimn) - %s unknown")).arg(symbol); + } else { + for(int i=1; ifparms.count(); i++) validateFilter(context, df, leaf->fparms[i]); + } + } + } else if (leaf->function == "metrics" || leaf->function == "metricstrings" || leaf->function == "aggmetrics" || leaf->function == "aggmetricstrings") { @@ -4613,6 +4632,33 @@ Result Leaf::eval(DataFilterRuntime *df, Leaf *leaf, const Result &x, long it, R return returning; } + if (leaf->function == "kmeans") { + // kmeans(centers|assignments, k, dim1, dim2, dim3) + + Result returning(0); + + QString symbol = *(leaf->fparms[0]->lvalue.n); + bool wantcenters=false; + if (symbol == "centers") wantcenters=true; + + // get k + int k = eval(df, leaf->fparms[1],x, it, m, p, c, s, d).number(); + + FastKmeans *kmeans = new FastKmeans(); + + // loop through the dimensions + for(int i=2; ifparms.count(); i++) + kmeans->addDimension(eval(df, leaf->fparms[i],x, it, m, p, c, s, d).asNumeric()); + + // calculate + if (kmeans->run(k)) { + if (wantcenters) returning = kmeans->centers(); + else returning = kmeans->assignments(); + } + + return returning; + } + if (leaf->function == "metrics" || leaf->function == "metricstrings" || leaf->function == "aggmetrics" || leaf->function == "aggmetricstrings") { diff --git a/src/Metrics/FastKmeans.cpp b/src/Metrics/FastKmeans.cpp new file mode 100644 index 000000000..b31f78f2d --- /dev/null +++ b/src/Metrics/FastKmeans.cpp @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2021 Mark Liversedge (liversedge@gmail.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "FastKmeans.h" + +#include "kmeans_general_functions.h" + +FastKmeans::FastKmeans() : kmeans(NULL), data(NULL), assignments_(NULL), centers_(NULL), length_(-1), k_(-1) {} +FastKmeans::~FastKmeans() +{ + if (data) delete data; + if (kmeans) delete kmeans; + if (centers_) delete centers_; + if (assignments_) delete [] assignments_; +} + +// all dimensions are resized to the largest +// and filled with zeroes, but really the caller +// should make sure they match +void +FastKmeans::addDimension(QVector &data) +{ + // take a copy, and init for first dimension + dimension.append(data); + int index = dimension.count() - 1; + + // first init length, no resizing needed + if (dimension.count() == 1) length_=data.length(); + else { + if (data.length() > length_) { + + // if longer, we need to resize everyone else + for(int i=0; idata[index++] = dimension[j][i]; + + // initialise centers + centers_ = init_centers_kmeanspp_v2(*data, k_); + + // initialise assignments + assignments_ = new unsigned short[length()]; + + // setup + kmeans_assign(*data, *centers_, assignments_); + kmeans->initialize(data, k, assignments_, 1); + + // run the algorithm, max out at 10,000 iterations + // it returns true or false if it succeeded + return kmeans->run(10000); +} + +// get centers (k x dimensions) +QVector +FastKmeans::centers() +{ + QVector returning; + + if (kmeans == NULL) return returning; + + Dataset const *finalcenters = kmeans->getCenters(); + + // lets reorganise them to d1,d1,d1,d2,d2,d2,d2,d3,d3,d3 + // from d1,d2,d3,d1,d2,d3,d1,d2,d3 + for(int d=0; ddata[(n * dim()) + d]; + + return returning; +} + +// get assignments - n indexes +QVector +FastKmeans::assignments() +{ + QVector returning; + + if (kmeans == NULL) return returning; + + Dataset const *finalcenters = kmeans->getCenters(); + kmeans_assign(*data, *finalcenters, assignments_); + + // lets reorganise and convert to doubles (datafilter likes these) + for (int i = 0; i < data->n; ++i) returning << assignments_[i]; + + return returning; +} + diff --git a/src/Metrics/FastKmeans.h b/src/Metrics/FastKmeans.h new file mode 100644 index 000000000..af705a403 --- /dev/null +++ b/src/Metrics/FastKmeans.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2021 Mark Liversedge (liversedge@gmail.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "hamerly_kmeans.h" +#include "kmeans_dataset.h" +#include +#include + +#ifndef _GC_FastKmeans_h +#define _GC_FastKmeans_h 1 + +class FastKmeans +{ + public: + + // since we are a wrapper around algorithms we need + // to initialise and cleanup subordinates, especially + // since they typically use non-QT containers + FastKmeans(); + ~FastKmeans(); + + // all dimensions are resized to the largest + // and filled with zeroes, but really the caller + // should make sure they match + void addDimension(QVector &data); + + // find centers and assignments for k clusters + bool run(int k); + + // get centers (k x dimensions) + QVector centers(); + + // get assignments - n indexes in datafilter order (use in overview tables etc) + QVector assignments(); + + int length() const { return length_; } ; // number of points + int dim() const { return dimension.count(); } // number of dimensions to a point + int k() const { return k_; } // number of clusters used + + private: + + HamerlyKmeans *kmeans; // the algorithm we use + Dataset *data; + unsigned short *assignments_; // updated with the cluster assignments + Dataset *centers_; + + QList > dimension; + + int length_; // updated as we add dimensions, but really should be the same + int k_; // updated when we run +}; + +#endif diff --git a/src/src.pro b/src/src.pro index e274bef20..888ec1b58 100644 --- a/src/src.pro +++ b/src/src.pro @@ -741,7 +741,7 @@ HEADERS += Gui/AboutDialog.h Gui/AddIntervalDialog.h Gui/AnalysisSidebar.h Gui/C HEADERS += Metrics/Banister.h Metrics/CPSolver.h Metrics/Estimator.h Metrics/ExtendedCriticalPower.h Metrics/HrZones.h Metrics/PaceZones.h \ Metrics/PDModel.h Metrics/PMCData.h Metrics/PowerProfile.h Metrics/RideMetadata.h Metrics/RideMetric.h Metrics/SpecialFields.h \ Metrics/Statistic.h Metrics/UserMetricParser.h Metrics/UserMetricSettings.h Metrics/VDOTCalculator.h Metrics/WPrime.h Metrics/Zones.h \ - Metrics/BlinnSolver.h + Metrics/BlinnSolver.h Metrics/FastKmeans.h ## Planning and Compliance HEADERS += Planning/PlanningWindow.h @@ -850,7 +850,7 @@ SOURCES += Metrics/aBikeScore.cpp Metrics/aCoggan.cpp Metrics/AerobicDecoupling. Metrics/SwimMetrics.cpp Metrics/SpecialFields.cpp Metrics/Statistic.cpp Metrics/SustainMetric.cpp Metrics/SwimScore.cpp \ Metrics/TimeInZone.cpp Metrics/TRIMPPoints.cpp Metrics/UserMetric.cpp Metrics/UserMetricParser.cpp Metrics/VDOTCalculator.cpp \ Metrics/VDOT.cpp Metrics/WattsPerKilogram.cpp Metrics/WPrime.cpp Metrics/Zones.cpp Metrics/HrvMetrics.cpp Metrics/BlinnSolver.cpp \ - Metrics/RowMetrics.cpp + Metrics/RowMetrics.cpp Metrics/FastKmeans.cpp ## Planning and Compliance SOURCES += Planning/PlanningWindow.cpp