mirror of
https://github.com/GoldenCheetah/GoldenCheetah.git
synced 2026-04-15 05:32:21 +00:00
DataFilter - kmeans()
.. kmeans(centers|assignments, k, dim1, dim2 .. dimn)
perform a k means cluster on data with multiple dimensions
and return the centers, or the assignments.
the return values are ordered so they can be displayed
easily in an overview table e.g.
values {
kmeans(centers, 3, metrics(TSS), metrics(IF));
}
.. will look at how we might plot these in charts with either
color coding of points or perhaps voronoi diagrams.
This commit is contained in:
@@ -237,7 +237,7 @@ Dataset *init_centers_kmeanspp_v2(Dataset const &x, unsigned short k) {
|
||||
}
|
||||
|
||||
|
||||
void assign(Dataset const &x, Dataset const &c, unsigned short *assignment) {
|
||||
void kmeans_assign(Dataset const &x, Dataset const &c, unsigned short *assignment) {
|
||||
for (int i = 0; i < x.n; ++i) {
|
||||
double shortestDist2 = std::numeric_limits<double>::max();
|
||||
int closest = 0;
|
||||
|
||||
@@ -78,6 +78,6 @@ void printArray(T const *arr, int length, std::string separator) {
|
||||
|
||||
void centerDataset(Dataset *x);
|
||||
|
||||
void assign(Dataset const &x, Dataset const &c, unsigned short *assignment);
|
||||
void kmeans_assign(Dataset const &x, Dataset const &c, unsigned short *assignment);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
#include "lmcurve.h"
|
||||
#include "LTMTrend.h" // for LR when copying CP chart filtering mechanism
|
||||
#include "WPrime.h" // for LR when copying CP chart filtering mechanism
|
||||
#include "FastKmeans.h" // for kmeans(...)
|
||||
|
||||
#ifdef GC_HAVE_SAMPLERATE
|
||||
// we have libsamplerate
|
||||
@@ -379,6 +380,9 @@ static struct {
|
||||
{ "pdfgamma", 3 }, // pdfgamma(a,b, x) as above for the gamma distribution
|
||||
{ "cdfgamma", 3 }, // cdfgamma(a,b, x) as above for the gamma distribution
|
||||
|
||||
{ "kmeans", 0 }, // kmeans(centers|assignments, k, dim1, dim2, dim3 .. dimn) - return the centers or cluster assignment
|
||||
// from a k means cluser of the data with n dimensions (but commonly just 2- x and y)
|
||||
|
||||
|
||||
// add new ones above this line
|
||||
{ "", -1 }
|
||||
@@ -2023,6 +2027,21 @@ void Leaf::validateFilter(Context *context, DataFilterRuntime *df, Leaf *leaf)
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (leaf->function == "kmeans") {
|
||||
|
||||
if (leaf->fparms.count() < 4 || leaf->fparms[0]->type != Leaf::Symbol) {
|
||||
leaf->inerror = true;
|
||||
DataFiltererrors << QString(tr("kmeans(centers|assignments, k, dim1, dim2, dimn)"));
|
||||
} else {
|
||||
QString symbol=*(leaf->fparms[0]->lvalue.n);
|
||||
if (symbol != "centers" && symbol != "assignments") {
|
||||
leaf->inerror = true;
|
||||
DataFiltererrors << QString(tr("kmeans(centers|assignments, k, dim1, dim2, dimn) - %s unknown")).arg(symbol);
|
||||
} else {
|
||||
for(int i=1; i<leaf->fparms.count(); i++) validateFilter(context, df, leaf->fparms[i]);
|
||||
}
|
||||
}
|
||||
|
||||
} else if (leaf->function == "metrics" || leaf->function == "metricstrings" ||
|
||||
leaf->function == "aggmetrics" || leaf->function == "aggmetricstrings") {
|
||||
|
||||
@@ -4613,6 +4632,33 @@ Result Leaf::eval(DataFilterRuntime *df, Leaf *leaf, const Result &x, long it, R
|
||||
return returning;
|
||||
}
|
||||
|
||||
if (leaf->function == "kmeans") {
|
||||
// kmeans(centers|assignments, k, dim1, dim2, dim3)
|
||||
|
||||
Result returning(0);
|
||||
|
||||
QString symbol = *(leaf->fparms[0]->lvalue.n);
|
||||
bool wantcenters=false;
|
||||
if (symbol == "centers") wantcenters=true;
|
||||
|
||||
// get k
|
||||
int k = eval(df, leaf->fparms[1],x, it, m, p, c, s, d).number();
|
||||
|
||||
FastKmeans *kmeans = new FastKmeans();
|
||||
|
||||
// loop through the dimensions
|
||||
for(int i=2; i<leaf->fparms.count(); i++)
|
||||
kmeans->addDimension(eval(df, leaf->fparms[i],x, it, m, p, c, s, d).asNumeric());
|
||||
|
||||
// calculate
|
||||
if (kmeans->run(k)) {
|
||||
if (wantcenters) returning = kmeans->centers();
|
||||
else returning = kmeans->assignments();
|
||||
}
|
||||
|
||||
return returning;
|
||||
}
|
||||
|
||||
if (leaf->function == "metrics" || leaf->function == "metricstrings" ||
|
||||
leaf->function == "aggmetrics" || leaf->function == "aggmetricstrings") {
|
||||
|
||||
|
||||
141
src/Metrics/FastKmeans.cpp
Normal file
141
src/Metrics/FastKmeans.cpp
Normal file
@@ -0,0 +1,141 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Mark Liversedge (liversedge@gmail.com)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc., 51
|
||||
* Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "FastKmeans.h"
|
||||
|
||||
#include "kmeans_general_functions.h"
|
||||
|
||||
FastKmeans::FastKmeans() : kmeans(NULL), data(NULL), assignments_(NULL), centers_(NULL), length_(-1), k_(-1) {}
|
||||
FastKmeans::~FastKmeans()
|
||||
{
|
||||
if (data) delete data;
|
||||
if (kmeans) delete kmeans;
|
||||
if (centers_) delete centers_;
|
||||
if (assignments_) delete [] assignments_;
|
||||
}
|
||||
|
||||
// all dimensions are resized to the largest
|
||||
// and filled with zeroes, but really the caller
|
||||
// should make sure they match
|
||||
void
|
||||
FastKmeans::addDimension(QVector<double> &data)
|
||||
{
|
||||
// take a copy, and init for first dimension
|
||||
dimension.append(data);
|
||||
int index = dimension.count() - 1;
|
||||
|
||||
// first init length, no resizing needed
|
||||
if (dimension.count() == 1) length_=data.length();
|
||||
else {
|
||||
if (data.length() > length_) {
|
||||
|
||||
// if longer, we need to resize everyone else
|
||||
for(int i=0; i<index; i++) {
|
||||
dimension[i].resize(data.length());
|
||||
for(int j=length_; j<data.length(); j++)
|
||||
dimension[i][j]=0;
|
||||
}
|
||||
length_ = data.length();
|
||||
|
||||
} else if (data.length() < length_) {
|
||||
|
||||
// if shorter we need to resize ours
|
||||
dimension[index].resize(length_);
|
||||
for(int j=length_; j<data.length(); j++)
|
||||
dimension[index][j]=0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// find centers and assignments for k clusters
|
||||
bool
|
||||
FastKmeans::run(int k)
|
||||
{
|
||||
// no data, or dimensions
|
||||
if (k <2 || length_ <= 0 || dimension.count() <= 0) return false;
|
||||
|
||||
// set number if clusters we looked for
|
||||
k_ = k;
|
||||
|
||||
// if we have old data, delete it
|
||||
if (data) delete data;
|
||||
if (kmeans) delete kmeans;
|
||||
if (centers_) delete centers_;
|
||||
if (assignments_) delete [] assignments_;
|
||||
|
||||
// lets get a new one
|
||||
kmeans = new HamerlyKmeans();
|
||||
data = new Dataset(length(), dim());
|
||||
|
||||
// now fill the data set with our data
|
||||
int index=0;
|
||||
for(int i=0; i<length(); i++)
|
||||
for(int j=0; j<dim(); j++)
|
||||
data->data[index++] = dimension[j][i];
|
||||
|
||||
// initialise centers
|
||||
centers_ = init_centers_kmeanspp_v2(*data, k_);
|
||||
|
||||
// initialise assignments
|
||||
assignments_ = new unsigned short[length()];
|
||||
|
||||
// setup
|
||||
kmeans_assign(*data, *centers_, assignments_);
|
||||
kmeans->initialize(data, k, assignments_, 1);
|
||||
|
||||
// run the algorithm, max out at 10,000 iterations
|
||||
// it returns true or false if it succeeded
|
||||
return kmeans->run(10000);
|
||||
}
|
||||
|
||||
// get centers (k x dimensions)
|
||||
QVector<double>
|
||||
FastKmeans::centers()
|
||||
{
|
||||
QVector<double> returning;
|
||||
|
||||
if (kmeans == NULL) return returning;
|
||||
|
||||
Dataset const *finalcenters = kmeans->getCenters();
|
||||
|
||||
// lets reorganise them to d1,d1,d1,d2,d2,d2,d2,d3,d3,d3
|
||||
// from d1,d2,d3,d1,d2,d3,d1,d2,d3
|
||||
for(int d=0; d<dim(); d++)
|
||||
for(int n=0; n<k(); n++)
|
||||
returning << finalcenters->data[(n * dim()) + d];
|
||||
|
||||
return returning;
|
||||
}
|
||||
|
||||
// get assignments - n indexes
|
||||
QVector<double>
|
||||
FastKmeans::assignments()
|
||||
{
|
||||
QVector<double> returning;
|
||||
|
||||
if (kmeans == NULL) return returning;
|
||||
|
||||
Dataset const *finalcenters = kmeans->getCenters();
|
||||
kmeans_assign(*data, *finalcenters, assignments_);
|
||||
|
||||
// lets reorganise and convert to doubles (datafilter likes these)
|
||||
for (int i = 0; i < data->n; ++i) returning << assignments_[i];
|
||||
|
||||
return returning;
|
||||
}
|
||||
|
||||
68
src/Metrics/FastKmeans.h
Normal file
68
src/Metrics/FastKmeans.h
Normal file
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Mark Liversedge (liversedge@gmail.com)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc., 51
|
||||
* Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "hamerly_kmeans.h"
|
||||
#include "kmeans_dataset.h"
|
||||
#include <QVector>
|
||||
#include <QList>
|
||||
|
||||
#ifndef _GC_FastKmeans_h
|
||||
#define _GC_FastKmeans_h 1
|
||||
|
||||
class FastKmeans
|
||||
{
|
||||
public:
|
||||
|
||||
// since we are a wrapper around algorithms we need
|
||||
// to initialise and cleanup subordinates, especially
|
||||
// since they typically use non-QT containers
|
||||
FastKmeans();
|
||||
~FastKmeans();
|
||||
|
||||
// all dimensions are resized to the largest
|
||||
// and filled with zeroes, but really the caller
|
||||
// should make sure they match
|
||||
void addDimension(QVector<double> &data);
|
||||
|
||||
// find centers and assignments for k clusters
|
||||
bool run(int k);
|
||||
|
||||
// get centers (k x dimensions)
|
||||
QVector<double> centers();
|
||||
|
||||
// get assignments - n indexes in datafilter order (use in overview tables etc)
|
||||
QVector<double> assignments();
|
||||
|
||||
int length() const { return length_; } ; // number of points
|
||||
int dim() const { return dimension.count(); } // number of dimensions to a point
|
||||
int k() const { return k_; } // number of clusters used
|
||||
|
||||
private:
|
||||
|
||||
HamerlyKmeans *kmeans; // the algorithm we use
|
||||
Dataset *data;
|
||||
unsigned short *assignments_; // updated with the cluster assignments
|
||||
Dataset *centers_;
|
||||
|
||||
QList<QVector<double> > dimension;
|
||||
|
||||
int length_; // updated as we add dimensions, but really should be the same
|
||||
int k_; // updated when we run
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -741,7 +741,7 @@ HEADERS += Gui/AboutDialog.h Gui/AddIntervalDialog.h Gui/AnalysisSidebar.h Gui/C
|
||||
HEADERS += Metrics/Banister.h Metrics/CPSolver.h Metrics/Estimator.h Metrics/ExtendedCriticalPower.h Metrics/HrZones.h Metrics/PaceZones.h \
|
||||
Metrics/PDModel.h Metrics/PMCData.h Metrics/PowerProfile.h Metrics/RideMetadata.h Metrics/RideMetric.h Metrics/SpecialFields.h \
|
||||
Metrics/Statistic.h Metrics/UserMetricParser.h Metrics/UserMetricSettings.h Metrics/VDOTCalculator.h Metrics/WPrime.h Metrics/Zones.h \
|
||||
Metrics/BlinnSolver.h
|
||||
Metrics/BlinnSolver.h Metrics/FastKmeans.h
|
||||
|
||||
## Planning and Compliance
|
||||
HEADERS += Planning/PlanningWindow.h
|
||||
@@ -850,7 +850,7 @@ SOURCES += Metrics/aBikeScore.cpp Metrics/aCoggan.cpp Metrics/AerobicDecoupling.
|
||||
Metrics/SwimMetrics.cpp Metrics/SpecialFields.cpp Metrics/Statistic.cpp Metrics/SustainMetric.cpp Metrics/SwimScore.cpp \
|
||||
Metrics/TimeInZone.cpp Metrics/TRIMPPoints.cpp Metrics/UserMetric.cpp Metrics/UserMetricParser.cpp Metrics/VDOTCalculator.cpp \
|
||||
Metrics/VDOT.cpp Metrics/WattsPerKilogram.cpp Metrics/WPrime.cpp Metrics/Zones.cpp Metrics/HrvMetrics.cpp Metrics/BlinnSolver.cpp \
|
||||
Metrics/RowMetrics.cpp
|
||||
Metrics/RowMetrics.cpp Metrics/FastKmeans.cpp
|
||||
|
||||
## Planning and Compliance
|
||||
SOURCES += Planning/PlanningWindow.cpp
|
||||
|
||||
Reference in New Issue
Block a user