kmeans¶
ソースコード¶
1#include <vector>
2#include <set>
3#include "titan_cpplib/algorithm/random.cpp"
4using namespace std;
5
6// Kmeans
7namespace titan23 {
8
9 template <class DistType,
10 class ElmType,
11 DistType (*dist)(const ElmType&, const ElmType&),
12 ElmType (*mean)(const vector<ElmType>&)>
13 class Kmeans {
14 private:
15 int k, max_iter;
16 titan23::Random my_random;
17
18 public:
19 Kmeans() : k(0), max_iter(0) {}
20 Kmeans(const int k, const int max_iter) : k(k), max_iter(max_iter) {}
21
22 pair<vector<int>, vector<ElmType>> fit(const vector<ElmType> &X) {
23 int n = (int)X.size();
24 assert(k <= n);
25 vector<ElmType> first_cluster = {my_random.choice(X)};
26 set<ElmType> cluster_set;
27 cluster_set.insert(first_cluster[0]);
28
29 while (first_cluster.size() < k) {
30 vector<int> p_f(n, 0);
31 bool flag = false;
32 for (int i = 0; i < n; ++i) {
33 if (cluster_set.find(X[i]) == cluster_set.end()) {
34 DistType min_d = dist(X[i], first_cluster[0]);
35 for (int i = 1; i < first_cluster.size(); ++i) {
36 min_d = min(min_d, dist(X[i], first_cluster[i]));
37 }
38 flag = true;
39 p_f[i] = min_d+1;
40 }
41 }
42 assert(flag);
43 ElmType tmpk = my_random.choice(X, p_f, false);
44 assert(cluster_set.find(tmpk) == cluster_set.end());
45 first_cluster.emplace_back(tmpk);
46 cluster_set.insert(tmpk); // iranai
47 }
48
49 vector<ElmType> cluster_centers = first_cluster;
50 vector<int> labels(n, -1);
51 for (int i = 0; i < n; ++i) {
52 DistType d = dist(X[i], first_cluster[0]);
53 for (int j = 0; j < k; ++j) {
54 DistType tmp = dist(X[i], first_cluster[j]);
55 if (tmp <= d) {
56 d = tmp;
57 labels[i] = j;
58 }
59 }
60 }
61
62 for (int _ = 0; _ < max_iter; ++_) {
63 vector<vector<ElmType>> syuukei(k);
64 for (int i = 0; i < n; ++i) {
65 syuukei[labels[i]].emplace_back(X[i]);
66 }
67 cluster_centers.clear();
68 for (int i = 0; i < k; ++i) {
69 cluster_centers.emplace_back(mean(syuukei[i]));
70 }
71 for (int i = 0; i < n; ++i) {
72 DistType d = dist(X[i], first_cluster[0]);
73 for (int j = 0; j < k; ++j) {
74 DistType tmp = dist(X[i], first_cluster[j]);
75 if (tmp <= d) {
76 d = tmp;
77 labels[i] = j;
78 }
79 }
80 }
81 }
82 return {labels, cluster_centers};
83 }
84 };
85}
86
87
88/*
89using DistType = int;
90using ElmType = pair<int, int>;
91DistType dist(const ElmType &s, const ElmType &t) {
92 return abs(s.first-t.first) + abs(s.second-t.second);
93}
94ElmType mean(const vector<ElmType> &L) {
95 return {0, 0};
96}
97*/
仕様¶
Warning
doxygenfile: Cannot find file “titan_cpplib/ahc/kmeans.cpp