kmeans

ソースコード

 1#include <vector>
 2#include <set>
 3#include "titan_cpplib/algorithm/random.cpp"
 4using namespace std;
 5
 6// Kmeans
 7namespace titan23 {
 8
 9    template <class DistType,
10              class ElmType,
11              DistType (*dist)(const ElmType&, const ElmType&),
12              ElmType (*mean)(const vector<ElmType>&)>
13    class Kmeans {
14      private:
15        int k, max_iter;
16        titan23::Random my_random;
17
18      public:
19        Kmeans() : k(0), max_iter(0) {}
20        Kmeans(const int k, const int max_iter) : k(k), max_iter(max_iter) {}
21
22        pair<vector<int>, vector<ElmType>> fit(const vector<ElmType> &X) {
23            int n = (int)X.size();
24            assert(k <= n);
25            vector<ElmType> first_cluster = {my_random.choice(X)};
26            set<ElmType> cluster_set;
27            cluster_set.insert(first_cluster[0]);
28
29            while (first_cluster.size() < k) {
30                vector<int> p_f(n, 0);
31                bool flag = false;
32                for (int i = 0; i < n; ++i) {
33                    if (cluster_set.find(X[i]) == cluster_set.end()) {
34                        DistType min_d = dist(X[i], first_cluster[0]);
35                        for (int i = 1; i < first_cluster.size(); ++i) {
36                            min_d = min(min_d, dist(X[i], first_cluster[i]));
37                        }
38                        flag = true;
39                        p_f[i] = min_d+1;
40                    }
41                }
42                assert(flag);
43                ElmType tmpk = my_random.choice(X, p_f, false);
44                assert(cluster_set.find(tmpk) == cluster_set.end());
45                first_cluster.emplace_back(tmpk);
46                cluster_set.insert(tmpk); // iranai
47            }
48
49            vector<ElmType> cluster_centers = first_cluster;
50            vector<int> labels(n, -1);
51            for (int i = 0; i < n; ++i) {
52                DistType d = dist(X[i], first_cluster[0]);
53                for (int j = 0; j < k; ++j) {
54                    DistType tmp = dist(X[i], first_cluster[j]);
55                    if (tmp <= d) {
56                        d = tmp;
57                        labels[i] = j;
58                    }
59                }
60            }
61
62            for (int _ = 0; _ < max_iter; ++_) {
63                vector<vector<ElmType>> syuukei(k);
64                for (int i = 0; i < n; ++i) {
65                    syuukei[labels[i]].emplace_back(X[i]);
66                }
67                cluster_centers.clear();
68                for (int i = 0; i < k; ++i) {
69                    cluster_centers.emplace_back(mean(syuukei[i]));
70                }
71                for (int i = 0; i < n; ++i) {
72                    DistType d = dist(X[i], first_cluster[0]);
73                    for (int j = 0; j < k; ++j) {
74                        DistType tmp = dist(X[i], first_cluster[j]);
75                        if (tmp <= d) {
76                            d = tmp;
77                            labels[i] = j;
78                        }
79                    }
80                }
81            }
82            return {labels, cluster_centers};
83        }
84    };
85}
86
87
88/*
89using DistType = int;
90using ElmType = pair<int, int>;
91DistType dist(const ElmType &s, const ElmType &t) {
92    return abs(s.first-t.first) + abs(s.second-t.second);
93}
94ElmType mean(const vector<ElmType> &L) {
95    return {0, 0};
96}
97*/

仕様

Warning

doxygenfile: Cannot find file “titan_cpplib/ahc/kmeans.cpp