hash string

ソースコード

  1#include <vector>
  2#include <random>
  3#include <cassert>
  4
  5#include "titan_cpplib/data_structures/segment_tree.cpp"
  6using namespace std;
  7
  8// HashString
  9namespace titan23 {
 10
 11    class HashStringBase {
 12        // ref: https://qiita.com/keymoon/items/11fac5627672a6d6a9f6
 13      public:
 14        using u64 = unsigned long long;
 15        static constexpr const u64 MASK30 = (1ULL << 30) - 1;
 16        static constexpr const u64 MASK31 = (1ULL << 31) - 1;
 17        static constexpr const u64 MOD = (1ULL << 61) - 1;
 18        static constexpr const u64 MASK61 = (1ULL << 61) - 1;
 19        int n;
 20        u64 base, invpow;
 21        vector<u64> powb, invb;
 22
 23        static u64 get_mul(u64 a, u64 b) {
 24            u64 au = a >> 31;
 25            u64 ad = a & MASK31;
 26            u64 bu = b >> 31;
 27            u64 bd = b & MASK31;
 28            u64 mid = ad * bu + au * bd;
 29            u64 midu = mid >> 30;
 30            u64 midd = mid & MASK30;
 31            return get_mod(au * bu * 2 + midu + (midd << 31) + ad * bd);
 32        }
 33
 34        static u64 get_mod(u64 x) {
 35            u64 xu = x >> 61;
 36            u64 xd = x & MASK61;
 37            u64 res = xu + xd;
 38            if (res >= MOD) res -= MOD;
 39            return res;
 40        }
 41
 42        u64 pow_mod(u64 a, u64 b, const u64 mod) {
 43            u64 res = 1ULL;
 44            while (b) {
 45                if (b & 1ULL) {
 46                res = get_mul(res, a);
 47                }
 48                a = get_mul(a, a);
 49                b >>= 1ULL;
 50            }
 51            return res;
 52        }
 53
 54        HashStringBase(int n = 0) : n(n) {
 55            std::mt19937 rand(std::random_device{}());
 56            u64 base = std::uniform_int_distribution<>(414123123, 1000000000)(rand);
 57            // u64 base = 414123123;
 58            this->base = base;
 59            this->invpow = pow_mod(base, HashStringBase::MOD-2, HashStringBase::MOD);
 60            powb.resize(n + 1, 1ULL);
 61            invb.resize(n + 1, 1ULL);
 62            for (int i = 1; i <= n; ++i) {
 63                powb[i] = get_mul(powb[i-1], base);
 64                invb[i] = get_mul(invb[i-1], this->invpow);
 65            }
 66        }
 67
 68        void extend(const int cap) {
 69            assert(cap >= 0);
 70            int pre_cap = powb.size();
 71            powb.resize(pre_cap + cap, 0);
 72            invb.resize(pre_cap + cap, 0);
 73            for (int i = pre_cap; i < pre_cap + cap; ++i) {
 74                powb[i] = get_mul(powb[i-1], base);
 75                invb[i] = get_mul(invb[i-1], invpow);
 76            }
 77        }
 78
 79        int get_cap() const {
 80            return powb.size();
 81        }
 82
 83        u64 unite(u64 h1, u64 h2, int k) {
 84            if (k >= get_cap()) extend(k - get_cap() + 1);
 85            return get_mod(get_mul(h1, powb[k]) + h2);
 86        }
 87    };
 88
 89    class HashString {
 90      public:
 91        using u64 = unsigned long long;
 92
 93      private:
 94        HashStringBase* hsb;
 95        int n;
 96        bool used_seg;
 97        vector<u64> data, acc;
 98
 99        static u64 op(u64 s, u64 t) { return (s+t) % HashStringBase::MOD; }
100        static u64 e() { return 0; }
101        titan23::SegmentTree<u64, op, e> seg;
102
103      public:
104        HashString() {}
105        HashString(HashStringBase* hsb, const string &s) : n(s.size()), used_seg(false), hsb(hsb) {
106            data.resize(n);
107            acc.resize(n+1, 0);
108            if (n > this->hsb->get_cap()) this->hsb->extend(n - this->hsb->get_cap() + 1);
109            for (int i = 0 ; i < n; ++i) {
110                assert(0 <= n-i-1 && n-i-1 < hsb->powb.size());
111                data[i] = this->hsb->get_mul(this->hsb->powb[n-i-1], s[i]-'a'+1);
112                acc[i+1] = this->hsb->get_mod(acc[i] + data[i]);
113            }
114            seg = titan23::SegmentTree<u64, HashString::op, HashString::e>(data);
115        }
116
117        u64 get(int l, int r) const {
118            assert(0 <= l && l <= r && r <= n);
119            if (used_seg) {
120                return hsb->get_mul(seg.prod(l, r), hsb->invb[n - r]);
121            }
122            return hsb->get_mul(hsb->get_mod(4 * HashStringBase::MOD + acc[r] - acc[l]), hsb->invb[n - r]);
123        }
124
125        u64 get(int k) const {
126            assert(0 <= k && k < n);
127            return get(k, k+1);
128        }
129
130        void set(int k, char c) {
131            assert(0 <= k && k < n);
132            used_seg = true;
133            seg.set(k, hsb->get_mul(hsb->powb[n-k-1], c-'a'+1));
134        }
135
136        vector<int> get_lcp() const {
137            vector<int> a(n);
138            for (int i = 0; i < n; ++i) {
139                int ok = 0, ng = n - i + 1;
140                    while (ng - ok > 1) {
141                    int mid = (ok + ng) / 2;
142                    (get(0, mid) == get(i, i + mid) ? ok : ng) = mid;
143                }
144                a[i] = ok;
145            }
146            return a;
147        }
148    };
149} // namespace titan23

仕様

Warning

doxygenfile: Cannot find file “titan_cpplib/string/hash_string.cpp