Tifa's CP Library

:heavy_check_mark: suffix_array (src/code/str/suffix_array.hpp)

Depends on

Required by

Verified with

Code

#ifndef TIFALIBS_STR_SUFFIX_ARRAY
#define TIFALIBS_STR_SUFFIX_ARRAY

#include "../util/traits.hpp"

namespace tifa_libs::str {

template <iterable_c T = strn>
class suffixarray {
  T s;

 public:
  vecu sa, rk, height;

  // s must start from 1
  CEXPE suffixarray(cT_(T) s_) : s(s_), sa(s.size()), rk(s.size()) {
    const u32 n = u32(s.size() - 1);
    u32 m = 0, p;
    for (auto x : s_) m = max(m, u32(x));
    vecu oldrk(n + n + 1), id(n + 1), cnt(m + 1, 0);
    fle_ (u32, i, 1, n) ++cnt[rk[i] = u32(s[i])];
    fle_ (u32, i, 1, m) cnt[i] += cnt[i - 1];
    for (u32 i = n; i >= 1; --i) sa[cnt[rk[i]]--] = i;
    for (u32 w = 1;; w *= 2, m = p) {
      p = 0;
      for (u32 i = n; i > n - w; --i) id[++p] = i;
      fle_ (u32, i, 1, n)
        if (sa[i] > w) id[++p] = sa[i] - w;
      cnt = vecu(m + 1, 0);
      fle_ (u32, i, 1, n) ++cnt[rk[id[i]]];
      fle_ (u32, i, 1, m) cnt[i] += cnt[i - 1];
      for (u32 i = n; i >= 1; --i) sa[cnt[rk[id[i]]]--] = id[i];
      std::ranges::copy(rk, oldrk.begin()), p = 0;
      fle_ (u32, i, 1, n) {
        u32 x = sa[i], y = sa[i - 1];
        rk[x] = oldrk[x] == oldrk[y] && oldrk[x + w] == oldrk[y + w] ? p : ++p;
      }
      if (p == n) break;
    }
  }

  CEXP void get_height() {
    const u32 n = u32(s.size() - 1);
    height = vecu(n + 1);
    for (u32 i = 1, k = 0; i <= n; ++i) {
      if (rk[i] == 1) continue;
      if (k) --k;
      while (i + k <= n && s[i + k] == s[sa[rk[i] - 1] + k]) ++k;
      height[rk[i]] = k;
    }
  }
  // s.substr(begs) := s[begs..s.size() - 1]
  // -1:  t > s.substr
  // 0 :  t == s.substr
  // 1 :  t < s.substr, t is a prefix of s.substr
  // 2 :  t < s.substr, t isn't prefix of s.substr
  CEXP i32 compare_substr(T t, u32 begs = 1, u32 begt = 1) const {
    while (begs < s.size() && begt < t.size()) {
      if (t[begt] > s[begs]) return -1;
      if (t[begt] < s[begs]) return 2;
      ++begs, ++begt;
    }
    return begs == s.size() && begt == t.size() ? 0 : (begt >= t.size() ? 1 : -1);
  }
  // the smallest rank of suffix that is greater than or equal t
  CEXP u32 lower_bound(T t) const {
    u32 l = 1, r = u32(s.size() - 1), ret = u32(s.size());
    while (r >= l) {
      const u32 m = l + (r - l) / 2;
      if (compare_substr(t, sa[m]) < 0) l = m + 1;
      else r = m - 1, ret = m;
    }
    return ret;
  }
  // the smallest rank of suffix that is greater than t and t isn't prefix of that
  CEXP u32 upper_bound(T t) const {
    u32 l = 1, r = u32(s.size() - 1), ret = u32(s.size());
    while (r >= l)
      if (const u32 m = l + (r - l) / 2; compare_substr(t, sa[m]) <= 1) l = m + 1;
      else r = m - 1, ret = m;
    return ret;
  }
  CEXP u32 frequency(T t) const { return upper_bound(t) - lower_bound(t); }
};

}  // namespace tifa_libs::str

#endif
#line 1 "src/code/str/suffix_array.hpp"



#line 1 "src/code/util/traits.hpp"



#line 1 "src/code/util/util.hpp"



#include <bits/extc++.h>

#define CEXP constexpr
#define CEXPE constexpr explicit
#define TPN typename
#define CR const&

#define cT_(...) std::conditional_t<sizeof(__VA_ARGS__) <= sizeof(size_t), __VA_ARGS__, __VA_ARGS__ CR>
#define fle_(T, i, l, r, ...) for (T i = (l), i##e = (r)__VA_OPT__(, ) __VA_ARGS__; i <= i##e; ++i)
#define flt_(T, i, l, r, ...) for (T i = (l), i##e = (r)__VA_OPT__(, ) __VA_ARGS__; i < i##e; ++i)

#ifdef ONLINE_JUDGE
#undef assert
#define assert(x) 42
#endif

using i8 = int8_t;
using i16 = int16_t;
using i32 = int32_t;
using i64 = int64_t;
using i128 = __int128_t;
using isz = ptrdiff_t;
using u8 = uint8_t;
using u16 = uint16_t;
using u32 = uint32_t;
using u64 = uint64_t;
using u128 = __uint128_t;
using usz = size_t;
using f32 = float;
using f64 = double;
using f128 = long double;
using strn = std::string;
using strnv = std::string_view;

// clang-format off
template <class T, T v> using ic = std::integral_constant<T, v>;
template <class T> using ptt = std::pair<T, T>;
template <class T> struct edge_t {
  T w; u32 u, v;
  CEXP auto operator<=>(edge_t CR) const = default;
};
template <class T> struct pt3 {
  T _0, _1, _2;
  CEXP auto operator<=>(pt3 CR) const = default;
};
template <class T> struct pt4 {
  T _0, _1, _2, _3;
  CEXP auto operator<=>(pt4 CR) const = default;
};
template <class E> using itl = std::initializer_list<E>;
template <class T> using vec = std::vector<T>;
template <class T> using vvec = vec<vec<T>>;
template <class T> using v3ec = vec<vvec<T>>;
template <class T> using vecpt = vec<ptt<T>>;
template <class T> using vvecpt = vvec<ptt<T>>;
template <class T> using ptvec = ptt<vec<T>>;
template <class T> using ptvvec = ptt<vvec<T>>;

template <class T, usz ext = std::dynamic_extent> using spn = std::span<T const, ext>;
template <class T, usz N> using arr = std::array<T, N>;
template <class U, class T> using vecp = vec<std::pair<U, T>>;
template <class U, class T> using vvecp = vvec<std::pair<U, T>>;

template <class T, class C = std::less<T>> using pq = std::priority_queue<T, vec<T>, C>;
template <class T> using pqg = std::priority_queue<T, vec<T>, std::greater<T>>;
// clang-format on

#define mk_(V, A, T) using V##A = V<T>;
#define mk(A, T) mk_(edge_t, A, T) mk_(ptt, A, T) mk_(pt3, A, T) mk_(pt4, A, T) mk_(vec, A, T) mk_(vvec, A, T) mk_(v3ec, A, T) mk_(vecpt, A, T) mk_(vvecpt, A, T) mk_(ptvec, A, T) mk_(ptvvec, A, T) mk_(spn, A, T) mk_(itl, A, T)
mk(b, bool) mk(i, i32) mk(u, u32) mk(ii, i64) mk(uu, u64);
#undef mk
#undef mk_

using namespace std::literals;
CEXP i8 operator""_i8(unsigned long long x) { return (i8)x; }
CEXP i16 operator""_i16(unsigned long long x) { return (i16)x; }
CEXP i32 operator""_i32(unsigned long long x) { return (i32)x; }
CEXP i64 operator""_i64(unsigned long long x) { return (i64)x; }
CEXP isz operator""_iz(unsigned long long x) { return (isz)x; }
CEXP u8 operator""_u8(unsigned long long x) { return (u8)x; }
CEXP u16 operator""_u16(unsigned long long x) { return (u16)x; }
CEXP u32 operator""_u32(unsigned long long x) { return (u32)x; }
CEXP u64 operator""_u64(unsigned long long x) { return (u64)x; }
CEXP usz operator""_uz(unsigned long long x) { return (usz)x; }

using std::numbers::pi_v;
template <std::floating_point FP>
inline FP eps_v = std::sqrt(std::numeric_limits<FP>::epsilon());
template <std::floating_point FP>
CEXP void set_eps(FP v) { eps_v<FP> = v; }

inline const auto fn_0 = [](auto&&...) {};
inline const auto fn_is0 = [](auto x) { return x == 0; };

namespace tifa_libs {
using std::min, std::max, std::swap;
template <class T>
constexpr T abs(T x) { return x < 0 ? -x : x; }
}  // namespace tifa_libs


#line 5 "src/code/util/traits.hpp"

namespace tifa_libs {

template <class T>
concept iterable_c = requires(T v) {
  { v.begin() } -> std::same_as<TPN T::iterator>;
  { v.end() } -> std::same_as<TPN T::iterator>;
};

template <class T>
concept container_c = iterable_c<T> && !std::same_as<std::remove_cvref_t<T>, strn> && !std::same_as<std::remove_cvref_t<T>, strnv>;

template <class T>
CEXP bool is_char_v = std::is_same_v<T, char> || std::is_same_v<T, signed char> || std::is_same_v<T, unsigned char>;
template <class T>
concept char_c = is_char_v<T>;

template <class T>
CEXP bool is_s128_v = std::is_same_v<T, __int128_t> || std::is_same_v<T, __int128>;
template <class T>
concept s128_c = is_s128_v<T>;

template <class T>
CEXP bool is_u128_v = std::is_same_v<T, __uint128_t> || std::is_same_v<T, unsigned __int128>;
template <class T>
concept u128_c = is_u128_v<T>;

template <class T>
CEXP bool is_i128_v = is_s128_v<T> || is_u128_v<T>;
template <class T>
concept i128_c = is_u128_v<T>;

template <class T>
CEXP bool is_int_v = std::is_integral_v<T> || is_i128_v<T>;
template <class T>
concept int_c = is_int_v<T>;

template <class T>
CEXP bool is_sint_v = is_s128_v<T> || (is_int_v<T> && std::is_signed_v<T>);
template <class T>
concept sint_c = is_sint_v<T>;

template <class T>
CEXP bool is_uint_v = is_u128_v<T> || (is_int_v<T> && std::is_unsigned_v<T>);
template <class T>
concept uint_c = is_uint_v<T>;

template <class T>
concept mint_c = requires(T x) {
  { x.mod() } -> uint_c;
  { x.val() } -> uint_c;
};

template <class T>
concept dft_c = requires(T x, vec<TPN T::data_t> v, u32 n) {
  { x.size() } -> std::same_as<u32>;
  x.bzr(n);
  x.dif(v, n);
  x.dit(v, n);
};

template <class T>
concept ntt_c = dft_c<T> && requires(T x) {
  T::max_size;
  T::G;
};

template <class T>
CEXP bool is_arithm_v = std::is_arithmetic_v<T> || is_int_v<T>;
template <class T>
concept arithm_c = is_arithm_v<T>;

template <class T>
struct to_sint : std::make_signed<T> {};
template <>
struct to_sint<u128> {
  using type = u128;
};
template <>
struct to_sint<i128> {
  using type = u128;
};
template <class T>
using to_sint_t = TPN to_sint<T>::type;

template <class T>
struct to_uint : std::make_unsigned<T> {};
template <>
struct to_uint<u128> {
  using type = u128;
};
template <>
struct to_uint<i128> {
  using type = u128;
};
template <class T>
using to_uint_t = TPN to_uint<T>::type;

}  // namespace tifa_libs


#line 5 "src/code/str/suffix_array.hpp"

namespace tifa_libs::str {

template <iterable_c T = strn>
class suffixarray {
  T s;

 public:
  vecu sa, rk, height;

  // s must start from 1
  CEXPE suffixarray(cT_(T) s_) : s(s_), sa(s.size()), rk(s.size()) {
    const u32 n = u32(s.size() - 1);
    u32 m = 0, p;
    for (auto x : s_) m = max(m, u32(x));
    vecu oldrk(n + n + 1), id(n + 1), cnt(m + 1, 0);
    fle_ (u32, i, 1, n) ++cnt[rk[i] = u32(s[i])];
    fle_ (u32, i, 1, m) cnt[i] += cnt[i - 1];
    for (u32 i = n; i >= 1; --i) sa[cnt[rk[i]]--] = i;
    for (u32 w = 1;; w *= 2, m = p) {
      p = 0;
      for (u32 i = n; i > n - w; --i) id[++p] = i;
      fle_ (u32, i, 1, n)
        if (sa[i] > w) id[++p] = sa[i] - w;
      cnt = vecu(m + 1, 0);
      fle_ (u32, i, 1, n) ++cnt[rk[id[i]]];
      fle_ (u32, i, 1, m) cnt[i] += cnt[i - 1];
      for (u32 i = n; i >= 1; --i) sa[cnt[rk[id[i]]]--] = id[i];
      std::ranges::copy(rk, oldrk.begin()), p = 0;
      fle_ (u32, i, 1, n) {
        u32 x = sa[i], y = sa[i - 1];
        rk[x] = oldrk[x] == oldrk[y] && oldrk[x + w] == oldrk[y + w] ? p : ++p;
      }
      if (p == n) break;
    }
  }

  CEXP void get_height() {
    const u32 n = u32(s.size() - 1);
    height = vecu(n + 1);
    for (u32 i = 1, k = 0; i <= n; ++i) {
      if (rk[i] == 1) continue;
      if (k) --k;
      while (i + k <= n && s[i + k] == s[sa[rk[i] - 1] + k]) ++k;
      height[rk[i]] = k;
    }
  }
  // s.substr(begs) := s[begs..s.size() - 1]
  // -1:  t > s.substr
  // 0 :  t == s.substr
  // 1 :  t < s.substr, t is a prefix of s.substr
  // 2 :  t < s.substr, t isn't prefix of s.substr
  CEXP i32 compare_substr(T t, u32 begs = 1, u32 begt = 1) const {
    while (begs < s.size() && begt < t.size()) {
      if (t[begt] > s[begs]) return -1;
      if (t[begt] < s[begs]) return 2;
      ++begs, ++begt;
    }
    return begs == s.size() && begt == t.size() ? 0 : (begt >= t.size() ? 1 : -1);
  }
  // the smallest rank of suffix that is greater than or equal t
  CEXP u32 lower_bound(T t) const {
    u32 l = 1, r = u32(s.size() - 1), ret = u32(s.size());
    while (r >= l) {
      const u32 m = l + (r - l) / 2;
      if (compare_substr(t, sa[m]) < 0) l = m + 1;
      else r = m - 1, ret = m;
    }
    return ret;
  }
  // the smallest rank of suffix that is greater than t and t isn't prefix of that
  CEXP u32 upper_bound(T t) const {
    u32 l = 1, r = u32(s.size() - 1), ret = u32(s.size());
    while (r >= l)
      if (const u32 m = l + (r - l) / 2; compare_substr(t, sa[m]) <= 1) l = m + 1;
      else r = m - 1, ret = m;
    return ret;
  }
  CEXP u32 frequency(T t) const { return upper_bound(t) - lower_bound(t); }
};

}  // namespace tifa_libs::str


Back to top page