Tifa's CP Library

:heavy_check_mark: ntt (src/code/conv/ntt.hpp)

Depends on

Required by

Verified with

Code

#ifndef TIFALIBS_CONV_NTT
#define TIFALIBS_CONV_NTT

#include "../bit/lowbit.hpp"
#include "../math/qpow.hpp"
#include "../nt/proot_u64.hpp"

namespace tifa_libs::math {

template <class mint>
struct NTT {
  using data_t = mint;

  static_assert(is_prime(mint::mod()) && (mint::mod() & 3) == 1, "MOD must be prime with 4k+1");
  static constexpr u64 max_size = bit::lowbit(mint::mod() - 1);

  const mint G = proot(mint::mod());

  explicit constexpr NTT() : root() {}

  constexpr u32 size() const { return (u32)root.size(); }
  constexpr void bzr(u32 len = max_size) {
    u32 n = std::bit_ceil(len);
    assert(n <= max_size);
    if (n == size()) return;
    root.resize(n);
    root[0] = 1;
    mint w = qpow(G, (mint::mod() - 1) / n);
    for (u32 i = 1; i < n; ++i) root[i] = root[i - 1] * w;
  }

#pragma GCC diagnostic ignored "-Wsign-conversion"
  constexpr void dif(vec<mint> &f, u32 n = 0) const {
    assert(size());
    if (!n) n = size();
    if (f.size() < n) f.resize(n);
    assert(std::has_single_bit(n) && n <= size());
    for (u32 i = n / 2, d = 1; i; i /= 2, d *= 2)
      for (u32 j = 0; j < n; j += i * 2) {
        auto w = root.begin();
        mint u, t;
        for (u32 k = 0; k < i; ++k, w += d) {
          f[j | k] = (u = f[j | k]) + (t = f[i | j | k]);
          f[i | j | k] = (u - t) * (*w);
        }
      }
  }
  constexpr void dit(vec<mint> &f, u32 n = 0) const {
    assert(size());
    if (!n) n = size();
    if (f.size() < n) f.resize(n);
    assert(std::has_single_bit(n) && n <= size());
    for (u32 i = 1, d = n / 2; d; i *= 2, d /= 2)
      for (u32 j = 0; j < n; j += i * 2) {
        auto w = root.begin();
        mint t;
        for (u32 k = 0; k < i; ++k, w += d) {
          f[i | j | k] = f[j | k] - (t = f[i | j | k] * (*w));
          f[j | k] += t;
        }
      }
    std::reverse(f.begin() + 1, f.end());
    mint t = mint(n).inv();
    for (u32 i = 0; i < n; ++i) f[i] *= t;
  }
#pragma GCC diagnostic warning "-Wsign-conversion"

 private:
  vec<mint> root;
};

}  // namespace tifa_libs::math

#endif
#line 1 "src/code/conv/ntt.hpp"



#line 1 "src/code/bit/lowbit.hpp"



#line 1 "src/code/util/util.hpp"



#include <bits/stdc++.h>

template <class T>
constexpr T abs(T x) { return x < 0 ? -x : x; }

using i8 = int8_t;
using i16 = int16_t;
using i32 = int32_t;
using i64 = int64_t;
using i128 = __int128_t;
using isz = ptrdiff_t;

using u8 = uint8_t;
using u16 = uint16_t;
using u32 = uint32_t;
using u64 = uint64_t;
using u128 = __uint128_t;
using usz = size_t;

using f32 = float;
using f64 = double;
using f128 = long double;

template <class T>
using ptt = std::pair<T, T>;
template <class T>
using pt3 = std::tuple<T, T, T>;
template <class T>
using pt4 = std::tuple<T, T, T, T>;

template <class T, usz N>
using arr = std::array<T, N>;
template <class T>
using vec = std::vector<T>;
template <class T>
using vvec = vec<vec<T>>;
template <class T>
using v3ec = vec<vvec<T>>;
template <class U, class T>
using vecp = vec<std::pair<U, T>>;
template <class U, class T>
using vvecp = vvec<std::pair<U, T>>;
template <class T>
using vecpt = vec<ptt<T>>;
template <class T>
using vvecpt = vvec<ptt<T>>;

template <class T, class C = std::less<T>>
using pq = std::priority_queue<T, vec<T>, C>;
template <class T>
using pqg = std::priority_queue<T, vec<T>, std::greater<T>>;

using strn = std::string;
using strnv = std::string_view;

using vecu = vec<u32>;
using vvecu = vvec<u32>;
using v3ecu = v3ec<u32>;
using vecu64 = vec<u64>;
using vecb = vec<bool>;
using vvecb = vvec<bool>;

#ifdef ONLINE_JUDGE
#undef assert
#define assert(x) 42
#endif

using namespace std::literals;

constexpr i8 operator""_i8(unsigned long long x) { return (i8)x; }
constexpr i16 operator""_i16(unsigned long long x) { return (i16)x; }
constexpr i32 operator""_i32(unsigned long long x) { return (i32)x; }
constexpr i64 operator""_i64(unsigned long long x) { return (i64)x; }
constexpr isz operator""_iz(unsigned long long x) { return (isz)x; }

constexpr u8 operator""_u8(unsigned long long x) { return (u8)x; }
constexpr u16 operator""_u16(unsigned long long x) { return (u16)x; }
constexpr u32 operator""_u32(unsigned long long x) { return (u32)x; }
constexpr u64 operator""_u64(unsigned long long x) { return (u64)x; }
constexpr usz operator""_uz(unsigned long long x) { return (usz)x; }

inline const auto fn_0 = [](auto&&...) {};


#line 5 "src/code/bit/lowbit.hpp"

namespace tifa_libs::bit {

template <class T>
constexpr T lowbit(T x) { return T(1) << std::countr_zero(x); }

}  // namespace tifa_libs::bit


#line 1 "src/code/math/qpow.hpp"



#line 5 "src/code/math/qpow.hpp"

namespace tifa_libs::math {

template <class T>
constexpr T qpow(T a, u64 b, T const& init_v = T{1}) {
  T res = init_v;
  for (; b; b >>= 1, a = a * a)
    if (b & 1) res = res * a;
  return res;
}

}  // namespace tifa_libs::math


#line 1 "src/code/nt/proot_u64.hpp"



#line 1 "src/code/nt/pfactors.hpp"



#line 1 "src/code/math/mul_mod_u.hpp"



#line 5 "src/code/math/mul_mod_u.hpp"

namespace tifa_libs::math {

constexpr u64 mul_mod_u(u64 a, u64 b, u64 mod) {
  if (std::bit_width(a) + std::bit_width(b) <= 64) return a * b % mod;
  else return (u64)((u128)a * b % mod);
}

}  // namespace tifa_libs::math


#line 1 "src/code/rand/gen.hpp"



#line 5 "src/code/rand/gen.hpp"

namespace tifa_libs::rand {

template <class Distri>
class Gen {
  std::conditional_t<sizeof(typename Distri::result_type) <= 4, std::mt19937, std::mt19937_64> re;
  Distri dist;

 public:
  using random_engine = decltype(re);
  using distribution = Distri;
  using result_type = typename Distri::result_type;

  constexpr Gen() : re(std::random_device{}()), dist() {}
  constexpr Gen(result_type a, result_type b) : re(std::random_device{}()), dist(a, b) {}

  constexpr void set_range(result_type a, result_type b) { dist = Distri(a, b); }
  constexpr random_engine& rand_eng() { return re; }
  constexpr Distri& distrib() { return dist; }

  void reset_seed() { re.seed((result_type)std::chrono::duration_cast<std::conditional_t<sizeof(typename Distri::result_type) <= 4, std::chrono::seconds, std::chrono::nanoseconds>>(std::chrono::high_resolution_clock::now().time_since_epoch()).count()); }
  constexpr result_type operator()() { return dist(re); }
  result_type next() { return dist(re); }
};

}  // namespace tifa_libs::rand


#line 1 "src/code/nt/is_prime.hpp"



#line 1 "src/code/math/qpow_mod.hpp"



#line 5 "src/code/math/qpow_mod.hpp"

namespace tifa_libs::math {

constexpr u64 qpow_mod(u64 a, u64 b, u64 mod) {
  u64 res(1);
  for (a %= mod; b; b >>= 1, a = mul_mod_u(a, a, mod))
    if (b & 1) res = mul_mod_u(res, a, mod);
  return res;
}

}  // namespace tifa_libs::math


#line 6 "src/code/nt/is_prime.hpp"

namespace tifa_libs::math {

constexpr bool is_prime(u64 n) {
  if (n <= 2) return n == 2;
  if (~n & 1) return false;
  if (n < 8 || n == 61) return true;

  auto f = [n, d = (n - 1) >> std::countr_zero(n - 1)](auto const& bases) -> bool {
    for (u64 i : bases) {
      if (!(i % n)) continue;
      u64 t = d, y = qpow_mod(i, t, n);
      while (t != n - 1 && y != 1 && y != n - 1) {
        y = mul_mod_u(y, y, n);
        t *= 2;
      }
      if (y != n - 1 && (~t & 1)) return false;
    }
    return true;
  };

  if (n < (1 << 30)) {
    constexpr u64 bases[3] = {2, 7, 61};
    return f(bases);
  }
  constexpr u64 bases[7] = {2, 325, 9375, 28178, 450775, 9780504, 1795265022};
  return f(bases);
}

}  // namespace tifa_libs::math


#line 7 "src/code/nt/pfactors.hpp"

namespace tifa_libs::math {
namespace pfactors_impl_ {
class PollardRho {
  rand::Gen<std::uniform_int_distribution<u64>> e;

  constexpr u64 rho(u64 n) {
    e.set_range(2, n - 1);
    auto f = [n, r = e()](u64 x) { return (mul_mod_u(x, x, n) + r) % n; };
    u64 g = 1, x = 0, y = e(), yy = 0;
    const u32 LIM = 128;
    for (u64 r = 1, q = 1; g == 1; r *= 2) {
      x = y;
      for (u64 i = 0; i < r; ++i) y = f(y);
      for (u64 k = 0; g == 1 && k < r; k += LIM) {
        yy = y;
        for (u64 i = 0; i < LIM && i < r - k; ++i) q = mul_mod_u(q, (x + (n - (y = f(y)))) % n, n);
        g = std::gcd(q, n);
      }
    }
    if (g == n) do {
        g = std::gcd((x + (n - (yy = f(yy)))) % n, n);
      } while (g == 1);
    return g == n ? rho(n) : g;
  }

 public:
  explicit constexpr PollardRho() : e() {}

  constexpr void operator()(u64 n, std::map<u64, u32> &ans) {
    if (n < 2) return;
    if (is_prime(n)) {
      ++ans[n];
      return;
    }
    auto g = rho(n);
    (*this)(n / g, ans);
    (*this)(g, ans);
  }
};
}  // namespace pfactors_impl_

inline std::map<u64, u32> pfactors(u64 n) {
  std::map<u64, u32> ans;
  if (n < 2) return ans;
  if (~n & 1) n >>= (ans[2] = (u32)std::countr_zero(n));
  pfactors_impl_::PollardRho()(n, ans);
  return ans;
}

}  // namespace tifa_libs::math


#line 1 "src/code/nt/proot_u32.hpp"



#line 1 "src/code/math/isqrt.hpp"



#line 5 "src/code/math/isqrt.hpp"

namespace tifa_libs::math {

constexpr u32 isqrt(u64 x) {
  if (!x) return 0;
  int c = i32(std::bit_width(x) - 1) / 2, sh = 31 - c;
  u32 u = [](u64 x) {
    constexpr u8 TAB[192] = {128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 144, 145, 146, 147, 148, 149, 150, 151, 151, 152, 153, 154, 155, 156, 156, 157, 158, 159, 160, 160, 161, 162, 163, 164, 164, 165, 166, 167, 167, 168, 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 179, 179, 180, 181, 181, 182, 183, 183, 184, 185, 186, 186, 187, 188, 188, 189, 190, 190, 191, 192, 192, 193, 194, 194, 195, 196, 196, 197, 198, 198, 199, 200, 200, 201, 201, 202, 203, 203, 204, 205, 205, 206, 206, 207, 208, 208, 209, 210, 210, 211, 211, 212, 213, 213, 214, 214, 215, 216, 216, 217, 217, 218, 219, 219, 220, 220, 221, 221, 222, 223, 223, 224, 224, 225, 225, 226, 227, 227, 228, 228, 229, 229, 230, 230, 231, 232, 232, 233, 233, 234, 234, 235, 235, 236, 237, 237, 238, 238, 239, 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255, 255, 255};
    u32 u = TAB[(x >> 56) - 64];
    u = (u << 7) + (u32)(x >> 41) / u;
    return (u << 15) + (u32)((x >> 17) / u);
  }(x << 2 * sh);
  u >>= sh;
  u -= (u64)u * u > x;
  return u;
}

}  // namespace tifa_libs::math


#line 1 "src/code/nt/is_proot.hpp"



#line 5 "src/code/nt/is_proot.hpp"

namespace tifa_libs::math {

template <std::unsigned_integral T, class It>
constexpr bool is_proot(T g, T m, It pf_begin, It pf_end) {
  if (!g) return false;
  for (; pf_begin != pf_end; ++pf_begin)
    if (qpow_mod(g, (m - 1) / *pf_begin, m) == 1) return false;
  return true;
}

}  // namespace tifa_libs::math


#line 6 "src/code/nt/proot_u32.hpp"

namespace tifa_libs::math {

constexpr u32 proot(u32 m) {
  if (m == 2) return 1;
  if (m == 3 || m == 5) return 2;
  if (m == 104857601 || m == 167772161 || m == 469762049) return 3;
  if (m == 754974721) return 11;
  if (m == 998244353 || m == 1004535809) return 3;
  u32 divs[20] = {2};
  u32 cnt = 1, x = (m - 1) / 2;
  x >>= std::countr_zero(x);
  for (u32 i = 3, ed_ = isqrt(x); i <= ed_; i += 2)
    if (x % i == 0) {
      divs[cnt++] = i;
      while (x % i == 0) x /= i;
    }
  if (x > 1) divs[cnt++] = x;
  for (u32 g = 2;; ++g)
    if (is_proot(g, m, divs, divs + cnt)) return g;
}

}  // namespace tifa_libs::math


#line 6 "src/code/nt/proot_u64.hpp"

namespace tifa_libs::math {

inline u64 proot(u64 m) {
  if (m <= (u64)-1_u32) return proot((u32)m);
  vecu64 pf;
  {
    auto _ = pfactors(m - 1);
    pf.reserve(_.size());
    for (auto [k, v] : _) pf.push_back(k);
  }
  u64 g = 2;
  for (;; ++g)
    if (is_proot(g, m, pf.begin(), pf.end())) break;
  return g;
}

}  // namespace tifa_libs::math


#line 7 "src/code/conv/ntt.hpp"

namespace tifa_libs::math {

template <class mint>
struct NTT {
  using data_t = mint;

  static_assert(is_prime(mint::mod()) && (mint::mod() & 3) == 1, "MOD must be prime with 4k+1");
  static constexpr u64 max_size = bit::lowbit(mint::mod() - 1);

  const mint G = proot(mint::mod());

  explicit constexpr NTT() : root() {}

  constexpr u32 size() const { return (u32)root.size(); }
  constexpr void bzr(u32 len = max_size) {
    u32 n = std::bit_ceil(len);
    assert(n <= max_size);
    if (n == size()) return;
    root.resize(n);
    root[0] = 1;
    mint w = qpow(G, (mint::mod() - 1) / n);
    for (u32 i = 1; i < n; ++i) root[i] = root[i - 1] * w;
  }

#pragma GCC diagnostic ignored "-Wsign-conversion"
  constexpr void dif(vec<mint> &f, u32 n = 0) const {
    assert(size());
    if (!n) n = size();
    if (f.size() < n) f.resize(n);
    assert(std::has_single_bit(n) && n <= size());
    for (u32 i = n / 2, d = 1; i; i /= 2, d *= 2)
      for (u32 j = 0; j < n; j += i * 2) {
        auto w = root.begin();
        mint u, t;
        for (u32 k = 0; k < i; ++k, w += d) {
          f[j | k] = (u = f[j | k]) + (t = f[i | j | k]);
          f[i | j | k] = (u - t) * (*w);
        }
      }
  }
  constexpr void dit(vec<mint> &f, u32 n = 0) const {
    assert(size());
    if (!n) n = size();
    if (f.size() < n) f.resize(n);
    assert(std::has_single_bit(n) && n <= size());
    for (u32 i = 1, d = n / 2; d; i *= 2, d /= 2)
      for (u32 j = 0; j < n; j += i * 2) {
        auto w = root.begin();
        mint t;
        for (u32 k = 0; k < i; ++k, w += d) {
          f[i | j | k] = f[j | k] - (t = f[i | j | k] * (*w));
          f[j | k] += t;
        }
      }
    std::reverse(f.begin() + 1, f.end());
    mint t = mint(n).inv();
    for (u32 i = 0; i < n; ++i) f[i] *= t;
  }
#pragma GCC diagnostic warning "-Wsign-conversion"

 private:
  vec<mint> root;
};

}  // namespace tifa_libs::math


Back to top page