Tifa's CP Library

:heavy_check_mark: conv_czt (src/code/conv/conv_czt.hpp)

Depends on

Verified with

Code

#ifndef TIFALIBS_CONV_CONV_CZT
#define TIFALIBS_CONV_CONV_CZT

#include "../math/qpow.hpp"
#include "../nt/proot_u32.hpp"
#include "../poly/czt_fps.hpp"

namespace tifa_libs::math {

template <class poly>
constexpr auto conv_czt(poly l, poly r, u32 ans_size = 0) {
  using mint = typename poly::value_type;
  if (!ans_size) ans_size = u32(l.size() + r.size() - 1);
  assert(mint::mod() <= u64(-1_u32));
  const u32 m = (u32)mint::mod();
  u32 s = (u32)std::bit_ceil(l.size() + r.size() - 1);
  assert((m - 1) % s == 0);
  mint c = qpow(mint(proot(m)), (m - 1) / s);
  l.resize(s);
  r.resize(s);
  l = czt_fps(l, c);
  r = czt_fps(r, c);
  for (u32 i = 0; i < s; ++i) l[i] *= r[i];
  l = czt_fps<mint, typename poly::ccore_type>(l, c.inv());
  l.resize(ans_size);
  return (l *= mint(s).inv()).data();
}

}  // namespace tifa_libs::math

#endif
#line 1 "src/code/conv/conv_czt.hpp"



#line 1 "src/code/math/qpow.hpp"



#line 1 "src/code/util/util.hpp"



#include <bits/stdc++.h>

template <class T>
constexpr T abs(T x) { return x < 0 ? -x : x; }

using i8 = int8_t;
using i16 = int16_t;
using i32 = int32_t;
using i64 = int64_t;
using i128 = __int128_t;
using isz = ptrdiff_t;

using u8 = uint8_t;
using u16 = uint16_t;
using u32 = uint32_t;
using u64 = uint64_t;
using u128 = __uint128_t;
using usz = size_t;

using f32 = float;
using f64 = double;
using f128 = long double;

template <class T>
using ptt = std::pair<T, T>;
template <class T>
using pt3 = std::tuple<T, T, T>;
template <class T>
using pt4 = std::tuple<T, T, T, T>;

template <class T, usz N>
using arr = std::array<T, N>;
template <class T>
using vec = std::vector<T>;
template <class T>
using vvec = vec<vec<T>>;
template <class T>
using v3ec = vec<vvec<T>>;
template <class U, class T>
using vecp = vec<std::pair<U, T>>;
template <class U, class T>
using vvecp = vvec<std::pair<U, T>>;
template <class T>
using vecpt = vec<ptt<T>>;
template <class T>
using vvecpt = vvec<ptt<T>>;

template <class T, class C = std::less<T>>
using pq = std::priority_queue<T, vec<T>, C>;
template <class T>
using pqg = std::priority_queue<T, vec<T>, std::greater<T>>;

using strn = std::string;
using strnv = std::string_view;

using vecu = vec<u32>;
using vvecu = vvec<u32>;
using v3ecu = v3ec<u32>;
using vecu64 = vec<u64>;
using vecb = vec<bool>;
using vvecb = vvec<bool>;

#ifdef ONLINE_JUDGE
#undef assert
#define assert(x) 42
#endif

using namespace std::literals;

constexpr i8 operator""_i8(unsigned long long x) { return (i8)x; }
constexpr i16 operator""_i16(unsigned long long x) { return (i16)x; }
constexpr i32 operator""_i32(unsigned long long x) { return (i32)x; }
constexpr i64 operator""_i64(unsigned long long x) { return (i64)x; }
constexpr isz operator""_iz(unsigned long long x) { return (isz)x; }

constexpr u8 operator""_u8(unsigned long long x) { return (u8)x; }
constexpr u16 operator""_u16(unsigned long long x) { return (u16)x; }
constexpr u32 operator""_u32(unsigned long long x) { return (u32)x; }
constexpr u64 operator""_u64(unsigned long long x) { return (u64)x; }
constexpr usz operator""_uz(unsigned long long x) { return (usz)x; }

inline const auto fn_0 = [](auto&&...) {};


#line 5 "src/code/math/qpow.hpp"

namespace tifa_libs::math {

template <class T>
constexpr T qpow(T a, u64 b, T const& init_v = T{1}) {
  T res = init_v;
  for (; b; b >>= 1, a = a * a)
    if (b & 1) res = res * a;
  return res;
}

}  // namespace tifa_libs::math


#line 1 "src/code/nt/proot_u32.hpp"



#line 1 "src/code/math/isqrt.hpp"



#line 5 "src/code/math/isqrt.hpp"

namespace tifa_libs::math {

constexpr u32 isqrt(u64 x) {
  if (!x) return 0;
  int c = i32(std::bit_width(x) - 1) / 2, sh = 31 - c;
  u32 u = [](u64 x) {
    constexpr u8 TAB[192] = {128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 144, 145, 146, 147, 148, 149, 150, 151, 151, 152, 153, 154, 155, 156, 156, 157, 158, 159, 160, 160, 161, 162, 163, 164, 164, 165, 166, 167, 167, 168, 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 179, 179, 180, 181, 181, 182, 183, 183, 184, 185, 186, 186, 187, 188, 188, 189, 190, 190, 191, 192, 192, 193, 194, 194, 195, 196, 196, 197, 198, 198, 199, 200, 200, 201, 201, 202, 203, 203, 204, 205, 205, 206, 206, 207, 208, 208, 209, 210, 210, 211, 211, 212, 213, 213, 214, 214, 215, 216, 216, 217, 217, 218, 219, 219, 220, 220, 221, 221, 222, 223, 223, 224, 224, 225, 225, 226, 227, 227, 228, 228, 229, 229, 230, 230, 231, 232, 232, 233, 233, 234, 234, 235, 235, 236, 237, 237, 238, 238, 239, 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255, 255, 255};
    u32 u = TAB[(x >> 56) - 64];
    u = (u << 7) + (u32)(x >> 41) / u;
    return (u << 15) + (u32)((x >> 17) / u);
  }(x << 2 * sh);
  u >>= sh;
  u -= (u64)u * u > x;
  return u;
}

}  // namespace tifa_libs::math


#line 1 "src/code/nt/is_proot.hpp"



#line 1 "src/code/math/qpow_mod.hpp"



#line 1 "src/code/math/mul_mod_u.hpp"



#line 5 "src/code/math/mul_mod_u.hpp"

namespace tifa_libs::math {

constexpr u64 mul_mod_u(u64 a, u64 b, u64 mod) {
  if (std::bit_width(a) + std::bit_width(b) <= 64) return a * b % mod;
  else return (u64)((u128)a * b % mod);
}

}  // namespace tifa_libs::math


#line 5 "src/code/math/qpow_mod.hpp"

namespace tifa_libs::math {

constexpr u64 qpow_mod(u64 a, u64 b, u64 mod) {
  u64 res(1);
  for (a %= mod; b; b >>= 1, a = mul_mod_u(a, a, mod))
    if (b & 1) res = mul_mod_u(res, a, mod);
  return res;
}

}  // namespace tifa_libs::math


#line 5 "src/code/nt/is_proot.hpp"

namespace tifa_libs::math {

template <std::unsigned_integral T, class It>
constexpr bool is_proot(T g, T m, It pf_begin, It pf_end) {
  if (!g) return false;
  for (; pf_begin != pf_end; ++pf_begin)
    if (qpow_mod(g, (m - 1) / *pf_begin, m) == 1) return false;
  return true;
}

}  // namespace tifa_libs::math


#line 6 "src/code/nt/proot_u32.hpp"

namespace tifa_libs::math {

constexpr u32 proot(u32 m) {
  if (m == 2) return 1;
  if (m == 3 || m == 5) return 2;
  if (m == 104857601 || m == 167772161 || m == 469762049) return 3;
  if (m == 754974721) return 11;
  if (m == 998244353 || m == 1004535809) return 3;
  u32 divs[20] = {2};
  u32 cnt = 1, x = (m - 1) / 2;
  x >>= std::countr_zero(x);
  for (u32 i = 3, ed_ = isqrt(x); i <= ed_; i += 2)
    if (x % i == 0) {
      divs[cnt++] = i;
      while (x % i == 0) x /= i;
    }
  if (x > 1) divs[cnt++] = x;
  for (u32 g = 2;; ++g)
    if (is_proot(g, m, divs, divs + cnt)) return g;
}

}  // namespace tifa_libs::math


#line 1 "src/code/poly/czt_fps.hpp"



#line 1 "src/code/math/rpow.hpp"



#line 5 "src/code/math/rpow.hpp"

namespace tifa_libs::math {

class rpow {
  vecu64 b0, b1;
  u64 b_, m_;

 public:
  explicit constexpr rpow() : b0(65536), b1(65536), b_(), m_() {}
  constexpr rpow(u64 base_, u32 mod_) : rpow() { reset(base_, mod_); }

  constexpr void reset(u64 base, u32 mod) {
    if (b_ == base % mod && m_ == mod) return;
    b_ = base % mod;
    m_ = mod;
    b0[0] = b1[0] = 1;
    for (u32 i = 1; i < 65536; ++i) b0[i] = mul_mod_u(b0[i - 1], b_, m_);
    u64 _(mul_mod_u(b0.back(), b_, m_));
    for (u32 i = 1; i < 65536; ++i) b1[i] = mul_mod_u(b1[i - 1], _, m_);
  }

  constexpr void swap(rpow& r) {
    b0.swap(r.b0), b1.swap(r.b1);
    std::swap(b_, r.b_), std::swap(m_, r.m_);
  }

  constexpr u64 base() const { return b_; }
  constexpr u64 mod() const { return m_; }

  constexpr u64 operator()(u32 x) const { return mul_mod_u(b0[x & 65535], b1[x >> 16], m_); }
};

}  // namespace tifa_libs::math


#line 1 "src/code/poly/poly.hpp"



#line 5 "src/code/poly/poly.hpp"

namespace tifa_libs::math {

// clang-format off
enum ccore_t { ct_FFT, ct_3NTT, ct_NTT, ct_CNTT };
// clang-format on

template <class mint, class ccore>
requires requires(ccore cc, vec<mint> l, vec<mint> const &r, u32 sz) {
  { ccore::ct_cat } -> std::same_as<ccore_t const &>;
  cc.conv(l, r);
  cc.conv(l, r, sz);
}
class poly {
  vec<mint> d;

 public:
  using value_type = mint;
  using data_type = vec<value_type>;
  using ccore_type = ccore;
  static inline ccore_type conv_core;

  explicit constexpr poly(u32 sz = 1, value_type const &val = value_type{}) : d(sz, val) {}
  constexpr poly(typename data_type::const_iterator begin, typename data_type::const_iterator end) : d(begin, end) {}
  constexpr poly(std::initializer_list<value_type> v) : d(v) {}
  template <class T>
  explicit constexpr poly(vec<T> const &v) : d(v) {}

  friend constexpr std::istream &operator>>(std::istream &is, poly &poly) {
    for (auto &val : poly.d) is >> val;
    return is;
  }
  friend constexpr std::ostream &operator<<(std::ostream &os, poly const &poly) {
    if (!poly.size()) return os;
    for (u32 i = 1; i < poly.size(); ++i) os << poly[i - 1] << ' ';
    return os << poly.d.back();
  }

  constexpr u32 size() const { return (u32)d.size(); }
  constexpr bool empty() const {
    for (auto &&i : d)
      if (i != 0) return 0;
    return 1;
  }
  constexpr data_type &data() { return d; }
  constexpr data_type const &data() const { return d; }

  constexpr value_type &operator[](u32 x) { return d[x]; }
  constexpr value_type const &operator[](u32 x) const { return d[x]; }
  constexpr value_type operator()(value_type x) const {
    value_type ans = 0;
    for (u32 i = size() - 1; ~i; --i) ans = ans * x + d[i];
    return ans;
  }

  template <class F>
  requires requires(F f, u32 idx, mint &val) {
    f(idx, val);
  }
  constexpr void apply_range(u32 l, u32 r, F &&f) {
    assert(l < r && r <= size());
    for (u32 i = l; i < r; ++i) f(i, d[i]);
  }
  template <class F>
  constexpr void apply(F &&f) { apply_range(0, size(), std::forward<F>(f)); }
  constexpr void resize(u32 size) { d.resize(size); }
  constexpr poly pre(u32 size) const {
    poly _ = *this;
    _.resize(size);
    return _;
  }
  constexpr void strip() {
    auto it = std::find_if(d.rbegin(), d.rend(), [](auto const &x) { return x != 0; });
    d.resize(usz(d.rend() - it));
    if (d.empty()) d.push_back(value_type(0));
  }
  friend poly stripped(poly p) {
    p.strip();
    return p;
  }
  constexpr void reverse(u32 n = 0) { std::reverse(d.begin(), d.begin() + (n ? n : size())); }
  constexpr void conv(poly const &r, u32 ans_size = 0) { conv_core.conv(d, r.d, ans_size); }

  constexpr poly operator-() const {
    poly ret = *this;
    ret.apply([](u32, auto &v) { v = -v; });
    return ret;
  }

  friend constexpr poly operator+(poly p, value_type c) {
    p[0] += c;
    return p;
  }
  friend constexpr poly operator+(value_type c, poly const &p) { return p + c; }
  friend constexpr poly operator-(poly p, value_type c) {
    p[0] -= c;
    return p;
  }
  friend constexpr poly operator-(value_type c, poly const &p) { return p - c; }

  constexpr poly &operator*=(value_type c) {
    apply([&c](u32, auto &v) { v *= c; });
    return *this;
  }
  friend constexpr poly operator*(poly p, value_type c) { return p *= c; }
  friend constexpr poly operator*(value_type c, poly p) { return p *= c; }

  constexpr poly &operator+=(poly const &r) {
    if (!r.size()) return *this;
    resize(std::max(size(), r.size()));
    apply_range(0, r.size(), [&r](u32 i, auto &v) { v += r[i]; });
    return *this;
  }
  friend constexpr poly operator+(poly l, poly const &r) { return l += r; }

  constexpr poly &operator-=(poly const &r) {
    if (!r.size()) return *this;
    resize(std::max(size(), r.size()));
    apply_range(0, r.size(), [&r](u32 i, auto &v) { v -= r[i]; });
    return *this;
  }
  friend constexpr poly operator-(poly l, poly const &r) { return l -= r; }

  constexpr poly &operator*=(poly const &r) {
    if (!r.size()) {
      resize(1);
      d[0] = 0;
      return *this;
    }
    conv(r);
    return *this;
  }
  friend constexpr poly operator*(poly l, poly const &r) { return l *= r; }

  constexpr auto operator<=>(poly const &r) const { return stripped(*this).d <=> stripped(r).d; }
  constexpr bool operator==(poly const &r) const { return stripped(*this).d == stripped(r).d; }
};

}  // namespace tifa_libs::math


#line 6 "src/code/poly/czt_fps.hpp"

namespace tifa_libs::math {

// @brief Chirp Z-Transform
// @return {f(a*c^0), f(a*c^1), ..., f(a*c^{m-1})}
template <class mint, class ccore>
poly<mint, ccore> czt_fps(poly<mint, ccore> f, mint c, u32 m = -1_u32, mint a = 1) {
  using poly_t = poly<mint, ccore>;
  static rpow rp, irp;
  if (m == -1_u32) m = f.size();
  if (f.data().empty() || !m) return poly_t{};
  u32 n = f.size();
  if (a != 1) {
    mint x = 1;
    for (u32 i = 0; i < n; ++i) f[i] *= x, x *= a;
  }
  if (c == 0) {
    poly_t ans(m, f[0]);
    ans[0] = std::reduce(f.data().begin(), f.data().end(), mint{});
    return ans;
  }
  const u32 mod = (u32)mint::mod();
  if (c.val() == irp.base()) rp.swap(irp);
  rp.reset(c.val(), mod), irp.reset(c.inv().val(), mod);
  poly_t cc(m + n), icc(std::max(m, n));
  cc[0] = icc[0] = 1;
  for (u32 i = 1; i < m + n; ++i) cc[i] = rp((i * (i - 1_u64) / 2) % (mod - 1));
  for (u32 i = 1; i < std::max(m, n); ++i) icc[i] = irp((i * (i - 1_u64) / 2) % (mod - 1));
  for (u32 i = 1; i < n; ++i) f[i] *= icc[i];
  std::ranges::reverse(f.data());
  f.conv(cc, n + m);
  poly_t ans(f.data().begin() + ((isz)n - 1), f.data().begin() + (isz(n + m) - 1));
  for (u32 i = 1; i < m; ++i) ans[i] *= icc[i];
  return ans;
}

}  // namespace tifa_libs::math


#line 7 "src/code/conv/conv_czt.hpp"

namespace tifa_libs::math {

template <class poly>
constexpr auto conv_czt(poly l, poly r, u32 ans_size = 0) {
  using mint = typename poly::value_type;
  if (!ans_size) ans_size = u32(l.size() + r.size() - 1);
  assert(mint::mod() <= u64(-1_u32));
  const u32 m = (u32)mint::mod();
  u32 s = (u32)std::bit_ceil(l.size() + r.size() - 1);
  assert((m - 1) % s == 0);
  mint c = qpow(mint(proot(m)), (m - 1) / s);
  l.resize(s);
  r.resize(s);
  l = czt_fps(l, c);
  r = czt_fps(r, c);
  for (u32 i = 0; i < s; ++i) l[i] *= r[i];
  l = czt_fps<mint, typename poly::ccore_type>(l, c.inv());
  l.resize(ans_size);
  return (l *= mint(s).inv()).data();
}

}  // namespace tifa_libs::math


Back to top page