Tifa's CP Library

:heavy_check_mark: conv_3ntt (src/code/conv/conv_3ntt.hpp)

Depends on

Required by

Verified with

Code

#ifndef TIFALIBS_CONV_CONV_3NTT
#define TIFALIBS_CONV_CONV_3NTT

#include "../math/mul_mod.hpp"
#include "conv_dft.hpp"
#include "ntt.hpp"

namespace tifa_libs::math {

// 167772161, 469762049, 754974721
template <class mint0, class mint1, class mint2>
CEXP vecuu conv_3ntt_u64(std::tuple<NTT<mint0>, NTT<mint1>, NTT<mint2>> &ntt3, vecuu CR l, vecuu CR r, u64 mod, u32 ans_size = 0) {
  CEXP u64 m0 = mint0::mod(), m1 = mint1::mod(), m2 = mint2::mod();
  const u64 r01 = mint1(m0).inv().val(), r02 = mint2(m0).inv().val(), r12 = mint2(m1).inv().val(), r02r12 = (u32)mul_mod_u(r02, r12, m2);
  const u64 w1 = m0 % mod, w2 = mul_mod_u(m0, m1, mod);
  if (!ans_size) ans_size = u32(l.size() + r.size() - 1);
  auto &[ntt0, ntt1, ntt2] = ntt3;
  const vec<mint0> d0 = conv_dft_u64<NTT<mint0>, mint0>(ntt0, l, r, ans_size);
  const vec<mint1> d1 = conv_dft_u64<NTT<mint1>, mint1>(ntt1, l, r, ans_size);
  const vec<mint2> d2 = conv_dft_u64<NTT<mint2>, mint2>(ntt2, l, r, ans_size);
  vecuu ret(ans_size);
  flt_ (u32, i, 0, ans_size) {
    const u64 n1 = d1[i].val(), n2 = d2[i].val(), a = d0[i].val();
    const u64 b = mul_mod_u((n1 + m1 - a), r01, m1), c = mul_mod_u(n2 + m2 - a, r02r12, m2) + mul_mod_u(m2 - b, r12, m2);
    ret[i] = (a + mul_mod_u(b, w1, mod) + mul_mod_u(c % m2, w2, mod)) % mod;
  }
  return ret;
}
template <class mint, class mint0, class mint1, class mint2>
CEXP vec<mint> conv_3ntt(std::tuple<NTT<mint0>, NTT<mint1>, NTT<mint2>> &ntt3, vec<mint> CR l, vec<mint> CR r, u32 ans_size = 0) {
  if (!ans_size) ans_size = u32(l.size() + r.size() - 1);
  if (ans_size < 32) return conv_naive(l, r, ans_size);
  vecuu l_(l.size()), r_(r.size());
  flt_ (u32, i, 0, (u32)l.size()) l_[i] = l[i].val();
  flt_ (u32, i, 0, (u32)r.size()) r_[i] = r[i].val();
  vecuu _ = conv_3ntt_u64(ntt3, l_, r_, mint::mod(), ans_size);
  vec<mint> res(_.size());
  flt_ (u32, i, 0, (u32)_.size()) res[i] = _[i];
  return res;
}

}  // namespace tifa_libs::math

#endif
#line 1 "src/code/conv/conv_3ntt.hpp"



#line 1 "src/code/math/mul_mod.hpp"



#line 1 "src/code/math/safe_mod.hpp"



#line 1 "src/code/util/traits.hpp"



#line 1 "src/code/util/util.hpp"



#include <bits/stdc++.h>

#define CEXP constexpr
#define TPN typename
#define CR const&

#define cT_(...) std::conditional_t<sizeof(__VA_ARGS__) <= sizeof(size_t), __VA_ARGS__, __VA_ARGS__ CR>
#define fle_(T, i, l, r, ...) for (T i = (l), i##e = (r)__VA_OPT__(, ) __VA_ARGS__; i <= i##e; ++i)
#define flt_(T, i, l, r, ...) for (T i = (l), i##e = (r)__VA_OPT__(, ) __VA_ARGS__; i < i##e; ++i)

#ifdef ONLINE_JUDGE
#undef assert
#define assert(x) 42
#endif

using i8 = int8_t;
using i16 = int16_t;
using i32 = int32_t;
using i64 = int64_t;
using i128 = __int128_t;
using isz = ptrdiff_t;

using u8 = uint8_t;
using u16 = uint16_t;
using u32 = uint32_t;
using u64 = uint64_t;
using u128 = __uint128_t;
using usz = size_t;

using f32 = float;
using f64 = double;
using f128 = long double;

template <class T>
using ptt = std::pair<T, T>;
template <class T>
using pt3 = std::tuple<T, T, T>;
template <class T>
using pt4 = std::tuple<T, T, T, T>;

template <class E>
using itl = std::initializer_list<E>;
template <class T, usz N>
using arr = std::array<T, N>;
template <class T>
using vec = std::vector<T>;
template <class T>
using vvec = vec<vec<T>>;
template <class T>
using v3ec = vec<vvec<T>>;
template <class U, class T>
using vecp = vec<std::pair<U, T>>;
template <class U, class T>
using vvecp = vvec<std::pair<U, T>>;
template <class T>
using vecpt = vec<ptt<T>>;
template <class T>
using vvecpt = vvec<ptt<T>>;
template <class T>
using ptvec = ptt<vec<T>>;
template <class T>
using ptvvec = ptt<vvec<T>>;

template <class T, class C = std::less<T>>
using pq = std::priority_queue<T, vec<T>, C>;
template <class T>
using pqg = std::priority_queue<T, vec<T>, std::greater<T>>;

using strn = std::string;
using strnv = std::string_view;
template <class T, usz ext = std::dynamic_extent>
using spn = std::span<T const, ext>;

#define mk_(V, A, T) using V##A = V<T>;
#define mk(A, T) mk_(ptt, A, T) mk_(pt3, A, T) mk_(pt4, A, T) mk_(vec, A, T) mk_(vvec, A, T) mk_(v3ec, A, T) mk_(vecpt, A, T) mk_(vvecpt, A, T) mk_(ptvec, A, T) mk_(ptvvec, A, T) mk_(spn, A, T) mk_(itl, A, T)
mk(b, bool) mk(i, i32) mk(u, u32) mk(ii, i64) mk(uu, u64);
#undef mk
#undef mk_

using namespace std::literals;
CEXP i8 operator""_i8(unsigned long long x) { return (i8)x; }
CEXP i16 operator""_i16(unsigned long long x) { return (i16)x; }
CEXP i32 operator""_i32(unsigned long long x) { return (i32)x; }
CEXP i64 operator""_i64(unsigned long long x) { return (i64)x; }
CEXP isz operator""_iz(unsigned long long x) { return (isz)x; }

CEXP u8 operator""_u8(unsigned long long x) { return (u8)x; }
CEXP u16 operator""_u16(unsigned long long x) { return (u16)x; }
CEXP u32 operator""_u32(unsigned long long x) { return (u32)x; }
CEXP u64 operator""_u64(unsigned long long x) { return (u64)x; }
CEXP usz operator""_uz(unsigned long long x) { return (usz)x; }

inline const auto fn_0 = [](auto&&...) {};
inline const auto fn_is0 = [](auto x) { return x == 0; };

template <std::floating_point FP>
inline FP eps_v = std::sqrt(std::numeric_limits<FP>::epsilon());
template <std::floating_point FP>
CEXP void set_eps(FP v) { eps_v<FP> = v; }
using std::numbers::pi_v;

namespace tifa_libs {
using std::min, std::max, std::swap;
template <class T>
constexpr T abs(T x) { return x < 0 ? -x : x; }
}  // namespace tifa_libs


#line 5 "src/code/util/traits.hpp"

namespace tifa_libs {

template <class T>
concept iterable_c = requires(T v) {
  { v.begin() } -> std::same_as<TPN T::iterator>;
  { v.end() } -> std::same_as<TPN T::iterator>;
};

template <class T>
concept container_c = iterable_c<T> && !std::derived_from<T, std::basic_string<TPN T::value_type>> && !std::derived_from<T, std::basic_string_view<TPN T::value_type>>;

template <class T>
CEXP bool is_char_v = std::is_same_v<T, char> || std::is_same_v<T, signed char> || std::is_same_v<T, unsigned char>;
template <class T>
concept char_c = is_char_v<T>;

template <class T>
CEXP bool is_s128_v = std::is_same_v<T, __int128_t> || std::is_same_v<T, __int128>;
template <class T>
concept s128_c = is_s128_v<T>;

template <class T>
CEXP bool is_u128_v = std::is_same_v<T, __uint128_t> || std::is_same_v<T, unsigned __int128>;
template <class T>
concept u128_c = is_u128_v<T>;

template <class T>
CEXP bool is_i128_v = is_s128_v<T> || is_u128_v<T>;
template <class T>
concept i128_c = is_u128_v<T>;

template <class T>
CEXP bool is_int_v = std::is_integral_v<T> || is_i128_v<T>;
template <class T>
concept int_c = is_int_v<T>;

template <class T>
CEXP bool is_sint_v = is_s128_v<T> || (is_int_v<T> && std::is_signed_v<T>);
template <class T>
concept sint_c = is_sint_v<T>;

template <class T>
CEXP bool is_uint_v = is_u128_v<T> || (is_int_v<T> && std::is_unsigned_v<T>);
template <class T>
concept uint_c = is_uint_v<T>;

template <class T>
concept mint_c = requires(T x) {
  { x.mod() } -> uint_c;
  { x.val() } -> uint_c;
};

template <class T>
concept dft_c = requires(T x, vec<TPN T::data_t> v, u32 n) {
  { x.size() } -> std::same_as<u32>;
  x.bzr(n);
  x.dif(v, n);
  x.dit(v, n);
};

template <class T>
concept ntt_c = dft_c<T> && requires(T x) {
  T::max_size;
  T::G;
};

template <class T>
CEXP bool is_arithm_v = std::is_arithmetic_v<T> || is_int_v<T>;
template <class T>
concept arithm_c = is_arithm_v<T>;

template <class T>
struct to_sint : std::make_signed<T> {};
template <>
struct to_sint<u128> {
  using type = u128;
};
template <>
struct to_sint<i128> {
  using type = u128;
};
template <class T>
using to_sint_t = TPN to_sint<T>::type;

template <class T>
struct to_uint : std::make_unsigned<T> {};
template <>
struct to_uint<u128> {
  using type = u128;
};
template <>
struct to_uint<i128> {
  using type = u128;
};
template <class T>
using to_uint_t = TPN to_uint<T>::type;

}  // namespace tifa_libs


#line 5 "src/code/math/safe_mod.hpp"

namespace tifa_libs::math {

template <sint_c T>
CEXP T safe_mod(T x, to_uint_t<T> mod) { return ((x %= (T)mod) < 0 ? x + (T)mod : x); }

}  // namespace tifa_libs::math


#line 5 "src/code/math/mul_mod.hpp"

namespace tifa_libs::math {

CEXP i64 mul_mod_s(i64 a, i64 b, u64 mod) {
  if (std::bit_width((u64)abs(a)) + std::bit_width((u64)abs(b)) < 64) return safe_mod(a * b % (i64)mod, mod);
  return safe_mod((i64)((i128)a * b % mod), mod);
}
CEXP u64 mul_mod_u(u64 a, u64 b, u64 mod) {
  if (std::bit_width(a) + std::bit_width(b) <= 64) return a * b % mod;
  return (u64)((u128)a * b % mod);
}

}  // namespace tifa_libs::math


#line 1 "src/code/conv/conv_dft.hpp"



#line 1 "src/code/conv/conv_naive.hpp"



#line 5 "src/code/conv/conv_naive.hpp"

namespace tifa_libs::math {

template <class U, class T = U>
requires(sizeof(U) <= sizeof(T))
CEXP vec<T> conv_naive(vec<U> CR l, vec<U> CR r, u32 ans_size = 0) {
  if (l.empty() || r.empty()) return {};
  if (!ans_size) ans_size = u32(l.size() + r.size() - 1);
  const u32 n = (u32)l.size(), m = (u32)r.size();
  vec<T> ans(ans_size);
  if (n < m)
    flt_ (u32, j, 0, m)
      flt_ (u32, i, 0, n) {
        if (i + j >= ans_size) break;
        ans[i + j] += (T)l[i] * (T)r[j];
      }
  else
    flt_ (u32, i, 0, n)
      flt_ (u32, j, 0, m) {
        if (i + j >= ans_size) break;
        ans[i + j] += (T)l[i] * (T)r[j];
      }
  return ans;
}

}  // namespace tifa_libs::math


#line 6 "src/code/conv/conv_dft.hpp"

namespace tifa_libs::math {

template <dft_c DFT_t, std::same_as<TPN DFT_t::data_t> DFT_data_t>
CEXP vec<DFT_data_t> conv_dft(DFT_t &dft, vec<DFT_data_t> l, vec<DFT_data_t> r, u32 ans_size = 0) {
  if (!ans_size) ans_size = u32(l.size() + r.size() - 1);
  if (ans_size < 32) return conv_naive(l, r, ans_size);
  dft.bzr(max({(u32)l.size(), (u32)r.size(), min(u32(l.size() + r.size() - 1), ans_size)}));
  dft.dif(l), dft.dif(r);
  flt_ (u32, i, 0, dft.size()) l[i] *= r[i];
  return dft.dit(l), l.resize(ans_size), l;
}
template <class DFT_t, class mint, class T = u64>
CEXP vec<mint> conv_dft_u64(DFT_t &dft, vec<T> CR l, vec<T> CR r, u32 ans_size = 0) {
  if (!ans_size) ans_size = u32(l.size() + r.size() - 1);
  vec<mint> l_, r_;
  for (l_.reserve(l.size()); auto CR i : l) l_.push_back(i);
  for (r_.reserve(r.size()); auto CR i : r) r_.push_back(i);
  return conv_dft(dft, l_, r_, ans_size);
}

}  // namespace tifa_libs::math


#line 1 "src/code/conv/ntt.hpp"



#line 1 "src/code/bit/lowbit.hpp"



#line 5 "src/code/bit/lowbit.hpp"

namespace tifa_libs::bit {

template <class T>
CEXP T lowbit(T x) { return T(1) << std::countr_zero(x); }

}  // namespace tifa_libs::bit


#line 1 "src/code/math/qpow.hpp"



#line 5 "src/code/math/qpow.hpp"

namespace tifa_libs::math {

template <class T>
CEXP T qpow(T a, u64 b, cT_(T) init_v = T{1}) {
  T res = init_v;
  for (; b; b >>= 1, a = a * a)
    if (b & 1) res = res * a;
  return res;
}

}  // namespace tifa_libs::math


#line 1 "src/code/nt/proot.hpp"



#line 1 "src/code/nt/is_proot.hpp"



#line 1 "src/code/math/qpow_mod.hpp"



#line 5 "src/code/math/qpow_mod.hpp"

namespace tifa_libs::math {

CEXP u64 qpow_mod(u64 a, u64 b, u64 mod) {
  u64 res(1);
  for (a %= mod; b; b >>= 1, a = mul_mod_u(a, a, mod))
    if (b & 1) res = mul_mod_u(res, a, mod);
  return res;
}

}  // namespace tifa_libs::math


#line 5 "src/code/nt/is_proot.hpp"

namespace tifa_libs::math {

template <std::unsigned_integral T, class It>
CEXP bool is_proot(T g, T m, It pf_begin, It pf_end) {
  if (!g) return false;
  for (; pf_begin != pf_end; ++pf_begin)
    if (qpow_mod(g, (m - 1) / *pf_begin, m) == 1) return false;
  return true;
}

}  // namespace tifa_libs::math


#line 1 "src/code/nt/pfactors.hpp"



#line 1 "src/code/edh/discretization.hpp"



#line 5 "src/code/edh/discretization.hpp"

namespace tifa_libs {

template <iterable_c T = vec<int>>
CEXP T uniq(T v) {
  std::sort(v.begin(), v.end());
  auto [f, l] = std::ranges::unique(v);
  return v.erase(f, l), v;
}
template <iterable_c T = vec<int>>
CEXP std::pair<T, vecu> gen_id(T CR v) {
  const T _ = uniq(v);
  vecu _1;
  _1.reserve(v.size());
  flt_ (u32, i, 0, (u32)v.size()) _1.push_back(u32(std::ranges::lower_bound(_, v[i]) - _.begin()));
  return {_, _1};
}

}  // namespace tifa_libs


#line 1 "src/code/rand/gen.hpp"



#line 5 "src/code/rand/gen.hpp"

namespace tifa_libs::rand {

template <class Distri>
class Gen {
  std::conditional_t<sizeof(TPN Distri::result_type) <= 4, std::mt19937, std::mt19937_64> re;
  Distri dist;

 public:
  using random_engine = decltype(re);
  using distribution = Distri;
  using result_type = TPN Distri::result_type;

  CEXP Gen() : re(std::random_device{}()), dist() {}
  CEXP Gen(result_type a, result_type b) : re(std::random_device{}()), dist(a, b) {}

  CEXP void set_range(result_type a, result_type b) { dist = Distri(a, b); }
  CEXP random_engine& rand_eng() { return re; }
  CEXP Distri& distrib() { return dist; }
  void reset_seed() { re.seed((result_type)std::chrono::duration_cast<std::conditional_t<sizeof(TPN Distri::result_type) <= 4, std::chrono::seconds, std::chrono::nanoseconds>>(std::chrono::high_resolution_clock::now().time_since_epoch()).count()); }
  CEXP result_type operator()() { return dist(re); }
  result_type next() { return dist(re); }
};

}  // namespace tifa_libs::rand


#line 1 "src/code/nt/gcd.hpp"



#line 5 "src/code/nt/gcd.hpp"

namespace tifa_libs::math {

namespace gcd_impl_ {
template <uint_c T, uint_c U>
CEXP std::common_type_t<T, U> gcd__(T u, U v) {
  using W = std::common_type_t<T, U>;
  if (!u || !v) return u ^ v;
  const auto k = std::__countr_zero(u | v);
  u >>= k, v >>= k;
  do {
    if (W _ = v >> std::__countr_zero(v); u > _) v = u - _, u = _;
    else v = _ - u;
  } while (v);
  return u << k;
}
}  // namespace gcd_impl_

template <int_c T, int_c U>
CEXP auto gcd(T a, U b) { return gcd_impl_::gcd__((to_uint_t<T>)abs(a), (to_uint_t<U>)abs(b)); }

}  // namespace tifa_libs::math


#line 1 "src/code/nt/is_prime.hpp"



#line 6 "src/code/nt/is_prime.hpp"

namespace tifa_libs::math {

CEXP bool is_prime(u64 n) {
  if (n <= 2) return n == 2;
  if (~n & 1) return false;
  if (n < 8 || n == 61) return true;
  auto f = [n, d = (n - 1) >> std::countr_zero(n - 1)](auto&& bases) -> bool {
    for (u64 i : bases) {
      if (!(i % n)) continue;
      u64 t = d, y = qpow_mod(i, t, n);
      while (t != n - 1 && y != 1 && y != n - 1) y = mul_mod_u(y, y, n), t *= 2;
      if (y != n - 1 && (~t & 1)) return false;
    }
    return true;
  };
  if (n < (1 << 30)) {
    CEXP u64 bases[3] = {2, 7, 61};
    return f(bases);
  }
  CEXP u64 bases[7] = {2, 325, 9375, 28178, 450775, 9780504, 1795265022};
  return f(bases);
}

}  // namespace tifa_libs::math


#line 9 "src/code/nt/pfactors.hpp"

namespace tifa_libs::math {
namespace pfactors_impl_ {
static rand::Gen<std::uniform_int_distribution<u64>> e;
static auto __ = []() { e.reset_seed(); return 0; }();
CEXP u64 rho(u64 n) {
  e.set_range(1, n - 1);
  auto f = [n, r = e()](u64 x) { return (mul_mod_u(x, x, n) + r) % n; };
  u64 g = 1, x = 0, y = e(), yy = 0;
  const u32 LIM = 128;
  for (u64 r = 1, q = 1; g == 1; r *= 2) {
    x = y;
    flt_ (u64, i, 0, r) y = f(y);
    for (u64 k = 0; g == 1 && k < r; k += LIM) {
      yy = y;
      for (u64 i = 0; i < LIM && i < r - k; ++i) q = mul_mod_u(q, (n - (y = f(y)) + x) % n, n);
      g = gcd(q, n);
    }
  }
  if (g == n) do {
      g = gcd((x + (n - (yy = f(yy)))) % n, n);
    } while (g == 1);
  return g == n ? rho(n) : g;
}
CEXP void run(u64 n, vecuu &p) {
  if (n < 2) return;
  if (is_prime(n)) return p.push_back(n);
  const u64 g = rho(n);
  run(n / g, p), run(g, p);
}
}  // namespace pfactors_impl_

template <bool unique = true>
CEXP vecuu pfactors(u64 n) {
  vecuu p;
  if (u32 _ = (u32)std::countr_zero(n) & 63; _) {
    n >>= _;
    if CEXP (unique) p.push_back(2);
    else p.assign(_, 2);
  }
  if (n < 2) return p;
  pfactors_impl_::run(n, p);
  if CEXP (unique) return uniq(p);
  return std::ranges::sort(p), p;
}
CEXP vecp<u64, u32> pf_exp(u64 n) {
  auto p = pfactors<false>(n);
  vecp<u64, u32> ans;
  for (u64 lst = 0; u64 i : p)
    if (i != lst) ans.emplace_back(lst = i, 1);
    else ++ans.back().second;
  return ans;
}

}  // namespace tifa_libs::math


#line 6 "src/code/nt/proot.hpp"

namespace tifa_libs::math {

CEXP u64 proot(u64 m) {
  if (m == 2) return 1;
  if (m == 3 || m == 5) return 2;
  if (m == 104857601 || m == 167772161 || m == 469762049) return 3;
  if (m == 754974721) return 11;
  if (m == 998244353 || m == 1004535809) return 3;
  const auto pf = pfactors(m - 1);
  for (u64 g = 2;; ++g)
    if (is_proot(g, m, pf.begin(), pf.end())) return g;
}

}  // namespace tifa_libs::math


#line 7 "src/code/conv/ntt.hpp"

namespace tifa_libs::math {

template <class mint>
struct NTT {
  using data_t = mint;

  static_assert(is_prime(mint::mod()) && (mint::mod() & 3) == 1, "MOD must be prime with 4k+1");
  static CEXP u64 max_size = bit::lowbit(mint::mod() - 1);

  const mint G = proot(mint::mod());

  explicit CEXP NTT() : root() {}

  CEXP u32 size() const { return (u32)root.size(); }
  CEXP void bzr(u32 len = max_size) {
    const u32 n = std::bit_ceil(len);
    assert(n <= max_size);
    if (n == size()) return;
    root.resize(n), root[0] = 1;
    const mint w = qpow(G, (mint::mod() - 1) / n);
    flt_ (u32, i, 1, n) root[i] = root[i - 1] * w;
  }
#pragma GCC diagnostic ignored "-Wsign-conversion"
  CEXP void dif(vec<mint> &f, u32 n = 0) const {
    assert(size());
    if (!n) n = size();
    if (f.size() < n) f.resize(n);
    assert(std::has_single_bit(n) && n <= size());
    for (u32 i = n / 2, d = 1; i; i /= 2, d *= 2)
      for (u32 j = 0; j < n; j += i * 2) {
        auto w = root.begin();
        mint u, t;
        for (u32 k = 0; k < i; ++k, w += d) f[j | k] = (u = f[j | k]) + (t = f[i | j | k]), f[i | j | k] = (u - t) * (*w);
      }
  }
  CEXP void dit(vec<mint> &f, u32 n = 0) const {
    assert(size());
    if (!n) n = size();
    if (f.size() < n) f.resize(n);
    assert(std::has_single_bit(n) && n <= size());
    for (u32 i = 1, d = n / 2; d; i *= 2, d /= 2)
      for (u32 j = 0; j < n; j += i * 2) {
        auto w = root.begin();
        mint t;
        for (u32 k = 0; k < i; ++k, w += d) f[i | j | k] = f[j | k] - (t = f[i | j | k] * (*w)), f[j | k] += t;
      }
    std::reverse(f.begin() + 1, f.end());
    const mint t = mint(n).inv();
    flt_ (u32, i, 0, n) f[i] *= t;
  }
#pragma GCC diagnostic warning "-Wsign-conversion"

 private:
  vec<mint> root;
};

}  // namespace tifa_libs::math


#line 7 "src/code/conv/conv_3ntt.hpp"

namespace tifa_libs::math {

// 167772161, 469762049, 754974721
template <class mint0, class mint1, class mint2>
CEXP vecuu conv_3ntt_u64(std::tuple<NTT<mint0>, NTT<mint1>, NTT<mint2>> &ntt3, vecuu CR l, vecuu CR r, u64 mod, u32 ans_size = 0) {
  CEXP u64 m0 = mint0::mod(), m1 = mint1::mod(), m2 = mint2::mod();
  const u64 r01 = mint1(m0).inv().val(), r02 = mint2(m0).inv().val(), r12 = mint2(m1).inv().val(), r02r12 = (u32)mul_mod_u(r02, r12, m2);
  const u64 w1 = m0 % mod, w2 = mul_mod_u(m0, m1, mod);
  if (!ans_size) ans_size = u32(l.size() + r.size() - 1);
  auto &[ntt0, ntt1, ntt2] = ntt3;
  const vec<mint0> d0 = conv_dft_u64<NTT<mint0>, mint0>(ntt0, l, r, ans_size);
  const vec<mint1> d1 = conv_dft_u64<NTT<mint1>, mint1>(ntt1, l, r, ans_size);
  const vec<mint2> d2 = conv_dft_u64<NTT<mint2>, mint2>(ntt2, l, r, ans_size);
  vecuu ret(ans_size);
  flt_ (u32, i, 0, ans_size) {
    const u64 n1 = d1[i].val(), n2 = d2[i].val(), a = d0[i].val();
    const u64 b = mul_mod_u((n1 + m1 - a), r01, m1), c = mul_mod_u(n2 + m2 - a, r02r12, m2) + mul_mod_u(m2 - b, r12, m2);
    ret[i] = (a + mul_mod_u(b, w1, mod) + mul_mod_u(c % m2, w2, mod)) % mod;
  }
  return ret;
}
template <class mint, class mint0, class mint1, class mint2>
CEXP vec<mint> conv_3ntt(std::tuple<NTT<mint0>, NTT<mint1>, NTT<mint2>> &ntt3, vec<mint> CR l, vec<mint> CR r, u32 ans_size = 0) {
  if (!ans_size) ans_size = u32(l.size() + r.size() - 1);
  if (ans_size < 32) return conv_naive(l, r, ans_size);
  vecuu l_(l.size()), r_(r.size());
  flt_ (u32, i, 0, (u32)l.size()) l_[i] = l[i].val();
  flt_ (u32, i, 0, (u32)r.size()) r_[i] = r[i].val();
  vecuu _ = conv_3ntt_u64(ntt3, l_, r_, mint::mod(), ans_size);
  vec<mint> res(_.size());
  flt_ (u32, i, 0, (u32)_.size()) res[i] = _[i];
  return res;
}

}  // namespace tifa_libs::math


Back to top page