#pragma once
#include "../../trans/fft_r2/lib.hpp"
#include "../naive/lib.hpp"
namespace tifa_libs {
template <class mint, class FP>
CEXP vec<mint> conv_mtt(fft_r2<FP>& fft, vec<mint> CR l, vec<mint> CR r, u32 ans_size = 0) NE {
using C = TPN fft_r2<FP>::data_t;
if (!ans_size) ans_size = u32(l.size() + r.size() - 1);
if (min(l.size(), r.size()) < CONV_NAIVE_THRESHOLD) return conv_naive(l, r, ans_size);
if (l.size() == 1) {
vec<mint> ans = r;
for (ans.resize(ans_size); auto& i : ans) i *= l[0];
return ans;
}
if (r.size() == 1) {
vec<mint> ans = l;
for (ans.resize(ans_size); auto& i : ans) i *= r[0];
return ans;
}
fft.bzr(max({(u32)l.size(), (u32)r.size(), min(u32(l.size() + r.size() - 1), ans_size)}));
cu32 n = fft.size();
csint OFS = ((int)sizeof(decltype(mint::mod())) * 8 - std::countl_zero(mint::mod() - 1) + 1) / 2;
cu32 MSK = ((1u << OFS) - 1);
vec<mint> ans(ans_size);
vec<C> a(n), b(n);
flt_ (u32, i, 0, (u32)l.size()) a[i] = {(FP)(l[i].val() & MSK), (FP)(l[i].val() >> OFS)};
flt_ (u32, i, 0, (u32)r.size()) b[i] = {(FP)(r[i].val() & MSK), (FP)(r[i].val() >> OFS)};
fft.dif(a), fft.dif(b);
{
vec<C> p(n), q(n);
for (u32 i = 0, j; i < n; ++i) {
j = (n - i) & (n - 1);
C da = (a[i] + std::conj(a[j])) * C(.5, 0), db = (a[i] - std::conj(a[j])) * C(0, -.5),
dc = (b[i] + std::conj(b[j])) * C(.5, 0), dd = (b[i] - std::conj(b[j])) * C(.5, 0);
p[j] = da * dc + da * dd;
q[j] = db * dc + db * dd;
}
a = p, b = q;
}
fft.dif(a), fft.dif(b);
flt_ (u32, i, 0, ans_size) {
ci64 da = (i64)(a[i].real() / (FP)n + .5) % mint::smod(),
db = (i64)(a[i].imag() / (FP)n + .5) % mint::smod(),
dc = (i64)(b[i].real() / (FP)n + .5) % mint::smod(),
dd = (i64)(b[i].imag() / (FP)n + .5) % mint::smod();
ans[i] = da + ((db + dc) << OFS) % mint::smod() + (dd << (OFS * 2)) % mint::smod();
}
return ans;
}
} // namespace tifa_libs
#line 2 "src/conv/add/mtt/lib.hpp"
#line 2 "src/conv/trans/fft_r2/lib.hpp"
#line 2 "src/util/alias/others/lib.hpp"
#line 2 "src/util/consts/lib.hpp"
#line 2 "src/util/alias/num/lib.hpp"
#line 2 "src/util/util/lib.hpp"
// https://github.com/Tiphereth-A/CP-lib
#include <bits/extc++.h>
// clang-format off
namespace tifa_libs {
#define CEXP constexpr
#define CEXPE constexpr explicit
#define CR const&
#define CP const*
#define PC *const
#define CPC const*const
#define TPN typename
#define NE noexcept
#define CNE const noexcept
#define ND [[nodiscard]]
#define cT_(...) std::conditional_t<sizeof(__VA_ARGS__) <= sizeof(size_t) * 2, __VA_ARGS__, __VA_ARGS__ CR>
// NOLINTNEXTLINE(misc-const-correctness)
#define flt_(T, i, l, r, ...) for (T i = (l), i##e = (r)__VA_OPT__(, ) __VA_ARGS__; i < i##e; ++i)
#define retif_(cond, if_true, ...) if cond return if_true __VA_OPT__(; else return __VA_ARGS__)
#ifdef ONLINE_JUDGE
#undef assert
#define assert(x) 42
#endif
using namespace std::ranges;
using namespace std::literals;
template <class T>
CEXP T abs(T x) NE { retif_((x < 0), -x, x); }
} // namespace tifa_libs
// clang-format on
#line 4 "src/util/alias/num/lib.hpp"
// clang-format off
namespace tifa_libs {
#define mk0_(w, t) using w = t; using c##w = const t
#define mk_(w, t) mk0_(w, t); CEXP w operator""_##w(unsigned long long x) NE { return (w)x; }
mk_(i8, int8_t) mk_(u8, uint8_t) mk_(i16, int16_t) mk_(u16, uint16_t) mk_(i32, int32_t) mk_(u32, uint32_t) mk_(i64, int64_t) mk_(u64, uint64_t) mk_(isz, ssize_t) mk_(usz, size_t) mk_(chr, char) mk_(schr, signed char) mk_(uchr, unsigned char) mk_(sint, signed) mk_(uint, unsigned);
mk0_(i128, __int128_t); mk0_(u128, __uint128_t); mk0_(f32, float); mk0_(f64, double); mk0_(f128, long double);
#undef mk0_
#undef mk_
} // namespace tifa_libs
// clang-format on
#line 4 "src/util/consts/lib.hpp"
// clang-format off
namespace tifa_libs {
using std::numbers::pi_v;
template <std::floating_point FP>
inline FP eps_v = std::sqrt(std::numeric_limits<FP>::epsilon());
template <std::floating_point FP>
CEXP void set_eps(FP v) NE { eps_v<FP> = v; }
CEXP u32 TIME = ((__TIME__[0] & 15) << 20) | ((__TIME__[1] & 15) << 16) | ((__TIME__[3] & 15) << 12) | ((__TIME__[4] & 15) << 8) | ((__TIME__[6] & 15) << 4) | (__TIME__[7] & 15);
CEXP auto STR2U16 = [] { std::array<u32, 65536> table{}; table.fill(-1_u32); flt_ (u32, i, 48, 58) flt_ (u32, j, 48, 58) table[i << 8 | j] = (j & 15) * 10 + (i & 15); return table; }();
inline const auto fn_0 = [](auto&&...) NE {};
inline const auto fn_is0 = [](auto x) NE { return x == 0; };
} // namespace tifa_libs
// clang-format on
#line 4 "src/util/alias/others/lib.hpp"
namespace tifa_libs {
template <class T>
struct chash {
CEXP static u64 C = u64(pi_v<f128> * 2e18) | 71;
CEXP u64 operator()(T x) CNE { return __builtin_bswap64(((u64)x ^ TIME) * C); }
};
// clang-format off
#define mk_(w, t) using w = t; using c##w = const t;
mk_(strn, std::string) mk_(strnv, std::string_view)
#undef mk_
template <class T> struct edge_t { T w; u32 u, v; CEXP auto operator<=>(edge_t CR) const = default; }; template <class T> using cedge_t = const edge_t<T>;
template <class T> struct pt3 { T _0, _1, _2; CEXP auto operator<=>(pt3 CR) const = default; }; template <class T> using cpt3 = const pt3<T>;
template <class T> struct pt4 { T _0, _1, _2, _3; CEXP auto operator<=>(pt4 CR) const = default; }; template <class T> using cpt4 = const pt4<T>;
#define mkT_(w, t, ...) template <class T> using w = t __VA_OPT__(, ) __VA_ARGS__; template <class T> using c##w = const t __VA_OPT__(, ) __VA_ARGS__;
mkT_(ptt, std::pair<T, T>) mkT_(alc, std::pmr::polymorphic_allocator<T>) mkT_(vec, std::vector<T>) mkT_(vvec, vec<vec<T>>) mkT_(v3ec, vvec<vec<T>>) mkT_(vecpt, vec<ptt<T>>) mkT_(vvecpt, vvec<ptt<T>>) mkT_(ptvec, ptt<vec<T>>) mkT_(ptvvec, ptt<vvec<T>>)
#undef mkT_
template <class T> using itl = std ::initializer_list<T>;
template <class T, usz ext = std::dynamic_extent> using spn = std::span<T const, ext>;
template <class T, usz N> using arr = std::array<T, N>; template <class T, usz N> using carr = std::array<const T, N>;
template <class U, class T> using vecp = vec<std::pair<U, T>>; template <class U, class T> using vvecp = vvec<std::pair<U, T>>;
template <class U, class T> using vvecp = vvec<std::pair<U, T>>; template <class U, class T> using vvvecp = vvec<vvec<std::pair<U, T>>>;
#ifdef PB_DS_ASSOC_CNTNR_HPP
template <class T, class C = std::less<T>> using set = __gnu_pbds::tree<T, __gnu_pbds::null_type, C>;
template <class K, class V, class C = std::less<K>> using map = __gnu_pbds::tree<K, V, C>;
// hset<u64> s({}, {}, {}, {}, {1<<16});
template <class T, class HF = chash<T>> using hset = __gnu_pbds::gp_hash_table<T, __gnu_pbds::null_type, HF>;
// hmap<u64, int> s({}, {}, {}, {}, {1<<16});
template <class K, class V, class HF = chash<K>> using hmap = __gnu_pbds::gp_hash_table<K, V, HF>;
#else
using std::set, std::map;
template <class T, class HF = chash<T>> using hset = std::unordered_set<T, HF>;
template <class K, class V, class HF = chash<K>> using hmap = std::unordered_map<K, V, HF>;
#endif
#ifdef PB_DS_PRIORITY_QUEUE_HPP
template <class T, class C = std::less<T>> using pq = __gnu_pbds::priority_queue<T, C>;
#else
template <class T, class C = std::less<T>> using pq = std::priority_queue<T, vec<T>, C>;
#endif
template <class T> using pqg = pq<T, std::greater<T>>;
// clang-format on
#define mk1_(V, A, T) using V##A = V<T>;
#define mk_(V, A, T) mk1_(V, A, T) mk1_(c##V, A, T)
#define mk(A, T) mk_(edge_t, A, T) mk_(ptt, A, T) mk_(pt3, A, T) mk_(pt4, A, T) mk_(vec, A, T) mk_(vvec, A, T) mk_(v3ec, A, T) mk_(vecpt, A, T) mk_(vvecpt, A, T) mk_(ptvec, A, T) mk_(ptvvec, A, T) mk1_(spn, A, T) mk1_(itl, A, T)
mk(b, bool) mk(c, chr) mk(i, i32) mk(u, u32) mk(ii, i64) mk(uu, u64) mk(t, isz) mk(z, usz) mk(f, f32) mk(d, f64) mk(s, strn);
#undef mk
#undef mk_
#undef mk1_
} // namespace tifa_libs
#line 4 "src/conv/trans/fft_r2/lib.hpp"
namespace tifa_libs {
template <std::floating_point FP>
class fft_r2 {
using C = std::complex<FP>;
const FP TAU = std::acos((FP)-1.) * 2;
vecu rev;
vec<C> w;
public:
using data_t = C;
ND CEXP u32 size() CNE { return (u32)rev.size(); }
CEXP void bzr(u32 len) NE {
cu32 n = max(std::bit_ceil(len), 2_u32);
if (n == size()) return;
rev.resize(n, 0);
cu32 k = (u32)(std::bit_width(n) - 1);
flt_ (u32, i, 0, n) rev[i] = (rev[i / 2] / 2) | ((i & 1) << (k - 1));
w.resize(n), w[0].real(1);
flt_ (u32, i, 1, n) w[i] = {std::cos(TAU * (FP)i / (FP)n), std::sin(TAU * (FP)i / (FP)n)};
}
CEXP void dif(vec<data_t>& f, u32 n = 0) CNE {
if (!n) n = size();
if (f.size() < n) f.resize(n);
assert(n <= size());
flt_ (u32, i, 0, n)
if (i < rev[i]) swap(f[rev[i]], f[i]);
#pragma GCC diagnostic ignored "-Wsign-conversion"
for (u32 i = 2, d = n / 2; i <= n; i *= 2, d /= 2)
for (u32 j = 0; j < n; j += i) {
auto l = begin(f) + j, r = begin(f) + j + i / 2;
auto p = begin(w);
for (u32 k = 0; k < i / 2; ++k, ++l, ++r, p += d) {
const data_t _ = *r * *p;
*r = *l - _, *l = *l + _;
}
}
#pragma GCC diagnostic warning "-Wsign-conversion"
}
CEXP void dit(vec<data_t>& f, u32 n = 0) CNE {
if (!n) n = size();
dif(f, n);
flt_ (u32, i, 0, n) f[i] /= (FP)n;
}
};
} // namespace tifa_libs
#line 2 "src/conv/add/naive/lib.hpp"
#line 4 "src/conv/add/naive/lib.hpp"
namespace tifa_libs {
CEXP inline u32 CONV_NAIVE_THRESHOLD = 16;
template <class U, class T = U>
requires(sizeof(U) <= sizeof(T))
CEXP vec<T> conv_naive(vec<U> CR l, vec<U> CR r, u32 ans_size = 0) NE {
retif_((l.empty() || r.empty()) [[unlikely]], {});
if (!ans_size) ans_size = u32(l.size() + r.size() - 1);
vec<T> ans(ans_size);
u32 n = (u32)l.size(), m = (u32)r.size();
auto &&l_ = n < m ? r : l, &&r_ = n < m ? l : r;
if (n < m) swap(n, m);
flt_ (u32, i, 0, n)
flt_ (u32, j, 0, min(m, ans_size - i)) ans[i + j] += (T)l_[i] * (T)r_[j];
return ans;
}
} // namespace tifa_libs
#line 5 "src/conv/add/mtt/lib.hpp"
namespace tifa_libs {
template <class mint, class FP>
CEXP vec<mint> conv_mtt(fft_r2<FP>& fft, vec<mint> CR l, vec<mint> CR r, u32 ans_size = 0) NE {
using C = TPN fft_r2<FP>::data_t;
if (!ans_size) ans_size = u32(l.size() + r.size() - 1);
if (min(l.size(), r.size()) < CONV_NAIVE_THRESHOLD) return conv_naive(l, r, ans_size);
if (l.size() == 1) {
vec<mint> ans = r;
for (ans.resize(ans_size); auto& i : ans) i *= l[0];
return ans;
}
if (r.size() == 1) {
vec<mint> ans = l;
for (ans.resize(ans_size); auto& i : ans) i *= r[0];
return ans;
}
fft.bzr(max({(u32)l.size(), (u32)r.size(), min(u32(l.size() + r.size() - 1), ans_size)}));
cu32 n = fft.size();
csint OFS = ((int)sizeof(decltype(mint::mod())) * 8 - std::countl_zero(mint::mod() - 1) + 1) / 2;
cu32 MSK = ((1u << OFS) - 1);
vec<mint> ans(ans_size);
vec<C> a(n), b(n);
flt_ (u32, i, 0, (u32)l.size()) a[i] = {(FP)(l[i].val() & MSK), (FP)(l[i].val() >> OFS)};
flt_ (u32, i, 0, (u32)r.size()) b[i] = {(FP)(r[i].val() & MSK), (FP)(r[i].val() >> OFS)};
fft.dif(a), fft.dif(b);
{
vec<C> p(n), q(n);
for (u32 i = 0, j; i < n; ++i) {
j = (n - i) & (n - 1);
C da = (a[i] + std::conj(a[j])) * C(.5, 0), db = (a[i] - std::conj(a[j])) * C(0, -.5),
dc = (b[i] + std::conj(b[j])) * C(.5, 0), dd = (b[i] - std::conj(b[j])) * C(.5, 0);
p[j] = da * dc + da * dd;
q[j] = db * dc + db * dd;
}
a = p, b = q;
}
fft.dif(a), fft.dif(b);
flt_ (u32, i, 0, ans_size) {
ci64 da = (i64)(a[i].real() / (FP)n + .5) % mint::smod(),
db = (i64)(a[i].imag() / (FP)n + .5) % mint::smod(),
dc = (i64)(b[i].real() / (FP)n + .5) % mint::smod(),
dd = (i64)(b[i].imag() / (FP)n + .5) % mint::smod();
ans[i] = da + ((db + dc) << OFS) % mint::smod() + (dd << (OFS * 2)) % mint::smod();
}
return ans;
}
} // namespace tifa_libs