#ifndef TIFALIBS_POLY_POLYMTT
#define TIFALIBS_POLY_POLYMTT
#include "../conv/conv_mtt.hpp"
#include "poly.hpp"
namespace tifa_libs::math {
namespace polymtt_impl_ {
template <class FP = f64>
struct cconv_mtt : public FFT<FP> {
static CEXP auto ct_cat = ct_FFT;
template <class mint>
CEXP void conv(vec<mint>& l, vec<mint> CR r, u32 sz = 0) { l = conv_mtt<mint>(*this, l, r, sz); }
};
} // namespace polymtt_impl_
template <class mint, class FP = f64>
using polymtt = poly<mint, polymtt_impl_::cconv_mtt<FP>>;
} // namespace tifa_libs::math
#endif
#line 1 "src/code/poly/polymtt.hpp"
#line 1 "src/code/conv/conv_mtt.hpp"
#line 1 "src/code/conv/conv_naive.hpp"
#line 1 "src/code/util/util.hpp"
#include <bits/stdc++.h>
#define CEXP constexpr
#define TPN typename
#define CR const&
#define cT_(...) std::conditional_t<sizeof(__VA_ARGS__) <= sizeof(size_t), __VA_ARGS__, __VA_ARGS__ CR>
#define fle_(T, i, l, r, ...) for (T i = (l), i##e = (r)__VA_OPT__(, ) __VA_ARGS__; i <= i##e; ++i)
#define flt_(T, i, l, r, ...) for (T i = (l), i##e = (r)__VA_OPT__(, ) __VA_ARGS__; i < i##e; ++i)
#ifdef ONLINE_JUDGE
#undef assert
#define assert(x) 42
#endif
using i8 = int8_t;
using i16 = int16_t;
using i32 = int32_t;
using i64 = int64_t;
using i128 = __int128_t;
using isz = ptrdiff_t;
using u8 = uint8_t;
using u16 = uint16_t;
using u32 = uint32_t;
using u64 = uint64_t;
using u128 = __uint128_t;
using usz = size_t;
using f32 = float;
using f64 = double;
using f128 = long double;
template <class T>
using ptt = std::pair<T, T>;
template <class T>
using pt3 = std::tuple<T, T, T>;
template <class T>
using pt4 = std::tuple<T, T, T, T>;
template <class E>
using itl = std::initializer_list<E>;
template <class T, usz N>
using arr = std::array<T, N>;
template <class T>
using vec = std::vector<T>;
template <class T>
using vvec = vec<vec<T>>;
template <class T>
using v3ec = vec<vvec<T>>;
template <class U, class T>
using vecp = vec<std::pair<U, T>>;
template <class U, class T>
using vvecp = vvec<std::pair<U, T>>;
template <class T>
using vecpt = vec<ptt<T>>;
template <class T>
using vvecpt = vvec<ptt<T>>;
template <class T>
using ptvec = ptt<vec<T>>;
template <class T>
using ptvvec = ptt<vvec<T>>;
template <class T, class C = std::less<T>>
using pq = std::priority_queue<T, vec<T>, C>;
template <class T>
using pqg = std::priority_queue<T, vec<T>, std::greater<T>>;
using strn = std::string;
using strnv = std::string_view;
template <class T, usz ext = std::dynamic_extent>
using spn = std::span<T const, ext>;
#define mk_(V, A, T) using V##A = V<T>;
#define mk(A, T) mk_(ptt, A, T) mk_(pt3, A, T) mk_(pt4, A, T) mk_(vec, A, T) mk_(vvec, A, T) mk_(v3ec, A, T) mk_(vecpt, A, T) mk_(vvecpt, A, T) mk_(ptvec, A, T) mk_(ptvvec, A, T) mk_(spn, A, T) mk_(itl, A, T)
mk(b, bool) mk(i, i32) mk(u, u32) mk(ii, i64) mk(uu, u64);
#undef mk
#undef mk_
using namespace std::literals;
CEXP i8 operator""_i8(unsigned long long x) { return (i8)x; }
CEXP i16 operator""_i16(unsigned long long x) { return (i16)x; }
CEXP i32 operator""_i32(unsigned long long x) { return (i32)x; }
CEXP i64 operator""_i64(unsigned long long x) { return (i64)x; }
CEXP isz operator""_iz(unsigned long long x) { return (isz)x; }
CEXP u8 operator""_u8(unsigned long long x) { return (u8)x; }
CEXP u16 operator""_u16(unsigned long long x) { return (u16)x; }
CEXP u32 operator""_u32(unsigned long long x) { return (u32)x; }
CEXP u64 operator""_u64(unsigned long long x) { return (u64)x; }
CEXP usz operator""_uz(unsigned long long x) { return (usz)x; }
inline const auto fn_0 = [](auto&&...) {};
inline const auto fn_is0 = [](auto x) { return x == 0; };
template <std::floating_point FP>
inline FP eps_v = std::sqrt(std::numeric_limits<FP>::epsilon());
template <std::floating_point FP>
CEXP void set_eps(FP v) { eps_v<FP> = v; }
using std::numbers::pi_v;
namespace tifa_libs {
using std::min, std::max, std::swap;
template <class T>
constexpr T abs(T x) { return x < 0 ? -x : x; }
} // namespace tifa_libs
#line 5 "src/code/conv/conv_naive.hpp"
namespace tifa_libs::math {
template <class U, class T = U>
requires(sizeof(U) <= sizeof(T))
CEXP vec<T> conv_naive(vec<U> CR l, vec<U> CR r, u32 ans_size = 0) {
if (l.empty() || r.empty()) return {};
if (!ans_size) ans_size = u32(l.size() + r.size() - 1);
const u32 n = (u32)l.size(), m = (u32)r.size();
vec<T> ans(ans_size);
if (n < m)
flt_ (u32, j, 0, m)
flt_ (u32, i, 0, n) {
if (i + j >= ans_size) break;
ans[i + j] += (T)l[i] * (T)r[j];
}
else
flt_ (u32, i, 0, n)
flt_ (u32, j, 0, m) {
if (i + j >= ans_size) break;
ans[i + j] += (T)l[i] * (T)r[j];
}
return ans;
}
} // namespace tifa_libs::math
#line 1 "src/code/conv/fft.hpp"
#line 5 "src/code/conv/fft.hpp"
namespace tifa_libs::math {
template <std::floating_point FP>
struct FFT {
using C = std::complex<FP>;
using data_t = C;
explicit CEXP FFT() : rev(), w() {}
CEXP u32 size() const { return (u32)rev.size(); }
CEXP void bzr(u32 len) {
const u32 n = max<u32>(std::bit_ceil(len), 2);
if (n == size()) return;
rev.resize(n, 0);
const u32 k = (u32)(std::bit_width(n) - 1);
flt_ (u32, i, 0, n) rev[i] = (rev[i / 2] / 2) | ((i & 1) << (k - 1));
w.resize(n), w[0].real(1);
flt_ (u32, i, 1, n) w[i] = {std::cos(TAU * (FP)i / (FP)n), std::sin(TAU * (FP)i / (FP)n)};
}
CEXP void dif(vec<C> &f, u32 n = 0) const {
if (!n) n = size();
if (f.size() < n) f.resize(n);
assert(n <= size());
flt_ (u32, i, 0, n)
if (i < rev[i]) swap(f[rev[i]], f[i]);
#pragma GCC diagnostic ignored "-Wsign-conversion"
for (u32 i = 2, d = n / 2; i <= n; i *= 2, d /= 2)
for (u32 j = 0; j < n; j += i) {
auto l = f.begin() + j, r = f.begin() + j + i / 2;
auto p = w.begin();
for (u32 k = 0; k < i / 2; ++k, ++l, ++r, p += d) {
const C _ = *r * *p;
*r = *l - _, *l = *l + _;
}
}
#pragma GCC diagnostic warning "-Wsign-conversion"
}
CEXP void dit(vec<C> &f, u32 n = 0) const {
if (!n) n = size();
dif(f, n);
flt_ (u32, i, 0, n) f[i] /= (FP)n;
}
private:
const FP TAU = std::acos((FP)-1.) * 2;
vecu rev;
vec<C> w;
};
} // namespace tifa_libs::math
#line 6 "src/code/conv/conv_mtt.hpp"
namespace tifa_libs::math {
template <class mint, class FP>
CEXP vec<mint> conv_mtt(FFT<FP> &fft, vec<mint> CR l, vec<mint> CR r, u32 ans_size = 0) {
using C = TPN FFT<FP>::C;
if (!ans_size) ans_size = u32(l.size() + r.size() - 1);
if (ans_size < 32) return conv_naive(l, r, ans_size);
if (l.size() == 1) {
vec<mint> ans = r;
for (ans.resize(ans_size); auto &i : ans) i *= l[0];
return ans;
}
if (r.size() == 1) {
vec<mint> ans = l;
for (ans.resize(ans_size); auto &i : ans) i *= r[0];
return ans;
}
fft.bzr(max({(u32)l.size(), (u32)r.size(), min(u32(l.size() + r.size() - 1), ans_size)}));
const u32 n = fft.size();
const int OFS = ((int)sizeof(decltype(mint::mod())) * 8 - std::countl_zero(mint::mod() - 1) + 1) / 2;
const u32 MSK = ((1u << OFS) - 1);
vec<mint> ans(ans_size);
vec<C> a(n), b(n);
flt_ (u32, i, 0, (u32)l.size()) a[i] = {(FP)(l[i].val() & MSK), (FP)(l[i].val() >> OFS)};
flt_ (u32, i, 0, (u32)r.size()) b[i] = {(FP)(r[i].val() & MSK), (FP)(r[i].val() >> OFS)};
fft.dif(a), fft.dif(b);
{
vec<C> p(n), q(n);
for (u32 i = 0, j; i < n; ++i) {
j = (n - i) & (n - 1);
C da = (a[i] + std::conj(a[j])) * C(.5, 0), db = (a[i] - std::conj(a[j])) * C(0, -.5), dc = (b[i] + std::conj(b[j])) * C(.5, 0), dd = (b[i] - std::conj(b[j])) * C(.5, 0);
p[j] = da * dc + da * dd, q[j] = db * dc + db * dd;
}
a = p, b = q;
}
fft.dif(a), fft.dif(b);
flt_ (u32, i, 0, ans_size) {
const i64 da = (i64)(a[i].real() / (FP)n + .5) % mint::smod(), db = (i64)(a[i].imag() / (FP)n + .5) % mint::smod(), dc = (i64)(b[i].real() / (FP)n + .5) % mint::smod(), dd = (i64)(b[i].imag() / (FP)n + .5) % mint::smod();
ans[i] = da + ((db + dc) << OFS) % mint::smod() + (dd << (OFS * 2)) % mint::smod();
}
return ans;
}
} // namespace tifa_libs::math
#line 1 "src/code/poly/poly.hpp"
#line 5 "src/code/poly/poly.hpp"
namespace tifa_libs::math {
// clang-format off
enum ccore_t { ct_FFT, ct_3NTT, ct_NTT };
// clang-format on
template <class mint, class ccore>
requires requires(ccore cc, vec<mint> l, vec<mint> r, u32 sz) {
{ ccore::ct_cat } -> std::same_as<ccore_t CR>;
cc.conv(l, r);
cc.conv(l, r, sz);
}
class poly {
vec<mint> d;
public:
using value_type = mint;
using data_type = vec<value_type>;
using ccore_type = ccore;
static inline ccore_type conv_core;
explicit CEXP poly(u32 sz = 1, cT_(value_type) val = value_type{}) : d(sz, val) {}
CEXP poly(TPN data_type::const_iterator begin, TPN data_type::const_iterator end) : d(begin, end) {}
CEXP poly(itl<value_type> v) : d(v) {}
CEXP poly(spn<value_type> v) : d(v.begin(), v.end()) {}
friend CEXP std::istream &operator>>(std::istream &is, poly &poly) {
for (auto &val : poly.d) is >> val;
return is;
}
friend CEXP std::ostream &operator<<(std::ostream &os, poly CR poly) {
if (!poly.size()) return os;
flt_ (u32, i, 1, (u32)poly.size()) os << poly[i - 1] << ' ';
return os << poly.d.back();
}
CEXP u32 size() const { return (u32)d.size(); }
CEXP bool empty() const {
for (auto &&i : d)
if (i != 0) return 0;
return 1;
}
CEXP data_type &data() { return d; }
CEXP data_type CR data() const { return d; }
CEXP value_type &operator[](u32 x) { return d[x]; }
CEXP value_type CR operator[](u32 x) const { return d[x]; }
CEXP value_type operator()(value_type x) const {
value_type ans = 0;
for (u32 i = size() - 1; ~i; --i) ans = ans * x + d[i];
return ans;
}
template <class F>
requires requires(F f, u32 idx, mint &val) { f(idx, val); }
CEXP void apply_range(u32 l, u32 r, F &&f) {
assert(l < r && r <= size());
flt_ (u32, i, l, r) f(i, d[i]);
}
template <class F>
CEXP void apply(F &&f) { apply_range(0, size(), std::forward<F>(f)); }
CEXP void resize(u32 size) { d.resize(size); }
CEXP poly pre(u32 size) const {
poly _ = *this;
return _.resize(size), _;
}
CEXP void strip() {
auto it = std::find_if(d.rbegin(), d.rend(), [](cT_(mint) x) { return x.val() != 0; });
if (d.resize(usz(d.rend() - it)); d.empty()) d.push_back(value_type(0));
}
friend poly stripped(poly p) { return p.strip(), p; }
CEXP void reverse(u32 n = 0) { std::reverse(d.begin(), d.begin() + (n ? n : size())); }
CEXP void conv(poly CR r, u32 ans_size = 0) { conv_core.conv(d, r.d, ans_size); }
CEXP poly operator-() const {
poly ret = *this;
return ret.apply([](u32, auto &v) { v = -v; }), ret;
}
friend CEXP poly operator+(poly p, value_type c) { return p[0] += c, p; }
friend CEXP poly operator+(value_type c, poly CR p) { return p + c; }
friend CEXP poly operator-(poly p, value_type c) { return p[0] -= c, p; }
friend CEXP poly operator-(value_type c, poly CR p) { return p - c; }
CEXP poly &operator*=(value_type c) {
return apply([&c](u32, auto &v) { v *= c; }), *this;
}
friend CEXP poly operator*(poly p, value_type c) { return p *= c; }
friend CEXP poly operator*(value_type c, poly p) { return p *= c; }
CEXP poly &operator+=(poly CR r) {
if (!r.size()) return *this;
resize(max(size(), r.size())), apply_range(0, r.size(), [&r](u32 i, auto &v) { v += r[i]; });
return *this;
}
friend CEXP poly operator+(poly l, poly CR r) { return l += r; }
CEXP poly &operator-=(poly CR r) {
if (!r.size()) return *this;
return resize(max(size(), r.size())), apply_range(0, r.size(), [&r](u32 i, auto &v) { v -= r[i]; }), *this;
}
friend CEXP poly operator-(poly l, poly CR r) { return l -= r; }
CEXP poly &operator*=(poly CR r) {
if (!r.size()) return resize(1), d[0] = 0, *this;
return conv(r), *this;
}
friend CEXP poly operator*(poly l, poly CR r) { return l *= r; }
CEXP auto operator<=>(poly CR r) const { return stripped(*this).d <=> stripped(r).d; }
CEXP bool operator==(poly CR r) const { return stripped(*this).d == stripped(r).d; }
};
} // namespace tifa_libs::math
#line 6 "src/code/poly/polymtt.hpp"
namespace tifa_libs::math {
namespace polymtt_impl_ {
template <class FP = f64>
struct cconv_mtt : public FFT<FP> {
static CEXP auto ct_cat = ct_FFT;
template <class mint>
CEXP void conv(vec<mint>& l, vec<mint> CR r, u32 sz = 0) { l = conv_mtt<mint>(*this, l, r, sz); }
};
} // namespace polymtt_impl_
template <class mint, class FP = f64>
using polymtt = poly<mint, polymtt_impl_::cconv_mtt<FP>>;
} // namespace tifa_libs::math