Tifa's CP Library

:heavy_check_mark: src/conv/add/u64/lib.hpp

Depends on

Verified with

Code

#pragma once

#include "../../trans/fft_r3/lib.hpp"
#include "../naive/lib.hpp"

namespace tifa_libs {

template <class T>
vecuu conv_u64(vec<T> CR a, vec<T> CR b, u32 ans_size = 0) NE {
  cu32 n = (u32)a.size(), m = (u32)b.size();
  if (!ans_size) ans_size = n + m - 1;
  retif_((a.empty() && b.empty()) [[unlikely]], {});
  if (min(n, m) < CONV_NAIVE_THRESHOLD) return conv_naive<T, u64>(a, b, ans_size);
  static fft_r3<T> fft;
  using EI = fft_r3<T>::data_t;
  CEXP static EI inv_3{-T(1) / 3 * 2 + 1, 0};
  fft.bzr(n + m - 1);
  u32 s = fft.size();
  vec<EI> pa(s), pb(s);
  for (u32 i = 0; i < min(s, n); ++i) pa[i].real(a[i]);
  for (u32 i = s; i < min(2 * s, n); ++i) pa[i - s].imag(a[i]);
  for (u32 i = 0; i < min(s, m); ++i) pb[i].real(b[i]);
  for (u32 i = s; i < min(2 * s, m); ++i) pb[i - s].imag(b[i]);
  vec<EI> pc(4 * s);
  auto mul = [](auto&& mul, EI* p, EI* q, EI* to, u32 n) {
    if (n <= 27) {
      fill_n(to, n, 0);
      flt_ (u32, i, 0, n) {
        flt_ (u32, j, 0, n - i) to[i + j] += p[i] * q[j];
        flt_ (u32, j, n - i, n) to[i + j - n] += p[i] * q[j] * EI::w;
      }
      return;
    }
    u32 m = 1;
    for (; m * m < n; m *= 3);
    u32 r = n / m;
    EI inv{1};
    for (u32 i = 1; i < r; i *= 3) inv *= inv_3;
    flt_ (u32, i, 0, r) {
      fft.twiddle(p + m * i, m, m / r * i, to + m * i);
      fft.twiddle(q + m * i, m, m / r * i, to + n + m * i);
    }
    fft.dif(to, m, r), fft.dif(to + n, m, r);
    flt_ (u32, i, 0, r) mul(mul, to + m * i, to + n + m * i, to + 2 * n + m * i, m);
    fft.dit(to + 2 * n, m, r);
    flt_ (u32, i, 0, n) to[2 * n + i] *= inv;
    flt_ (u32, i, 0, r) fft.twiddle(to + 2 * n + m * i, m, 3 * m - m / r * i, to + n + m * i);
    flt_ (u32, i, 0, r) {
      flt_ (u32, j, 0, m) p[m * i + j] = conj(p[m * i + j]), q[m * i + j] = conj(q[m * i + j]);
      fft.twiddle(p + m * i, m, 2 * m / r * i, to + m * i);
      fft.twiddle(q + m * i, m, 2 * m / r * i, p + m * i);
    }
    fft.dif(to, m, r), fft.dif(p, m, r);
    flt_ (u32, i, 0, r) mul(mul, to + m * i, p + m * i, to + 2 * n + m * i, m);
    fft.dit(to + 2 * n, m, r);
    flt_ (u32, i, 0, n) to[2 * n + i] *= inv;
    flt_ (u32, i, 0, r) fft.twiddle(to + 2 * n + m * i, m, 3 * m - 2 * m / r * i, q + m * i);
    fill_n(to, n, 0);
    flt_ (u32, i, 0, n) {
      to[i] += (1 - EI::w) * to[n + i] + (1 - EI::w2) * conj(q[i]);
      if (i + m < n) to[i + m] += (EI::w2 - EI::w) * (to[n + i] - conj(q[i]));
      else to[i + m - n] += (1 - EI::w2) * (to[n + i] - conj(q[i]));
    }
    flt_ (u32, i, 0, n) to[i] *= inv_3;
  };
  mul(mul, pa.data(), pb.data(), pc.data(), s);
  vec<T> ans(ans_size);
  flt_ (u32, i, 0, min(s, ans_size)) ans[i] = pc[i].real();
  flt_ (u32, i, s, min(2 * s, ans_size)) ans[i] = pc[i - s].imag();
  return ans;
}

}  // namespace tifa_libs
#line 2 "src/conv/add/u64/lib.hpp"

#line 2 "src/conv/trans/fft_r3/lib.hpp"

#line 2 "src/math/ds/eint/lib.hpp"

#line 2 "src/util/traits/others/lib.hpp"
// clang-format off
#line 2 "src/util/alias/others/lib.hpp"

#line 2 "src/util/consts/lib.hpp"

#line 2 "src/util/alias/num/lib.hpp"

#line 2 "src/util/util/lib.hpp"
// https://github.com/Tiphereth-A/CP-lib
#include <bits/extc++.h>
// clang-format off
namespace tifa_libs {

#define CEXP constexpr
#define CEXPE constexpr explicit
#define CR const&
#define CP const*
#define PC *const
#define CPC const*const
#define TPN typename
#define NE noexcept
#define CNE const noexcept
#define ND [[nodiscard]]
#define cT_(...) std::conditional_t<sizeof(__VA_ARGS__) <= sizeof(size_t) * 2, __VA_ARGS__, __VA_ARGS__ CR>
// NOLINTNEXTLINE(misc-const-correctness)
#define flt_(T, i, l, r, ...) for (T i = (l), i##e = (r)__VA_OPT__(, ) __VA_ARGS__; i < i##e; ++i)
#define retif_(cond, if_true, ...) if cond return if_true __VA_OPT__(; else return __VA_ARGS__)
#ifdef ONLINE_JUDGE
#undef assert
#define assert(x) 42
#endif

using namespace std::ranges;
using namespace std::literals;

template <class T>
CEXP T abs(T x) NE { retif_((x < 0), -x, x); }

}  // namespace tifa_libs
// clang-format on
#line 4 "src/util/alias/num/lib.hpp"
// clang-format off
namespace tifa_libs {

#define mk0_(w, t) using w = t; using c##w = const t
#define mk_(w, t) mk0_(w, t); CEXP w operator""_##w(unsigned long long x) NE { return (w)x; }
mk_(i8, int8_t) mk_(u8, uint8_t) mk_(i16, int16_t) mk_(u16, uint16_t) mk_(i32, int32_t) mk_(u32, uint32_t) mk_(i64, int64_t) mk_(u64, uint64_t) mk_(isz, ssize_t) mk_(usz, size_t) mk_(chr, char) mk_(schr, signed char) mk_(uchr, unsigned char) mk_(sint, signed) mk_(uint, unsigned);
mk0_(i128, __int128_t); mk0_(u128, __uint128_t); mk0_(f32, float); mk0_(f64, double); mk0_(f128, long double);
#undef mk0_
#undef mk_

}  // namespace tifa_libs
// clang-format on
#line 4 "src/util/consts/lib.hpp"
// clang-format off
namespace tifa_libs {
using std::numbers::pi_v;
template <std::floating_point FP>
inline FP eps_v = std::sqrt(std::numeric_limits<FP>::epsilon());
template <std::floating_point FP>
CEXP void set_eps(FP v) NE { eps_v<FP> = v; }
CEXP u32 TIME = ((__TIME__[0] & 15) << 20) | ((__TIME__[1] & 15) << 16) | ((__TIME__[3] & 15) << 12) | ((__TIME__[4] & 15) << 8) | ((__TIME__[6] & 15) << 4) | (__TIME__[7] & 15);
CEXP auto STR2U16 = [] { std::array<u32, 65536> table{}; table.fill(-1_u32); flt_ (u32, i, 48, 58) flt_ (u32, j, 48, 58) table[i << 8 | j] = (j & 15) * 10 + (i & 15); return table; }();

inline const auto fn_0 = [](auto&&...) NE {};
inline const auto fn_is0 = [](auto x) NE { return x == 0; };
}  // namespace tifa_libs
// clang-format on
#line 4 "src/util/alias/others/lib.hpp"

namespace tifa_libs {

template <class T>
struct chash {
  CEXP static u64 C = u64(pi_v<f128> * 2e18) | 71;
  CEXP u64 operator()(T x) CNE { return __builtin_bswap64(((u64)x ^ TIME) * C); }
};
// clang-format off
#define mk_(w, t) using w = t; using c##w = const t;
mk_(strn, std::string) mk_(strnv, std::string_view)
#undef mk_
template <class T> struct edge_t { T w; u32 u, v; CEXP auto operator<=>(edge_t CR) const = default; }; template <class T> using cedge_t = const edge_t<T>;
template <class T> struct pt3 { T _0, _1, _2; CEXP auto operator<=>(pt3 CR) const = default; }; template <class T> using cpt3 = const pt3<T>;
template <class T> struct pt4 { T _0, _1, _2, _3; CEXP auto operator<=>(pt4 CR) const = default; }; template <class T> using cpt4 = const pt4<T>;
#define mkT_(w, t, ...) template <class T> using w = t __VA_OPT__(, ) __VA_ARGS__; template <class T> using c##w = const t __VA_OPT__(, ) __VA_ARGS__;
mkT_(ptt, std::pair<T, T>) mkT_(alc, std::pmr::polymorphic_allocator<T>) mkT_(vec, std::vector<T>) mkT_(vvec, vec<vec<T>>) mkT_(v3ec, vvec<vec<T>>) mkT_(vecpt, vec<ptt<T>>) mkT_(vvecpt, vvec<ptt<T>>) mkT_(ptvec, ptt<vec<T>>) mkT_(ptvvec, ptt<vvec<T>>)
#undef mkT_
template <class T> using itl = std ::initializer_list<T>;
template <class T, usz ext = std::dynamic_extent> using spn = std::span<T const, ext>;
template <class T, usz N> using arr = std::array<T, N>; template <class T, usz N> using carr = std::array<const T, N>;
template <class U, class T> using vecp = vec<std::pair<U, T>>; template <class U, class T> using vvecp = vvec<std::pair<U, T>>;
template <class U, class T> using vvecp = vvec<std::pair<U, T>>; template <class U, class T> using vvvecp = vvec<vvec<std::pair<U, T>>>;
#ifdef PB_DS_ASSOC_CNTNR_HPP
template <class T, class C = std::less<T>> using set = __gnu_pbds::tree<T, __gnu_pbds::null_type, C>;
template <class K, class V, class C = std::less<K>> using map = __gnu_pbds::tree<K, V, C>;
// hset<u64> s({}, {}, {}, {}, {1<<16});
template <class T, class HF = chash<T>> using hset = __gnu_pbds::gp_hash_table<T, __gnu_pbds::null_type, HF>;
// hmap<u64, int> s({}, {}, {}, {}, {1<<16});
template <class K, class V, class HF = chash<K>> using hmap = __gnu_pbds::gp_hash_table<K, V, HF>;
#else
using std::set, std::map;
template <class T, class HF = chash<T>> using hset = std::unordered_set<T, HF>;
template <class K, class V, class HF = chash<K>> using hmap = std::unordered_map<K, V, HF>;
#endif
#ifdef PB_DS_PRIORITY_QUEUE_HPP
template <class T, class C = std::less<T>> using pq = __gnu_pbds::priority_queue<T, C>;
#else
template <class T, class C = std::less<T>> using pq = std::priority_queue<T, vec<T>, C>;
#endif
template <class T> using pqg = pq<T, std::greater<T>>;
// clang-format on
#define mk1_(V, A, T) using V##A = V<T>;
#define mk_(V, A, T) mk1_(V, A, T) mk1_(c##V, A, T)
#define mk(A, T) mk_(edge_t, A, T) mk_(ptt, A, T) mk_(pt3, A, T) mk_(pt4, A, T) mk_(vec, A, T) mk_(vvec, A, T) mk_(v3ec, A, T) mk_(vecpt, A, T) mk_(vvecpt, A, T) mk_(ptvec, A, T) mk_(ptvvec, A, T) mk1_(spn, A, T) mk1_(itl, A, T)
mk(b, bool) mk(c, chr) mk(i, i32) mk(u, u32) mk(ii, i64) mk(uu, u64) mk(t, isz) mk(z, usz) mk(f, f32) mk(d, f64) mk(s, strn);
#undef mk
#undef mk_
#undef mk1_

}  // namespace tifa_libs
#line 4 "src/util/traits/others/lib.hpp"

namespace tifa_libs {

//! only for template without non-type argument
template <class, template <class...> class> CEXP bool specialized_from_v = false;
template <template <class...> class T, class... Args> CEXP bool specialized_from_v<T<Args...>, T> = true;
static_assert(specialized_from_v<vecu, std::vector>);
template <class T> concept container_c = common_range<T> && !std::is_array_v<std::remove_cvref_t<T>> && !std::same_as<std::remove_cvref_t<T>, strn> && !std::same_as<std::remove_cvref_t<T>, strnv>;
template <class T> concept istream_c = std::derived_from<T, std::istream> || std::derived_from<T, std::wistream> || requires(T is) { is.peek(); };
template <class T> concept ostream_c = std::derived_from<T, std::ostream> || std::derived_from<T, std::wostream> || requires(T os) { os.flush(); };

}  // namespace tifa_libs
// clang-format on
#line 4 "src/math/ds/eint/lib.hpp"

namespace tifa_libs {

template <class T>
class eint {
  T r_, i_;

 public:
  // $\omega$, $\omega^2$
  // $1+\omega+\omega^2=0$
  const static inline eint<T> w{0, 1}, w2{-T(1), -T(1)};

  CEXP eint(cT_(T) real = T{}, cT_(T) imag = T{}) NE : r_(real), i_(imag) {}

  CEXP T CR real() CNE { return r_; }
  CEXP T CR imag() CNE { return i_; }
  CEXP T norm() CNE { return r_ * (r_ - i_) + i_ * i_; }
  CEXP T& real() NE { return r_; }
  CEXP T& imag() NE { return i_; }
  CEXP void real(cT_(T) x) NE { r_ = x; }
  CEXP void imag(cT_(T) x) NE { i_ = x; }
  CEXP eint& operator+=(cT_(T) x) NE {
    r_ += x;
    return *this;
  }
  CEXP eint& operator-=(cT_(T) x) NE {
    r_ -= x;
    return *this;
  }
  CEXP eint& operator*=(cT_(T) x) NE {
    r_ *= x, i_ *= x;
    return *this;
  }
  CEXP eint& operator+=(eint CR x) NE {
    r_ += x.real(), i_ += x.imag();
    return *this;
  }
  CEXP eint& operator-=(eint CR x) NE {
    r_ -= x.real(), i_ -= x.imag();
    return *this;
  }
  CEXP eint& operator*=(eint CR x) NE {
    T _ = r_;
    r_ = r_ * x.real() - i_ * x.imag(), i_ = i_ * x.real() + _ * x.imag() - i_ * x.imag();
    return *this;
  }
  friend CEXP eint operator+(eint x, cT_(T) y) NE { return x += y; }
  friend CEXP eint operator-(eint x, cT_(T) y) NE { return x -= y; }
  friend CEXP eint operator*(eint x, cT_(T) y) NE { return x *= y; }
  friend CEXP eint operator+(eint x, eint CR y) NE { return x += y; }
  friend CEXP eint operator-(eint x, eint CR y) NE { return x -= y; }
  friend CEXP eint operator*(eint x, eint CR y) NE { return x *= y; }
  CEXP eint operator+() CNE { return *this; }
  CEXP eint operator-() CNE { return eint(-r_, -i_); }
  friend CEXP T norm(eint CR x) NE { return x.norm(); }
  friend CEXP eint conj(eint CR x) NE { return eint{x.r_ - x.i_, -x.i_}; }
  friend CEXP bool operator==(eint CR x, eint CR y) NE { return x.real() == y.real() && x.imag() == y.imag(); }
  friend auto& operator>>(istream_c auto& is, eint& x) NE { return is >> x.r_ >> x.i_; }
  friend auto& operator<<(ostream_c auto& os, eint CR x) NE { return os << x.real() << ' ' << x.imag(); }
};

}  // namespace tifa_libs
#line 4 "src/conv/trans/fft_r3/lib.hpp"

namespace tifa_libs {

template <class T>
class fft_r3 {
  using EI = eint<T>;
  u32 s = 1;
  vec<EI> tmp;

 public:
  using data_t = eint<T>;

  ND CEXP u32 size() CNE { return s; }
  CEXP void bzr(u32 len) NE {
    for (; 2 * s < len; s *= 3);
    tmp.resize(s * 3);
  }

  // Calculate the product of polynomial {@code f} and $x^t$ in $T[x]/(x^m - \omega)$
  // result is in {@code to}
  CEXP void twiddle(data_t* p, u32 m, u32 t, data_t* to) CNE {
    assert(t <= 3 * m);
    if (!t || t == 3 * m) {
      copy_n(p, m, to);
      return;
    }
    u32 n;
    data_t _0, _1;
    if (t < m) n = t, _0 = 1, _1 = EI::w;
    else if (t < 2 * m) n = t - m, _0 = EI::w, _1 = EI::w2;
    else n = t - 2 * m, _0 = EI::w2, _1 = 1;
    flt_ (u32, j, 0, n) to[j] = p[m - n + j] * _1;
    flt_ (u32, j, n, m) to[j] = p[j - n] * _0;
  }
  // @param p A polynomial from $(T[x]/(x^m - \omega))[y]/(y^r - 1)$
  // result: Fourier transform (w.r.t. y) in 3-reversed order, inplace.
  CEXP void dif(data_t* p, u32 m, u32 r) NE {
    for (u32 rr = r / 3; rr >= 1; rr /= 3)
      for (u32 k = 0; k < r; k += rr * 3)
        flt_ (u32, i, k, k + rr) {
          flt_ (u32, j, 0, m) {
            tmp[j] = p[i * m + j] + p[(i + rr) * m + j] + p[(i + 2 * rr) * m + j];
            tmp[m + j] = p[i * m + j] + EI::w * p[(i + rr) * m + j] + EI::w2 * p[(i + 2 * rr) * m + j];
            tmp[2 * m + j] = p[i * m + j] + EI::w2 * p[(i + rr) * m + j] + EI::w * p[(i + 2 * rr) * m + j];
            p[i * m + j] = tmp[j];
          }
          twiddle(tmp.data() + m, m, 3 * (i - k) * m / (rr * 3), p + m * rr + i * m);
          twiddle(tmp.data() + 2 * m, m, 6 * (i - k) * m / (rr * 3), p + 2 * m * rr + i * m);
        }
  }
  // @param p A polynomial in $(T[x]/(x^m - \omega))[y]/(y^r - 1)$ with coefficients in 3-reversed order.
  // result: inverse Fourier transform in normal order, inplace.
  CEXP void dit(data_t* p, u32 m, u32 r) NE {
    for (u32 rr = 1; rr < r; rr *= 3)
      for (u32 k = 0; k < r; k += rr * 3)
        flt_ (u32, i, k, k + rr) {
          twiddle(p + m * rr + i * m, m, 3 * m - 3 * (i - k) * m / (rr * 3), tmp.data() + m);
          twiddle(p + 2 * m * rr + i * m, m, 3 * m - 6 * (i - k) * m / (rr * 3), tmp.data() + 2 * m);
          flt_ (u32, j, 0, m) {
            tmp[j] = p[i * m + j];
            p[i * m + j] = tmp[j] + tmp[m + j] + tmp[2 * m + j];
            p[(i + rr) * m + j] = tmp[j] + EI::w2 * tmp[m + j] + EI::w * tmp[2 * m + j];
            p[(i + 2 * rr) * m + j] = tmp[j] + EI::w * tmp[m + j] + EI::w2 * tmp[2 * m + j];
          }
        }
  }
};

}  // namespace tifa_libs
#line 2 "src/conv/add/naive/lib.hpp"

#line 4 "src/conv/add/naive/lib.hpp"

namespace tifa_libs {

CEXP inline u32 CONV_NAIVE_THRESHOLD = 16;
template <class U, class T = U>
requires(sizeof(U) <= sizeof(T))
CEXP vec<T> conv_naive(vec<U> CR l, vec<U> CR r, u32 ans_size = 0) NE {
  retif_((l.empty() || r.empty()) [[unlikely]], {});
  if (!ans_size) ans_size = u32(l.size() + r.size() - 1);
  vec<T> ans(ans_size);
  u32 n = (u32)l.size(), m = (u32)r.size();
  auto &&l_ = n < m ? r : l, &&r_ = n < m ? l : r;
  if (n < m) swap(n, m);
  flt_ (u32, i, 0, n)
    flt_ (u32, j, 0, min(m, ans_size - i)) ans[i + j] += (T)l_[i] * (T)r_[j];
  return ans;
}

}  // namespace tifa_libs
#line 5 "src/conv/add/u64/lib.hpp"

namespace tifa_libs {

template <class T>
vecuu conv_u64(vec<T> CR a, vec<T> CR b, u32 ans_size = 0) NE {
  cu32 n = (u32)a.size(), m = (u32)b.size();
  if (!ans_size) ans_size = n + m - 1;
  retif_((a.empty() && b.empty()) [[unlikely]], {});
  if (min(n, m) < CONV_NAIVE_THRESHOLD) return conv_naive<T, u64>(a, b, ans_size);
  static fft_r3<T> fft;
  using EI = fft_r3<T>::data_t;
  CEXP static EI inv_3{-T(1) / 3 * 2 + 1, 0};
  fft.bzr(n + m - 1);
  u32 s = fft.size();
  vec<EI> pa(s), pb(s);
  for (u32 i = 0; i < min(s, n); ++i) pa[i].real(a[i]);
  for (u32 i = s; i < min(2 * s, n); ++i) pa[i - s].imag(a[i]);
  for (u32 i = 0; i < min(s, m); ++i) pb[i].real(b[i]);
  for (u32 i = s; i < min(2 * s, m); ++i) pb[i - s].imag(b[i]);
  vec<EI> pc(4 * s);
  auto mul = [](auto&& mul, EI* p, EI* q, EI* to, u32 n) {
    if (n <= 27) {
      fill_n(to, n, 0);
      flt_ (u32, i, 0, n) {
        flt_ (u32, j, 0, n - i) to[i + j] += p[i] * q[j];
        flt_ (u32, j, n - i, n) to[i + j - n] += p[i] * q[j] * EI::w;
      }
      return;
    }
    u32 m = 1;
    for (; m * m < n; m *= 3);
    u32 r = n / m;
    EI inv{1};
    for (u32 i = 1; i < r; i *= 3) inv *= inv_3;
    flt_ (u32, i, 0, r) {
      fft.twiddle(p + m * i, m, m / r * i, to + m * i);
      fft.twiddle(q + m * i, m, m / r * i, to + n + m * i);
    }
    fft.dif(to, m, r), fft.dif(to + n, m, r);
    flt_ (u32, i, 0, r) mul(mul, to + m * i, to + n + m * i, to + 2 * n + m * i, m);
    fft.dit(to + 2 * n, m, r);
    flt_ (u32, i, 0, n) to[2 * n + i] *= inv;
    flt_ (u32, i, 0, r) fft.twiddle(to + 2 * n + m * i, m, 3 * m - m / r * i, to + n + m * i);
    flt_ (u32, i, 0, r) {
      flt_ (u32, j, 0, m) p[m * i + j] = conj(p[m * i + j]), q[m * i + j] = conj(q[m * i + j]);
      fft.twiddle(p + m * i, m, 2 * m / r * i, to + m * i);
      fft.twiddle(q + m * i, m, 2 * m / r * i, p + m * i);
    }
    fft.dif(to, m, r), fft.dif(p, m, r);
    flt_ (u32, i, 0, r) mul(mul, to + m * i, p + m * i, to + 2 * n + m * i, m);
    fft.dit(to + 2 * n, m, r);
    flt_ (u32, i, 0, n) to[2 * n + i] *= inv;
    flt_ (u32, i, 0, r) fft.twiddle(to + 2 * n + m * i, m, 3 * m - 2 * m / r * i, q + m * i);
    fill_n(to, n, 0);
    flt_ (u32, i, 0, n) {
      to[i] += (1 - EI::w) * to[n + i] + (1 - EI::w2) * conj(q[i]);
      if (i + m < n) to[i + m] += (EI::w2 - EI::w) * (to[n + i] - conj(q[i]));
      else to[i + m - n] += (1 - EI::w2) * (to[n + i] - conj(q[i]));
    }
    flt_ (u32, i, 0, n) to[i] *= inv_3;
  };
  mul(mul, pa.data(), pb.data(), pc.data(), s);
  vec<T> ans(ans_size);
  flt_ (u32, i, 0, min(s, ans_size)) ans[i] = pc[i].real();
  flt_ (u32, i, s, min(2 * s, ans_size)) ans[i] = pc[i - s].imag();
  return ans;
}

}  // namespace tifa_libs
Back to top page