QOJ.ac

QOJ

IDProblemSubmitterResultTimeMemoryLanguageFile sizeSubmit timeJudge time
#363156#618. 多项式乘法NOI_AK_ME100 ✓58ms39076kbC++2324.9kb2024-03-23 18:52:202024-03-23 18:52:22

Judging History

你现在查看的是最新测评结果

  • [2024-03-23 18:52:22]
  • 评测
  • 测评结果:100
  • 用时:58ms
  • 内存:39076kb
  • [2024-03-23 18:52:20]
  • 提交

answer

#include <iostream>
#include <algorithm>
#include <functional>
using i32 = int;
using u32 = unsigned;
using i64 = long long;
using u64 = unsigned long long;
#define IL __inline__ __attribute__((always_inline))
#define RC(T, x) reinterpret_cast<T>(x)
namespace Setting
{
	constexpr u32 mod = 998244353;
	constexpr int sta_l_MB = 64;
	constexpr int Detail = 1;
}
namespace __qed
{
	constexpr u32 primitive_root_constexpr(u32 M)
	{
		u32 ed = 0, n = M - 1, d[11] = {};
		for (int i = 2; i * i <= n; i++)
		{
			if (n % i == 0)
			{
				d[ed++] = i;
				do
				{
					n /= i;
				} while (n % i == 0);
			}
		}
		if (n > 1)
		{
			d[ed++] = n;
		}
		for (u32 g = 2, r = 0;; ++g)
		{
			for (u32 i = 0; i < ed; ++i)
			{
				u32 b = (M - 1) / d[i], a = g;
				for (r = 1; b; b >>= 1, a = u64(a) * a % M)
				{
					if (b & 1)
					{
						r = u64(r) * a % M;
					}
				}
				if (r == 1)
				{
					break;
				}
			}
			if (r != 1)
			{
				return g;
			}
		}
	}
	constexpr int bceil(int x) { return 1 << (32 - __builtin_clz(x - 1)); }
	constexpr int bit_up(int x) { return 1 << (32 - __builtin_clz(x)); }
	constexpr int cro_32(int x) { return __builtin_ctz(~x); }
	constexpr bool ispow2(u64 x) { return (x & (x - 1)) == 0; }
}
namespace Stat_Info
{
	using Setting::Detail;
	i64 ntt_size;
	i64 fill0_size, copy_size, rev_size;
	size_t max_cost_sta_l;
	constexpr const char *Author = "QedDust413 & killer_joke";
	constexpr const char *Thanks = "negiizhao, chaihf, rogeryoungh, Qdedawsd2233, bh1234666, yosupo, Pulsating_Dust, KKKKa, qdc, and more.";
	template <typename Tf = std::ostream>
	void report(Tf &outf = std::clog)
	{
		outf << '\n';
		if constexpr (Detail <= 0)
		{
			outf << "Statistics are turned off.\n";
		}
		if constexpr (Detail > 0)
		{
			outf << "ntt_size:" << ntt_size << "\n";
		}
		if constexpr (Detail > 1)
		{
			outf << "fill0_size:" << fill0_size << " copy_size:" << copy_size << " rev_size:" << rev_size << "\nmax_cost_sta:" << (double(max_cost_sta_l) / double(1 << 20)) << "\n";
		}
		outf << std::endl;
	}
}
namespace mem_helper
{
	using namespace __qed;
	template <size_t align>
	void *to_align(void *mem)
	{
		static_assert(ispow2(align) && (align != 0));
		return RC(void *, (RC(u64, mem) + (align - 1)) & (-align));
	}
	template <size_t align>
	bool is_align(const void *mem)
	{
		static_assert(ispow2(align) && (align != 0));
		return (RC(u64, mem) & (align - 1)) == 0;
	}
	using Setting::sta_l_MB;
	char _mem[sta_l_MB << 20];
	void *now = _mem;
	struct pre_aloc
	{
		void *t;
		pre_aloc() { t = now; }
		~pre_aloc() { now = t; }
	};
	template <typename T, size_t align>
	T *AlocS(size_t n)
	{
		T *r = RC(T *, to_align<align>(now));
		now = r + n;
		if constexpr (Stat_Info::Detail > 1)
		{
			Stat_Info::max_cost_sta_l = std::max<size_t>(RC(char *, now) - _mem, Stat_Info::max_cost_sta_l);
		}
		return r;
	}
	template <size_t align>
	void *_alocH(size_t n)
	{
		constexpr size_t ptr_size = sizeof(void *), Extra = (align + ptr_size);
		void *base = std::malloc(n + Extra), *p = to_align<align>(RC(char *, base) + ptr_size);
		return RC(void **, p)[-1] = base, p;
	}
	template <typename T, size_t align>
	T *AlocH(size_t n) { return RC(T *, _alocH<align>(n * sizeof(T))); }
	void Freed(void *p) { std::free(RC(void **, p)[-1]); }
	template <typename T, size_t align>
	struct Trivial_Aligned_Allocator
	{
		static_assert(std::is_trivial<T>::value);
		typedef T value_type;
		T *allocate(std::size_t n) { return AlocH<T, align>(n); }
		template <class T1>
		struct rebind
		{
			using other = Trivial_Aligned_Allocator<T1, align>;
		};
		void deallocate(T *p, std::size_t n) { Freed(p); }
	};
}
namespace Montgo
{
	struct Mont32
	{
		u32 Mod, Mod2, Inv, NInv, R2;
		constexpr Mont32(u32 n) : Mod(n), Mod2(n << 1), Inv(1), NInv(), R2((-u64(n)) % n)
		{
			for (int i = 0; i < 5; ++i)
			{
				Inv *= 2 - n * Inv;
			}
			NInv = -Inv;
		}
		constexpr IL u32 reduce(u64 x) const { return (x + u64(u32(x) * NInv) * Mod) >> 32; }
		constexpr IL u32 reduce_s(u64 x) const
		{
			u32 r = (x >> 32) - ((u64(u32(x) * Inv) * Mod) >> 32);
			return r >> 31 ? r + Mod : r;
		}
		constexpr IL u32 mul(u32 x, u32 y) const { return reduce(u64(x) * y); }
		constexpr IL u32 mul_s(u32 x, u32 y) const { return reduce_s(u64(x) * y); }
		constexpr IL u32 In(u32 x) const { return mul(x, R2); }
		constexpr IL u32 In_s(u32 x) const { return mul_s(x, R2); }
		constexpr IL u32 Out(u32 x) const
		{
			u32 r = (x + (u64(x * NInv) * Mod)) >> 32;
			return __builtin_expect(r < Mod, 1) ? r : r - Mod;
		}
	};
}
namespace field_Z
{
	using Setting::mod;
	constexpr Montgo::Mont32 Space(mod);
	constexpr u32 mod2 = Space.Mod2;
	constexpr IL u32 shrink(u32 x) { return x >= mod ? x - mod : x; }
	constexpr IL u32 dilate2(u32 x) { return x >> 31 ? x + mod2 : x; }
	using Z = u32;
	constexpr bool isgood(Z x) { return x < mod2; }
	constexpr IL Z InZ(u32 x) { return Space.In(x); }
	constexpr IL Z InZs(u32 x) { return Space.In_s(x); }
	constexpr Z zero_Z(0), one_Z(InZs(1)), not_exist_Z(-1);
	constexpr IL u32 OutZ(Z x) { return Space.Out(x); }
	constexpr bool equalZ(Z x, Z y) { return shrink(x) == shrink(y); }
	namespace calc
	{
		constexpr IL Z addZ(Z x, Z y) { return dilate2(x + y - mod2); }
		constexpr IL Z subZ(Z x, Z y) { return dilate2(x - y); }
		constexpr IL Z mulZ(Z x, Z y) { return Space.mul(x, y); }
		constexpr Z powZ(Z a, u32 b, Z r = one_Z)
		{
			for (; b; a = mulZ(a, a), b >>= 1)
			{
				if (b & 1)
				{
					r = mulZ(r, a);
				}
			}
			return r;
		}
		constexpr IL Z invZ(Z x) { return powZ(x, mod - 2); }
		constexpr IL Z divZ(Z x, Z y) { return powZ(y, mod - 2, x); }
		template <bool strict = true>
		constexpr IL Z mulZs(Z x, Z y)
		{
			if constexpr (strict)
			{
				return Space.mul_s(x, y);
			}
			return mulZ(x, y);
		}
		constexpr Z negZ(Z x) { return (!x - 1) & (mod2 - x); }
		namespace extend_
		{
			constexpr Z absZ(Z x)
			{
				u32 r = OutZ(x);
				return InZs(std::min(r, mod - r));
			}
			constexpr Z legendre(Z x) { return powZ(x, (mod - 1) >> 1); }
			constexpr bool isQR(Z x) { return equalZ(legendre(x), one_Z); }
			constexpr Z sqrtZ(Z x)
			{
				if (!isQR(x))
				{
					return not_exist_Z;
				}
				Z a(1), I_mul(0);
				while (isQR(I_mul = subZ(mulZ(a, a), x)))
				{
					++a;
				}
				struct dZ
				{
					Z r, i;
					constexpr void Mul(dZ d, Z I_) { *this = {addZ(mulZ(r, d.r), mulZ(mulZ(I_, i), d.i)), addZ(mulZ(r, d.i), mulZ(i, d.r))}; }
				};
				dZ s = {a, one_Z}, r = {one_Z, zero_Z};
				for (u32 b = (mod + 1) >> 1; b; b >>= 1, s.Mul(s, I_mul))
				{
					if (b & 1)
					{
						r.Mul(s, I_mul);
					}
				}
				return absZ(r.r);
			}
		}
	}
	template <int fixes, bool strict = true>
	constexpr Z trans(Z x)
	{
		constexpr Z o = fixes > 0 ? calc::powZ(Space.R2, fixes) : calc::powZ(1, -fixes);
		return calc::mulZs<strict>(x, o);
	}
	template <bool strict = true>
	constexpr Z trans(Z x, int fixes) { return calc::mulZs<strict>(x, fixes > 0 ? calc::powZ(Space.R2, fixes) : calc::powZ(1, -fixes)); }
	namespace Const
	{
		constexpr Z _half = InZs((mod + 1) >> 1), _neghalf = InZs((mod - 1) >> 1), neg_one = InZs(mod - 1);
		constexpr Z img = calc::extend_::sqrtZ(neg_one), imgI = mod - img, _imghalf = calc::mulZs(_half, img);
	}
}
#pragma GCC target("avx2")
#include <immintrin.h>
#define VEC(sz, T) __attribute((vector_size(sz))) T
namespace SIMD
{
	using i32x8 = VEC(32, i32);
	using u32x8 = VEC(32, u32);
	using i64x4 = VEC(32, i64);
	using u64x4 = VEC(32, u64);
	using I256 = __m256i;
	constexpr IL u32x8 setu32x8(u32 x) { return (u32x8){x, x, x, x, x, x, x, x}; }
	template <int typ>
	IL u32x8 shuffle(const u32x8 &x) { return RC(u32x8, _mm256_shuffle_epi32(RC(I256, x), typ)); }
	template <int typ>
	IL u32x8 blend(const u32x8 &x, const u32x8 &y) { return RC(u32x8, _mm256_blend_epi32(RC(I256, x), RC(I256, y), typ)); }
	IL I256 swaplohi128(const I256 &x) { return _mm256_permute2x128_si256(x, x, 1); }
	IL u32x8 &x8(u32 *data) { return *RC(u32x8 *, data); }
	IL const u32x8 &x8(const u32 *data) { return *RC(const u32x8 *, data); }
	IL I256 loadu(const void *data) { return _mm256_loadu_si256(RC(const __m256i_u *, data)); }
	IL void storeu(const I256 &x, void *data) { return _mm256_storeu_si256(RC(__m256i_u *, data), x); }
	IL u64x4 fus_mul(const u32x8 &x, const u32x8 &y) { return RC(u64x4, _mm256_mul_epu32(RC(I256, x), RC(I256, y))); }
}
namespace field_Z
{
	using SIMD::setu32x8;
	using SIMD::u32x8;
	using SIMD::x8;
	using Zx8 = u32x8;
	constexpr u32x8 modx8 = setu32x8(mod), mod2x8 = setu32x8(mod2), NInvx8 = setu32x8(Space.NInv);
	constexpr Zx8 one_Zx8 = setu32x8(one_Z), zerox8 = setu32x8(0u);
	IL Zx8 dilate2x8(const Zx8 &x) { return x + (RC(Zx8, RC(SIMD::i32x8, x) < RC(SIMD::i32x8, zerox8)) & mod2x8); }
	IL Zx8 shrinkx8(const Zx8 &x) { return x - ((x >= modx8) & modx8); }
	namespace calc
	{
		using namespace SIMD;
		IL Zx8 addZx8(const Zx8 &x, const Zx8 &y) { return dilate2x8(x + y - mod2x8); }
		IL Zx8 subZx8(const Zx8 &x, const Zx8 &y) { return dilate2x8(x - y); }
		IL Zx8 mulZx8(const Zx8 &x, const Zx8 &y)
		{
			u32x8 z = (NInvx8 * x * y);
			return blend<0xaa>(RC(u32x8, (fus_mul(x, y) + fus_mul(z, modx8)) >> 32), RC(u32x8, (fus_mul(u32x8(u64x4(x) >> 32), u32x8(u64x4(y) >> 32)) + fus_mul(shuffle<0xf5>(z), modx8))));
		}
		IL Zx8 powZx8(const Zx8 &y, u32 b, const Zx8 &_r = one_Zx8)
		{
			Zx8 x = y, r = _r;
			for (; b; x = mulZx8(x, x), b >>= 1)
			{
				if (b & 1)
				{
					r = mulZx8(r, x);
				}
			}
			return r;
		}
		IL Zx8 invZx8(const Zx8 &x) { return powZx8(x, mod - 2); }
		IL Zx8 divZx8(const Zx8 &x, const Zx8 &y) { return powZx8(y, mod - 2, x); }
		template <bool strict = true>
		IL Zx8 mulZsx8(const Zx8 &x, const Zx8 &y)
		{
			if constexpr (strict)
			{
				u32x8 z = (NInvx8 * x * y);
				z = blend<0xaa>(RC(u32x8, (fus_mul(x, y) + fus_mul(z, modx8)) >> 32), RC(u32x8, (fus_mul(u32x8(u64x4(x) >> 32), u32x8(u64x4(y) >> 32)) + fus_mul(shuffle<0xf5>(z), modx8)))) - modx8;
				return z + (RC(Zx8, RC(i32x8, z) < RC(i32x8, zerox8)) & modx8);
			}
			return mulZx8(x, y);
		}
		IL Zx8 negZx8(const Zx8 &x) { return (x != zerox8) & (mod2x8 - x); }
		IL Zx8 _AmulZx8(const Zx8 &a, const Zx8 &b, const Zx8 &c) { return mulZx8(a + b, c); }
		IL Zx8 _SmulZx8(const Zx8 &a, const Zx8 &b, const Zx8 &c) { return mulZx8(a - b + mod2x8, c); }
	}
}
#undef VEC
#define STATI(ifo, dt, num)               \
	if constexpr (Stat_Info::Detail > dt) \
	{                                     \
		Stat_Info::ifo += (num);          \
	}
namespace poly
{
	namespace poly_base
	{
		using namespace field_Z;
		using __qed::bit_up;
		void fl0(Z *f, int l) { STATI(fill0_size, 1, l)
								std::fill_n(f, l, zero_Z); }
		void fl0(Z *bg, Z *ed) { STATI(fill0_size, 1, ed - bg)
								 std::fill(bg, ed, zero_Z); }
		void Cpy(const Z *f, int l, Z *g) { STATI(copy_size, 1, l)
											std::copy_n(f, l, g); }
		void Cpy(const Z *bg, const Z *ed, Z *g) { STATI(copy_size, 1, ed - bg)
												   std::copy(bg, ed, g); }
		void rev(Z *bg, Z *ed) { STATI(rev_size, 1, ed - bg)
								 std::reverse(bg, ed); }
		void rev(Z *f, int l) { STATI(rev_size, 1, l)
								std::reverse(f, f + l); }
		void Cpy_fl0(const Z *f, int n, Z *g, int lim) { Cpy(f, n, g), fl0(g + n, g + lim); }
		void Cpy_rev(const Z *f, int l, Z *g) { STATI(rev_size, 1, l)
												STATI(copy_size, 1, l) std::reverse_copy(f, f + l, g); }
		void mul_t_s(const Z *A, int l, Z *B, Z t)
		{
			int i = 0;
			if (l > 16)
			{
				Zx8 tx8 = setu32x8(t);
				for (; i + 7 < l; i += 8)
				{
					x8(B + i) = calc::mulZx8(x8(A + i), tx8);
				}
			}
			for (; i < l; ++i)
			{
				B[i] = calc::mulZ(A[i], t);
			}
		}
#define flx(nam, opt)                                         \
	void nam(Z *A, int l, const Z *B)                         \
	{                                                         \
		int i = 0;                                            \
		for (; i + 7 < l; i += 8)                             \
		{                                                     \
			x8(A + i) = calc::opt##Zx8(x8(A + i), x8(B + i)); \
		}                                                     \
		for (; i < l; ++i)                                    \
		{                                                     \
			A[i] = calc::opt##Z(A[i], B[i]);                  \
		}                                                     \
	}                                                         \
	void nam(const Z *A, const Z *B, int l, Z *C)             \
	{                                                         \
		int i = 0;                                            \
		for (; i + 7 < l; i += 8)                             \
		{                                                     \
			x8(C + i) = calc::opt##Zx8(x8(A + i), x8(B + i)); \
		}                                                     \
		for (; i < l; ++i)                                    \
		{                                                     \
			C[i] = calc::opt##Z(A[i], B[i]);                  \
		}                                                     \
	}
		flx(dot, mul) flx(addP, add) flx(subP, sub)
#undef flx
	}
	namespace poly_base
	{
		using mem_helper::pre_aloc;
		const std::function<Z *(size_t)> alocS = mem_helper::AlocS<Z, 32>;
		const std::function<Z *(size_t)> alocH = mem_helper::AlocH<Z, 32>;
		const std::function<void(void *)> freed = mem_helper::Freed;
	}
	using namespace poly_base;
}
namespace poly
{
	namespace f_n_t_t_base
	{
		using namespace calc;
		using __qed::cro_32;
		constexpr int base4thre = 16;
		template <bool strict = false>
		void mul_t(Z *A, int l, Z t)
		{
			for (int j = 0; j < l; ++j)
			{
				A[j] = mulZs<strict>(A[j], t);
			}
		}
		constexpr int mp2 = __builtin_ctz(mod - 1);
		constexpr Z _g(InZ(__qed::primitive_root_constexpr(mod)));
		struct P_R_Tab
		{
			Z t[mp2 + 1];
			constexpr P_R_Tab(Z G) : t()
			{
				t[mp2] = shrink(powZ(G, (mod - 1) >> mp2));
				for (int i = mp2 - 1; ~i; --i)
				{
					t[i] = mulZs(t[i + 1], t[i + 1]);
				}
			}
			constexpr Z operator[](int i) const { return t[i]; }
		};
		constexpr P_R_Tab rt1(_g), rt1_I(invZ(_g));
		template <int lim>
		struct omega_info_base2
		{
			Z o[lim >> 1];
			constexpr omega_info_base2(const P_R_Tab &Tb) : o()
			{
				o[0] = one_Z;
				for (int i = 0; (1 << i) < (lim >> 1); ++i)
				{
					o[1 << i] = Tb[i + 2];
				}
				for (int i = 1, l = lim >> 1; i < l; ++i)
				{
					o[i] = mulZ(o[i & (i - 1)], o[i & -i]);
				}
			}
			constexpr Z operator[](int i) const { return o[i]; }
		};
		const omega_info_base2<base4thre> w(rt1), wI(rt1_I);
		constexpr Zx8 powXx8(Z X)
		{
			Z X2 = mulZs(X, X), X3 = mulZs(X2, X), X4 = mulZs(X3, X), X5 = mulZs(X4, X), X6 = mulZs(X5, X), X7 = mulZs(X6, X);
			return (Zx8){one_Z, X, X2, X3, X4, X5, X6, X7};
		}
		struct ntt_info_base4x8
		{
			Z rt3[mp2 - 2], rt3_I[mp2 - 2];
			Zx8 rt4ix8[mp2 - 3], rt4ix8_I[mp2 - 3];
			constexpr ntt_info_base4x8() : rt3(), rt3_I(), rt4ix8(), rt4ix8_I()
			{
				Z pr = one_Z, pr_I = one_Z;
				for (int i = 0; i < mp2 - 2; ++i)
				{
					rt3[i] = mulZs(pr, rt1[i + 3]), rt3_I[i] = mulZs(pr_I, rt1_I[i + 3]);
					pr = mulZs(pr, rt1_I[i + 3]), pr_I = mulZs(pr_I, rt1[i + 3]);
				}
				pr = one_Z, pr_I = one_Z;
				for (int i = 0; i < mp2 - 3; ++i)
				{
					rt4ix8[i] = powXx8(mulZs(pr, rt1[i + 4])), rt4ix8_I[i] = powXx8(mulZs(pr_I, rt1_I[i + 4]));
					pr = mulZs(pr, rt1_I[i + 4]), pr_I = mulZs(pr_I, rt1[i + 4]);
				}
			}
		};
		template <int typ>
		IL u32x8 Neg(const u32x8 &x) { return blend<typ>(x, mod2x8 - x); }
	}
	namespace f_n_t_t
	{
		using namespace f_n_t_t_base;
		template <bool strict = false, int fixes = 0>
		void dif_base2(Z *A, int lim)
		{
			STATI(ntt_size, 0, lim);
			for (int L = lim >> 1, R = lim; L; L >>= 1, R >>= 1)
			{
				for (int i = 0, k = 0; i < lim; i += R, ++k)
				{
					for (int j = 0; j < L; ++j)
					{
						Z x = dilate2(A[i + j] - mod2), y = mulZ(w[k], A[i + j + L]);
						A[i + j] = x + y, A[i + j + L] = x - y + mod2;
					}
				}
			}
			if constexpr (fixes)
			{
				mul_t<strict>(A, lim, trans<fixes>(one_Z));
			}
			else
			{
				for (int j = 0; j < lim; ++j)
				{
					A[j] = dilate2(A[j] - mod2);
					if constexpr (strict)
					{
						A[j] = shrink(A[j]);
					}
				}
			}
		}
		template <bool strict = false, int fixes = 0>
		void dit_base2(Z *A, int lim)
		{
			STATI(ntt_size, 0, lim);
			for (int L = 1, R = 2; L < lim; L <<= 1, R <<= 1)
			{
				for (int i = 0, k = 0; i < lim; i += R, ++k)
				{
					for (int j = 0; j < L; ++j)
					{
						Z x = A[i + j], y = A[i + j + L];
						A[i + j] = addZ(x, y), A[i + j + L] = mulZ(x - y + mod2, wI[k]);
					}
				}
			}
			mul_t<strict>(A, lim, trans<fixes + 1>(mod - ((mod - 1) / lim)));
		}
		constexpr Zx8 imagx8 = setu32x8(rt1[2]), imag_Ix8 = setu32x8(rt1_I[2]);
		constexpr Z w38 = mulZs(rt1[2], rt1[3]), w38I = mulZs(rt1_I[2], rt1_I[3]);
		constexpr ntt_info_base4x8 iab4;
		template <bool strict = false, int fixes = 0>
		void dif_base4x8(Z *A, int lim)
		{
			STATI(ntt_size, 0, lim);
			int n = lim >> 3, L = n >> 1;
			Zx8 *f = RC(Zx8 *, A);
			if (__builtin_ctz(n) & 1)
			{
				for (int j = 0; j < L; ++j)
				{
					Zx8 x = f[j], y = f[j + L];
					f[j] = x + y, f[j + L] = x - y + mod2x8;
				}
				L >>= 1;
			}
			L >>= 1;
			for (int R = L << 2; L; L >>= 2, R >>= 2)
			{
				Z _r = one_Z, _r2 = one_Z, _r3 = one_Z;
				for (int i = 0, k = 0; i < n; i += R, ++k)
				{
					Zx8 r = setu32x8(_r), r2 = setu32x8(_r2), r3 = setu32x8(_r3);
					for (int j = 0; j < L; ++j)
					{
#define F(c) f[i + j + c * L]
						Zx8 f0 = dilate2x8(F(0) - mod2x8), f1 = mulZx8(F(1), r), f2 = mulZx8(F(2), r2), f3 = mulZx8(F(3), r3);
						Zx8 f1f3 = _SmulZx8(f1, f3, imagx8), f02 = addZx8(f0, f2), f13 = addZx8(f1, f3), f_02 = subZx8(f0, f2);
						F(0) = f02 + f13, F(1) = f02 - f13 + mod2x8, F(2) = f_02 + f1f3, F(3) = f_02 - f1f3 + mod2x8;
#undef F
					}
					_r = mulZs(_r, iab4.rt3[cro_32(k)]), _r2 = mulZs(_r, _r), _r3 = mulZs(_r2, _r);
				}
			}
			{
				constexpr Zx8 _r = setu32x8(trans<fixes>(one_Z)), pr2 = {one_Z, one_Z, one_Z, rt1[2], one_Z, one_Z, one_Z, rt1[2]}, pr4 = {one_Z, one_Z, one_Z, one_Z, one_Z, rt1[3], rt1[2], w38};
				Zx8 r = _r;
				for (int i = 0; i < n; ++i)
				{
					Zx8 &fi = f[i];
					fi = mulZx8(fi, r);
					fi = _AmulZx8(Neg<0xf0>(fi), RC(Zx8, swaplohi128(RC(I256, fi))), pr4);
					fi = _AmulZx8(Neg<0xcc>(fi), shuffle<0x4e>(fi), pr2);
					fi = addZx8(Neg<0xaa>(fi), shuffle<0xb1>(fi));
					if constexpr (strict)
					{
						fi = shrinkx8(fi);
					}
					r = mulZsx8(r, iab4.rt4ix8[cro_32(i)]);
				}
			}
		}
		template <bool strict = false, int fixes = 0>
		void dit_base4x8(Z *A, int lim)
		{
			STATI(ntt_size, 0, lim);
			int n = lim >> 3, L = 1;
			Zx8 *f = RC(Zx8 *, A);
			{
				constexpr Zx8 pr2 = {one_Z, one_Z, one_Z, rt1_I[2], one_Z, one_Z, one_Z, rt1_I[2]}, pr4 = {one_Z, one_Z, one_Z, one_Z, one_Z, rt1_I[3], rt1_I[2], w38I};
				Zx8 r = setu32x8(trans<fixes + 1>(mod - ((mod - 1) / lim)));
				for (int i = 0; i < n; ++i)
				{
					Zx8 &fi = f[i];
					fi = _AmulZx8(Neg<0xaa>(fi), shuffle<0xb1>(fi), pr2);
					fi = _AmulZx8(Neg<0xcc>(fi), shuffle<0x4e>(fi), pr4);
					fi = _AmulZx8(Neg<0xf0>(fi), RC(Zx8, swaplohi128(RC(I256, fi))), r);
					r = mulZsx8(r, iab4.rt4ix8_I[cro_32(i)]);
				}
			}
			for (int R = L << 2; L < (n >> 1); L <<= 2, R <<= 2)
			{
				Z _r = one_Z, _r2 = one_Z, _r3 = one_Z;
				for (int i = 0, k = 0; i < n; i += R, ++k)
				{
					Zx8 r = setu32x8(_r), r2 = setu32x8(_r2), r3 = setu32x8(_r3);
					for (int j = 0; j < L; ++j)
					{
#define F(c) f[i + j + c * L]
						Zx8 f0 = F(0), f1 = F(1), f2 = F(2), f3 = F(3);
						Zx8 f2f3 = _SmulZx8(f2, f3, imag_Ix8), f01 = addZx8(f0, f1), f23 = addZx8(f2, f3), f_01 = subZx8(f0, f1);
						F(0) = addZx8(f01, f23), F(1) = _AmulZx8(f_01, f2f3, r), F(2) = _SmulZx8(f01, f23, r2), F(3) = _SmulZx8(f_01, f2f3, r3);
#undef F
					}
					_r = mulZs(_r, iab4.rt3_I[cro_32(k)]), _r2 = mulZs(_r, _r), _r3 = mulZs(_r2, _r);
				}
			}
			if (__builtin_ctz(n) & 1)
			{
				for (int j = 0; j < L; ++j)
				{
					Zx8 x = f[j], y = f[j + L];
					f[j] = addZx8(x, y), f[j + L] = subZx8(x, y);
				}
			}
			if constexpr (strict)
			{
				for (int i = 0; i < n; ++i)
				{
					f[i] = shrinkx8(f[i]);
				}
			}
		}
		template <bool strict = false, int fixes = 0>
		void dif(Z *A, int lim) { lim >= base4thre ? dif_base4x8<strict, fixes>(A, lim) : dif_base2<strict, fixes>(A, lim); }
		template <bool strict = false, int fixes = 0>
		void dit(Z *A, int lim) { lim >= base4thre ? dit_base4x8<strict, fixes>(A, lim) : dit_base2<strict, fixes>(A, lim); }
	}
	using f_n_t_t::dif;
	using f_n_t_t::dit;
}
#undef STATI
#include <sys/mman.h>
#include <sys/stat.h>
#include <cstring>
namespace QIO_base
{
	constexpr int O_buffer_default_size = 1 << 18;
	constexpr int O_buffer_default_flush_threshold = 40;
	struct _int_to_char_tab
	{
		char tab[40000];
		constexpr _int_to_char_tab() : tab()
		{
			for (int i = 0; i != 10000; ++i)
			{
				for (int j = 3, n = i; ~j; --j)
				{
					tab[i * 4 + j] = n % 10 + 48, n /= 10;
				}
			}
		}
	} constexpr _otab;
}
namespace QIO_I
{
	using namespace QIO_base;
	struct Qinf
	{
		FILE *f;
		char *bg, *ed, *p;
		struct stat Fl;
		Qinf(FILE *fi) : f(fi)
		{
			int fd = fileno(f);
			fstat(fd, &Fl);
			bg = (char *)mmap(0, Fl.st_size + 1, PROT_READ, MAP_PRIVATE, fd, 0);
			p = bg, ed = bg + Fl.st_size;
			madvise(p, Fl.st_size + 1, MADV_SEQUENTIAL);
		}
		~Qinf() { munmap(bg, Fl.st_size + 1); }
		void skip_space()
		{
			while (*p <= ' ')
			{
				++p;
			}
		}
		char get() { return *p++; }
		char seek() { return *p; }
		bool eof() { return p == ed; }
		Qinf &read(char *s, size_t count) { return memcpy(s, p, count), p += count, *this; }
		Qinf &operator>>(u32 &x)
		{
			skip_space(), x = 0;
			for (; *p > ' '; ++p)
			{
				x = x * 10 + (*p & 0xf);
			}
			return *this;
		}
		Qinf &operator>>(int &x)
		{
			skip_space();
			if (*p == '-')
			{
				for (++p, x = 48 - *p++; *p > ' '; ++p)
				{
					x = x * 10 - (*p ^ 48);
				}
			}
			else
			{
				for (x = *p++ ^ 48; *p > ' '; ++p)
				{
					x = x * 10 + (*p ^ 48);
				}
			}
			return *this;
		}
	} qin(stdin);
}
namespace QIO_O
{
	using namespace QIO_base;
	struct Qoutf
	{
		FILE *f;
		char *bg, *ed, *p;
		char *ed_thre;
		int fp;
		u64 _fpi;
		Qoutf(FILE *fo, size_t sz = O_buffer_default_size) : f(fo), bg(new char[sz]), ed(bg + sz), p(bg), ed_thre(ed - O_buffer_default_flush_threshold), fp(6), _fpi(1000000ull) {}
		void flush() { fwrite_unlocked(bg, 1, p - bg, f), p = bg; }
		void chk()
		{
			if (__builtin_expect(p > ed_thre, 0))
			{
				flush();
			}
		}
		~Qoutf()
		{
			flush();
			delete[] bg;
		}
		void put4(u32 x)
		{
			if (x > 99u)
			{
				if (x > 999u)
				{
					memcpy(p, _otab.tab + (x << 2) + 0, 4), p += 4;
				}
				else
				{
					memcpy(p, _otab.tab + (x << 2) + 1, 3), p += 3;
				}
			}
			else
			{
				if (x > 9u)
				{
					memcpy(p, _otab.tab + (x << 2) + 2, 2), p += 2;
				}
				else
				{
					*p++ = x ^ 48;
				}
			}
		}
		void put2(u32 x)
		{
			if (x > 9u)
			{
				memcpy(p, _otab.tab + (x << 2) + 2, 2), p += 2;
			}
			else
			{
				*p++ = x ^ 48;
			}
		}
		Qoutf &write(const char *s, size_t count)
		{
			if (count > 1024 || p + count > ed_thre)
			{
				flush(), fwrite_unlocked(s, 1, count, f);
			}
			else
			{
				memcpy(p, s, count), p += count, chk();
			}
			return *this;
		}
		Qoutf &operator<<(char ch) { return *p++ = ch, *this; }
		Qoutf &operator<<(u32 x)
		{
			if (x > 99999999u)
			{
				put2(x / 100000000u), x %= 100000000u;
				memcpy(p, _otab.tab + ((x / 10000u) << 2), 4), p += 4;
				memcpy(p, _otab.tab + ((x % 10000u) << 2), 4), p += 4;
			}
			else if (x > 9999u)
			{
				put4(x / 10000u);
				memcpy(p, _otab.tab + ((x % 10000u) << 2), 4), p += 4;
			}
			else
			{
				put4(x);
			}
			return chk(), *this;
		}
		Qoutf &operator<<(int x)
		{
			if (x < 0)
			{
				*p++ = '-', x = -x;
			}
			return *this << static_cast<u32>(x);
		}
	} qout(stdout);
}
namespace QIO
{
	using QIO_I::qin;
	using QIO_I::Qinf;
	using QIO_O::qout;
	using QIO_O::Qoutf;
}
using namespace QIO;
void solve()
{
	int n, m;
	qin >> n >> m;
++n,++m;
	int lim = __qed::bit_up(n + m - 2);
	auto F = poly::alocH(lim), G = poly::alocH(lim);
	for (int i = 0; i < n; ++i)
	{
		qin >> F[i];
	}
	std::fill(F + n, F + lim, 0);
	for (int i = 0; i < m; ++i)
	{
		qin >> G[i];
	}
	std::fill(G + m, G + lim, 0);
	poly::dif(F, lim), poly::dif(G, lim), poly::dot(F, lim, G), poly::dit<true, 1>(F, lim);
	for (int i = 0; i < n + m - 1; ++i)
	{
		qout << F[i] << ' ';
	}
	poly::freed(F), poly::freed(G);
}
int main()
{
	std::cin.tie(nullptr)->sync_with_stdio(false);
	solve();
	return 0;
}

Details

Tip: Click on the bar to expand more detailed information

Test #1:

score: 20
Accepted
time: 1ms
memory: 3696kb

input:

96 96
600395131 184265451 942971382 534262851 830366882 542271170 294355449 501371170 797809599 964826049 276651245 375755165 662619442 941920605 328216963 507795473 460271147 874920847 818231910 156789488 590591583 732194508 793983630 93566697 836155866 305319153 432040686 621119061 835023373 57138...

output:

683858396 5532883 499734624 910262414 221004044 924081841 392466229 64190174 260661815 939986106 283456690 260629512 990528995 704246427 991946815 236857583 903415172 900324859 938555797 225258152 874945420 516870315 74759441 769850097 353889928 300397164 63689540 115003940 872945378 407694641 91843...

result:

ok 193 numbers

Test #2:

score: 20
Accepted
time: 1ms
memory: 3868kb

input:

4992 4994
471194390 313639917 705341086 119536186 430124603 244978845 185588341 13731324 707132801 88167972 927324568 846658454 523684029 5133605 767200778 502476309 539772401 778154025 266136872 183516351 260704325 49303370 475056182 928574546 740424153 277920667 708439854 746983628 839491869 53579...

output:

700935456 677302967 772159864 479386810 109686665 263919131 29567167 960045078 636326916 585682137 409426717 14510019 441964472 92801447 551536199 216995135 59736203 790078879 55883568 796138076 265361608 66124731 150347029 93682849 205256362 672081205 86396898 573029352 541084997 293480941 90518071...

result:

ok 9987 numbers

Test #3:

score: 20
Accepted
time: 3ms
memory: 4440kb

input:

29995 29992
417238081 53580806 733071257 224121793 786137422 127072245 129083351 988357079 246853229 150935424 596994106 975796660 838029970 619117898 328485797 948485083 574261409 79312345 596346086 489787404 929520168 515647000 211731260 50868568 811515357 428215135 498099163 644485329 802849075 3...

output:

115270920 49832720 758693293 745763567 322999821 510579248 697424729 850661201 678364508 817667211 668544763 136619207 562899653 692811546 351397117 768369036 573254435 891143982 717302438 707939747 41743610 540709722 240732780 931265491 38731999 642520590 630812534 632188732 342954490 225414102 836...

result:

ok 59988 numbers

Test #4:

score: 20
Accepted
time: 3ms
memory: 7292kb

input:

100000 99993
812398607 947396010 797321381 883949986 56052416 586258761 193247973 611124334 773505112 142179482 565466227 140875825 79890768 893500101 553768089 648879319 480419657 915530184 799329430 494818755 793895824 851865180 459534006 259473419 610037701 472768430 868914058 887444584 588850309...

output:

821875273 646409297 701893040 744951544 891720486 338002304 134405948 686576985 653633849 704180950 763960458 160339533 773107048 630019221 467173934 675237413 824356289 394352126 870024535 473719536 246319541 372709664 656104889 677100818 890131281 374587639 160832628 144239351 450760970 646488586 ...

result:

ok 199994 numbers

Test #5:

score: 20
Accepted
time: 58ms
memory: 39076kb

input:

999993 999994
388529697 811245378 165909114 295553883 667981275 78502012 400874009 139394758 249494489 4636487 997712665 259780805 431039016 716944209 709300152 356513646 823185021 699568300 650937921 859190797 899514799 785648601 933470757 627225124 349752104 471458923 456404256 48134357 315599086 ...

output:

199012842 735467570 660520906 870291510 102406003 509914017 591503608 692425397 149848591 232605296 411728228 285507919 90090498 682749099 507720817 425946949 937188332 619041823 738654334 153862895 272311969 793838225 260785140 350903642 151151058 631242104 304026658 123734332 23714740 438936743 77...

result:

ok 1999988 numbers