QOJ.ac
QOJ
ID | 题目 | 提交者 | 结果 | 用时 | 内存 | 语言 | 文件大小 | 提交时间 | 测评时间 |
---|---|---|---|---|---|---|---|---|---|
#200533 | #622. 多项式多点求值 | NOI_AK_ME | 80 | 57ms | 29344kb | C++23 | 29.8kb | 2023-10-04 17:21:31 | 2023-10-04 17:21:32 |
Judging History
answer
#include<iostream>
#include<algorithm>
#include<functional>
using i32 = int;
using u32 = unsigned;
using i64 = long long;
using u64 = unsigned long long;
#define IL __inline__ __attribute__((always_inline))
#define RC(T, x) reinterpret_cast<T>(x)
/*
一个高性能的多项式实现
作者为 QedDust413 & killer_joke
*/
namespace Setting{
constexpr u32 mod = 998244353;
constexpr int sta_l_MB = 64;
constexpr int Detail = 1;
}
namespace __qed{
constexpr u32 primitive_root(u32 M)
{
u32 qed = 0, n = M - 1, d[11] = {};
for (int i = 2; i * i <= n; i++){
if (n % i == 0){
d[qed++] = i;
do{n /= i;}while (n % i == 0);
}
}
if (n > 1){d[qed++] = n;}
for (u32 g = 2, r = 0; ; ++g){
for (u32 i = 0; i < qed; ++i){
u32 b = (M - 1) / d[i], a = g;
for(r = 1; b; b >>= 1, a = u64(a) * a % M){
if(b & 1){r = u64(r) * a % M;}
}
if (r == 1){break;}
}
if(r != 1){return g;}
}
}
constexpr int bceil(int x){return 1 << (32 - __builtin_clz(x - 1));}
constexpr int bit_up(int x){return 1 << (32 - __builtin_clz(x));}
constexpr int cro_32(int x){return __builtin_ctz(~x);}
constexpr bool ispow2(u64 x){return (x & (x - 1)) == 0;}
}
namespace Stat_Info{
using Setting::Detail;
i64 ntt_size;
i64 fill0_size, copy_size, rev_size;
size_t max_cost_sta_l;
constexpr const char* Author = "QedDust413 & killer_joke";
constexpr const char* Thanks = "negiizhao, chaihf, rogeryoungh, EntropyIncreaser, Qdedawsd2233, bh1234666, yosupo, Pulsating_Dust, KKKKa, qdc, and more.";
template<typename Tf = std::ostream>void report(Tf &outf = std::clog){
outf << '\n';
if constexpr(Detail <= 0){outf << "Statistics are turned off.\n";}
if constexpr(Detail > 0){outf << "ntt_size:" << ntt_size << "\n";}
if constexpr(Detail > 1){outf << "fill0_size:" << fill0_size << " copy_size:" << copy_size << " rev_size:" << rev_size << "\nmax_cost_sta:" << (double(max_cost_sta_l) / double(1 << 20)) << "\n";}
outf.flush();
}
}
namespace mem_helper{
using namespace __qed;
template<size_t align>void *to_align(void *mem){
static_assert(ispow2(align) && (align != 0));
return RC(void*, (RC(u64, mem) + (align - 1)) & (-align));
}
template<size_t align>bool is_align(const void* mem){
static_assert(ispow2(align) && (align != 0));
return (RC(u64, mem) & (align - 1)) == 0;
}
using Setting::sta_l_MB;
char _mem[sta_l_MB << 20];
void *now = _mem;
struct pre_aloc{
void *t;
pre_aloc(){t = now;}
~pre_aloc(){now = t;}
};
template<typename T, size_t align>T *AlocS(size_t n){
T *r = RC(T*, to_align<align>(now));
now = r + n;
if constexpr(Stat_Info::Detail > 1){
Stat_Info::max_cost_sta_l = std::max<size_t>(RC(char*, now) - _mem, Stat_Info::max_cost_sta_l);
}
return r;
}
template<size_t align>void *_alocH(size_t n){
constexpr size_t ptr_size = sizeof(void*), Extra = (align + ptr_size);
void *base = std::malloc(n + Extra), *p = to_align<align>(RC(char*, base) + ptr_size);
return RC(void**, p)[-1] = base, p;
}
template<typename T, size_t align>T *AlocH(size_t n){
return RC(T*, _alocH<align>(n * sizeof(T)));
}
void Freed(void* p){
std::free(RC(void**, p)[-1]);
}
template<typename T, size_t align>struct Trivial_Aligned_Allocator{
static_assert(std::is_trivial<T>::value);
typedef T value_type;
T *allocate(std::size_t n) {return AlocH<T, align>(n);}
template <class T1>struct rebind {using other = Trivial_Aligned_Allocator<T1, align>;};
void deallocate(T *p, std::size_t n) {Freed(p);}
};
}
namespace Montgo{
//Author : QedDust413
struct Mont32{
u32 Mod, Mod2, Inv, NInv, R2;
constexpr Mont32(u32 n):Mod(n), Mod2(n << 1), Inv(1), NInv(), R2((-u64(n)) % n){
for (int i = 0; i < 5; ++i){Inv *= 2 - n * Inv;}NInv = -Inv;
}
constexpr IL u32 reduce (u64 x)const{return (x + u64(u32(x) * NInv) * Mod) >> 32;}
constexpr IL u32 reduce_s (u64 x)const{
u32 r = (x >> 32) - ((u64(u32(x) * Inv) * Mod) >> 32);
return r >> 31 ? r + Mod : r;
}
constexpr IL u32 mul(u32 x, u32 y)const{return reduce(u64(x) * y);}
constexpr IL u32 mul_s(u32 x, u32 y)const{return reduce_s(u64(x) * y);}
constexpr IL u32 In(u32 x)const{return mul(x, R2);}
constexpr IL u32 In_s(u32 x)const{return mul_s(x, R2);}
constexpr IL u32 Out(u32 x)const{
u32 r = (x + (u64(x * NInv) * Mod)) >> 32;
return __builtin_expect(r < Mod, 1) ? r : r - Mod;
}
};
}
namespace field_Z{
//Author : QedDust413
using Setting::mod;
constexpr Montgo::Mont32 Space(mod);
constexpr u32 mod2 = Space.Mod2;
constexpr IL u32 shrink(u32 x){return x >= mod ? x - mod : x;}
constexpr IL u32 dilate2(u32 x){return x >> 31 ? x + mod2 : x;}
using Z = u32;
constexpr bool isgood(Z x){return x < mod2;}
constexpr IL Z InZ(u32 x){return Space.In(x);}
constexpr IL Z InZs(u32 x){return Space.In_s(x);}
constexpr Z zero_Z(0), one_Z(InZs(1)), not_exist_Z(-1);
constexpr IL u32 OutZ(Z x){return Space.Out(x);}
constexpr bool equalZ(Z x, Z y){return shrink(x) == shrink(y);}
namespace calc{
constexpr IL Z addZ(Z x, Z y){return dilate2(x + y - mod2);}
constexpr IL Z subZ(Z x, Z y){return dilate2(x - y);}
constexpr IL Z mulZ(Z x, Z y){return Space.mul(x, y);}
constexpr Z powZ(Z a, u32 b, Z r = one_Z){
for(; b; a = mulZ(a, a), b >>= 1){if(b & 1){r = mulZ(r, a);}}
return r;
}
constexpr IL Z invZ(Z x){return powZ(x, mod - 2);}
constexpr IL Z divZ(Z x, Z y){return powZ(y, mod - 2, x);}
template<bool strict = true>constexpr IL Z mulZs(Z x, Z y){
if constexpr(strict){return Space.mul_s(x, y);}
return mulZ(x, y);
}
constexpr Z negZ(Z x){return (!x - 1) & (mod2 - x);}
namespace extend_{
constexpr Z absZ(Z x){
u32 r = OutZ(x);
return InZs(std::min(r, mod - r));
}
constexpr Z legendre(Z x){return powZ(x, (mod - 1) >> 1);}
constexpr bool isQR(Z x){return equalZ(legendre(x), one_Z);}
constexpr Z sqrtZ(Z x){
if(!isQR(x)){return not_exist_Z;}
Z a(1), I_mul(0);
while(isQR(I_mul = subZ(mulZ(a, a), x))){++a;}
struct dZ{
Z r, i;
constexpr void Mul(dZ d, Z I_){*this = {addZ(mulZ(r, d.r), mulZ(mulZ(I_, i), d.i)), addZ(mulZ(r, d.i), mulZ(i, d.r))};}
};
dZ s = {a, one_Z}, r = {one_Z, zero_Z};
for(u32 b = (mod + 1) >> 1; b; b >>= 1, s.Mul(s, I_mul)){if(b & 1){r.Mul(s, I_mul);}}
return absZ(r.r);
}
}
}
template<int fixes, bool strict = true>constexpr Z trans(Z x){
constexpr Z o = fixes > 0 ? calc::powZ(Space.R2, fixes) : calc::powZ(1, -fixes);
return calc::mulZs<strict>(x, o);
}
template<bool strict = true>constexpr Z trans(Z x, int fixes){
return calc::mulZs<strict>(x, fixes > 0 ? calc::powZ(Space.R2, fixes) : calc::powZ(1, -fixes));
}
namespace Const{
constexpr Z _half = InZs((mod + 1) >> 1), _neghalf = InZs((mod - 1) >> 1), neg_one = InZs(mod - 1);
constexpr Z img = calc::extend_::sqrtZ(neg_one), imgI = mod - img, _imghalf = calc::mulZs(_half, img);
}
}
#pragma GCC target("avx2")
#include <immintrin.h>
#define VEC(sz, T) __attribute((vector_size(sz))) T
namespace SIMD{
using i32x8 = VEC(32, i32);
using u32x8 = VEC(32, u32);
using i64x4 = VEC(32, i64);
using u64x4 = VEC(32, u64);
using I256 = __m256i;
constexpr IL u32x8 setu32x8(u32 x){return (u32x8){x, x, x, x, x, x, x, x};}
template<int typ>IL u32x8 shuffle(const u32x8 &x){return RC(u32x8, _mm256_shuffle_epi32(RC(I256, x), typ));}
template<int typ>IL u32x8 blend(const u32x8 &x, const u32x8 &y){return RC(u32x8, _mm256_blend_epi32(RC(I256, x), RC(I256, y), typ));}
IL I256 swaplohi128(const I256 &x){return _mm256_permute2x128_si256(x, x, 1);}
IL u32x8& x8(u32 *data){return *RC(u32x8* ,data);}
IL const u32x8& x8(const u32 *data){return *RC(const u32x8*, data);}
IL I256 loadu(const void* data){return _mm256_loadu_si256(RC(const __m256i_u*, data));}
IL void storeu(const I256 &x, void* data){return _mm256_storeu_si256(RC(__m256i_u*, data), x);}
IL u64x4 fus_mul(const u32x8 &x, const u32x8 &y){return RC(u64x4, _mm256_mul_epu32(RC(I256, x), RC(I256, y)));}
}
namespace field_Z{
using SIMD::x8;
using SIMD::u32x8;
using SIMD::setu32x8;
using Zx8 = u32x8;
constexpr u32x8 modx8 = setu32x8(mod), mod2x8 = setu32x8(mod2), NInvx8 = setu32x8(Space.NInv);
constexpr Zx8 one_Zx8 = setu32x8(one_Z), zerox8 = setu32x8(0u);
IL Zx8 dilate2x8(const Zx8 &x){return x + (RC(Zx8, RC(SIMD::i32x8, x) < RC(SIMD::i32x8, zerox8)) & mod2x8);}
IL Zx8 shrinkx8(const Zx8 &x){return x - ((x >= modx8) & modx8);}
namespace calc{
//Author : killer_joke
using namespace SIMD;
IL Zx8 addZx8(const Zx8 &x, const Zx8 &y){return dilate2x8(x + y - mod2x8);}
IL Zx8 subZx8(const Zx8 &x, const Zx8 &y){return dilate2x8(x - y);}
IL Zx8 mulZx8(const Zx8 &x, const Zx8 &y){
u32x8 z = (NInvx8 * x * y);
return blend<0xaa>(RC(u32x8, (fus_mul(x, y) + fus_mul(z, modx8)) >> 32), RC(u32x8, (fus_mul(u32x8(u64x4(x) >> 32), u32x8(u64x4(y) >> 32)) + fus_mul(shuffle<0xf5>(z), modx8))));
}
IL Zx8 powZx8(const Zx8 &y, u32 b, const Zx8 &_r = one_Zx8){
Zx8 x = y, r = _r;
for(; b; x = mulZx8(x, x), b >>= 1){if(b & 1){r = mulZx8(r, x);}}
return r;
}
IL Zx8 invZx8(const Zx8 &x){return powZx8(x, mod - 2);}
IL Zx8 divZx8(const Zx8 &x, const Zx8 &y){return powZx8(y, mod - 2, x);}
template<bool strict = true>IL Zx8 mulZsx8(const Zx8 &x, const Zx8 &y){
if constexpr (strict){
u32x8 z = (NInvx8 * x * y);
z = blend<0xaa>(RC(u32x8, (fus_mul(x, y) + fus_mul(z, modx8)) >> 32), RC(u32x8, (fus_mul(u32x8(u64x4(x) >> 32), u32x8(u64x4(y) >> 32)) + fus_mul(shuffle<0xf5>(z), modx8)))) - modx8;
return z + (RC(Zx8, RC(i32x8, z) < RC(i32x8, zerox8)) & modx8);
}
return mulZx8(x, y);
}
IL Zx8 negZx8(const Zx8 &x){return (x != zerox8) & (mod2x8 - x);}
IL Zx8 _AmulZx8(const Zx8 &a, const Zx8 &b, const Zx8 &c){return mulZx8(a + b, c);}
IL Zx8 _SmulZx8(const Zx8 &a, const Zx8 &b, const Zx8 &c){return mulZx8(a - b + mod2x8, c);}
}
}
#undef VEC
#define STATI(ifo, dt, num) if constexpr (Stat_Info::Detail > dt){Stat_Info::ifo += (num);}
namespace poly_base{
using namespace field_Z;
using __qed::bit_up;
void fl0(Z *f, int l){STATI(fill0_size, 1, l)std::fill_n(f, l, zero_Z);}
void fl0(Z *bg, Z *ed){STATI(fill0_size, 1, ed - bg)std::fill(bg, ed, zero_Z);}
void Cpy(const Z *f, int l, Z *g){STATI(copy_size, 1, l)std::copy_n(f, l, g);}
void Cpy(const Z *bg, const Z *ed, Z *g){STATI(copy_size, 1, ed - bg)std::copy(bg, ed, g);}
void rev(Z *bg, Z *ed){STATI(rev_size, 1, ed - bg)std::reverse(bg, ed);}
void rev(Z *f, int l){STATI(rev_size, 1, l)std::reverse(f, f + l);}
void Cpy_fl0(const Z *f, int n, Z *g, int lim){Cpy(f, n, g), fl0(g + n, g + lim);}
void Cpy_rev(const Z *f, int l, Z *g){STATI(rev_size, 1, l)STATI(copy_size, 1, l)std::reverse_copy(f, f + l, g);}
#define flx(nam, opt) void nam(const Z *A, int l, Z *B, Z t){int i=0;if(l>16){Zx8 tx8=setu32x8(t);for(;i+7<l;i+=8){x8(B+i)=calc::opt##Zx8(x8(A+i),tx8);}}for(;i<l;++i){B[i]=calc::opt##Z(A[i],t);}}
flx(mul_t_s, mul)
flx(add_t_s, add)
flx(sub_t_s, sub)
#undef flx
#define flx(nam, opt) void nam(Z *A,int l,const Z *B){int i=0;for(;i+7<l;i+=8){x8(A + i)=calc::opt##Zx8(x8(A+i),x8(B+i));}for(;i<l;++i){A[i]=calc::opt##Z(A[i],B[i]);}} void nam(const Z *A,const Z *B,int l,Z *C){int i=0;for(;i+7<l;i+=8){x8(C+i)=calc::opt##Zx8(x8(A+i),x8(B+i));}for(;i<l;++i){C[i]=calc::opt##Z(A[i],B[i]);}}
flx(dot, mul)
flx(addP, add)
flx(subP, sub)
#undef flx
void NegP(const Z *A, int l, int r, Z *B){
int i = l;
if(r - l >= 16){
for(; i & 7; ++i){B[i] = calc::negZ(A[i]);}
for(; i + 7 < r; i += 8){x8(B + i) = calc::negZx8(x8(A + i));}
}
for(; i < r; ++i){B[i] = calc::negZ(A[i]);}
}
}
namespace poly_base{
using mem_helper::pre_aloc;
using mem_helper::Freed;
const std::function<Z*(size_t)> alocS = mem_helper::AlocS<Z, 32>;
const std::function<Z*(size_t)> alocH = mem_helper::AlocH<Z, 32>;
template<typename T>struct pre_base{
std::function<void(T*, int, int)> jok;
T *p;
int sz;
pre_base(std::function<void(T*, int, int)> f) : jok(f), p(nullptr), sz(0){
}
void expand(int new_sz){
if(new_sz > sz){
new_sz = (new_sz + 7) & -8;
T *nxtp = alocH(new_sz);
if(p != nullptr){
Cpy(p, sz, nxtp), Freed(p);
}
jok(p = nxtp, sz, new_sz), sz = new_sz;
}
}
void clear(){
Freed(p), sz = 0;
}
const T * operator()(int len){
return expand(len), p;
}
const T * operator()(){
return p;
}
};
namespace cal_helper{
using namespace calc;
void _cal_i(Z *dus, int l, int r){
for(; l < 8; ++l){
dus[l] = InZ(l);
}
constexpr Zx8 eight_Zx8 = setu32x8(InZs(8));
for(int i = l; i < r; i += 8){
x8(dus + i) = addZx8(x8(dus + i - 8), eight_Zx8);
}
}
pre_base<Z> seqi(_cal_i);
void _cal_iv(Z *kil, int l, int r){
const Z* _i = seqi(r);
if(l < 8){
x8(kil) = invZx8(x8(_i)), l = 8;
if(r == 8){
return ;
}
}
x8(kil + l) = x8(_i + l);
for(int i = l + 8; i < r; i += 8){
x8(kil + i) = mulZx8(x8(_i + i), x8(kil + i - 8));
}
x8(kil + r - 8) = invZx8(x8(kil + r - 8));
for(int i = r - 8; i > l; i -= 8){
Zx8 ivix8 = x8(kil + i);
x8(kil + i) = mulZx8(ivix8, x8(kil + i - 8)), x8(kil + i - 8) = mulZx8(ivix8, x8(kil + i));
}
}
pre_base<Z> seqiv(_cal_iv);
void deriv(const Z *g, int n, Z *f){
const Z *_i = seqi(n + 1);
int i = 0;
if(n > 7){
for(; i < 7; ++i){f[i] = mulZ(g[i + 1], _i[i + 1]);}
for(; i + 7 < n; i += 8){storeu(RC(I256, mulZx8(x8(g + i + 1), x8(_i + i + 1))), f + i);}
}
for(; i < n; ++i){f[i] = mulZ(g[i + 1], _i[i + 1]);}
f[n] = zero_Z;
}
void integ(const Z *g, int n, Z *f){
const Z *_iv = seqiv(n + 1);
int i = n - 1;
if(i > 7){
for(; (i & 7) != 6; --i){f[i + 1] = mulZ(g[i], _iv[i + 1]);}
for(i -= 7; ~i; i -= 8){x8(f + i + 1) = mulZx8(RC(Zx8, loadu(g + i)), x8(_iv + i + 1));}
i += 7;
}
for(; ~i; --i){f[i + 1] = mulZ(g[i], _iv[i + 1]);}
f[0] = zero_Z;
}
}
using cal_helper::seqi;
using cal_helper::seqiv;
using cal_helper::deriv;
using cal_helper::integ;
}
namespace poly_base{
namespace Tools{
int compP(const Z *f, int l, const Z *g){
for(int i = 0; i < l; ++i){if(!equalZ(f[i], g[i])){return i;}}
return -1;
}
template<int fixes = 1, typename Tf = std::istream>void scanP(Z *f, int l, Tf &inf = std::cin){
for(int i = 0; i < l; ++i){inf >> f[i], f[i] = trans<fixes, false>(f[i]);}
}
template<int fixes = -1, typename Tf = std::ostream>void printP(const Z *f, int l, Tf &outf = std::cout){
if(l){
outf << trans<fixes>(f[0]);
for(int i = 1; i < l; ++i){outf << ' ' << trans<fixes>(f[i]);}
outf << '\n';
}
}
template<typename T = u32, typename lT = u64>T stoi_m(const std::string &s, const T m){
T r(0);
for(auto ch:s){r = ((lT)r * 10 + (ch - '0')) % m;}
return r;
}
int clzP(const Z *f, int n){
int i = 0;
while(i < n && equalZ(f[i], zero_Z)){++i;}return i;
}
int crzP(const Z *f, int n){
int i = n;
while(i && equalZ(f[i - 1], zero_Z)){--i;}return n - i;
}
}
}
namespace poly_base{
//Author : QedDust413
namespace f_n_t_t_base{
using namespace calc;
using __qed::cro_32;
constexpr int base4thre = 16;
template<bool strict = false>void mul_t(Z *A, int l, Z t){
for(int j = 0; j < l; ++j){A[j] = mulZs<strict>(A[j], t);}
}
constexpr int mp2 = __builtin_ctz(mod - 1);
constexpr Z _g(InZ(__qed::primitive_root(mod)));
struct P_R_Tab{
Z t[mp2 + 1];
constexpr P_R_Tab(Z G) : t(){
t[mp2] = shrink(powZ(G, (mod - 1) >> mp2));
for(int i = mp2 - 1; ~i; --i){t[i] = mulZs(t[i + 1], t[i + 1]);}
}
constexpr Z operator [] (int i) const {
return t[i];
}
};
constexpr P_R_Tab rt1(_g), rt1_I(invZ(_g));
template<int lim>struct omega_info_base2{
Z o[lim >> 1];
constexpr omega_info_base2(const P_R_Tab &Tb) : o(){
o[0] = one_Z;
for(int i = 0; (1 << i) < (lim >> 1); ++i){o[1 << i] = Tb[i + 2];}
for(int i = 1, l = lim >> 1; i < l; ++i){o[i] = mulZ(o[i & (i - 1)], o[i & -i]);}
}
constexpr Z operator [] (int i) const {
return o[i];
}
};
const omega_info_base2<base4thre> w(rt1), wI(rt1_I);
constexpr Zx8 powXx8(Z X){
Z X2 = mulZs(X, X), X3 = mulZs(X2, X), X4 = mulZs(X3, X), X5 = mulZs(X4, X), X6 = mulZs(X5, X), X7 = mulZs(X6, X);
return (Zx8){one_Z, X, X2, X3, X4, X5, X6, X7};
}
struct ntt_info_base4x8{
Z rt3[mp2 - 2], rt3_I[mp2 - 2];
Zx8 rt4ix8[mp2 - 3], rt4ix8_I[mp2 - 3];
constexpr ntt_info_base4x8() : rt3(), rt3_I(), rt4ix8(), rt4ix8_I()
{
Z pr = one_Z, pr_I = one_Z;
for(int i = 0; i < mp2 - 2; ++i){
rt3[i] = mulZs(pr, rt1[i + 3]), rt3_I[i] = mulZs(pr_I, rt1_I[i + 3]);
pr = mulZs(pr, rt1_I[i + 3]), pr_I = mulZs(pr_I, rt1[i + 3]);
}
pr = one_Z, pr_I = one_Z;
for(int i = 0; i < mp2 - 3; ++i){
rt4ix8[i] = powXx8(mulZs(pr, rt1[i + 4])), rt4ix8_I[i] = powXx8(mulZs(pr_I, rt1_I[i + 4]));
pr = mulZs(pr, rt1_I[i + 4]), pr_I = mulZs(pr_I, rt1[i + 4]);
}
}
};
template<int typ>IL u32x8 Neg(const u32x8 &x){return blend<typ>(x, mod2x8 - x);}
}
//fast_number_theoretic_transform
//Author : QedDust413
namespace f_n_t_t{
using namespace f_n_t_t_base;
template<bool strict = false, int fixes = 0>void dif_base2(Z *A, int lim){
STATI(ntt_size, 0, lim);
for(int L = lim >> 1, R = lim; L; L >>= 1, R >>= 1){
for(int i = 0, k = 0; i < lim; i += R, ++k){
for(int j = 0; j < L; ++j){
Z x = dilate2(A[i + j] - mod2) , y = mulZ(w[k], A[i + j + L]);
A[i + j] = x + y, A[i + j + L] = x - y + mod2;
}
}
}
if constexpr(fixes){mul_t<strict>(A, lim, trans<fixes>(one_Z));}
else{
for(int j = 0; j < lim; ++j){
A[j] = dilate2(A[j] - mod2);
if constexpr(strict){A[j] = shrink(A[j]);}
}
}
}
template<bool strict = false, int fixes = 0>void dit_base2(Z *A, int lim){
STATI(ntt_size, 0, lim);
for(int L = 1, R = 2; L < lim; L <<= 1, R <<= 1){
for(int i = 0, k = 0; i < lim; i += R, ++k){
for(int j = 0; j < L; ++j){
Z x = A[i + j], y = A[i + j + L];
A[i + j] = addZ(x, y), A[i + j + L] = mulZ(x - y + mod2, wI[k]);
}
}
}
mul_t<strict>(A, lim, trans<fixes + 1>(mod - ((mod - 1) / lim)));
}
constexpr Zx8 imagx8 = setu32x8(rt1[2]), imag_Ix8 = setu32x8(rt1_I[2]);
constexpr Z w38 = mulZs(rt1[2], rt1[3]), w38I = mulZs(rt1_I[2], rt1_I[3]);
constexpr ntt_info_base4x8 iab4;
template<bool strict = false, int fixes = 0>void dif_base4x8(Z *A, int lim){
STATI(ntt_size, 0, lim);
int n = lim >> 3, L = n >> 1;
Zx8 *f = RC(Zx8*, A);
if(__builtin_ctz(n) & 1){
for(int j = 0; j < L; ++j){
Zx8 x = f[j], y = f[j + L];
f[j] = x + y, f[j + L] = x - y + mod2x8;
}
L >>= 1;
}
L >>= 1;
for(int R = L << 2; L; L >>= 2, R >>= 2){
Z _r = one_Z, _r2 = one_Z, _r3 = one_Z;
for(int i = 0, k = 0; i < n; i += R, ++k){
Zx8 r = setu32x8(_r), r2 = setu32x8(_r2), r3 = setu32x8(_r3);
for(int j = 0; j < L; ++j){
#define F(c) f[i + j + c * L]
Zx8 f0 = dilate2x8(F(0) - mod2x8), f1 = mulZx8(F(1), r), f2 = mulZx8(F(2), r2), f3 = mulZx8(F(3), r3);
Zx8 f1f3 = _SmulZx8(f1, f3, imagx8), f02 = addZx8(f0, f2), f13 = addZx8(f1, f3), f_02 = subZx8(f0, f2);
F(0) = f02 + f13, F(1) = f02 - f13 + mod2x8, F(2) = f_02 + f1f3, F(3) = f_02 - f1f3 + mod2x8;
#undef F
}
_r = mulZs(_r, iab4.rt3[cro_32(k)]), _r2 = mulZs(_r, _r), _r3 = mulZs(_r2, _r);
}
}
{
constexpr Zx8 _r = setu32x8(trans<fixes>(one_Z)), pr2 = {one_Z, one_Z, one_Z, rt1[2], one_Z, one_Z, one_Z, rt1[2]}, pr4 = {one_Z, one_Z, one_Z, one_Z, one_Z, rt1[3], rt1[2], w38};
Zx8 r = _r;
for(int i = 0; i < n; ++i){
Zx8& fi = f[i];
fi = mulZx8(fi, r);
fi = _AmulZx8(Neg<0xf0>(fi), RC(Zx8, swaplohi128(RC(I256, fi))), pr4);
fi = _AmulZx8(Neg<0xcc>(fi), shuffle<0x4e>(fi), pr2);
fi = addZx8(Neg<0xaa>(fi), shuffle<0xb1>(fi));
if constexpr(strict){fi = shrinkx8(fi);}
r = mulZsx8(r, iab4.rt4ix8[cro_32(i)]);
}
}
}
template<bool strict = false, int fixes = 0>void dit_base4x8(Z *A, int lim){
STATI(ntt_size, 0, lim);
int n = lim >> 3, L = 1;
Zx8 *f = RC(Zx8*, A);
{
constexpr Zx8 pr2 = {one_Z, one_Z, one_Z, rt1_I[2], one_Z, one_Z, one_Z, rt1_I[2]}, pr4 = {one_Z, one_Z, one_Z, one_Z, one_Z, rt1_I[3], rt1_I[2], w38I};
Zx8 r = setu32x8(trans<fixes + 1>(mod - ((mod - 1) / lim)));
for(int i = 0; i < n; ++i){
Zx8& fi = f[i];
fi = _AmulZx8(Neg<0xaa>(fi), shuffle<0xb1>(fi), pr2);
fi = _AmulZx8(Neg<0xcc>(fi), shuffle<0x4e>(fi), pr4);
fi = _AmulZx8(Neg<0xf0>(fi), RC(Zx8, swaplohi128(RC(I256, fi))), r);
r = mulZsx8(r, iab4.rt4ix8_I[cro_32(i)]);
}
}
for (int R = L << 2; L < (n >> 1) ; L <<= 2, R <<= 2){
Z _r = one_Z, _r2 = one_Z, _r3 = one_Z;
for(int i = 0, k = 0; i < n; i += R, ++k){
Zx8 r = setu32x8(_r), r2 = setu32x8(_r2), r3 = setu32x8(_r3);
for(int j = 0; j < L; ++j){
#define F(c) f[i + j + c * L]
Zx8 f0 = F(0), f1 = F(1), f2 = F(2), f3 = F(3);
Zx8 f2f3 = _SmulZx8(f2, f3, imag_Ix8), f01 = addZx8(f0, f1), f23 = addZx8(f2, f3), f_01 = subZx8(f0, f1);
F(0) = addZx8(f01, f23), F(1) = _AmulZx8(f_01, f2f3 ,r), F(2) = _SmulZx8(f01, f23, r2), F(3) = _SmulZx8(f_01, f2f3, r3);
#undef F
}
_r = mulZs(_r, iab4.rt3_I[cro_32(k)]), _r2 = mulZs(_r, _r), _r3 = mulZs(_r2, _r);
}
}
if(__builtin_ctz(n) & 1){
for(int j = 0; j < L; ++j){
Zx8 x = f[j], y = f[j + L];
f[j] = addZx8(x, y), f[j + L] = subZx8(x, y);
}
}
if constexpr (strict){for(int i = 0; i < n; ++i){f[i] = shrinkx8(f[i]);}}
}
template<bool strict = false, int fixes = 0>void dif(Z *A, int lim){
lim >= base4thre ? dif_base4x8<strict, fixes>(A, lim) : dif_base2<strict, fixes>(A, lim);
}
template<bool strict = false, int fixes = 0>void dit(Z *A,int lim){
lim >= base4thre ? dit_base4x8<strict, fixes>(A, lim) : dit_base2<strict, fixes>(A, lim);
}
}
using f_n_t_t::dif;
using f_n_t_t::dit;
void Conv(Z *f, int lim, Z *g){
dif(f, lim), dif(g, lim), dot(f, lim, g), dit(f, lim);
}
}
namespace poly{
using namespace poly_base;
}
#undef STATI
namespace poly{
//Author : QedDust413
void Mul(const Z *f, int n, const Z *g, int m, Z *h){
int lim = bit_up(n + m);
pre_aloc mem;
Z *F = alocS(lim), *G = alocS(lim);
Cpy_fl0(f, n + 1, F, lim), Cpy_fl0(g, m + 1, G, lim), Conv(F, lim, G), Cpy(F, n + m + 1, h);
}
void Inv(const Z *g, int n, Z *f){
int lim = bit_up(n);
pre_aloc mem;
Z *o = alocS(lim), *h = alocS(lim);
f[0] = calc::invZ(g[0]);
for(int t = 2, mid = 1; t <= lim; t <<= 1, mid <<= 1){
int xl = std::min<int>(t, n + 1);
Cpy_fl0(g, xl, o, t), Cpy_fl0(f, mid, h, t), Conv(o, t, h), fl0(o, mid), dif(o, t), dot(o, t, h), dit(o, t), NegP(o, mid, xl, f);
}
}
}
namespace poly_base{
//Author : killer_joke
namespace makeseq{
using namespace calc;
void Ci(Z *f, int n, Z c){
Z fx = one_Z;
int i = 0;
for(; i < std::min(n, 8); ++i, fx = mulZ(fx, c)){
f[i] = fx;
}
if(n > 16){
Zx8 C8 = setu32x8(fx);
for(; i + 7 < n; i += 8){
x8(f + i) = mulZx8(x8(f + i - 8), C8);
}
}
for(; i < n; ++i){
f[i] = mulZ(f[i - 1], c);
}
}
}
}
namespace poly_base{
//Author : killer_joke
namespace f_n_t_t{
Z *wx2 = nullptr;
int wx2l = 0;
void predifx2(int lim){
if(lim > wx2l){
{
Z *nxtwx2 = alocH(lim << 1);
Cpy(wx2, wx2l << 1, nxtwx2);
if(wx2 != nullptr){Freed(wx2);}
wx2 = nxtwx2;
}
for(int i = std::max(1, wx2l); i <= lim; i <<= 1){
makeseq::Ci(wx2 + i, i, rt1[std::__lg(i) + 1]);
}
wx2l = lim;
}
}
template<bool strict = false>void difx2(Z *f, int lim){
Cpy(f, lim, f + lim), dit(f + lim, lim);
dot(f + lim, lim, wx2 + lim), dif<strict>(f + lim, lim);
}
template<bool strict = false>void difx2fxC(Z *f, int lim){
constexpr Z two_Z = InZs(2);
Cpy(f, lim, f + lim), dit(f + lim, lim);
dot(f + lim, lim, wx2 + lim), f[lim] = subZ(f[lim], two_Z), dif<strict>(f + lim, lim);
}
}
}
namespace poly{
namespace tech{
void multi_eva_2n(const Z *f, int n, const Z *a, int lim, Z *o){
pre_aloc mem;
int lgn = std::__lg(lim), lim2 = lim << 1;
f_n_t_t::predifx2(lim >> 1);
Z *G = alocS(lgn * lim2 + lim);
{
Z *g = G;
for(const Z *p = a, *const ed = p + lim; p != ed; ++p, g += 2){
g[0] = calc::subZ(one_Z, *p), g[1] = calc::subZ(Const::neg_one, *p);
}
Z *lg, *rg, *ed;
for(int dep = 1; dep < lgn; ++dep){
int t = 1 << dep, t2 = t << 1;
g = G + dep * lim2, lg = g - lim2, rg = lg + t, ed = g + lim2;
for(; g != ed; g += t2, lg += t2, rg += t2){
dot(lg, rg, t, g), f_n_t_t::difx2fxC(g, t);
}
}
{
g = G + lgn * lim2;
int Lim = bit_up(n + lim);
Z *h = alocS(Lim), *w = alocS(Lim);
dot(g - lim2, g - lim, lim, g), dit(g, lim);
g[0] = calc::subZ(g[0], one_Z), Cpy_rev(g, lim, h + 1), h[0] = one_Z;
fl0(h + lim + 1, h + Lim), Inv(h, n, w), rev(w, n + 1), fl0(w + n + 1, w + Lim);
Cpy_fl0(f, n + 1, h, Lim), Conv(h, Lim, w), Cpy(h + n, lim, g);
}
for(int dep = lgn; dep > 0; --dep){
int t = 1 << dep, t2 = t << 1, mid = t >> 1;
g = G + dep * lim2, lg = g - lim2, rg = lg + t, ed = g + lim2;
for(; g != ed; g += t2, lg += t2, rg += t2){
dif(g, t), dot(lg, t, g), dit(lg, t), dot(rg, t, g), dit(rg, t);
Cpy(rg + mid, mid, lg), Cpy(lg + mid, mid, rg);
}
}
g = G;
for(Z *p = o, *const ed = p + lim; p != ed; ++p, g += 2){
*p = *g;
}
}
}
}
}
namespace Command{
using Stat_Info::report;
void cut_string(){
_Exit(0);
}
}
using poly_base::alocS;
using poly_base::pre_aloc;
using namespace poly_base::Tools;
using namespace field_Z;
#include <sys/mman.h>
#include <sys/stat.h>
#include <cstring>
namespace QIO_base{
/*
verison 0.4.1
Author : QedDust413
Date : 2023 / 6 / 22
*/
constexpr int O_buffer_default_size = 1 << 18;
constexpr int O_buffer_default_flush_threshold = 40;
struct _int_to_char_tab{
char tab[40000];
constexpr _int_to_char_tab():tab(){
for(int i=0;i!=10000;++i){
for(int j=3,n=i;~j;--j){
tab[i*4+j]=n%10+48,n/=10;
}
}
}
}constexpr _otab;
}
namespace QIO_I {
using namespace QIO_base;
struct Qinf{
FILE* f;
char *bg,*ed,*p;
struct stat Fl;
Qinf(FILE *fi):f(fi)
{
int fd = fileno(f);
fstat(fd, &Fl);
bg = (char*)mmap(0, Fl.st_size + 1, PROT_READ,MAP_PRIVATE, fd, 0);
p = bg, ed = bg + Fl.st_size;
madvise(p,Fl.st_size + 1, MADV_SEQUENTIAL);
}
~Qinf(){
munmap(bg,Fl.st_size + 1);
}
void skip_space(){
while(*p <= ' '){
++p;
}
}
char get(){
return *p++;
}
char seek(){
return *p;
}
bool eof(){
return p == ed;
}
Qinf& read(char* s,size_t count){
return memcpy(s, p, count), p += count, *this;
}
Qinf& operator >> (u32 &x){
skip_space(),x=0;
for(; *p > ' '; ++p){
x = x * 10 + (*p & 0xf);
}
return *this;
}
Qinf& operator >> (int &x){
skip_space();
if(*p == '-'){
for(++p, x = 48 - *p++;*p > ' ';++p){
x = x * 10 - (*p ^ 48);
}
}
else{
for(x = *p++ ^ 48;*p > ' ';++p){
x = x * 10 + (*p ^ 48);
}
}
return *this;
}
}qin(stdin);
}
namespace QIO_O{
using namespace QIO_base;
struct Qoutf{
FILE *f;
char *bg,*ed,*p;
char *ed_thre;
int fp;
u64 _fpi;
Qoutf(FILE *fo,size_t sz = O_buffer_default_size):
f(fo),
bg(new char[sz]),ed(bg+sz),p(bg),
ed_thre(ed - O_buffer_default_flush_threshold),
fp(6),
_fpi(1000000ull){
}
void flush(){
fwrite_unlocked(bg,1,p - bg,f),p=bg;
}
void chk(){
if(__builtin_expect(p > ed_thre, 0)){
flush();
}
}
~Qoutf(){
flush();delete[] bg;
}
void put4(u32 x){
if(x > 99u){
if(x > 999u){
memcpy(p,_otab.tab + (x << 2) + 0,4),p += 4;
}
else{
memcpy(p,_otab.tab + (x << 2) + 1,3),p += 3;
}
}
else{
if(x > 9u){
memcpy(p,_otab.tab + (x << 2) + 2,2),p += 2;
}
else{
*p++ = x^48;
}
}
}
void put2(u32 x){
if(x > 9u){
memcpy(p,_otab.tab + (x << 2) + 2,2),p += 2;
}
else{
*p++ = x^48;
}
}
Qoutf& write(const char* s,size_t count){
if(count > 1024 || p + count > ed_thre){
flush(),fwrite_unlocked(s,1,count,f);
}
else{
memcpy(p,s,count),p+=count,chk();
}
return *this;
}
Qoutf& operator << (char ch){
return *p++ = ch, *this;
}
Qoutf& operator << (u32 x){
if(x > 99999999u){
put2(x / 100000000u),x %= 100000000u;
memcpy(p,_otab.tab + ((x / 10000u) << 2),4),p += 4;
memcpy(p,_otab.tab + ((x % 10000u) << 2),4),p += 4;
}
else if(x > 9999u){
put4(x / 10000u);
memcpy(p,_otab.tab + ((x % 10000u) << 2),4),p += 4;
}
else{
put4(x);
}
return chk() ,*this;
}
Qoutf& operator << (int x){
if(x < 0){
*p++ = '-', x = -x;
}
return *this << static_cast<u32>(x);
}
}qout(stdout);
}
namespace QIO{
using QIO_I::Qinf;
using QIO_I::qin;
using QIO_O::Qoutf;
using QIO_O::qout;
}
using namespace QIO;
void solve(){
int n, m, lim;
qin >> n >> m, lim = __qed::bit_up(std::max(3, m));
auto F = alocS(n), a = alocS(lim), o = alocS(lim);
for(int i = 0; i < n; ++i){
qin >> F[i];
}
scanP(a, m, qin);
poly::tech::multi_eva_2n(F, n - 1, a, lim, o);
for(int i = 0; i < m; ++i){
qout << shrink(o[i]) << '\n';
}
Command::report();
}
int main(){
std::cin.tie(nullptr) -> sync_with_stdio(false);
solve();
return 0;
}
詳細信息
Test #1:
score: 20
Accepted
time: 0ms
memory: 3580kb
input:
100 94 575336069 33153864 90977269 80162592 25195235 334936051 108161572 14050526 356949084 797375084 805865808 286113858 995555121 938794582 458465004 379862532 563357556 293989886 273730531 13531923 113366106 126368162 405344025 443053020 475686818 734878619 338356543 661401660 834651229 527993675...
output:
940122667 397187437 905033404 346709388 146347009 49596361 125616024 966474950 693596552 162411542 248699477 217639076 254290825 963991654 951375739 431661136 587466850 933820245 135676159 683994808 821695954 675479292 463904298 15085475 183389374 976945620 668527277 98940366 909505808 904450031 968...
result:
ok 94 numbers
Test #2:
score: 20
Accepted
time: 4ms
memory: 6116kb
input:
5000 4999 410683245 925831211 726803342 144364185 955318244 291646122 334752751 893945905 484134283 203760731 533867267 813509277 491860093 413174124 584582617 594704162 976489328 978500071 196938934 628117769 169796671 858963950 562124570 582491326 647830593 238623335 20782490 674939336 656529076 2...
output:
683038054 713408290 776843174 52275065 602085453 905088100 991748340 720305324 831850056 296147844 79380797 882313010 941965313 987314872 363655479 380838721 51243733 165728533 592641557 679475455 651115426 60492203 787012426 247557193 136399242 484592897 908383514 735275879 648228244 443933835 5504...
result:
ok 4999 numbers
Test #3:
score: 20
Accepted
time: 10ms
memory: 9548kb
input:
30000 29995 536696866 881441742 356233606 594487396 991820796 695996817 7219464 149265950 843761437 329761701 260625152 80366362 598729314 133794090 12808683 67477659 320740422 878134577 879383179 940923483 660160621 18082378 886078389 524050341 35092018 137623841 988429688 258507355 138475726 75726...
output:
319541931 71523627 374970852 25715597 36244629 300490421 920015579 97305810 949802809 507599156 733158280 569689405 234576135 266469534 141265915 989761030 221701009 895284489 707865101 547950933 844193939 688358883 642066256 113618699 877631874 804170817 455115375 47621629 66017800 747477619 281472...
result:
ok 29995 numbers
Test #4:
score: 20
Accepted
time: 57ms
memory: 29344kb
input:
100000 99989 703908936 826436271 431732352 607460686 960390248 897906950 506352459 662618885 172508812 713410533 704313866 156459539 879660919 98030681 46358006 400134234 121190289 498201666 616888945 210891377 39623412 687350951 269444705 980768130 381802923 553892268 644461704 287608268 554761733 ...
output:
135579851 646286631 74078131 432534100 405499800 291350098 736555983 833523488 132230969 377599489 208993791 503865639 149603681 279216057 477463117 247401241 643979698 478954375 436185030 471378650 234144621 390722547 788177217 69823556 516048238 562200936 507083023 201497639 482025143 173466674 95...
result:
ok 99989 numbers
Test #5:
score: 0
Runtime Error
input:
1000000 999998 326289172 459965021 432610030 381274775 890620650 133203219 755508578 820410129 100497878 978894337 34545975 484258543 341383383 556328539 705716773 985485996 201697555 806763870 456757110 445252781 501965590 655584951 516373423 475444481 554722275 106826011 433893131 385018453 687541...