QOJ.ac
QOJ
ID | Problem | Submitter | Result | Time | Memory | Language | File size | Submit time | Judge time |
---|---|---|---|---|---|---|---|---|---|
#420373 | #8721. 括号序列 | alpha1022 | AC ✓ | 196ms | 21552kb | C++14 | 47.7kb | 2024-05-24 16:53:11 | 2024-05-24 16:53:14 |
Judging History
answer
#include <bits/stdc++.h>
using ll = long long;
using namespace std;
const int mod = 998244353, K = 23, Root = 31;
int norm(int x) { return x >= mod ? x - mod : x; }
int reduce(int x) { return x < 0 ? x + mod : x; }
int neg(int x) { return x ? mod - x : 0; }
int quo2(int x) { return (x + (x & 1 ? mod : 0)) >> 1; }
void add(int &x, int y) { x += y, x = x >= mod ? x - mod : x; }
void sub(int &x, int y) { x -= y, x = x < 0 ? x + mod : x; }
void fam(int &x, int y, int z) { x = (x + (ll)y * z) % mod; }
int mpow(int a, int b) {
int ret = 1;
for (; b; b >>= 1) {
if (b & 1) ret = (ll)ret * a % mod;
a = (ll)a * a % mod;
}
return ret;
}
const int BRUTE_N2_LIMIT = 50;
struct NumberTheory {
typedef pair<int, int> P2_Field;
mt19937 rng;
NumberTheory(): rng(chrono::steady_clock::now().time_since_epoch().count()) {}
void exGcd(int a, int b, int &x, int &y) {
if (!b) {
x = 1, y = 0;
return;
}
exGcd(b, a % b, y, x), y -= a / b * x;
}
int inv(int a, int p = mod) {
int x, y;
exGcd(a, p, x, y);
if (x < 0) x += p;
return x;
}
template<class Integer>
bool quadRes(Integer a, Integer b) {
if (a <= 1) return 1;
while (a % 4 == 0) a /= 4;
if (a % 2 == 0) return (b % 8 == 1 || b % 8 == 7) == quadRes(a / 2, b);
return ((a - 1) % 4 == 0 || (b - 1) % 4 == 0) == quadRes(b % a, a);
}
int sqrt(int x, int p = mod) {
if (p == 2 || x <= 1) return x;
int w, v, k = (p + 1) / 2;
do w = rng() % p;
while (quadRes(v = ((ll)w * w - x + p) % p, p));
P2_Field res(1, 0), a(w, 1);
for (; k; k >>= 1) {
if (k & 1)
res = P2_Field(
((ll)res.first * a.first + (ll)res.second * a.second % p * v) % p,
((ll)res.first * a.second + (ll)res.second * a.first) % p
);
a = P2_Field(((ll)a.first * a.first + (ll)a.second * a.second % p * v) % p, 2LL * a.first * a.second % p);
}
return min(res.first, p - res.first);
}
} nt;
namespace Simple {
int n = 1;
vector<int> fac({1, 1}), ifac({1, 1}), inv({0, 1});
void build(int m) {
n = m;
fac.resize(n + 1), ifac.resize(n + 1), inv.resize(n + 1);
inv[1] = 1;
for (int i = 2; i <= n; ++i) inv[i] = (ll)(mod - mod / i) * inv[mod % i] % mod;
fac[0] = ifac[0] = 1;
for (int i = 1; i <= n; ++i) fac[i] = (ll)fac[i - 1] * i % mod, ifac[i] = (ll)ifac[i - 1] * inv[i] % mod;
}
void check(int k) {
int m = n;
if (k > m) {
while (k > m) m <<= 1;
build(m);
}
}
int gfac(int k) {
check(k);
return fac[k];
}
int gifac(int k) {
check(k);
return ifac[k];
}
int ginv(int k) {
check(k);
return inv[k];
}
}
struct SimpleSequence {
function<int(int)> func;
SimpleSequence(const function<int(int)> &func): func(func) {}
int operator[](int k) const { return func(k); }
} gfac(Simple::gfac), gifac(Simple::gifac), ginv(Simple::ginv);
int binom(int n, int m) {
if (m > n || m < 0) return 0;
return (ll)gfac[n] * gifac[m] % mod * gifac[n - m] % mod;
}
namespace NTT {
int L = -1;
vector<int> root;
void init(int l) {
L = l;
root.resize((1 << L) + 1);
int n = 1 << L, *w = root.data();
w[0] = 1, w[1 << L] = mpow(Root, 1 << (K - 2 - L));
for (int i = L; i; --i) w[1 << (i - 1)] = (ll)w[1 << i] * w[1 << i] % mod;
for (int i = 1; i < n; ++i) w[i] = (ll)w[i & (i - 1)] * w[i & -i] % mod;
}
void DIF(int *a, int l) {
int n = 1 << l;
for (int len = n >> 1; len; len >>= 1)
for (int *j = a, *o = root.data(); j != a + n; j += len << 1, ++o)
for (int *k = j; k != j + len; ++k) {
int r = (ll)*o * k[len] % mod;
k[len] = reduce(*k - r), add(*k, r);
}
}
void DIT(int *a, int l) {
int n = 1 << l;
for (int len = 1; len < n; len <<= 1)
for (int *j = a, *o = root.data(); j != a + n; j += len << 1, ++o)
for (int *k = j; k != j + len; ++k) {
int r = norm(*k + k[len]);
k[len] = (ll)*o * (*k - k[len] + mod) % mod, *k = r;
}
}
void fft(int *a, int lgn, int d = 1) {
if (L < lgn) init(lgn);
int n = 1 << lgn;
if (d == 1) DIF(a, lgn);
else {
DIT(a, lgn), reverse(a + 1, a + n);
int nInv = mod - (mod - 1) / n;
for (int i = 0; i < n; ++i) a[i] = (ll)a[i] * nInv % mod;
}
}
}
struct poly {
vector<int> a;
poly(ll v = 0): a(1) {
if ((v %= mod) < 0) v += mod;
a[0] = v;
}
poly(const poly &o): a(o.a) {}
poly(const vector<int> &o): a(o) {}
poly(initializer_list<int> o): a(o) {}
operator vector<int>() const { return a; }
int operator[](int k) const { return k < a.size() ? a[k] : 0; }
int &operator[](int k) {
if (k >= a.size()) a.resize(k + 1);
return a[k];
}
int deg() const { return (int)a.size() - 1; }
void redeg(int d) { a.resize(d + 1); }
int size() const { return a.size(); }
void resize(int s) { a.resize(s); }
bool empty() const { return a.empty(); }
int back() const { return a.back(); }
poly slice(int d) const {
if (d < a.size()) return vector<int>(a.begin(), a.begin() + d + 1);
vector<int> ret = a;
ret.resize(d + 1);
return ret;
}
poly shift(int k) const {
if (size() + k <= 0) return 0;
vector<int> ret(size() + k);
for (int i = max(0, k); i < ret.size(); ++i) ret[i] = a[i - k];
return ret;
}
int eval(int x) const {
int ret = 0;
for (int i = deg(); i >= 0; --i)
ret = ((ll)ret * x + a[i]) % mod;
return ret;
}
bool operator<(const poly &o) const {
for (int i = 0; i <= min(deg(), o.deg()); ++i)
if (a[i] != o[i])
return a[i] < o[i];
return deg() < o.deg();
}
bool operator==(const poly &o) const {
if (deg() != o.deg()) return 0;
for (int i = 0; i <= deg(); ++i)
if (a[i] != o[i])
return 0;
return 1;
}
int *base() { return a.data(); }
const int *base() const { return a.data(); }
poly println(FILE *fp = stdout) const {
for (int i = 0; i < a.size(); ++i) fprintf(fp, "%d%c", a[i], " \n"[i == a.size() - 1]);
return *this;
}
poly &operator+=(const poly &o) {
if (o.size() > a.size()) a.resize(o.size());
for (int i = 0; i < o.size(); ++i) add(a[i], o[i]);
return *this;
}
poly operator+(const poly &o) const {
poly ret(a);
ret += o;
return ret;
}
poly operator-() const {
poly ret = a;
for (int i = 0; i < a.size(); ++i) ret[i] = neg(ret[i]);
return ret;
}
poly &operator-=(const poly &o) { return operator+=(-o); }
poly operator-(const poly &o) const { return operator+(-o); }
poly operator*(const poly &) const;
poly operator/(const poly &) const;
poly operator%(const poly &) const;
poly &operator*=(const poly &o) {
*this = operator*(o);
return *this;
}
poly &operator/=(const poly &o) {
*this = operator/(o);
return *this;
}
poly &operator%=(const poly &o) {
*this = operator%(o);
return *this;
}
poly deriv() const;
poly integ() const;
poly inv() const;
poly sqrt() const;
poly ln() const;
poly exp() const;
poly srcExp() const;
pair<poly, poly> sqrti() const;
pair<poly, poly> expi() const;
poly quo(const poly &) const;
pair<poly, poly> iquo() const;
pair<poly, poly> div(const poly &) const;
poly taylor(int k) const;
poly pow(int k) const;
poly exp(int k) const;
poly comp(const poly &) const;
poly compInv() const;
poly compCompInv(const poly &) const;
};
void trim(vector<int> &a) { for (; !a.empty() && !a.back(); a.pop_back()); }
void trim(poly &a) { trim(a.a); }
poly zeroes(int d) { return vector<int>(d + 1); }
namespace NTT {
void fft(poly &a, int lgn, int d = 1) { fft(a.base(), lgn, d); }
void fft(vector<int> &a, int lgn, int d = 1) { fft(a.data(), lgn, d); }
}
using NTT::fft;
struct SemiRelaxedConvolution {
static const int LG = 19;
static const int B = 16;
void run(int *f, const int *g, int n, const function<void(int)> &relax) {
vector<vector<int>> save(LG + 1);
function<void(int, int)> divideConquer = [&](int l, int r) {
if (r - l <= BRUTE_N2_LIMIT) {
for (int i = l + 1; i <= r; ++i) {
for (int j = l; j < i; ++j) fam(f[i], f[j], g[i - j]);
relax(i);
}
return;
}
int d = (r - l) / B + 1, lgd = 0, lg;
while ((1 << lgd) <= d * 2) ++lgd;
d = (1 << (lgd - 1)) - 1, lg = (r - l + d - 1) / d;
vector<int> dft = save[lgd];
dft.resize(lg << (lgd + 1));
for (int i = lg << lgd; i < (lg << (lgd + 1)); ++i) dft[i] = 0;
int ef = lg;
for (int i = 0; i < lg; ++i) {
if ((i << lgd) < save[lgd].size()) continue;
if ((i + 2) * d >= l) --ef;
for (int j = 0; j <= min(d * 2, n - i * d); ++j) dft[(i << lgd) + j] = g[i * d + j];
fft(dft.data() + (i << lgd), lgd);
}
if (save[lgd].size() < (ef << lgd)) save[lgd] = vector<int>(dft.begin(), dft.begin() + (ef << lgd));
for (int i = 0; i < lg; ++i) {
if (i) fft(dft.data() + ((lg + i) << lgd), lgd, -1);
for (int j = 1; j <= min(d, r - l - i * d); ++j) add(f[l + i * d + j], dft[((lg + i) << lgd) + d + j]);
divideConquer(l + i * d, min(l + (i + 1) * d, r));
if (i + 1 < lg) {
for (int j = 0; j < d; ++j) dft[((lg + i) << lgd) + j] = f[l + i * d + j];
for (int j = d; j < (1 << lgd); ++j) dft[((lg + i) << lgd) + j] = 0;
fft(dft.data() + ((lg + i) << lgd), lgd);
}
for (int j = i + 1; j < lg; ++j)
for (int k = 0; k < (1 << lgd); ++k)
fam(dft[((lg + j) << lgd) + k], dft[((j - i - 1) << lgd) + k], dft[((lg + i) << lgd) + k]);
}
};
relax(0), divideConquer(0, n);
}
} src;
poly poly::srcExp() const {
int n = deg();
poly f, g;
f.redeg(n), g.redeg(n);
for (int i = 0; i <= n; ++i) g[i] = (ll)a[i] * i % mod;
src.run(f.base(), g.base(), n, [&](int i) {
if (i == 0) f[i] = 1;
else f[i] = (ll)f[i] * ginv[i] % mod;
});
return f;
}
struct Newton {
void inv(const poly &f, const poly &dftf, poly &g, const poly &dftg, int t) {
int n = 1 << t;
poly prod = dftf;
for (int i = 0; i < (n << 1); ++i) prod[i] = (ll)prod[i] * dftg[i] % mod;
fft(prod, t + 1, -1);
memset(prod.base(), 0, sizeof(int) << t);
fft(prod, t + 1);
for (int i = 0; i < (n << 1); ++i) prod[i] = (ll)prod[i] * dftg[i] % mod;
fft(prod, t + 1, -1);
for (int i = 0; i < n; ++i) prod[i] = 0;
g -= prod;
}
void inv(const poly &f, const poly &dftf, poly &g, int t) {
poly dftg(g);
dftg.resize(2 << t);
fft(dftg, t + 1);
inv(f, dftf, g, dftg, t);
}
void inv(const poly &f, poly &g, int t) {
poly dftf(f), dftg(g);
dftf.resize(2 << t), dftg.resize(2 << t);
fft(dftf, t + 1), fft(dftg, t + 1);
inv(f, dftf, g, dftg, t);
}
void sqrt(const poly &f, poly &g, poly &dftg, poly &h, int t) {
for (int i = 0; i < (1 << t); ++i) dftg[i] = (ll)dftg[i] * dftg[i] % mod;
fft(dftg, t, -1);
dftg -= f;
for (int i = 0; i < (1 << t); ++i) add(dftg[i | (1 << t)], dftg[i]), dftg[i] = 0;
fft(dftg, t + 1);
poly tmp = h;
tmp.resize(2 << t);
fft(tmp, t + 1);
for (int i = 0; i < (2 << t); ++i) tmp[i] = (ll)tmp[i] * dftg[i] % mod;
fft(tmp, t + 1, -1);
memset(tmp.base(), 0, sizeof(int) << t);
g -= tmp * ginv[2];
}
void exp(const poly &f, poly &g, poly &dftg, poly &h, int t) {
poly dfth = h;
fft(dfth, t);
poly dg = g.deriv().slice((1 << t) - 1);
fft(dg, t);
poly tmp = zeroes((1 << t) - 1);
for (int i = 0; i < (1 << t); ++i) tmp[i] = (ll)dftg[i] * dfth[i] % mod, dg[i] = (ll)dg[i] * dfth[i] % mod;
fft(tmp, t, -1), fft(dg, t, -1);
sub(tmp[0], 1);
dg.resize(2 << t);
poly df0 = f.deriv().slice((1 << t) - 1);
df0[(1 << t) - 1] = 0;
for (int i = 0; i < (1 << t); ++i) dg[i | (1 << t)] = reduce(dg[i] - df0[i]);
memcpy(dg.base(), df0.base(), sizeof(int) * ((1 << t) - 1));
tmp.resize(2 << t), fft(tmp, t + 1);
df0.resize(2 << t), fft(df0, t + 1);
for (int i = 0; i < (2 << t); ++i) df0[i] = (ll)df0[i] * tmp[i] % mod;
fft(df0, t + 1, -1);
for (int i = 0; i < (1 << t); ++i) df0[i | (1 << t)] = df0[i], df0[i] = 0;
dg = (dg - df0).integ().slice((2 << t) - 1) - f;
fft(dg, t + 1);
for (int i = 0; i < (2 << t); ++i) tmp[i] = (ll)dg[i] * dftg[i] % mod;
fft(tmp, t + 1, -1);
g.resize(2 << t);
for (int i = 1 << t; i < (2 << t); ++i) g[i] = neg(tmp[i]);
}
} nit;
struct Eval {
vector<int> ls, rs, pos;
virtual void _init(int n) {}
void _build(int n) {
ls.assign(n * 2 - 1, 0), rs.assign(n * 2 - 1, 0), pos.resize(n), _init(n);
int cnt = 0;
function<int(int, int)> dfs = [&](int l, int r) {
if (l == r) { pos[l] = cnt; return cnt++; }
int ret = cnt++;
int mid = (l + r) >> 1;
ls[ret] = dfs(l, mid), rs[ret] = dfs(mid + 1, r);
return ret;
};
dfs(0, n - 1);
}
};
struct Transposition : Eval {
vector<int> _mul(int l, vector<int> res, const poly &b) {
vector<int> tmp(1 << l);
memcpy(tmp.data(), b.base(), sizeof(int) * (b.deg() + 1));
reverse(tmp.begin() + 1, tmp.end());
fft(tmp.data(), l);
for (int i = 0; i < (1 << l); ++i) res[i] = (ll)res[i] * tmp[i] % mod;
fft(res.data(), l, -1);
return res;
}
poly bfMul(const poly &a, const poly &b) {
int n = a.deg(), m = b.deg();
poly ret = zeroes(n - m);
for (int i = 0; i <= n - m; ++i)
for (int j = 0; j <= m; ++j) ret[i] = (ret[i] + (ll)a[i + j] * b[j]) % mod;
return ret;
}
poly mul(const poly &a, const poly &b) {
if (a.deg() < b.deg()) return 0;
if (a.deg() <= BRUTE_N2_LIMIT) return bfMul(a, b);
int l = 0;
while ((1 << l) <= a.deg()) ++l;
vector<int> res(1 << l);
memcpy(res.data(), a.base(), sizeof(int) * (a.deg() + 1));
fft(res.data(), l);
res = _mul(l, res, b);
res.resize(a.deg() - b.deg() + 1);
return res;
}
pair<poly, poly> mul2(const poly &a, const poly &b1, const poly &b2) {
if (a.deg() <= BRUTE_N2_LIMIT) return make_pair(bfMul(a, b1), bfMul(a, b2));
int l = 0;
while ((1 << l) <= a.deg()) ++l;
vector<int> fa(1 << l);
memcpy(fa.data(), a.base(), sizeof(int) * (a.deg() + 1));
fft(fa.data(), l);
vector<int> res1 = _mul(l, fa, b1), res2 = _mul(l, fa, b2);
res1.resize(a.deg() - b1.deg() + 1), res2.resize(a.deg() - b2.deg() + 1);
return {res1, res2};
}
vector<poly> p, q;
void _init(int n) { p.assign(n * 2 - 1, 0), q.assign(n * 2 - 1, 0); }
vector<int> _eval(vector<int> f, const vector<int> &x) {
int n = f.size();
_build(n);
for (int i = 0; i < n; ++i) q[pos[i]] = {1, neg(x[i])};
for (int i = n * 2 - 2; i >= 0; --i)
if (ls[i]) q[i] = q[ls[i]] * q[rs[i]];
f.resize(n * 2);
p[0] = mul(f, q[0].inv());
for (int i = 0; i < n * 2 - 1; ++i)
if (ls[i]) tie(p[ls[i]], p[rs[i]]) = mul2(p[i], q[rs[i]], q[ls[i]]);
vector<int> ret(n);
for (int i = 0; i < n; ++i) ret[i] = p[pos[i]][0];
return ret;
}
vector<int> eval(const poly &f, const vector<int> &x) {
int n = f.deg() + 1, m = x.size();
vector<int> tmpf = f.a, tmpx = x;
tmpf.resize(max(n, m)), tmpx.resize(max(n, m));
vector<int> ret = _eval(tmpf, tmpx);
ret.resize(m);
return ret;
}
poly inter(const vector<int> &x, const vector<int> &y) {
int n = x.size();
_build(n);
for (int i = 0; i < n; ++i) q[pos[i]] = {1, norm(mod - x[i])};
for (int i = n * 2 - 2; i >= 0; --i)
if (ls[i]) q[i] = q[ls[i]] * q[rs[i]];
poly tmp = q[0];
reverse(tmp.a.begin(), tmp.a.end());
vector<int> f = tmp.deriv().a;
f.resize(n * 2);
p[0] = mul(f, q[0].inv());
for (int i = 0; i < n * 2 - 1; ++i)
if (ls[i]) tie(p[ls[i]], p[rs[i]]) = mul2(p[i], q[rs[i]], q[ls[i]]);
for (int i = 0; i < n; ++i) p[pos[i]] = nt.inv(p[pos[i]][0]) * (ll)y[i] % mod;
for (int i = 0; i < n * 2 - 1; ++i) reverse(q[i].a.begin(), q[i].a.end());
for (int i = n * 2 - 2; i >= 0; --i)
if (ls[i]) p[i] = p[ls[i]] * q[rs[i]] + p[rs[i]] * q[ls[i]];
return p[0];
}
vector<poly> mul(const vector<poly> &a, const vector<poly> &b);
} tp;
struct FactorialPolynomials : Eval {
pair<poly, poly> mul2(const poly &a, const poly &b1, const poly &b2) {
int n = a.deg() + max(b1.deg(), b2.deg());
if (n <= BRUTE_N2_LIMIT) return {a * b1, a * b2};
int l = 0;
while ((1 << l) <= n) ++l;
vector<int> fa(1 << l);
memcpy(fa.data(), a.base(), sizeof(int) * (a.deg() + 1));
fft(fa.data(), l);
vector<int> res1(1 << l), res2(1 << l);
memcpy(res1.data(), b1.base(), sizeof(int) * (b1.deg() + 1)), memcpy(res2.data(), b2.base(), sizeof(int) * (b2.deg() + 1));
fft(res1.data(), l), fft(res2.data(), l);
for (int i = 0; i < (1 << l); ++i) res1[i] = (ll)res1[i] * fa[i] % mod, res2[i] = (ll)res2[i] * fa[i] % mod;
fft(res1.data(), l, -1), fft(res2.data(), l, -1);
res1.resize(a.deg() + b1.deg() + 1), res2.resize(a.deg() + b2.deg() + 1);
return {res1, res2};
}
vector<poly> p, q;
void _init(int n) { p.assign(n * 2 - 1, 0), q.assign(n * 2 - 1, 0); }
vector<int> toPoly(vector<int> f) {
int n = f.size();
_build(n);
for (int i = 0; i < n; ++i) p[pos[i]] = f[i], q[pos[i]] = {neg(i), 1};
for (int i = n * 2 - 2; i >= 0; --i)
if (ls[i]) tie(p[i], q[i]) = mul2(q[ls[i]], p[rs[i]], q[rs[i]]), p[i] += p[ls[i]];
return p[0];
}
vector<int> lagrange(int x, vector<int> y) {
int m = y.size(), n = m * 2 - 1;
vector<int> tinv(n);
if (x == m)
for (int i = 0; i < n; ++i) tinv[i] = ginv[x - (m - 1 - i)];
else {
vector<int> tfac(n), tifac(n);
tfac[0] = reduce(x - (m - 1));
for (int i = 1; i < n; ++i) tfac[i] = (ll)tfac[i - 1] * (x - (m - 1 - i) + mod) % mod;
tifac[n - 1] = nt.inv(tfac[n - 1]);
for (int i = n - 1; i; --i) tifac[i - 1] = (ll)tifac[i] * (x - (m - 1 - i) + mod) % mod;
tinv[0] = tifac[0];
for (int i = 1; i < n; ++i) tinv[i] = (ll)tifac[i] * tfac[i - 1] % mod;
}
for (int i = 0; i < m; ++i)
y[i] = (ll)((m - i) & 1 ? y[i] : mod - y[i]) * gifac[i] % mod * gifac[m - 1 - i] % mod;
vector<int> ret(m);
if (n <= BRUTE_N2_LIMIT) {
for (int i = 0; i < m; ++i)
for (int j = 0; j <= m - 1 + i && j < m; ++j)
fam(ret[i], y[j], tinv[m - 1 + i - j]);
} else {
int l = 0;
while ((1 << l) < n) ++l;
auto tmp = tinv;
y.resize(1 << l), tmp.resize(1 << l);
fft(y.data(), l), fft(tmp.data(), l);
for (int i = 0; i < (1 << l); ++i) tmp[i] = (ll)tmp[i] * y[i] % mod;
fft(tmp.data(), l, -1);
memcpy(ret.data(), tmp.data() + m - 1, sizeof(int) * m);
}
int fac = 1;
for (int i = 0; i < m; ++i) fac = (ll)fac * (x - i) % mod;
for (int i = 0; i < m; ++i)
ret[i] = (ll)ret[i] * fac % mod,
fac = (ll)fac * (x + i + 1) % mod * tinv[i] % mod;
return ret;
}
pair<vector<int>, vector<int>> lagrange2(int x, vector<int> y1, vector<int> y2) {
assert(y1.size() == y2.size());
int m = y1.size(), n = m * 2 - 1;
vector<int> tinv(n);
if (x == m)
for (int i = 0; i < n; ++i) tinv[i] = ginv[x - (m - 1 - i)];
else {
vector<int> tfac(n), tifac(n);
tfac[0] = reduce(x - (m - 1));
for (int i = 1; i < n; ++i) tfac[i] = (ll)tfac[i - 1] * (x - (m - 1 - i) + mod) % mod;
tifac[n - 1] = nt.inv(tfac[n - 1]);
for (int i = n - 1; i; --i) tifac[i - 1] = (ll)tifac[i] * (x - (m - 1 - i) + mod) % mod;
tinv[0] = tifac[0];
for (int i = 1; i < n; ++i) tinv[i] = (ll)tifac[i] * tfac[i - 1] % mod;
}
for (int i = 0; i < m; ++i)
y1[i] = (ll)((m - i) & 1 ? y1[i] : mod - y1[i]) * gifac[i] % mod * gifac[m - 1 - i] % mod,
y2[i] = (ll)((m - i) & 1 ? y2[i] : mod - y2[i]) * gifac[i] % mod * gifac[m - 1 - i] % mod;
vector<int> ret1(m), ret2(m);
if (n <= BRUTE_N2_LIMIT) {
for (int i = 0; i < m; ++i)
for (int j = 0; j <= m - 1 + i && j < m; ++j)
fam(ret1[i], y1[j], tinv[m - 1 + i - j]),
fam(ret2[i], y2[j], tinv[m - 1 + i - j]);
} else {
int l = 0;
while ((1 << l) < n) ++l;
auto tmp = tinv;
y1.resize(1 << l), y2.resize(1 << l), tmp.resize(1 << l);
fft(y1.data(), l), fft(y2.data(), l), fft(tmp.data(), l);
for (int i = 0; i < (1 << l); ++i)
y1[i] = (ll)tmp[i] * y1[i] % mod, y2[i] = (ll)tmp[i] * y2[i] % mod;
fft(y1.data(), l, -1), fft(y2.data(), l, -1);
memcpy(ret1.data(), y1.data() + m - 1, sizeof(int) * m),
memcpy(ret2.data(), y2.data() + m - 1, sizeof(int) * m);
}
int fac = 1;
for (int i = 0; i < m; ++i) fac = (ll)fac * (x - i) % mod;
for (int i = 0; i < m; ++i)
ret1[i] = (ll)ret1[i] * fac % mod, ret2[i] = (ll)ret2[i] * fac % mod,
fac = (ll)fac * (x + i + 1) % mod * tinv[i] % mod;
return {ret1, ret2};
}
vector<int> lagrangeDoubling(vector<int> f) {
auto tmp = lagrange(f.size(), f);
f.insert(f.end(), tmp.begin(), tmp.end());
return f;
}
pair<vector<int>, vector<int>> lagrangeDoubling2(vector<int> f1, vector<int> f2) {
vector<int> tmp1, tmp2;
tie(tmp1, tmp2) = lagrange2(f1.size(), f1, f2);
f1.insert(f1.end(), tmp1.begin(), tmp1.end()),
f2.insert(f2.end(), tmp2.begin(), tmp2.end());
return {f1, f2};
}
vector<int> lagrangeExtending(vector<int> f) {
int n = f.size();
int res = 0;
for (int i = 0; i < n; ++i)
res = (res + (ll)((n - i) & 1 ? f[i] : mod - f[i]) * gifac[i] % mod * gifac[n - 1 - i] % mod * ginv[n - i]) % mod;
for (int i = 1; i <= n; ++i) res = (ll)res * i % mod;
f.push_back(res);
return f;
}
vector<int> evalFacPoly(vector<int> f) {
int n = f.size();
poly g = zeroes(n - 1);
for (int i = 0; i < n; ++i) g[i] = gifac[i];
g = (g * f).slice(n - 1);
for (int i = 0; i < n; ++i) g[i] = (ll)g[i] * gfac[i] % mod;
return g;
}
vector<int> evalPoly(vector<int> f) {
int n = f.size();
_build(n);
for (int i = 0; i < n; ++i) p[pos[i]] = f[i];
for (int i = n * 2 - 2; i >= 0; --i)
if (ls[i]) {
vector<int> l = p[ls[i]], r = p[rs[i]];
if (l.size() > r.size())
tie(l, r) = lagrangeDoubling2(l, lagrangeExtending(r)),
l.pop_back(), r.pop_back();
else tie(l, r) = lagrangeDoubling2(l, r);
p[i].resize(p[ls[i]].size() + p[rs[i]].size());
for (int j = 0; j < p[i].size(); ++j)
p[i][j] = (l[j] + (ll)r[j] * mpow(j, p[ls[i]].size())) % mod;
}
return p[0];
}
vector<int> interFacPoly(vector<int> y) {
int n = y.size();
poly g = zeroes(n - 1);
for (int i = 0; i < n; ++i)
g[i] = i & 1 ? mod - gifac[i] : gifac[i], y[i] = (ll)y[i] * gifac[i] % mod;
return (g * y).slice(n - 1);
}
vector<int> interPoly(vector<int> y) { return toPoly(interFacPoly(y)); }
vector<int> toFacPoly(vector<int> f) { return interFacPoly(evalPoly(f)); }
vector<int> mul(vector<int> a, vector<int> b) {
int n = a.size() + b.size() - 1;
a.resize(n), b.resize(n);
a = evalFacPoly(a), b = evalFacPoly(b);
for (int i = 0; i < n; ++i) a[i] = (ll)a[i] * b[i] % mod;
return interFacPoly(a);
}
} fp;
poly poly::operator*(const poly &o) const {
int n = deg(), m = o.deg();
if (n == -1 || m == -1) return vector<int>();
if (n <= 10 || m <= 10 || n + m <= BRUTE_N2_LIMIT) {
poly ret = zeroes(n + m);
for (int i = 0; i <= n; ++i)
for (int j = 0; j <= m; ++j) fam(ret[i + j], a[i], o[j]);
return ret;
}
n += m;
int l = 0;
while ((1 << l) <= n) ++l;
poly ret = a, tmp = o;
ret.resize(1 << l), tmp.resize(1 << l);
fft(ret, l), fft(tmp, l);
for (int i = 0; i < (1 << l); ++i) ret[i] = (ll)ret[i] * tmp[i] % mod;
fft(ret, l, -1);
return ret.slice(n);
}
poly poly::inv() const {
poly g = nt.inv(a[0]);
for (int t = 0; (1 << t) <= deg(); ++t) nit.inv(slice((2 << t) - 1), g, t);
return g.slice(deg());
}
poly poly::taylor(int k) const {
int n = deg();
poly f = zeroes(n), g = zeroes(n);
for (int i = 0, pw = 1; i <= n; ++i)
f[n - i] = (ll)a[i] * gfac[i] % mod, g[i] = (ll)pw * gifac[i] % mod, pw = (ll)pw * k % mod;
f *= g;
for (int i = 0; i <= n; ++i) g[i] = (ll)f[n - i] * gifac[i] % mod;
return g;
}
poly poly::pow(int k) const {
if (k == 0) return 1;
if (k == 1) return *this;
int n = deg() * k;
int l = 0;
while ((1 << l) <= n) ++l;
poly ret = a;
ret.resize(1 << l), fft(ret, l);
for (int i = 0; i < (1 << l); ++i) ret[i] = mpow(ret[i], k);
fft(ret, -1);
return ret.slice(n);
}
poly poly::deriv() const {
if (deg() == 0) return 0;
vector<int> ret(deg());
for (int i = 0; i < deg(); ++i) ret[i] = (ll)a[i + 1] * (i + 1) % mod;
return ret;
}
poly poly::integ() const {
vector<int> ret(size() + 1);
for (int i = 0; i <= deg(); ++i) ret[i + 1] = (ll)a[i] * ginv[i + 1] % mod;
return ret;
}
poly poly::quo(const poly &o) const {
if (o.deg() == 0) return (ll)a[0] * nt.inv(o[0]) % mod;
poly g = nt.inv(o[0]);
int t = 0, n;
for (n = 1; (n << 1) <= o.deg(); ++t, n <<= 1) nit.inv(o.slice((n << 1) - 1), g, t);
poly dftg = g;
dftg.resize(n << 1), fft(dftg, t + 1);
poly eps1 = o;
eps1.resize(n << 1), fft(eps1, t + 1);
for (int i = 0; i < (n << 1); ++i) eps1[i] = (ll)eps1[i] * dftg[i] % mod;
fft(eps1, t + 1, -1);
for (int i = 0; i < n; ++i) eps1[i] = eps1[i + n], eps1[i + n] = 0;
fft(eps1, t + 1);
poly h0 = slice(n - 1);
h0.resize(n << 1), fft(h0, t + 1);
poly h0g0 = zeroes((n << 1) - 1);
for (int i = 0; i < (n << 1); ++i) h0g0[i] = (ll)h0[i] * dftg[i] % mod;
fft(h0g0, t + 1, -1);
poly h0eps1 = zeroes((n << 1) - 1);
for (int i = 0; i < (n << 1); ++i) h0eps1[i] = (ll)h0[i] * eps1[i] % mod;
fft(h0eps1, t + 1, -1);
for (int i = 0; i < n; ++i) h0eps1[i] = reduce(operator[](i + n) - h0eps1[i]);
memset(h0eps1.base() + n, 0, sizeof(int) << t);
fft(h0eps1, t + 1);
for (int i = 0; i < (n << 1); ++i) h0eps1[i] = (ll)h0eps1[i] * dftg[i] % mod;
fft(h0eps1, t + 1, -1);
for (int i = 0; i < n; ++i) h0eps1[i + n] = h0eps1[i], h0eps1[i] = 0;
return (h0g0 + h0eps1).slice(o.deg());
}
poly poly::ln() const {
if (deg() == 0) return 0;
return deriv().quo(slice(deg() - 1)).integ();
}
pair<poly, poly> poly::sqrti() const {
poly g = nt.sqrt(a[0]), h = nt.inv(a[0]), dftg = g;
for (int t = 0; (1 << t) <= deg(); ++t) {
nit.sqrt(slice((2 << t) - 1), g, dftg, h, t);
dftg = g, fft(dftg, t + 1);
nit.inv(g, dftg, h, t);
}
return make_pair(g.slice(deg()), h.slice(deg()));
}
poly poly::sqrt() const {
poly g = nt.sqrt(a[0]), h = nt.inv(a[0]), dftg = g;
for (int t = 0; (1 << t) <= deg(); ++t) {
nit.sqrt(slice((2 << t) - 1), g, dftg, h, t);
if ((2 << t) <= deg()) {
dftg = g, fft(dftg, t + 1);
nit.inv(g, dftg, h, t);
}
}
return g.slice(deg());
}
pair<poly, poly> poly::expi() const {
poly g = 1, h = 1, dftg = {1, 1};
for (int t = 0; (1 << t) <= deg(); ++t) {
nit.exp(slice((2 << t) - 1), g, dftg, h, t);
dftg = g, dftg.resize(4 << t);
fft(dftg, t + 2);
poly f2n = dftg.slice((2 << t) - 1);
nit.inv(g, f2n, h, t);
}
return make_pair(g.slice(deg()), h.slice(deg()));
}
poly poly::exp() const {
poly g = 1, h = 1, dftg = {1, 1};
for (int t = 0; (1 << t) <= deg(); ++t) {
nit.exp(slice((2 << t) - 1), g, dftg, h, t);
if ((2 << t) <= deg()) {
dftg = g, dftg.resize(4 << t);
fft(dftg, t + 2);
poly f2n = dftg.slice((2 << t) - 1);
nit.inv(g, f2n, h, t);
}
}
return g.slice(deg());
}
poly poly::exp(int k) const {
int lead, lz = 0;
while (lz < deg() && !a[lz]) ++lz;
if (lz == deg() && !a[lz]) return *this;
lead = a[lz];
if ((ll)lz * k > deg()) return zeroes(deg());
poly part = poly(vector<int>(a.begin() + lz, a.begin() + deg() - lz * (k - 1) + 1)) * nt.inv(lead);
part = (part.ln() * k).exp() * mpow(lead, k);
vector<int> ret(deg() + 1);
memcpy(ret.data() + lz * k, part.base(), sizeof(int) * (deg() - lz * k + 1));
return ret;
}
poly poly::operator/(const poly &o) const {
int n = deg(), m = o.deg();
if (n < m) return 0;
poly ta(vector<int>(a.rbegin(), a.rend())), tb(vector<int>(o.a.rbegin(), o.a.rend()));
ta.redeg(n - m), tb.redeg(n - m);
poly q = ta.quo(tb);
return vector<int>(q.a.rbegin(), q.a.rend());
}
pair<poly, poly> poly::div(const poly &o) const {
if (deg() < o.deg()) return make_pair(0, *this);
int n = deg(), m = o.deg();
poly q = operator/(o), r;
int l = 0;
while ((1 << l) < o.deg()) ++l;
int t = (1 << l) - 1;
poly tmp = zeroes(t);
r = zeroes(t);
for (int i = 0; i <= m; ++i) add(r[i & t], o[i]);
for (int i = 0; i <= n - m; ++i) add(tmp[i & t], q[i]);
fft(r, l), fft(tmp, l);
for (int i = 0; i <= t; ++i) tmp[i] = (ll)tmp[i] * r[i] % mod;
fft(tmp, l, -1);
memset(r.base(), 0, sizeof(int) << l);
for (int i = 0; i <= n; ++i) add(r[i & t], a[i]);
for (int i = 0; i < m; ++i) sub(r[i], tmp[i]);
return make_pair(q, r.slice(m - 1));
}
poly poly::operator%(const poly &o) const { return div(o).second; }
vector<poly> operator*(vector<poly> a, vector<poly> b) {
int m0 = a.size() - 1, n0 = a[0].deg(), m1 = b.size() - 1, n1 = b[0].deg();
poly A = zeroes((n0 + n1 + 1) * m0), B = zeroes((n0 + n1 + 1) * m1);
for (int i = 0; i <= m0; ++i)
for (int j = 0; j <= n0; ++j)
A[i * (n0 + n1 + 1) + j] = a[i][j];
for (int i = 0; i <= m1; ++i)
for (int j = 0; j <= n1; ++j)
B[i * (n0 + n1 + 1) + j] = b[i][j];
A *= B;
vector<poly> ret(m0 + m1 + 1, zeroes(n0 + n1));
for (int i = 0; i <= m0 + m1; ++i)
for (int j = 0; j <= n0 + n1; ++j)
ret[i][j] = A[i * (n0 + n1 + 1) + j];
return ret;
}
vector<poly> Transposition::mul(const vector<poly> &a, const vector<poly> &b) {
int m0 = a.size() - 1, n0 = a[0].deg(), m1 = b.size() - 1, n1 = b[0].deg();
poly A = zeroes((n0 + 1) * m0), B = zeroes((n0 + 1) * m1);
for (int i = 0; i <= m0; ++i)
for (int j = 0; j <= n0; ++j)
A[i * (n0 + 1) + j] = a[i][j];
for (int i = 0; i <= m1; ++i)
for (int j = 0; j <= n1; ++j)
B[i * (n0 + 1) + j] = b[i][j];
A = tp.mul(A, B);
vector<poly> ret(m0 - m1 + 1, zeroes(n0 - n1));
for (int i = 0; i <= m0 - m1; ++i)
for (int j = 0; j <= n0 - n1; ++j)
ret[i][j] = A[i * (n0 + 1) + j];
return ret;
}
poly poly::comp(const poly &o) const {
if (o[0] != 0) return taylor(o[0]).comp(o - o[0]);
function<vector<poly>(int, int, vector<poly>)> recurse = [&](int n, int k, vector<poly> denom) {
if (!n) {
vector<poly> ret(k, 1);
for (int i = k - 1, j = 0; i >= 0 && j <= deg(); --i, ++j) ret[i] = a[j];
return ret;
}
int l = 0;
while ((1 << l) <= ((k << 1) + 1) * ((n + 1) << 1) - 2) ++l;
vector<int> dt(1 << l);
for (int i = 0; i <= k; ++i)
for (int j = 0; j <= n; ++j)
dt[i * ((n + 1) << 1) + j] = denom[i][j];
fft(dt, l);
vector<int> sq(1 << (l - 1));
for (int i = 0; i < 1 << l; i += 2) sq[i >> 1] = (ll)dt[i] * dt[i ^ 1] % mod;
fft(sq, l - 1, -1);
vector<poly> sub(k << 1 | 1, zeroes(n >> 1));
for (int i = 0; i <= k << 1; ++i)
for (int j = 0; j <= n >> 1; ++j)
sub[i][j] = sq[i * (n + 1) + j];
auto res = recurse(n >> 1, k << 1, sub);
vector<int> ing(1 << (l - 1));
for (int i = 0; i < k << 1; ++i)
for (int j = 0; j <= n >> 1; ++j)
ing[i * (n + 1) + j] = res[i][j];
sq.resize(1 << l);
fft(ing, l - 1);
for (int i = 0; i < 1 << l; ++i) sq[i] = (ll)ing[i >> 1] * dt[i ^ 1] % mod;
fft(sq, l, -1);
vector<poly> ret(k, zeroes(n));
for (int i = 0; i < k; ++i)
for (int j = 0; j <= n; ++j)
ret[i][j] = sq[(i + k) * ((n + 1) << 1) + j];
return ret;
};
int n = deg();
poly ret = recurse(n, 1, {poly(1).slice(n), -o})[0];
return ret;
}
poly recurse2d(int m, int n, int k, vector<poly> numer, vector<poly> denom) {
if (!n) {
poly ret = zeroes(k - 1);
for (int i = 0; i < k; ++i) ret[i] = numer[i][0];
return ret.slice(m);
}
int l = 0;
while ((1 << l) <= ((k << 1) + 1) * ((n + 1) << 1) - 2) ++l;
vector<int> nt(1 << l), dt(1 << l), sqn(1 << (l - 1)), sqd(1 << (l - 1));
for (int i = 0; i < k; ++i)
for (int j = 0; j <= n; ++j)
nt[i * ((n + 1) << 1) + j] = numer[i][j];
for (int i = 0; i <= k; ++i)
for (int j = 0; j <= n; ++j)
dt[i * ((n + 1) << 1) + j] = denom[i][j];
fft(nt, l), fft(dt, l);
if (n & 1) for (int i = 1; i < 1 << l; i += 2) nt[i] = neg(nt[i]);
for (int i = 0; i < 1 << l; i += 2)
sqd[i >> 1] = (ll)dt[i] * dt[i ^ 1] % mod,
sqn[i >> 1] = ((ll)nt[i] * dt[i ^ 1] + (ll)nt[i ^ 1] * dt[i]) % mod * ginv[2] % mod;
if (n & 1) for (int i = 1; i < 1 << (l - 1); ++i) sqn[i] = (ll)sqn[i] * NTT::root[i] % mod;
fft(sqn, l - 1, -1), fft(sqd, l - 1, -1);
vector<poly> subn(k << 1, zeroes(n >> 1)), subd(k << 1 | 1, zeroes(n >> 1));
for (int i = 0; i < k << 1; ++i)
for (int j = 0; j <= n >> 1; ++j)
subn[i][j] = sqn[i * (n + 1) + j + (n & 1)];
for (int i = 0; i <= k << 1; ++i)
for (int j = 0; j <= n >> 1; ++j)
subd[i][j] = sqd[i * (n + 1) + j];
return recurse2d(m, n >> 1, k << 1, subn, subd);
};
poly poly::compInv() const {
if (a[1] != 1) {
int nv = mpow(a[1], mod - 2);
poly ret = operator*(nv).compInv();
for (int i = 1, pw = 1; i <= deg(); ++i) ret[i] = (ll)(pw = (ll)pw * nv % mod) * ret[i] % mod;
return ret;
}
int n = deg();
poly ret = recurse2d(n - 1, n - 1, 1, {(shift(-1).exp(mod - n)).slice(n - 1)}, {poly(1).slice(n), operator-()});
reverse(ret.a.begin(), ret.a.end()), ret = ret.shift(1);
for (int i = 1; i <= n; ++i) ret[i] = (ll)ret[i] * ginv[i] % mod;
return ret;
}
poly poly::compCompInv(const poly &o) const {
if (o[1] != 1) {
int nv = mpow(o[1], mod - 2);
poly ret = compCompInv(o * nv);
for (int i = 1, pw = 1; i <= deg(); ++i) ret[i] = (ll)(pw = (ll)pw * nv % mod) * ret[i] % mod;
return ret;
}
int n = deg();
poly ret = recurse2d(n - 1, n - 1, 1, {(deriv() * o.shift(-1).exp(mod - n)).slice(n - 1)}, {poly(1).slice(n), -o});
reverse(ret.a.begin(), ret.a.end()), ret = ret.shift(1) + a[0];
for (int i = 1; i <= n; ++i) ret[i] = (ll)ret[i] * ginv[i] % mod;
return ret;
}
using polyMat = vector<vector<poly>>;
polyMat operator*(const polyMat &lhs, const polyMat &rhs) {
int x = lhs.size(), y = rhs.size(), z = rhs.back().size();
int m = 0;
for (int i = 0; i < x; ++i)
for (int j = 0; j < z; ++j)
for (int k = 0; k < y; ++k)
m = max(m, lhs[i][k].deg() + rhs[k][j].deg());
polyMat ret(x, vector<poly>(z));
if (m <= BRUTE_N2_LIMIT) {
for (int i = 0; i < x; ++i)
for (int j = 0; j < z; ++j)
for (int k = 0; k < y; ++k)
ret[i][j] += lhs[i][k] * rhs[k][j];
for (int i = 0; i < x; ++i)
for (int j = 0; j < z; ++j)
trim(ret[i][j]);
} else {
int l = 0;
for (; (1 << l) <= m; ++l);
vector<vector<vector<int>>> lDFT(x, vector<vector<int>>(y, vector<int>(1 << l))),
rDFT(y, vector<vector<int>>(z, vector<int>(1 << l)));
for (int i = 0; i < x; ++i)
for (int j = 0; j < y; ++j) {
memcpy(lDFT[i][j].begin().base(), lhs[i][j].a.begin().base(),
sizeof(int) * lhs[i][j].a.size());
fft(lDFT[i][j].begin().base(), l, 1);
}
for (int i = 0; i < y; ++i)
for (int j = 0; j < z; ++j) {
memcpy(rDFT[i][j].begin().base(), rhs[i][j].a.begin().base(),
sizeof(int) * rhs[i][j].a.size());
fft(rDFT[i][j].begin().base(), l, 1);
}
vector<int> res;
for (int i = 0; i < x; ++i)
for (int j = 0; j < z; ++j) {
res.assign(1 << l, 0);
for (int k = 0; k < y; ++k)
for (int t = 0; t < (1 << l); ++t)
res[t] = (res[t] + lDFT[i][k][t] * (ll)rDFT[k][j][t]) % mod;
fft(res.begin().base(), l, -1);
trim(ret[i][j] = res);
}
}
return ret;
}
polyMat &operator*=(polyMat &a, const polyMat &b) { a = a * b; }
tuple<poly, poly, polyMat> halfEuclid(poly a, poly b, int l) {
if (b.deg() < 1 << (l - 1)) return {a, b, {{1, 0}, {0, 1}}};
polyMat ret = get<2>(halfEuclid(a.shift(-(1 << (l - 1))), b.shift(-(1 << (l - 1))), l - 1));
auto t0 = ret * polyMat{{a}, {b}};
a = t0[0][0], b = t0[1][0];
if (b.deg() < 1 << (l - 1)) return {a, b, ret};
auto [q, r] = a.div(b); trim(r);
ret = polyMat{{0, 1}, {1, -q}} * ret, a = exchange(b, r);
if (b.deg() < 1 << (l - 1)) return {a, b, ret};
t0 = get<2>(halfEuclid(a.shift(-(1 << (l - 2))), b.shift(-(1 << (l - 2))), l - 1));
auto t1 = t0 * polyMat{{a}, {b}};
a = t1[0][0], b = t1[1][0], ret = t0 * ret;
return {a, b, ret};
}
pair<poly, polyMat> exEuclid(poly a, poly b) {
trim(a), trim(b);
if (b.empty()) return {a, {{1, 0}}};
polyMat ret = {{1, 0}, {0, 1}};
auto [q, r] = a.div(b); trim(r);
ret = polyMat{{0, 1}, {1, -q}} * ret, a = exchange(b, r);
if (b.empty()) return {a, {ret[0]}};
int l = 0;
for (; 1 << l <= a.deg(); ++l);
while (!b.empty()) {
auto res = halfEuclid(a, b, l);
a = get<0>(res), b = get<1>(res), ret = get<2>(res) * ret;
if (b.empty()) break;
auto [q, r] = a.div(b); trim(r);
ret = polyMat{{0, 1}, {1, -q}} * ret, a = exchange(b, r);
for (; l && 1 << (l - 1) > a.deg(); --l);
}
return {a, {ret[0]}};
}
pair<poly, bool> modInv(poly p, poly q) {
auto [a, mat] = exEuclid(p, q);
if (!a.empty()) {
int nv = nt.inv(a.back());
a *= nv, mat = polyMat{{nv, 0}, {0, nv}} * mat;
}
return {mat[0][0], a == 1};
}
const int img = mpow(3, (mod - 1) >> 2);
poly chebyshev(int n) {
poly ret = zeroes(n);
for (int k = 0; k * 2 <= n; ++k)
ret[n - k * 2] = k & 1 ? mod - binom(n - k, k) : binom(n - k, k);
return ret;
}
poly bostanMori(int n, poly m, poly p0, poly p1, poly q1, poly q2) {
for (; n; n >>= 1) {
if (n & 1) p0 = (p1 - p0 * q1) % m, p1 = p1 * q2 % m;
else p1 = (p0 * q2 - p1 * q1) % m;
q1 = (q2 * 2 - q1 * q1) % m,
q2 = q2 * q2 % m;
}
return p0;
}
int resultant(poly a, poly b) {
trim(a), trim(b);
if (a.empty() || b.empty()) return 0;
int ret = 1;
if (a.back() != 1)
ret = (ll)ret * mpow(a.back(), b.deg()) % mod, a *= nt.inv(a.back());
if (b.back() != 1)
ret = (ll)ret * mpow(b.back(), a.deg()) % mod, b *= nt.inv(b.back());
if (!b.deg()) return ret;
if (a.deg() & b.deg() & 1) ret = mod - ret;
a = exchange(b, a % b), trim(b);
if (b.empty()) return 0;
if (b.back() != 1)
ret = (ll)ret * mpow(b.back(), a.deg()) % mod, b *= nt.inv(b.back());
if (!b.deg()) return ret;
int l = 0;
for (; 1 << l <= a.deg(); ++l);
function<tuple<int, poly, poly, polyMat>(poly, poly, int, int)> halfEuclid =
[&](poly a, poly b, int offset, int l) -> tuple<int, poly, poly, polyMat> {
if (b.deg() < 1 << (l - 1)) return {1, a, b, {{1, 0}, {0, 1}}};
auto [res, _, __, ret] = halfEuclid(a.shift(-(1 << (l - 1))), b.shift(-(1 << (l - 1))), offset + (1 << (l - 1)), l - 1);
auto t0 = ret * polyMat{{a}, {b}};
a = t0[0][0], b = t0[1][0];
if (b.deg() < 1 << (l - 1)) return {res, a, b, ret};
if (b.back() != 1) {
res = (ll)res * mpow(b.back(), a.deg() + offset) % mod;
int nv = nt.inv(b.back());
b *= nv, ret = polyMat{{1, 0}, {0, nv}} * ret;
}
if ((a.deg() + offset) & (b.deg() + offset) & 1) res = mod - res;
auto [q, r] = a.div(b); trim(r);
ret = polyMat{{0, 1}, {1, -q}} * ret, a = exchange(b, r);
if (b.deg() < 1 << (l - 1)) return {res, a, b, ret};
if (b.back() != 1) {
res = (ll)res * mpow(b.back(), a.deg() + offset) % mod;
int nv = nt.inv(b.back());
b *= nv, ret = polyMat{{1, 0}, {0, nv}} * ret;
}
auto tmp = halfEuclid(a.shift(-(1 << (l - 2))), b.shift(-(1 << (l - 2))), offset + (1 << (l - 2)), l - 1);
t0 = get<3>(tmp) * polyMat{{a}, {b}},
res = (ll)res * get<0>(tmp) % mod, a = t0[0][0], b = t0[1][0], ret = get<3>(tmp) * ret;
return {res, a, b, ret};
};
while (b.deg()) {
auto [nres, na, nb, _] = halfEuclid(a, b, 0, l);
ret = (ll)ret * nres % mod, a = na, b = nb;
if (b.empty()) return 0;
if (b.back() != 1)
ret = (ll)ret * mpow(b.back(), a.deg()) % mod, b *= nt.inv(b.back());
if (!b.deg()) break;
if (a.deg() & b.deg() & 1) ret = mod - ret;
a = exchange(b, a % b), trim(b);
if (b.back() != 1)
ret = (ll)ret * mpow(b.back(), a.deg()) % mod,
b *= nt.inv(b.back());
for (; l && 1 << (l - 1) > a.deg(); --l);
}
return ret;
}
int bostanMori(int n, poly p, poly q) {
trim(p), trim(q);
if (p.deg() >= q.deg()) {
auto [k, r] = p.div(q);
int ret = bostanMori(n, r, q);
if (n <= k.deg()) add(ret, k[n]);
return ret;
}
int k = 0;
for (; 1 << k < q.size() << 1; ++k);
vector<int> root(1 << k), rev(1 << k);
for (int i = 0; i < 1 << k; ++i)
rev[i] = (rev[i >> 1] >> 1) | ((i & 1) << (k - 1));
int w = mpow(Root, 1 << (K - k));
root[0] = 1;
for (int i = 1; i < 1 << k; ++i) root[i] = (ll)root[i - 1] * w % mod;
p.resize(1 << k), q.resize(1 << k); vector<int> r(1 << k);
fft(p, k - 1), fft(q, k - 1);
for (; n; n >>= 1) {
auto extend = [&](poly &f) {
memcpy(f.base() + (1 << (k - 1)), f.base(), sizeof(int) << (k - 1));
fft(f.base() + (1 << (k - 1)), k - 1, -1);
for (int i = 1 << (k - 1); i < 1 << k; ++i) f[i] = (ll)f[i] * root[i ^ (1 << (k - 1))] % mod;
fft(f.base() + (1 << (k - 1)), k - 1);
};
extend(p), extend(q);
for (int i = 0; i < 1 << k; ++i) r[i] = q[i ^ 1];
for (int i = 0; i < 1 << k; ++i) p[i] = (ll)p[i] * r[i] % mod, q[i] = (ll)q[i] * r[i] % mod;
if (n & 1) for (int i = 1; i < 1 << k; ++i) p[i] = (ll)p[i] * root[(1 << k) - rev[i]] % mod;
for (int i = 0; i < 1 << (k - 1); ++i) p[i] = quo2(norm(p[i << 1] + p[i << 1 | 1])), q[i] = q[i << 1];
}
int p0 = 0, q0 = 0;
for (int i = 0; i < 1 << (k - 1); ++i) add(p0, p[i]), add(q0, q[i]);
return (ll)p0 * mpow(q0, mod - 2) % mod;
}
int bostanMori(int n, pair<vector<int>, vector<int>> f) {
return bostanMori(n, f.first, f.second);
}
pair<poly, poly> fracApprox(poly a, poly b, int k) {
trim(a), trim(b);
if (a.deg() < k) return {a, 1};
polyMat mat = {{1, 0}, {0, 1}};
auto [q, r] = a.div(b); trim(r);
mat = polyMat{{0, 1}, {1, -q}} * mat, a = exchange(b, r);
if (a.deg() < k) return {a, mat[0][0]};
int l = 0;
for (; 1 << l <= a.deg(); ++l);
function<tuple<poly, poly, polyMat>(poly, poly, int, int)> halfEuclid =
[&](poly a, poly b, int k, int l) -> tuple<poly, poly, polyMat> {
if (b.deg() < 1 << (l - 1)) return {a, b, {{1, 0}, {0, 1}}};
polyMat ret = get<2>(halfEuclid(a.shift(-(1 << (l - 1))), b.shift(-(1 << (l - 1))), k - (1 << (l - 1)), l - 1));
auto t0 = ret * polyMat{{a}, {b}};
a = t0[0][0], b = t0[1][0];
if (a.deg() < k || b.deg() < 1 << (l - 1)) return {a, b, ret};
auto [q, r] = a.div(b); trim(r);
ret = polyMat{{0, 1}, {1, -q}} * ret, a = exchange(b, r);
if (a.deg() < k || b.deg() < 1 << (l - 1)) return {a, b, ret};
t0 = get<2>(halfEuclid(a.shift(-(1 << (l - 2))), b.shift(-(1 << (l - 2))), k - (1 << (l - 2)), l - 1));
auto t1 = t0 * polyMat{{a}, {b}};
a = t1[0][0], b = t1[1][0], ret = t0 * ret;
return {a, b, ret};
};
while (a.deg() >= k) {
auto res = halfEuclid(a, b, k, l);
a = get<0>(res), b = get<1>(res), mat = get<2>(res) * mat;
if (a.deg() < k) break;
auto [q, r] = a.div(b); trim(r);
mat = polyMat{{0, 1}, {1, -q}} * mat, a = exchange(b, r);
for (; l && 1 << (l - 1) > a.deg(); --l);
}
return {a, mat[0][0]};
}
pair<poly, poly> berlekampMassey(poly f) {
auto [p, q] = fracApprox(f, poly{1}.shift(f.size()), f.size() >> 1);
if (q[0] != 1) {
int nv = nt.inv(q[0]);
p *= nv, q *= nv;
}
return {p, q};
}
struct RelaxedConvolution {
static const int LG = 19;
static const int B = 16;
void run(int *f, int *g, int n, const function<void(int)> &relax) {
vector<vector<int>> savef(LG + 1), saveg(LG + 1);
function<void(int, int)> divideConquer = [&](int l, int r) {
if (r - l <= BRUTE_N2_LIMIT) {
for (int i = l + 1; i <= r; ++i) {
for (int j = l; j < i; ++j)
fam(f[i], f[j], g[i - j]);
if (l > 0)
for (int j = l; j < i; ++j)
fam(f[i], g[j], f[i - j]);
relax(i);
}
return ;
}
if (l == 0) {
int mid = ((l + r) >> 1) + 5;
divideConquer(l, mid);
int lgd = 0;
while ((1 << lgd) <= r)
++lgd;
vector<int> tf(1 << lgd), tg(1 << lgd);
for (int i = 0; i < mid; ++i)
tf[i] = f[i], tg[i] = g[i];
fft(tf.data(), lgd), fft(tg.data(), lgd);
for (int i = 0; i < (1 << lgd); ++i)
tf[i] = (ll)tf[i] * tg[i] % mod;
fft(tf.data(), lgd, -1);
for (int i = mid + 1; i <= r; ++i)
add(f[i], tf[i]);
divideConquer(mid, r);
return ;
}
int d = (r - l) / B + 1, lgd = 0, lg;
while ((1 << lgd) <= d * 2)
++lgd;
d = (1 << (lgd - 1)) - 1, lg = (r - l + d - 1) / d;
vector<int> dftf = savef[lgd], dftg = saveg[lgd];
dftf.resize(lg << (lgd + 1)), dftg.resize(lg << (lgd + 1));
for (int i = lg << lgd; i < (lg << (lgd + 1)); ++i)
dftf[i] = dftg[i] = 0;
int ef = lg;
for (int i = 0; i < lg; ++i) {
if ((i << lgd) < savef[lgd].size())
continue;
if ((i + 2) * d >= l)
--ef;
for (int j = 0; j <= min(d * 2, n - i * d); ++j)
dftf[(i << lgd) + j] = f[i * d + j],
dftg[(i << lgd) + j] = g[i * d + j];
fft(dftf.data() + (i << lgd), lgd), fft(dftg.data() + (i << lgd), lgd);
}
if (savef[lgd].size() < (ef << lgd))
savef[lgd] = vector<int>(dftf.begin(), dftf.begin() + (ef << lgd)),
saveg[lgd] = vector<int>(dftg.begin(), dftg.begin() + (ef << lgd));
for (int i = 0; i < lg; ++i) {
if (i) {
for (int j = 0; j < (1 << lgd); ++j)
add(dftf[((lg + i) << lgd) + j], dftg[((lg + i) << lgd) + j]);
fft(dftf.data() + ((lg + i) << lgd), lgd, -1);
}
for (int j = 1; j <= min(d, r - l - i * d); ++j)
add(f[l + i * d + j], dftf[((lg + i) << lgd) + d + j]);
divideConquer(l + i * d, min(l + (i + 1) * d, r));
if (i + 1 < lg) {
for (int j = 0; j < d; ++j)
dftf[((lg + i) << lgd) + j] = f[l + i * d + j],
dftg[((lg + i) << lgd) + j] = g[l + i * d + j];
for (int j = d; j < (1 << lgd); ++j)
dftf[((lg + i) << lgd) + j] = dftg[((lg + i) << lgd) + j] = 0;
fft(dftf.data() + ((lg + i) << lgd), lgd), fft(dftg.data() + ((lg + i) << lgd), lgd);
}
for (int j = i + 1; j < lg; ++j)
for (int k = 0; k < (1 << lgd); ++k)
fam(dftf[((lg + j) << lgd) + k], dftg[((j - i - 1) << lgd) + k], dftf[((lg + i) << lgd) + k]),
fam(dftg[((lg + j) << lgd) + k], dftf[((j - i - 1) << lgd) + k], dftg[((lg + i) << lgd) + k]);
}
};
relax(0), divideConquer(0, n);
}
} rc;
int main() {
int n; scanf("%d", &n);
poly f = zeroes(n); vector<int> g(n + 1);
rc.run(f.base(), g.data(), n, [&](int k) {
if (!k) return ;
if (k == 1) f[k] = 1;
fam(f[k], k - 1, f[k - 1]), g[k] = exchange(f[k], (ll)f[k] * ginv[k] % mod);
});
int ans = (ll)(-f + 1).inv()[n] * gfac[n] % mod;
printf("%d\n", ans);
}
这程序好像有点Bug,我给组数据试试?
Details
Tip: Click on the bar to expand more detailed information
Test #1:
score: 100
Accepted
time: 0ms
memory: 3952kb
input:
3
output:
28
result:
ok 1 number(s): "28"
Test #2:
score: 0
Accepted
time: 0ms
memory: 3980kb
input:
1
output:
1
result:
ok 1 number(s): "1"
Test #3:
score: 0
Accepted
time: 0ms
memory: 3884kb
input:
2
output:
4
result:
ok 1 number(s): "4"
Test #4:
score: 0
Accepted
time: 0ms
memory: 4180kb
input:
4
output:
282
result:
ok 1 number(s): "282"
Test #5:
score: 0
Accepted
time: 0ms
memory: 3972kb
input:
5
output:
3718
result:
ok 1 number(s): "3718"
Test #6:
score: 0
Accepted
time: 0ms
memory: 4048kb
input:
6
output:
60694
result:
ok 1 number(s): "60694"
Test #7:
score: 0
Accepted
time: 0ms
memory: 3960kb
input:
7
output:
1182522
result:
ok 1 number(s): "1182522"
Test #8:
score: 0
Accepted
time: 0ms
memory: 3956kb
input:
8
output:
26791738
result:
ok 1 number(s): "26791738"
Test #9:
score: 0
Accepted
time: 0ms
memory: 4044kb
input:
9
output:
692310518
result:
ok 1 number(s): "692310518"
Test #10:
score: 0
Accepted
time: 0ms
memory: 3884kb
input:
10
output:
135061370
result:
ok 1 number(s): "135061370"
Test #11:
score: 0
Accepted
time: 0ms
memory: 3968kb
input:
100
output:
423669705
result:
ok 1 number(s): "423669705"
Test #12:
score: 0
Accepted
time: 1ms
memory: 4044kb
input:
1234
output:
878522960
result:
ok 1 number(s): "878522960"
Test #13:
score: 0
Accepted
time: 36ms
memory: 7812kb
input:
54321
output:
827950477
result:
ok 1 number(s): "827950477"
Test #14:
score: 0
Accepted
time: 54ms
memory: 9832kb
input:
65536
output:
380835743
result:
ok 1 number(s): "380835743"
Test #15:
score: 0
Accepted
time: 125ms
memory: 15016kb
input:
131072
output:
842796122
result:
ok 1 number(s): "842796122"
Test #16:
score: 0
Accepted
time: 102ms
memory: 12460kb
input:
131071
output:
981021531
result:
ok 1 number(s): "981021531"
Test #17:
score: 0
Accepted
time: 100ms
memory: 12532kb
input:
131070
output:
480197639
result:
ok 1 number(s): "480197639"
Test #18:
score: 0
Accepted
time: 129ms
memory: 17012kb
input:
131074
output:
383000585
result:
ok 1 number(s): "383000585"
Test #19:
score: 0
Accepted
time: 133ms
memory: 17036kb
input:
131073
output:
316664839
result:
ok 1 number(s): "316664839"
Test #20:
score: 0
Accepted
time: 196ms
memory: 21212kb
input:
250000
output:
119658643
result:
ok 1 number(s): "119658643"
Test #21:
score: 0
Accepted
time: 193ms
memory: 21552kb
input:
249999
output:
78110138
result:
ok 1 number(s): "78110138"
Test #22:
score: 0
Accepted
time: 177ms
memory: 21440kb
input:
249998
output:
297253469
result:
ok 1 number(s): "297253469"
Extra Test:
score: 0
Extra Test Passed