QOJ.ac
QOJ
ID | 题目 | 提交者 | 结果 | 用时 | 内存 | 语言 | 文件大小 | 提交时间 | 测评时间 |
---|---|---|---|---|---|---|---|---|---|
#300167 | #4822. Matrix Counting | hos_lyric | AC ✓ | 345ms | 33716kb | C++14 | 45.8kb | 2024-01-07 19:18:33 | 2024-01-07 19:18:33 |
Judging History
answer
#include <cassert>
#include <cmath>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <algorithm>
#include <bitset>
#include <complex>
#include <deque>
#include <functional>
#include <iostream>
#include <map>
#include <numeric>
#include <queue>
#include <set>
#include <sstream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
using namespace std;
using Int = long long;
template <class T1, class T2> ostream &operator<<(ostream &os, const pair<T1, T2> &a) { return os << "(" << a.first << ", " << a.second << ")"; };
template <class T> ostream &operator<<(ostream &os, const vector<T> &as) { const int sz = as.size(); os << "["; for (int i = 0; i < sz; ++i) { if (i >= 256) { os << ", ..."; break; } if (i > 0) { os << ", "; } os << as[i]; } return os << "]"; }
template <class T> void pv(T a, T b) { for (T i = a; i != b; ++i) cerr << *i << " "; cerr << endl; }
template <class T> bool chmin(T &t, const T &f) { if (t > f) { t = f; return true; } return false; }
template <class T> bool chmax(T &t, const T &f) { if (t < f) { t = f; return true; } return false; }
////////////////////////////////////////////////////////////////////////////////
template <unsigned M_> struct ModInt {
static constexpr unsigned M = M_;
unsigned x;
constexpr ModInt() : x(0U) {}
constexpr ModInt(unsigned x_) : x(x_ % M) {}
constexpr ModInt(unsigned long long x_) : x(x_ % M) {}
constexpr ModInt(int x_) : x(((x_ %= static_cast<int>(M)) < 0) ? (x_ + static_cast<int>(M)) : x_) {}
constexpr ModInt(long long x_) : x(((x_ %= static_cast<long long>(M)) < 0) ? (x_ + static_cast<long long>(M)) : x_) {}
ModInt &operator+=(const ModInt &a) { x = ((x += a.x) >= M) ? (x - M) : x; return *this; }
ModInt &operator-=(const ModInt &a) { x = ((x -= a.x) >= M) ? (x + M) : x; return *this; }
ModInt &operator*=(const ModInt &a) { x = (static_cast<unsigned long long>(x) * a.x) % M; return *this; }
ModInt &operator/=(const ModInt &a) { return (*this *= a.inv()); }
ModInt pow(long long e) const {
if (e < 0) return inv().pow(-e);
ModInt a = *this, b = 1U; for (; e; e >>= 1) { if (e & 1) b *= a; a *= a; } return b;
}
ModInt inv() const {
unsigned a = M, b = x; int y = 0, z = 1;
for (; b; ) { const unsigned q = a / b; const unsigned c = a - q * b; a = b; b = c; const int w = y - static_cast<int>(q) * z; y = z; z = w; }
assert(a == 1U); return ModInt(y);
}
ModInt operator+() const { return *this; }
ModInt operator-() const { ModInt a; a.x = x ? (M - x) : 0U; return a; }
ModInt operator+(const ModInt &a) const { return (ModInt(*this) += a); }
ModInt operator-(const ModInt &a) const { return (ModInt(*this) -= a); }
ModInt operator*(const ModInt &a) const { return (ModInt(*this) *= a); }
ModInt operator/(const ModInt &a) const { return (ModInt(*this) /= a); }
template <class T> friend ModInt operator+(T a, const ModInt &b) { return (ModInt(a) += b); }
template <class T> friend ModInt operator-(T a, const ModInt &b) { return (ModInt(a) -= b); }
template <class T> friend ModInt operator*(T a, const ModInt &b) { return (ModInt(a) *= b); }
template <class T> friend ModInt operator/(T a, const ModInt &b) { return (ModInt(a) /= b); }
explicit operator bool() const { return x; }
bool operator==(const ModInt &a) const { return (x == a.x); }
bool operator!=(const ModInt &a) const { return (x != a.x); }
friend std::ostream &operator<<(std::ostream &os, const ModInt &a) { return os << a.x; }
};
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
constexpr unsigned MO = 998244353U;
constexpr unsigned MO2 = 2U * MO;
constexpr int FFT_MAX = 23;
using Mint = ModInt<MO>;
constexpr Mint FFT_ROOTS[FFT_MAX + 1] = {1U, 998244352U, 911660635U, 372528824U, 929031873U, 452798380U, 922799308U, 781712469U, 476477967U, 166035806U, 258648936U, 584193783U, 63912897U, 350007156U, 666702199U, 968855178U, 629671588U, 24514907U, 996173970U, 363395222U, 565042129U, 733596141U, 267099868U, 15311432U};
constexpr Mint INV_FFT_ROOTS[FFT_MAX + 1] = {1U, 998244352U, 86583718U, 509520358U, 337190230U, 87557064U, 609441965U, 135236158U, 304459705U, 685443576U, 381598368U, 335559352U, 129292727U, 358024708U, 814576206U, 708402881U, 283043518U, 3707709U, 121392023U, 704923114U, 950391366U, 428961804U, 382752275U, 469870224U};
constexpr Mint FFT_RATIOS[FFT_MAX] = {911660635U, 509520358U, 369330050U, 332049552U, 983190778U, 123842337U, 238493703U, 975955924U, 603855026U, 856644456U, 131300601U, 842657263U, 730768835U, 942482514U, 806263778U, 151565301U, 510815449U, 503497456U, 743006876U, 741047443U, 56250497U, 867605899U};
constexpr Mint INV_FFT_RATIOS[FFT_MAX] = {86583718U, 372528824U, 373294451U, 645684063U, 112220581U, 692852209U, 155456985U, 797128860U, 90816748U, 860285882U, 927414960U, 354738543U, 109331171U, 293255632U, 535113200U, 308540755U, 121186627U, 608385704U, 438932459U, 359477183U, 824071951U, 103369235U};
// as[rev(i)] <- \sum_j \zeta^(ij) as[j]
void fft(Mint *as, int n) {
assert(!(n & (n - 1))); assert(1 <= n); assert(n <= 1 << FFT_MAX);
int m = n;
if (m >>= 1) {
for (int i = 0; i < m; ++i) {
const unsigned x = as[i + m].x; // < MO
as[i + m].x = as[i].x + MO - x; // < 2 MO
as[i].x += x; // < 2 MO
}
}
if (m >>= 1) {
Mint prod = 1U;
for (int h = 0, i0 = 0; i0 < n; i0 += (m << 1)) {
for (int i = i0; i < i0 + m; ++i) {
const unsigned x = (prod * as[i + m]).x; // < MO
as[i + m].x = as[i].x + MO - x; // < 3 MO
as[i].x += x; // < 3 MO
}
prod *= FFT_RATIOS[__builtin_ctz(++h)];
}
}
for (; m; ) {
if (m >>= 1) {
Mint prod = 1U;
for (int h = 0, i0 = 0; i0 < n; i0 += (m << 1)) {
for (int i = i0; i < i0 + m; ++i) {
const unsigned x = (prod * as[i + m]).x; // < MO
as[i + m].x = as[i].x + MO - x; // < 4 MO
as[i].x += x; // < 4 MO
}
prod *= FFT_RATIOS[__builtin_ctz(++h)];
}
}
if (m >>= 1) {
Mint prod = 1U;
for (int h = 0, i0 = 0; i0 < n; i0 += (m << 1)) {
for (int i = i0; i < i0 + m; ++i) {
const unsigned x = (prod * as[i + m]).x; // < MO
as[i].x = (as[i].x >= MO2) ? (as[i].x - MO2) : as[i].x; // < 2 MO
as[i + m].x = as[i].x + MO - x; // < 3 MO
as[i].x += x; // < 3 MO
}
prod *= FFT_RATIOS[__builtin_ctz(++h)];
}
}
}
for (int i = 0; i < n; ++i) {
as[i].x = (as[i].x >= MO2) ? (as[i].x - MO2) : as[i].x; // < 2 MO
as[i].x = (as[i].x >= MO) ? (as[i].x - MO) : as[i].x; // < MO
}
}
// as[i] <- (1/n) \sum_j \zeta^(-ij) as[rev(j)]
void invFft(Mint *as, int n) {
assert(!(n & (n - 1))); assert(1 <= n); assert(n <= 1 << FFT_MAX);
int m = 1;
if (m < n >> 1) {
Mint prod = 1U;
for (int h = 0, i0 = 0; i0 < n; i0 += (m << 1)) {
for (int i = i0; i < i0 + m; ++i) {
const unsigned long long y = as[i].x + MO - as[i + m].x; // < 2 MO
as[i].x += as[i + m].x; // < 2 MO
as[i + m].x = (prod.x * y) % MO; // < MO
}
prod *= INV_FFT_RATIOS[__builtin_ctz(++h)];
}
m <<= 1;
}
for (; m < n >> 1; m <<= 1) {
Mint prod = 1U;
for (int h = 0, i0 = 0; i0 < n; i0 += (m << 1)) {
for (int i = i0; i < i0 + (m >> 1); ++i) {
const unsigned long long y = as[i].x + MO2 - as[i + m].x; // < 4 MO
as[i].x += as[i + m].x; // < 4 MO
as[i].x = (as[i].x >= MO2) ? (as[i].x - MO2) : as[i].x; // < 2 MO
as[i + m].x = (prod.x * y) % MO; // < MO
}
for (int i = i0 + (m >> 1); i < i0 + m; ++i) {
const unsigned long long y = as[i].x + MO - as[i + m].x; // < 2 MO
as[i].x += as[i + m].x; // < 2 MO
as[i + m].x = (prod.x * y) % MO; // < MO
}
prod *= INV_FFT_RATIOS[__builtin_ctz(++h)];
}
}
if (m < n) {
for (int i = 0; i < m; ++i) {
const unsigned y = as[i].x + MO2 - as[i + m].x; // < 4 MO
as[i].x += as[i + m].x; // < 4 MO
as[i + m].x = y; // < 4 MO
}
}
const Mint invN = Mint(n).inv();
for (int i = 0; i < n; ++i) {
as[i] *= invN;
}
}
void fft(vector<Mint> &as) {
fft(as.data(), as.size());
}
void invFft(vector<Mint> &as) {
invFft(as.data(), as.size());
}
vector<Mint> convolve(vector<Mint> as, vector<Mint> bs) {
if (as.empty() || bs.empty()) return {};
const int len = as.size() + bs.size() - 1;
int n = 1;
for (; n < len; n <<= 1) {}
as.resize(n); fft(as);
bs.resize(n); fft(bs);
for (int i = 0; i < n; ++i) as[i] *= bs[i];
invFft(as);
as.resize(len);
return as;
}
vector<Mint> square(vector<Mint> as) {
if (as.empty()) return {};
const int len = as.size() + as.size() - 1;
int n = 1;
for (; n < len; n <<= 1) {}
as.resize(n); fft(as);
for (int i = 0; i < n; ++i) as[i] *= as[i];
invFft(as);
as.resize(len);
return as;
}
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// inv: log, exp, pow
// fac: shift
// invFac: shift
constexpr int LIM_INV = 1 << 20; // @
Mint inv[LIM_INV], fac[LIM_INV], invFac[LIM_INV];
struct ModIntPreparator {
ModIntPreparator() {
inv[1] = 1;
for (int i = 2; i < LIM_INV; ++i) inv[i] = -((Mint::M / i) * inv[Mint::M % i]);
fac[0] = 1;
for (int i = 1; i < LIM_INV; ++i) fac[i] = fac[i - 1] * i;
invFac[0] = 1;
for (int i = 1; i < LIM_INV; ++i) invFac[i] = invFac[i - 1] * inv[i];
}
} preparator;
// polyWork0: *, inv, div, divAt, log, exp, pow, sqrt, shift
// polyWork1: inv, div, divAt, log, exp, pow, sqrt, shift
// polyWork2: divAt, exp, pow, sqrt
// polyWork3: exp, pow, sqrt
static constexpr int LIM_POLY = 1 << 20; // @
static_assert(LIM_POLY <= 1 << FFT_MAX, "Poly: LIM_POLY <= 1 << FFT_MAX must hold.");
static Mint polyWork0[LIM_POLY], polyWork1[LIM_POLY], polyWork2[LIM_POLY], polyWork3[LIM_POLY];
struct Poly : public vector<Mint> {
Poly() {}
explicit Poly(int n) : vector<Mint>(n) {}
Poly(const vector<Mint> &vec) : vector<Mint>(vec) {}
Poly(std::initializer_list<Mint> il) : vector<Mint>(il) {}
int size() const { return vector<Mint>::size(); }
Mint at(long long k) const { return (0 <= k && k < size()) ? (*this)[k] : 0U; }
int ord() const { for (int i = 0; i < size(); ++i) if ((*this)[i]) return i; return -1; }
int deg() const { for (int i = size(); --i >= 0; ) if ((*this)[i]) return i; return -1; }
Poly mod(int n) const { return Poly(vector<Mint>(data(), data() + min(n, size()))); }
friend std::ostream &operator<<(std::ostream &os, const Poly &fs) {
os << "[";
for (int i = 0; i < fs.size(); ++i) { if (i > 0) os << ", "; os << fs[i]; }
return os << "]";
}
Poly &operator+=(const Poly &fs) {
if (size() < fs.size()) resize(fs.size());
for (int i = 0; i < fs.size(); ++i) (*this)[i] += fs[i];
return *this;
}
Poly &operator-=(const Poly &fs) {
if (size() < fs.size()) resize(fs.size());
for (int i = 0; i < fs.size(); ++i) (*this)[i] -= fs[i];
return *this;
}
// 3 E(|t| + |f|)
Poly &operator*=(const Poly &fs) {
if (empty() || fs.empty()) return *this = {};
const int nt = size(), nf = fs.size();
int n = 1;
for (; n < nt + nf - 1; n <<= 1) {}
assert(n <= LIM_POLY);
resize(n);
fft(data(), n); // 1 E(n)
memcpy(polyWork0, fs.data(), nf * sizeof(Mint));
memset(polyWork0 + nf, 0, (n - nf) * sizeof(Mint));
fft(polyWork0, n); // 1 E(n)
for (int i = 0; i < n; ++i) (*this)[i] *= polyWork0[i];
invFft(data(), n); // 1 E(n)
resize(nt + nf - 1);
return *this;
}
// 13 E(deg(t) - deg(f) + 1)
// rev(t) = rev(f) rev(q) + x^(deg(t)-deg(f)+1) rev(r)
Poly &operator/=(const Poly &fs) {
const int m = deg(), n = fs.deg();
assert(n != -1);
if (m < n) return *this = {};
Poly tsRev(m - n + 1), fsRev(min(m - n, n) + 1);
for (int i = 0; i <= m - n; ++i) tsRev[i] = (*this)[m - i];
for (int i = 0, i0 = min(m - n, n); i <= i0; ++i) fsRev[i] = fs[n - i];
const Poly qsRev = tsRev.div(fsRev, m - n + 1); // 13 E(m - n + 1)
resize(m - n + 1);
for (int i = 0; i <= m - n; ++i) (*this)[i] = qsRev[m - n - i];
return *this;
}
// 13 E(deg(t) - deg(f) + 1) + 3 E(|t|)
Poly &operator%=(const Poly &fs) {
const Poly qs = *this / fs; // 13 E(deg(t) - deg(f) + 1)
*this -= fs * qs; // 3 E(|t|)
resize(deg() + 1);
return *this;
}
Poly &operator*=(const Mint &a) {
for (int i = 0; i < size(); ++i) (*this)[i] *= a;
return *this;
}
Poly &operator/=(const Mint &a) {
const Mint b = a.inv();
for (int i = 0; i < size(); ++i) (*this)[i] *= b;
return *this;
}
Poly operator+() const { return *this; }
Poly operator-() const {
Poly fs(size());
for (int i = 0; i < size(); ++i) fs[i] = -(*this)[i];
return fs;
}
Poly operator+(const Poly &fs) const { return (Poly(*this) += fs); }
Poly operator-(const Poly &fs) const { return (Poly(*this) -= fs); }
Poly operator*(const Poly &fs) const { return (Poly(*this) *= fs); }
Poly operator/(const Poly &fs) const { return (Poly(*this) /= fs); }
Poly operator%(const Poly &fs) const { return (Poly(*this) %= fs); }
Poly operator*(const Mint &a) const { return (Poly(*this) *= a); }
Poly operator/(const Mint &a) const { return (Poly(*this) /= a); }
friend Poly operator*(const Mint &a, const Poly &fs) { return fs * a; }
// 10 E(n)
// f <- f - (t f - 1) f
Poly inv(int n) const {
assert(!empty()); assert((*this)[0]); assert(1 <= n);
assert(n == 1 || 1 << (32 - __builtin_clz(n - 1)) <= LIM_POLY);
Poly fs(n);
fs[0] = (*this)[0].inv();
for (int m = 1; m < n; m <<= 1) {
memcpy(polyWork0, data(), min(m << 1, size()) * sizeof(Mint));
memset(polyWork0 + min(m << 1, size()), 0, ((m << 1) - min(m << 1, size())) * sizeof(Mint));
fft(polyWork0, m << 1); // 2 E(n)
memcpy(polyWork1, fs.data(), min(m << 1, n) * sizeof(Mint));
memset(polyWork1 + min(m << 1, n), 0, ((m << 1) - min(m << 1, n)) * sizeof(Mint));
fft(polyWork1, m << 1); // 2 E(n)
for (int i = 0; i < m << 1; ++i) polyWork0[i] *= polyWork1[i];
invFft(polyWork0, m << 1); // 2 E(n)
memset(polyWork0, 0, m * sizeof(Mint));
fft(polyWork0, m << 1); // 2 E(n)
for (int i = 0; i < m << 1; ++i) polyWork0[i] *= polyWork1[i];
invFft(polyWork0, m << 1); // 2 E(n)
for (int i = m, i0 = min(m << 1, n); i < i0; ++i) fs[i] = -polyWork0[i];
}
return fs;
}
// 9 E(n)
// Need (4 m)-th roots of unity to lift from (mod x^m) to (mod x^(2m)).
// f <- f - (t f - 1) f
// (t f^2) mod ((x^(2m) - 1) (x^m - 1^(1/4)))
/*
Poly inv(int n) const {
assert(!empty()); assert((*this)[0]); assert(1 <= n);
assert(n == 1 || 3 << (31 - __builtin_clz(n - 1)) <= LIM_POLY);
assert(n <= 1 << (FFT_MAX - 1));
Poly fs(n);
fs[0] = (*this)[0].inv();
for (int h = 2, m = 1; m < n; ++h, m <<= 1) {
const Mint a = FFT_ROOTS[h], b = INV_FFT_ROOTS[h];
memcpy(polyWork0, data(), min(m << 1, size()) * sizeof(Mint));
memset(polyWork0 + min(m << 1, size()), 0, ((m << 1) - min(m << 1, size())) * sizeof(Mint));
{
Mint aa = 1;
for (int i = 0; i < m; ++i) { polyWork0[(m << 1) + i] = aa * polyWork0[i]; aa *= a; }
for (int i = 0; i < m; ++i) { polyWork0[(m << 1) + i] += aa * polyWork0[m + i]; aa *= a; }
}
fft(polyWork0, m << 1); // 2 E(n)
fft(polyWork0 + (m << 1), m); // 1 E(n)
memcpy(polyWork1, fs.data(), min(m << 1, n) * sizeof(Mint));
memset(polyWork1 + min(m << 1, n), 0, ((m << 1) - min(m << 1, n)) * sizeof(Mint));
{
Mint aa = 1;
for (int i = 0; i < m; ++i) { polyWork1[(m << 1) + i] = aa * polyWork1[i]; aa *= a; }
for (int i = 0; i < m; ++i) { polyWork1[(m << 1) + i] += aa * polyWork1[m + i]; aa *= a; }
}
fft(polyWork1, m << 1); // 2 E(n)
fft(polyWork1 + (m << 1), m); // 1 E(n)
for (int i = 0; i < (m << 1) + m; ++i) polyWork0[i] *= polyWork1[i] * polyWork1[i];
invFft(polyWork0, m << 1); // 2 E(n)
invFft(polyWork0 + (m << 1), m); // 1 E(n)
// 2 f0 + (-f2), (-f1) + (-f3), 1^(1/4) (-f1) - (-f2) - 1^(1/4) (-f3)
{
Mint bb = 1;
for (int i = 0, i0 = min(m, n - m); i < i0; ++i) {
unsigned x = polyWork0[i].x + (bb * polyWork0[(m << 1) + i]).x + MO2 - (fs[i].x << 1); // < 4 MO
fs[m + i] = Mint(static_cast<unsigned long long>(FFT_ROOTS[2].x) * x) - polyWork0[m + i];
fs[m + i].x = ((fs[m + i].x & 1) ? (fs[m + i].x + MO) : fs[m + i].x) >> 1;
bb *= b;
}
}
}
return fs;
}
*/
// 13 E(n)
// g = (1 / f) mod x^m
// h <- h - (f h - t) g
Poly div(const Poly &fs, int n) const {
assert(!fs.empty()); assert(fs[0]); assert(1 <= n);
if (n == 1) return {at(0) / fs[0]};
// m < n <= 2 m
const int m = 1 << (31 - __builtin_clz(n - 1));
assert(m << 1 <= LIM_POLY);
Poly gs = fs.inv(m); // 5 E(n)
gs.resize(m << 1);
fft(gs.data(), m << 1); // 1 E(n)
memcpy(polyWork0, data(), min(m, size()) * sizeof(Mint));
memset(polyWork0 + min(m, size()), 0, ((m << 1) - min(m, size())) * sizeof(Mint));
fft(polyWork0, m << 1); // 1 E(n)
for (int i = 0; i < m << 1; ++i) polyWork0[i] *= gs[i];
invFft(polyWork0, m << 1); // 1 E(n)
Poly hs(n);
memcpy(hs.data(), polyWork0, m * sizeof(Mint));
memset(polyWork0 + m, 0, m * sizeof(Mint));
fft(polyWork0, m << 1); // 1 E(n)
memcpy(polyWork1, fs.data(), min(m << 1, fs.size()) * sizeof(Mint));
memset(polyWork1 + min(m << 1, fs.size()), 0, ((m << 1) - min(m << 1, fs.size())) * sizeof(Mint));
fft(polyWork1, m << 1); // 1 E(n)
for (int i = 0; i < m << 1; ++i) polyWork0[i] *= polyWork1[i];
invFft(polyWork0, m << 1); // 1 E(n)
memset(polyWork0, 0, m * sizeof(Mint));
for (int i = m, i0 = min(m << 1, size()); i < i0; ++i) polyWork0[i] -= (*this)[i];
fft(polyWork0, m << 1); // 1 E(n)
for (int i = 0; i < m << 1; ++i) polyWork0[i] *= gs[i];
invFft(polyWork0, m << 1); // 1 E(n)
for (int i = m; i < n; ++i) hs[i] = -polyWork0[i];
return hs;
}
// (4 (floor(log_2 k) - ceil(log_2 |f|)) + 16) E(|f|) for |t| < |f|
// [x^k] (t(x) / f(x)) = [x^k] ((t(x) f(-x)) / (f(x) f(-x))
// polyWork0: half of (2 m)-th roots of unity, inversed, bit-reversed
Mint divAt(const Poly &fs, long long k) const {
assert(k >= 0);
if (size() >= fs.size()) {
const Poly qs = *this / fs; // 13 E(deg(t) - deg(f) + 1)
Poly rs = *this - fs * qs; // 3 E(|t|)
rs.resize(rs.deg() + 1);
return qs.at(k) + rs.divAt(fs, k);
}
int h = 0, m = 1;
for (; m < fs.size(); ++h, m <<= 1) {}
if (k < m) {
const Poly gs = fs.inv(k + 1); // 10 E(|f|)
Mint sum;
for (int i = 0, i0 = min<int>(k + 1, size()); i < i0; ++i) sum += (*this)[i] * gs[k - i];
return sum;
}
assert(m << 1 <= LIM_POLY);
polyWork0[0] = Mint(2U).inv();
for (int hh = 0; hh < h; ++hh) for (int i = 0; i < 1 << hh; ++i) polyWork0[1 << hh | i] = polyWork0[i] * INV_FFT_ROOTS[hh + 2];
const Mint a = FFT_ROOTS[h + 1];
memcpy(polyWork2, data(), size() * sizeof(Mint));
memset(polyWork2 + size(), 0, ((m << 1) - size()) * sizeof(Mint));
fft(polyWork2, m << 1); // 2 E(|f|)
memcpy(polyWork1, fs.data(), fs.size() * sizeof(Mint));
memset(polyWork1 + fs.size(), 0, ((m << 1) - fs.size()) * sizeof(Mint));
fft(polyWork1, m << 1); // 2 E(|f|)
for (; ; ) {
if (k & 1) {
for (int i = 0; i < m; ++i) polyWork2[i] = polyWork0[i] * (polyWork2[i << 1 | 0] * polyWork1[i << 1 | 1] - polyWork2[i << 1 | 1] * polyWork1[i << 1 | 0]);
} else {
for (int i = 0; i < m; ++i) {
polyWork2[i] = polyWork2[i << 1 | 0] * polyWork1[i << 1 | 1] + polyWork2[i << 1 | 1] * polyWork1[i << 1 | 0];
polyWork2[i].x = ((polyWork2[i].x & 1) ? (polyWork2[i].x + MO) : polyWork2[i].x) >> 1;
}
}
for (int i = 0; i < m; ++i) polyWork1[i] = polyWork1[i << 1 | 0] * polyWork1[i << 1 | 1];
if ((k >>= 1) < m) {
invFft(polyWork2, m); // 1 E(|f|)
invFft(polyWork1, m); // 1 E(|f|)
// Poly::inv does not use polyWork2
const Poly gs = Poly(vector<Mint>(polyWork1, polyWork1 + k + 1)).inv(k + 1); // 10 E(|f|)
Mint sum;
for (int i = 0; i <= k; ++i) sum += polyWork2[i] * gs[k - i];
return sum;
}
memcpy(polyWork2 + m, polyWork2, m * sizeof(Mint));
invFft(polyWork2 + m, m); // (floor(log_2 k) - ceil(log_2 |f|)) E(|f|)
memcpy(polyWork1 + m, polyWork1, m * sizeof(Mint));
invFft(polyWork1 + m, m); // (floor(log_2 k) - ceil(log_2 |f|)) E(|f|)
Mint aa = 1;
for (int i = m; i < m << 1; ++i) { polyWork2[i] *= aa; polyWork1[i] *= aa; aa *= a; }
fft(polyWork2 + m, m); // (floor(log_2 k) - ceil(log_2 |f|)) E(|f|)
fft(polyWork1 + m, m); // (floor(log_2 k) - ceil(log_2 |f|)) E(|f|)
}
}
// 13 E(n)
// D log(t) = (D t) / t
Poly log(int n) const {
assert(!empty()); assert((*this)[0].x == 1U); assert(n <= LIM_INV);
Poly fs = mod(n);
for (int i = 0; i < fs.size(); ++i) fs[i] *= i;
fs = fs.div(*this, n);
for (int i = 1; i < n; ++i) fs[i] *= ::inv[i];
return fs;
}
// (16 + 1/2) E(n)
// f = exp(t) mod x^m ==> (D f) / f == D t (mod x^m)
// g = (1 / exp(t)) mod x^m
// f <- f - (log f - t) / (1 / f)
// = f - (I ((D f) / f) - t) f
// == f - (I ((D f) / f + (f g - 1) ((D f) / f - D (t mod x^m))) - t) f (mod x^(2m))
// = f - (I (g (D f - f D (t mod x^m)) + D (t mod x^m)) - t) f
// g <- g - (f g - 1) g
// polyWork1: DFT(f, 2 m), polyWork2: g, polyWork3: DFT(g, 2 m)
Poly exp(int n) const {
assert(!empty()); assert(!(*this)[0]); assert(1 <= n);
assert(n == 1 || 1 << (32 - __builtin_clz(n - 1)) <= min(LIM_INV, LIM_POLY));
if (n == 1) return {1U};
if (n == 2) return {1U, at(1)};
Poly fs(n);
fs[0].x = polyWork1[0].x = polyWork1[1].x = polyWork2[0].x = 1U;
int m;
for (m = 1; m << 1 < n; m <<= 1) {
for (int i = 0, i0 = min(m, size()); i < i0; ++i) polyWork0[i] = i * (*this)[i];
memset(polyWork0 + min(m, size()), 0, (m - min(m, size())) * sizeof(Mint));
fft(polyWork0, m); // (1/2) E(n)
for (int i = 0; i < m; ++i) polyWork0[i] *= polyWork1[i];
invFft(polyWork0, m); // (1/2) E(n)
for (int i = 0; i < m; ++i) polyWork0[i] -= i * fs[i];
memset(polyWork0 + m, 0, m * sizeof(Mint));
fft(polyWork0, m << 1); // 1 E(n)
memcpy(polyWork3, polyWork2, m * sizeof(Mint));
memset(polyWork3 + m, 0, m * sizeof(Mint));
fft(polyWork3, m << 1); // 1 E(n)
for (int i = 0; i < m << 1; ++i) polyWork0[i] *= polyWork3[i];
invFft(polyWork0, m << 1); // 1 E(n)
for (int i = 0; i < m; ++i) polyWork0[i] *= ::inv[m + i];
for (int i = 0, i0 = min(m, size() - m); i < i0; ++i) polyWork0[i] += (*this)[m + i];
memset(polyWork0 + m, 0, m * sizeof(Mint));
fft(polyWork0, m << 1); // 1 E(n)
for (int i = 0; i < m << 1; ++i) polyWork0[i] *= polyWork1[i];
invFft(polyWork0, m << 1); // 1 E(n)
memcpy(fs.data() + m, polyWork0, m * sizeof(Mint));
memcpy(polyWork1, fs.data(), (m << 1) * sizeof(Mint));
memset(polyWork1 + (m << 1), 0, (m << 1) * sizeof(Mint));
fft(polyWork1, m << 2); // 2 E(n)
for (int i = 0; i < m << 1; ++i) polyWork0[i] = polyWork1[i] * polyWork3[i];
invFft(polyWork0, m << 1); // 1 E(n)
memset(polyWork0, 0, m * sizeof(Mint));
fft(polyWork0, m << 1); // 1 E(n)
for (int i = 0; i < m << 1; ++i) polyWork0[i] *= polyWork3[i];
invFft(polyWork0, m << 1); // 1 E(n)
for (int i = m; i < m << 1; ++i) polyWork2[i] = -polyWork0[i];
}
for (int i = 0, i0 = min(m, size()); i < i0; ++i) polyWork0[i] = i * (*this)[i];
memset(polyWork0 + min(m, size()), 0, (m - min(m, size())) * sizeof(Mint));
fft(polyWork0, m); // (1/2) E(n)
for (int i = 0; i < m; ++i) polyWork0[i] *= polyWork1[i];
invFft(polyWork0, m); // (1/2) E(n)
for (int i = 0; i < m; ++i) polyWork0[i] -= i * fs[i];
memcpy(polyWork0 + m, polyWork0 + (m >> 1), (m >> 1) * sizeof(Mint));
memset(polyWork0 + (m >> 1), 0, (m >> 1) * sizeof(Mint));
memset(polyWork0 + m + (m >> 1), 0, (m >> 1) * sizeof(Mint));
fft(polyWork0, m); // (1/2) E(n)
fft(polyWork0 + m, m); // (1/2) E(n)
memcpy(polyWork3 + m, polyWork2 + (m >> 1), (m >> 1) * sizeof(Mint));
memset(polyWork3 + m + (m >> 1), 0, (m >> 1) * sizeof(Mint));
fft(polyWork3 + m, m); // (1/2) E(n)
for (int i = 0; i < m; ++i) polyWork0[m + i] = polyWork0[i] * polyWork3[m + i] + polyWork0[m + i] * polyWork3[i];
for (int i = 0; i < m; ++i) polyWork0[i] *= polyWork3[i];
invFft(polyWork0, m); // (1/2) E(n)
invFft(polyWork0 + m, m); // (1/2) E(n)
for (int i = 0; i < m >> 1; ++i) polyWork0[(m >> 1) + i] += polyWork0[m + i];
for (int i = 0; i < m; ++i) polyWork0[i] *= ::inv[m + i];
for (int i = 0, i0 = min(m, size() - m); i < i0; ++i) polyWork0[i] += (*this)[m + i];
memset(polyWork0 + m, 0, m * sizeof(Mint));
fft(polyWork0, m << 1); // 1 E(n)
for (int i = 0; i < m << 1; ++i) polyWork0[i] *= polyWork1[i];
invFft(polyWork0, m << 1); // 1 E(n)
memcpy(fs.data() + m, polyWork0, (n - m) * sizeof(Mint));
return fs;
}
// (29 + 1/2) E(n)
// g <- g - (log g - a log t) g
Poly pow(Mint a, int n) const {
assert(!empty()); assert((*this)[0].x == 1U); assert(1 <= n);
return (a * log(n)).exp(n); // 13 E(n) + (16 + 1/2) E(n)
}
// (29 + 1/2) E(n - a ord(t))
Poly pow(long long a, int n) const {
assert(a >= 0); assert(1 <= n);
if (a == 0) { Poly gs(n); gs[0].x = 1U; return gs; }
const int o = ord();
if (o == -1 || o > (n - 1) / a) return Poly(n);
const Mint b = (*this)[o].inv(), c = (*this)[o].pow(a);
const int ntt = min<int>(n - a * o, size() - o);
Poly tts(ntt);
for (int i = 0; i < ntt; ++i) tts[i] = b * (*this)[o + i];
tts = tts.pow(Mint(a), n - a * o); // (29 + 1/2) E(n - a ord(t))
Poly gs(n);
for (int i = 0; i < n - a * o; ++i) gs[a * o + i] = c * tts[i];
return gs;
}
// (10 + 1/2) E(n)
// f = t^(1/2) mod x^m, g = 1 / t^(1/2) mod x^m
// f <- f - (f^2 - h) g / 2
// g <- g - (f g - 1) g
// polyWork1: DFT(f, m), polyWork2: g, polyWork3: DFT(g, 2 m)
Poly sqrt(int n) const {
assert(!empty()); assert((*this)[0].x == 1U); assert(1 <= n);
assert(n == 1 || 1 << (32 - __builtin_clz(n - 1)) <= LIM_POLY);
if (n == 1) return {1U};
if (n == 2) return {1U, at(1) / 2};
Poly fs(n);
fs[0].x = polyWork1[0].x = polyWork2[0].x = 1U;
int m;
for (m = 1; m << 1 < n; m <<= 1) {
for (int i = 0; i < m; ++i) polyWork1[i] *= polyWork1[i];
invFft(polyWork1, m); // (1/2) E(n)
for (int i = 0, i0 = min(m, size()); i < i0; ++i) polyWork1[i] -= (*this)[i];
for (int i = 0, i0 = min(m, size() - m); i < i0; ++i) polyWork1[i] -= (*this)[m + i];
memset(polyWork1 + m, 0, m * sizeof(Mint));
fft(polyWork1, m << 1); // 1 E(n)
memcpy(polyWork3, polyWork2, m * sizeof(Mint));
memset(polyWork3 + m, 0, m * sizeof(Mint));
fft(polyWork3, m << 1); // 1 E(n)
for (int i = 0; i < m << 1; ++i) polyWork1[i] *= polyWork3[i];
invFft(polyWork1, m << 1); // 1 E(n)
for (int i = 0; i < m; ++i) { polyWork1[i] = -polyWork1[i]; fs[m + i].x = ((polyWork1[i].x & 1) ? (polyWork1[i].x + MO) : polyWork1[i].x) >> 1; }
memcpy(polyWork1, fs.data(), (m << 1) * sizeof(Mint));
fft(polyWork1, m << 1); // 1 E(n)
for (int i = 0; i < m << 1; ++i) polyWork0[i] = polyWork1[i] * polyWork3[i];
invFft(polyWork0, m << 1); // 1 E(n)
memset(polyWork0, 0, m * sizeof(Mint));
fft(polyWork0, m << 1); // 1 E(n)
for (int i = 0; i < m << 1; ++i) polyWork0[i] *= polyWork3[i];
invFft(polyWork0, m << 1); // 1 E(n)
for (int i = m; i < m << 1; ++i) polyWork2[i] = -polyWork0[i];
}
for (int i = 0; i < m; ++i) polyWork1[i] *= polyWork1[i];
invFft(polyWork1, m); // (1/2) E(n)
for (int i = 0, i0 = min(m, size()); i < i0; ++i) polyWork1[i] -= (*this)[i];
for (int i = 0, i0 = min(m, size() - m); i < i0; ++i) polyWork1[i] -= (*this)[m + i];
memcpy(polyWork1 + m, polyWork1 + (m >> 1), (m >> 1) * sizeof(Mint));
memset(polyWork1 + (m >> 1), 0, (m >> 1) * sizeof(Mint));
memset(polyWork1 + m + (m >> 1), 0, (m >> 1) * sizeof(Mint));
fft(polyWork1, m); // (1/2) E(n)
fft(polyWork1 + m, m); // (1/2) E(n)
memcpy(polyWork3 + m, polyWork2 + (m >> 1), (m >> 1) * sizeof(Mint));
memset(polyWork3 + m + (m >> 1), 0, (m >> 1) * sizeof(Mint));
fft(polyWork3 + m, m); // (1/2) E(n)
// for (int i = 0; i < m << 1; ++i) polyWork1[i] *= polyWork3[i];
for (int i = 0; i < m; ++i) polyWork1[m + i] = polyWork1[i] * polyWork3[m + i] + polyWork1[m + i] * polyWork3[i];
for (int i = 0; i < m; ++i) polyWork1[i] *= polyWork3[i];
invFft(polyWork1, m); // (1/2) E(n)
invFft(polyWork1 + m, m); // (1/2) E(n)
for (int i = 0; i < m >> 1; ++i) polyWork1[(m >> 1) + i] += polyWork1[m + i];
for (int i = 0; i < n - m; ++i) { polyWork1[i] = -polyWork1[i]; fs[m + i].x = ((polyWork1[i].x & 1) ? (polyWork1[i].x + MO) : polyWork1[i].x) >> 1; }
return fs;
}
// (10 + 1/2) E(n)
// modSqrt must return a quadratic residue if exists, or anything otherwise.
// Return {} if *this does not have a square root.
template <class F> Poly sqrt(int n, F modSqrt) const {
assert(1 <= n);
const int o = ord();
if (o == -1) return Poly(n);
if (o & 1) return {};
const Mint c = modSqrt((*this)[o]);
if (c * c != (*this)[o]) return {};
if (o >> 1 >= n) return Poly(n);
const Mint b = (*this)[o].inv();
const int ntt = min(n - (o >> 1), size() - o);
Poly tts(ntt);
for (int i = 0; i < ntt; ++i) tts[i] = b * (*this)[o + i];
tts = tts.sqrt(n - (o >> 1)); // (10 + 1/2) E(n)
Poly gs(n);
for (int i = 0; i < n - (o >> 1); ++i) gs[(o >> 1) + i] = c * tts[i];
return gs;
}
// 6 E(|t|)
// x -> x + a
Poly shift(const Mint &a) const {
if (empty()) return {};
const int n = size();
int m = 1;
for (; m < n; m <<= 1) {}
for (int i = 0; i < n; ++i) polyWork0[i] = fac[i] * (*this)[i];
memset(polyWork0 + n, 0, ((m << 1) - n) * sizeof(Mint));
fft(polyWork0, m << 1); // 2 E(|t|)
{
Mint aa = 1;
for (int i = 0; i < n; ++i) { polyWork1[n - 1 - i] = invFac[i] * aa; aa *= a; }
}
memset(polyWork1 + n, 0, ((m << 1) - n) * sizeof(Mint));
fft(polyWork1, m << 1); // 2 E(|t|)
for (int i = 0; i < m << 1; ++i) polyWork0[i] *= polyWork1[i];
invFft(polyWork0, m << 1); // 2 E(|t|)
Poly fs(n);
for (int i = 0; i < n; ++i) fs[i] = invFac[i] * polyWork0[n - 1 + i];
return fs;
}
};
Mint linearRecurrenceAt(const vector<Mint> &as, const vector<Mint> &cs, long long k) {
assert(!cs.empty()); assert(cs[0]);
const int d = cs.size() - 1;
assert(as.size() >= static_cast<size_t>(d));
return (Poly(vector<Mint>(as.begin(), as.begin() + d)) * cs).mod(d).divAt(cs, k);
}
struct SubproductTree {
int logN, n, nn;
vector<Mint> xs;
// [DFT_4((X-xs[0])(X-xs[1])(X-xs[2])(X-xs[3]))] [(X-xs[0])(X-xs[1])(X-xs[2])(X-xs[3])mod X^4]
// [ DFT_4((X-xs[0])(X-xs[1])) ] [ DFT_4((X-xs[2])(X-xs[3])) ]
// [ DFT_2(X-xs[0]) ] [ DFT_2(X-xs[1]) ] [ DFT_2(X-xs[2]) ] [ DFT_2(X-xs[3]) ]
vector<Mint> buf;
vector<Mint *> gss;
// (1 - xs[0] X) ... (1 - xs[nn-1] X)
Poly all;
// (ceil(log_2 n) + O(1)) E(n)
SubproductTree(const vector<Mint> &xs_) {
n = xs_.size();
for (logN = 0, nn = 1; nn < n; ++logN, nn <<= 1) {}
xs.assign(nn, 0U);
memcpy(xs.data(), xs_.data(), n * sizeof(Mint));
buf.assign((logN + 1) * (nn << 1), 0U);
gss.assign(nn << 1, nullptr);
for (int h = 0; h <= logN; ++h) for (int u = 1 << h; u < 1 << (h + 1); ++u) {
gss[u] = buf.data() + (h * (nn << 1) + ((u - (1 << h)) << (logN - h + 1)));
}
for (int i = 0; i < nn; ++i) {
gss[nn + i][0] = -xs[i] + 1;
gss[nn + i][1] = -xs[i] - 1;
}
if (nn == 1) gss[1][1] += 2;
for (int h = logN; --h >= 0; ) {
const int m = 1 << (logN - h);
for (int u = 1 << (h + 1); --u >= 1 << h; ) {
for (int i = 0; i < m; ++i) gss[u][i] = gss[u << 1][i] * gss[u << 1 | 1][i];
memcpy(gss[u] + m, gss[u], m * sizeof(Mint));
invFft(gss[u] + m, m); // ((1/2) ceil(log_2 n) + O(1)) E(n)
if (h > 0) {
gss[u][m] -= 2;
const Mint a = FFT_ROOTS[logN - h + 1];
Mint aa = 1;
for (int i = m; i < m << 1; ++i) { gss[u][i] *= aa; aa *= a; };
fft(gss[u] + m, m); // ((1/2) ceil(log_2 n) + O(1)) E(n)
}
}
}
all.resize(nn + 1);
all[0] = 1;
for (int i = 1; i < nn; ++i) all[i] = gss[1][nn + nn - i];
all[nn] = gss[1][nn] - 1;
}
// ((3/2) ceil(log_2 n) + O(1)) E(n) + 10 E(|f|) + 3 E(|f| + 2^(ceil(log_2 n)))
vector<Mint> multiEval(const Poly &fs) const {
vector<Mint> work0(nn), work1(nn), work2(nn);
{
const int m = max(fs.size(), 1);
auto invAll = all.inv(m); // 10 E(|f|)
std::reverse(invAll.begin(), invAll.end());
int mm;
for (mm = 1; mm < m - 1 + nn; mm <<= 1) {}
invAll.resize(mm, 0U);
fft(invAll); // E(|f| + 2^(ceil(log_2 n)))
vector<Mint> ffs(mm, 0U);
memcpy(ffs.data(), fs.data(), fs.size() * sizeof(Mint));
fft(ffs); // E(|f| + 2^(ceil(log_2 n)))
for (int i = 0; i < mm; ++i) ffs[i] *= invAll[i];
invFft(ffs); // E(|f| + 2^(ceil(log_2 n)))
memcpy(((logN & 1) ? work1 : work0).data(), ffs.data() + m - 1, nn * sizeof(Mint));
}
for (int h = 0; h < logN; ++h) {
const int m = 1 << (logN - h);
for (int u = 1 << h; u < 1 << (h + 1); ++u) {
Mint *hs = (((logN - h) & 1) ? work1 : work0).data() + ((u - (1 << h)) << (logN - h));
Mint *hs0 = (((logN - h) & 1) ? work0 : work1).data() + ((u - (1 << h)) << (logN - h));
Mint *hs1 = hs0 + (m >> 1);
fft(hs, m); // ((1/2) ceil(log_2 n) + O(1)) E(n)
for (int i = 0; i < m; ++i) work2[i] = gss[u << 1 | 1][i] * hs[i];
invFft(work2.data(), m); // ((1/2) ceil(log_2 n) + O(1)) E(n)
memcpy(hs0, work2.data() + (m >> 1), (m >> 1) * sizeof(Mint));
for (int i = 0; i < m; ++i) work2[i] = gss[u << 1][i] * hs[i];
invFft(work2.data(), m); // ((1/2) ceil(log_2 n) + O(1)) E(n)
memcpy(hs1, work2.data() + (m >> 1), (m >> 1) * sizeof(Mint));
}
}
work0.resize(n);
return work0;
}
// ((5/2) ceil(log_2 n) + O(1)) E(n)
Poly interpolate(const vector<Mint> &ys) const {
assert(static_cast<int>(ys.size()) == n);
Poly gs(n);
for (int i = 0; i < n; ++i) gs[i] = (i + 1) * all[n - (i + 1)];
const vector<Mint> denoms = multiEval(gs); // ((3/2) ceil(log_2 n) + O(1)) E(n)
vector<Mint> work(nn << 1, 0U);
for (int i = 0; i < n; ++i) {
// xs[0], ..., xs[n - 1] are not distinct
assert(denoms[i]);
work[i << 1] = work[i << 1 | 1] = ys[i] / denoms[i];
}
for (int h = logN; --h >= 0; ) {
const int m = 1 << (logN - h);
for (int u = 1 << (h + 1); --u >= 1 << h; ) {
Mint *hs = work.data() + ((u - (1 << h)) << (logN - h + 1));
for (int i = 0; i < m; ++i) hs[i] = gss[u << 1 | 1][i] * hs[i] + gss[u << 1][i] * hs[m + i];
if (h > 0) {
memcpy(hs + m, hs, m * sizeof(Mint));
invFft(hs + m, m); // ((1/2) ceil(log_2 n) + O(1)) E(n)
const Mint a = FFT_ROOTS[logN - h + 1];
Mint aa = 1;
for (int i = m; i < m << 1; ++i) { hs[i] *= aa; aa *= a; };
fft(hs + m, m); // ((1/2) ceil(log_2 n) + O(1)) E(n)
}
}
}
invFft(work.data(), nn); // E(n)
return Poly(vector<Mint>(work.data() + nn - n, work.data() + nn));
}
};
////////////////////////////////////////////////////////////////////////////////
/*
P := \sum_{i>=1} i! x^i
I := \sum_{i>=1} (# of indecomposable perm) x^i
S := \sum_{i>=1} (# of simple perm) x^i
P = I/(1-I)
I = P/(1+P)
P = x + 2 I^2/(1-I) + S(P(x))
P = x + 2 P^2/(1+P) + S(P(x))
Q := P^<-1>
y = Q + 2 y^2/(1+y) + S
(1 - x) P - x^2 P' = x
(1 - Q) y - Q^2 / Q' = Q
y Q' - (1 + y) Q Q' = Q^2
I https://oeis.org/A003319
Q https://oeis.org/A059372
S https://oeis.org/A111111
F := \sum_{1<=i<=K} i! x^i
G := F^<-1>
[x^N] S(F(x)) = (1/N) [y^(N-1)] S'(y) (y/G)^N
(1 - x) F - x^2 F' = x - (K+1)! x^(K+1)
(1 - G) y - G^2 / G' = y - (K+1)! G^(K+1)
y G' - (1 + y) G G' = G^2 - (K+1)! G^(K+1) G'
*/
int main() {
int N, K;
for (; ~scanf("%d%d", &N, &K); ) {
int len;
for (len = 1; len < N + 5; len <<= 1) {}
Poly P(N + 1);
for (int i = 1; i <= N; ++i) P[i] = fac[i];
Poly I;
{
Poly P0 = P;
P0[0] = 1;
I = P.div(P0, N + 1);
}
// cerr<<"I = "<<I<<endl;
Poly Q(N + 1);
{
/*
y Q' - (1 + y) Q Q' = Q^2
Q[0] = 0
Q[1] = 1
Q[n] = - \sum_{2<=i<=n-1} i Q[i] Q[n+1-i]
- \sum_{1<=i<=n-1} (i+1) Q[i] Q[n-i] (n >= 2)
R[n] := Q[n+1]
R[0] = 1
R[n] = - \sum_{1<=i<=n-1} (i+1) R[i] R[n-i]
- \sum_{0<=i<=n-1} (i+2) R[i] R[n-1-i] (n >= 1)
*/
Poly R(N);
Poly prod0(len), prod1(len);
Poly work(len);
Poly workR0L(len), workR1L(len);
Poly workR0R(len), workR1R(len);
R[0] = 1;
prod0[0] += R[0] * R[0];
prod1[0] += 0 * R[0] * R[0];
for (int n = 1; n < N; ++n) {
const int w = n & -n;
if (n == w) {
// [0, w), [0, w) -> [w, 2 w)
memset(workR0R.data() + w, 0, w * sizeof(Mint));
memset(workR1R.data() + w, 0, w * sizeof(Mint));
for (int i = 0; i < w; ++i) workR0R[i] = R[i];
for (int i = 0; i < w; ++i) workR1R[i] = i * R[i];
fft(workR0R.data(), w << 1);
fft(workR1R.data(), w << 1);
for (int i = 0; i < w << 1; ++i) work[i] = workR0R[i] * workR0R[i];
invFft(work.data(), w << 1);
for (int i = w; i < w << 1; ++i) prod0[i] += work[i];
for (int i = 0; i < w << 1; ++i) work[i] = workR1R[i] * workR0R[i];
invFft(work.data(), w << 1);
for (int i = w; i < w << 1; ++i) prod1[i] += work[i];
} else {
// [0, 2 w), [n - w, n) -> [n, n + w)
memset(workR0R.data() + w, 0, w * sizeof(Mint));
memset(workR1R.data() + w, 0, w * sizeof(Mint));
for (int i = 0; i < w << 1; ++i) workR0L[i] = R[i];
for (int i = 0; i < w << 1; ++i) workR1L[i] = i * R[i];
for (int i = n - w; i < n; ++i) workR0R[i - (n - w)] = R[i];
for (int i = n - w; i < n; ++i) workR1R[i - (n - w)] = i * R[i];
fft(workR0L.data(), w << 1);
fft(workR1L.data(), w << 1);
fft(workR0R.data(), w << 1);
fft(workR1R.data(), w << 1);
for (int i = 0; i < w << 1; ++i) work[i] = workR0L[i] * workR0R[i] + workR0R[i] * workR0L[i];
invFft(work.data(), w << 1);
for (int i = n; i < n + w; ++i) prod0[i] += work[i - (n - w)];
for (int i = 0; i < w << 1; ++i) work[i] = workR1L[i] * workR0R[i] + workR1R[i] * workR0L[i];
invFft(work.data(), w << 1);
for (int i = n; i < n + w; ++i) prod1[i] += work[i - (n - w)];
}
// R[n] determined
R[n] = - (prod1[n] + prod0[n]) - (prod1[n - 1] + 2 * prod0[n - 1]);
// 0, n -> n
prod0[n] += R[0] * R[n] + R[n] * R[0];
prod1[n] += 0 * R[0] * R[n] + n * R[n] * R[0];
}
for (int n = 1; n <= N; ++n) {
Q[n] = R[n - 1];
}
}
// cerr<<"Q = "<<Q<<endl;
Poly S(N + 1);
for (int n = 4; n <= N; ++n) {
S[n] = -Q[n] - ((n & 1) ? -2 : +2);
}
// cerr<<"S = "<<S<<endl;
Poly G(N + 1);
{
/*
y G' - (1 + y) G G' = G^2 - (K+1)! G^(K+1) G'
G[0] = 0
G[1] = 1
G[n] = - \sum_{2<=i<=n-1} i G[i] G[n+1-i]
- \sum_{1<=i<=n-1} (i+1) G[i] G[n-i]
+ (K+1)! \sum_{1<=i<=n-K} i G[i] G^(K+1)[n+1-i] (n >= 2)
H[n] := G[n+1]
H[0] = 1
H[n] = - \sum_{1<=i<=n-1} (i+1) H[i] H[n-i]
- \sum_{0<=i<=n-1} (i+2) H[i] H[n-1-i]
+ (K+1)! \sum_{0<=i<=n-K} (i+1) H[i] H^(K+1)[n-K-i] (n >= 1)
J := H^(K+1)
(y J') H = (K+1) J (y H')
J[0] = 1
J[n] = (1/n) \sum_{1<=i<=n} ((K+1+1)i - n) H[i] J[n-i] (n >= 1)
*/
Poly H(N), J(N);
Poly prod0(len), prod1(len), prod2(len), prod3(len);
Poly work(len);
Poly workH0L(len), workH1L(len), workJ0L(len);
Poly workH0R(len), workH1R(len), workJ0R(len);
H[0] = 1;
J[0] = 1;
prod0[0] += H[0] * H[0];
prod1[0] += 0 * H[0] * H[0];
prod2[0] += H[0] * J[0];
prod3[0] += 0 * H[0] * J[0];
for (int n = 1; n < N; ++n) {
const int w = n & -n;
if (n == w) {
// [0, w), [0, w) -> [w, 2 w)
memset(workH0R.data() + w, 0, w * sizeof(Mint));
memset(workH1R.data() + w, 0, w * sizeof(Mint));
memset(workJ0R.data() + w, 0, w * sizeof(Mint));
for (int i = 0; i < w; ++i) workH0R[i] = H[i];
for (int i = 0; i < w; ++i) workH1R[i] = i * H[i];
for (int i = 0; i < w; ++i) workJ0R[i] = J[i];
fft(workH0R.data(), w << 1);
fft(workH1R.data(), w << 1);
fft(workJ0R.data(), w << 1);
for (int i = 0; i < w << 1; ++i) work[i] = workH0R[i] * workH0R[i];
invFft(work.data(), w << 1);
for (int i = w; i < w << 1; ++i) prod0[i] += work[i];
for (int i = 0; i < w << 1; ++i) work[i] = workH1R[i] * workH0R[i];
invFft(work.data(), w << 1);
for (int i = w; i < w << 1; ++i) prod1[i] += work[i];
for (int i = 0; i < w << 1; ++i) work[i] = workH0R[i] * workJ0R[i];
invFft(work.data(), w << 1);
for (int i = w; i < w << 1; ++i) prod2[i] += work[i];
for (int i = 0; i < w << 1; ++i) work[i] = workH1R[i] * workJ0R[i];
invFft(work.data(), w << 1);
for (int i = w; i < w << 1; ++i) prod3[i] += work[i];
} else {
// [0, 2 w), [n - w, n) -> [n, n + w)
memset(workH0R.data() + w, 0, w * sizeof(Mint));
memset(workH1R.data() + w, 0, w * sizeof(Mint));
memset(workJ0R.data() + w, 0, w * sizeof(Mint));
for (int i = 0; i < w << 1; ++i) workH0L[i] = H[i];
for (int i = 0; i < w << 1; ++i) workH1L[i] = i * H[i];
for (int i = 0; i < w << 1; ++i) workJ0L[i] = J[i];
for (int i = n - w; i < n; ++i) workH0R[i - (n - w)] = H[i];
for (int i = n - w; i < n; ++i) workH1R[i - (n - w)] = i * H[i];
for (int i = n - w; i < n; ++i) workJ0R[i - (n - w)] = J[i];
fft(workH0L.data(), w << 1);
fft(workH1L.data(), w << 1);
fft(workJ0L.data(), w << 1);
fft(workH0R.data(), w << 1);
fft(workH1R.data(), w << 1);
fft(workJ0R.data(), w << 1);
for (int i = 0; i < w << 1; ++i) work[i] = workH0L[i] * workH0R[i] + workH0R[i] * workH0L[i];
invFft(work.data(), w << 1);
for (int i = n; i < n + w; ++i) prod0[i] += work[i - (n - w)];
for (int i = 0; i < w << 1; ++i) work[i] = workH1L[i] * workH0R[i] + workH1R[i] * workH0L[i];
invFft(work.data(), w << 1);
for (int i = n; i < n + w; ++i) prod1[i] += work[i - (n - w)];
for (int i = 0; i < w << 1; ++i) work[i] = workH0L[i] * workJ0R[i] + workH0R[i] * workJ0L[i];
invFft(work.data(), w << 1);
for (int i = n; i < n + w; ++i) prod2[i] += work[i - (n - w)];
for (int i = 0; i < w << 1; ++i) work[i] = workH1L[i] * workJ0R[i] + workH1R[i] * workJ0L[i];
invFft(work.data(), w << 1);
for (int i = n; i < n + w; ++i) prod3[i] += work[i - (n - w)];
}
// H[n] determined
H[n] = - (prod1[n] + prod0[n]) - (prod1[n - 1] + 2 * prod0[n - 1])
+ ((n >= K) ? (fac[K + 1] * (prod3[n - K] + prod2[n - K])) : 0);
// 0, n -> n
prod0[n] += H[0] * H[n] + H[n] * H[0];
prod1[n] += 0 * H[0] * H[n] + n * H[n] * H[0];
prod2[n] += H[n] * J[0];
prod3[n] += n * H[n] * J[0];
// J[n] determined
J[n] = inv[n] * ((K + 1 + 1) * prod3[n] - n * prod2[n]);
// 0, n -> n
prod2[n] += H[0] * J[n];
prod3[n] += 0 * H[0] * J[n];
}
// cerr<<"H = "<<H<<endl;
// cerr<<"J = "<<J<<endl;
for (int n = 1; n <= N; ++n) {
G[n] = H[n - 1];
}
}
// cerr<<"G = "<<G<<endl;
Mint ans = 0;
// x
if (N == 1) {
ans += 1;
}
// I_large (1 + P) I_large
{
Poly large(N + 1);
for (int i = N - K; i <= N; ++i) large[i] = I[i];
Poly sq = square(large);
Mint sum = 0;
for (int i = 0; i <= N; ++i) {
sum += fac[N - i] * sq[i];
}
ans += 2 * sum;
}
// [x^N] S(F(x)) = (1/N) [y^(N-1)] S'(y) (y/G)^N
{
Poly gs = G;
gs.erase(gs.begin());
gs = gs.pow(Mint(-N), N - 1);
Mint sum = 0;
for (int i = 1; i <= N; ++i) {
sum += i * S[i] * gs[N - i];
}
ans += inv[N] * sum;
}
printf("%u\n", ans.x);
}
return 0;
}
詳細信息
Test #1:
score: 100
Accepted
time: 6ms
memory: 21692kb
input:
3 2
output:
6
result:
ok 1 number(s): "6"
Test #2:
score: 0
Accepted
time: 7ms
memory: 23472kb
input:
4 2
output:
4
result:
ok 1 number(s): "4"
Test #3:
score: 0
Accepted
time: 11ms
memory: 22676kb
input:
300 20
output:
368258992
result:
ok 1 number(s): "368258992"
Test #4:
score: 0
Accepted
time: 334ms
memory: 32524kb
input:
100000 1
output:
91844344
result:
ok 1 number(s): "91844344"
Test #5:
score: 0
Accepted
time: 15ms
memory: 21060kb
input:
2 1
output:
2
result:
ok 1 number(s): "2"
Test #6:
score: 0
Accepted
time: 11ms
memory: 22916kb
input:
282 4
output:
563176581
result:
ok 1 number(s): "563176581"
Test #7:
score: 0
Accepted
time: 16ms
memory: 22976kb
input:
359 159
output:
903685663
result:
ok 1 number(s): "903685663"
Test #8:
score: 0
Accepted
time: 15ms
memory: 23496kb
input:
359 254
output:
470520868
result:
ok 1 number(s): "470520868"
Test #9:
score: 0
Accepted
time: 8ms
memory: 22744kb
input:
58 27
output:
436559968
result:
ok 1 number(s): "436559968"
Test #10:
score: 0
Accepted
time: 15ms
memory: 23312kb
input:
202 194
output:
16749760
result:
ok 1 number(s): "16749760"
Test #11:
score: 0
Accepted
time: 11ms
memory: 23584kb
input:
45 12
output:
296797655
result:
ok 1 number(s): "296797655"
Test #12:
score: 0
Accepted
time: 13ms
memory: 23704kb
input:
363 242
output:
13005572
result:
ok 1 number(s): "13005572"
Test #13:
score: 0
Accepted
time: 11ms
memory: 23232kb
input:
342 65
output:
923824682
result:
ok 1 number(s): "923824682"
Test #14:
score: 0
Accepted
time: 7ms
memory: 24148kb
input:
207 190
output:
369707039
result:
ok 1 number(s): "369707039"
Test #15:
score: 0
Accepted
time: 11ms
memory: 22320kb
input:
367 37
output:
467362142
result:
ok 1 number(s): "467362142"
Test #16:
score: 0
Accepted
time: 15ms
memory: 23272kb
input:
130 18
output:
188514783
result:
ok 1 number(s): "188514783"
Test #17:
score: 0
Accepted
time: 15ms
memory: 23044kb
input:
362 6
output:
563842262
result:
ok 1 number(s): "563842262"
Test #18:
score: 0
Accepted
time: 12ms
memory: 23864kb
input:
296 93
output:
826385279
result:
ok 1 number(s): "826385279"
Test #19:
score: 0
Accepted
time: 11ms
memory: 24108kb
input:
66 56
output:
510485158
result:
ok 1 number(s): "510485158"
Test #20:
score: 0
Accepted
time: 126ms
memory: 24240kb
input:
35567 25153
output:
370961506
result:
ok 1 number(s): "370961506"
Test #21:
score: 0
Accepted
time: 296ms
memory: 32028kb
input:
87796 51661
output:
486383441
result:
ok 1 number(s): "486383441"
Test #22:
score: 0
Accepted
time: 150ms
memory: 24000kb
input:
46775 17243
output:
84820979
result:
ok 1 number(s): "84820979"
Test #23:
score: 0
Accepted
time: 80ms
memory: 23668kb
input:
23118 14043
output:
768845693
result:
ok 1 number(s): "768845693"
Test #24:
score: 0
Accepted
time: 125ms
memory: 25340kb
input:
34630 30168
output:
163030696
result:
ok 1 number(s): "163030696"
Test #25:
score: 0
Accepted
time: 25ms
memory: 23288kb
input:
7355 6036
output:
721628072
result:
ok 1 number(s): "721628072"
Test #26:
score: 0
Accepted
time: 280ms
memory: 28304kb
input:
76991 35145
output:
741290476
result:
ok 1 number(s): "741290476"
Test #27:
score: 0
Accepted
time: 263ms
memory: 28804kb
input:
65607 31472
output:
120005728
result:
ok 1 number(s): "120005728"
Test #28:
score: 0
Accepted
time: 192ms
memory: 25732kb
input:
63045 5458
output:
568942298
result:
ok 1 number(s): "568942298"
Test #29:
score: 0
Accepted
time: 263ms
memory: 29304kb
input:
70632 20910
output:
870416132
result:
ok 1 number(s): "870416132"
Test #30:
score: 0
Accepted
time: 38ms
memory: 23480kb
input:
12184 4852
output:
887796493
result:
ok 1 number(s): "887796493"
Test #31:
score: 0
Accepted
time: 310ms
memory: 29460kb
input:
90205 3600
output:
431001694
result:
ok 1 number(s): "431001694"
Test #32:
score: 0
Accepted
time: 319ms
memory: 32256kb
input:
90932 33329
output:
600549935
result:
ok 1 number(s): "600549935"
Test #33:
score: 0
Accepted
time: 164ms
memory: 25536kb
input:
53902 13766
output:
415161256
result:
ok 1 number(s): "415161256"
Test #34:
score: 0
Accepted
time: 47ms
memory: 24632kb
input:
14161 7105
output:
134694410
result:
ok 1 number(s): "134694410"
Test #35:
score: 0
Accepted
time: 152ms
memory: 24452kb
input:
46381 37047
output:
187944778
result:
ok 1 number(s): "187944778"
Test #36:
score: 0
Accepted
time: 127ms
memory: 25012kb
input:
33614 4898
output:
747492373
result:
ok 1 number(s): "747492373"
Test #37:
score: 0
Accepted
time: 316ms
memory: 28632kb
input:
91722 35254
output:
694638772
result:
ok 1 number(s): "694638772"
Test #38:
score: 0
Accepted
time: 150ms
memory: 24920kb
input:
43121 16137
output:
287812406
result:
ok 1 number(s): "287812406"
Test #39:
score: 0
Accepted
time: 132ms
memory: 25544kb
input:
43266 29411
output:
573388406
result:
ok 1 number(s): "573388406"
Test #40:
score: 0
Accepted
time: 14ms
memory: 23880kb
input:
4919 1469
output:
683001842
result:
ok 1 number(s): "683001842"
Test #41:
score: 0
Accepted
time: 35ms
memory: 24456kb
input:
10213 4043
output:
828882070
result:
ok 1 number(s): "828882070"
Test #42:
score: 0
Accepted
time: 170ms
memory: 24528kb
input:
57705 29766
output:
908462562
result:
ok 1 number(s): "908462562"
Test #43:
score: 0
Accepted
time: 162ms
memory: 25468kb
input:
50628 43693
output:
874935502
result:
ok 1 number(s): "874935502"
Test #44:
score: 0
Accepted
time: 273ms
memory: 28404kb
input:
71491 8618
output:
720976298
result:
ok 1 number(s): "720976298"
Test #45:
score: 0
Accepted
time: 190ms
memory: 26228kb
input:
61231 37699
output:
905540582
result:
ok 1 number(s): "905540582"
Test #46:
score: 0
Accepted
time: 187ms
memory: 25152kb
input:
60626 48375
output:
847650011
result:
ok 1 number(s): "847650011"
Test #47:
score: 0
Accepted
time: 94ms
memory: 24156kb
input:
29493 22917
output:
808093687
result:
ok 1 number(s): "808093687"
Test #48:
score: 0
Accepted
time: 145ms
memory: 24640kb
input:
42960 4829
output:
582999364
result:
ok 1 number(s): "582999364"
Test #49:
score: 0
Accepted
time: 310ms
memory: 29744kb
input:
92916 35720
output:
871538508
result:
ok 1 number(s): "871538508"
Test #50:
score: 0
Accepted
time: 323ms
memory: 29800kb
input:
95751 52590
output:
676583073
result:
ok 1 number(s): "676583073"
Test #51:
score: 0
Accepted
time: 132ms
memory: 23712kb
input:
38771 20287
output:
598673771
result:
ok 1 number(s): "598673771"
Test #52:
score: 0
Accepted
time: 16ms
memory: 24144kb
input:
711 592
output:
22681776
result:
ok 1 number(s): "22681776"
Test #53:
score: 0
Accepted
time: 249ms
memory: 28076kb
input:
68245 11512
output:
182749044
result:
ok 1 number(s): "182749044"
Test #54:
score: 0
Accepted
time: 256ms
memory: 28496kb
input:
68203 966
output:
488928778
result:
ok 1 number(s): "488928778"
Test #55:
score: 0
Accepted
time: 340ms
memory: 29412kb
input:
100000 111
output:
373450248
result:
ok 1 number(s): "373450248"
Test #56:
score: 0
Accepted
time: 340ms
memory: 30128kb
input:
100000 1231
output:
105765703
result:
ok 1 number(s): "105765703"
Test #57:
score: 0
Accepted
time: 336ms
memory: 29812kb
input:
100000 12333
output:
860654995
result:
ok 1 number(s): "860654995"
Test #58:
score: 0
Accepted
time: 345ms
memory: 29996kb
input:
100000 39198
output:
846441800
result:
ok 1 number(s): "846441800"
Test #59:
score: 0
Accepted
time: 342ms
memory: 29348kb
input:
100000 56721
output:
618984747
result:
ok 1 number(s): "618984747"
Test #60:
score: 0
Accepted
time: 338ms
memory: 33716kb
input:
100000 99823
output:
811855278
result:
ok 1 number(s): "811855278"
Test #61:
score: 0
Accepted
time: 334ms
memory: 29924kb
input:
100000 99998
output:
385349822
result:
ok 1 number(s): "385349822"