QOJ.ac

QOJ

IDProblemSubmitterResultTimeMemoryLanguageFile sizeSubmit timeJudge time
#304625#8010. Hierarchies of Judgesucup-team159#AC ✓1684ms64192kbC++2326.7kb2024-01-13 22:04:562024-01-13 22:04:56

Judging History

你现在查看的是最新测评结果

  • [2024-01-13 22:04:56]
  • 评测
  • 测评结果:AC
  • 用时:1684ms
  • 内存:64192kb
  • [2024-01-13 22:04:56]
  • 提交

answer

#line 1 "H.cpp"
// #pragma GCC target("avx2,avx512f,avx512vl,avx512bw,avx512dq,avx512cd,avx512vbmi,avx512vbmi2,avx512vpopcntdq,avx512bitalg,bmi,bmi2,lzcnt,popcnt")
// #pragma GCC optimize("Ofast")

#line 2 "/mnt/c/Users/tsigm/Documents/Cprogram/library/template.hpp"

#include <bits/stdc++.h>
using namespace std;
using ll = long long;
using uint = unsigned int;
using ull = unsigned long long;
#define rep(i,n) for(int i=0;i<int(n);i++)
#define rep1(i,n) for(int i=1;i<=int(n);i++)
#define per(i,n) for(int i=int(n)-1;i>=0;i--)
#define per1(i,n) for(int i=int(n);i>0;i--)
#define all(c) c.begin(),c.end()
#define si(x) int(x.size())
#define pb push_back
#define eb emplace_back
#define fs first
#define sc second
template<class T> using V = vector<T>;
template<class T> using VV = vector<vector<T>>;
template<class T,class U> bool chmax(T& x, U y){
	if(x<y){ x=y; return true; }
	return false;
}
template<class T,class U> bool chmin(T& x, U y){
	if(y<x){ x=y; return true; }
	return false;
}
template<class T> void mkuni(V<T>& v){sort(all(v));v.erase(unique(all(v)),v.end());}
template<class T> int lwb(const V<T>& v, const T& a){return lower_bound(all(v),a) - v.begin();}
template<class T>
V<T> Vec(size_t a) {
    return V<T>(a);
}
template<class T, class... Ts>
auto Vec(size_t a, Ts... ts) {
  return V<decltype(Vec<T>(ts...))>(a, Vec<T>(ts...));
}
template<class S,class T> ostream& operator<<(ostream& o,const pair<S,T> &p){
	return o<<"("<<p.fs<<","<<p.sc<<")";
}
template<class T> ostream& operator<<(ostream& o,const vector<T> &vc){
	o<<"{";
	for(const T& v:vc) o<<v<<",";
	o<<"}";
	return o;
}
constexpr ll TEN(int n) { return (n == 0) ? 1 : 10 * TEN(n-1); }

#ifdef LOCAL
#define show(x) cerr << "LINE" << __LINE__ << " : " << #x << " = " << (x) << endl
void dmpr(ostream& os){os<<endl;}
template<class T,class... Args>
void dmpr(ostream&os,const T&t,const Args&... args){
	os<<t<<" ~ ";
	dmpr(os,args...);
}
#define shows(...) cerr << "LINE" << __LINE__ << " : ";dmpr(cerr,##__VA_ARGS__)
#define dump(x) cerr << "LINE" << __LINE__ << " : " << #x << " = {";  \
	for(auto v: x) cerr << v << ","; cerr << "}" << endl;
#else
#define show(x) void(0)
#define dump(x) void(0)
#define shows(...) void(0)
#endif

template<class D> D divFloor(D a, D b){
	return a / b - (((a ^ b) < 0 && a % b != 0) ? 1 : 0);
}
template<class D> D divCeil(D a, D b) {
	return a / b + (((a ^ b) > 0 && a % b != 0) ? 1 : 0);
}

/*
x       0  1  2  3  4  5  6  7  8  9
bsr(x) -1  0  1  1  2  2  2  2  3  3
最上位bit
*/
int bsr(uint x){
	return x == 0 ? -1 : 31 ^ __builtin_clz(x);
}
int bsr(ull x){
	return x == 0 ? -1 : 63 ^ __builtin_clzll(x);
}

/*
x       0  1  2  3  4  5  6  7  8  9
bsl(x) -1  0  1  0  2  0  1  0  3  0
最下位bit
*/
int bsl(uint x){
	if(x==0) return -1;
	return __builtin_ctz(x);
}
int bsl(ull x){
	if(x==0) return -1;
	return __builtin_ctzll(x);
}


template<class T>
T rnd(T l,T r){	//[l,r)
	using D = uniform_int_distribution<T>;
	static random_device rd;
	static mt19937 gen(rd());
	return D(l,r-1)(gen);
}
template<class T>
T rnd(T n){	//[0,n)
	return rnd(T(0),n);
}
#line 1 "/mnt/c/Users/tsigm/Documents/Cprogram/library/math/poly.cpp"
/*
	2021/04/14 大幅変更
	poly 基本, MultipointEval, Interpolate
*/
template<unsigned int mod_>
struct ModInt{
	using uint = unsigned int;
	using ll = long long;
	using ull = unsigned long long;

	constexpr static uint mod = mod_;

	uint v;
	ModInt():v(0){}
	ModInt(ll _v):v(normS(_v%mod+mod)){}
	explicit operator bool() const {return v!=0;}
	static uint normS(const uint &x){return (x<mod)?x:x-mod;}		// [0 , 2*mod-1] -> [0 , mod-1]
	static ModInt make(const uint &x){ModInt m; m.v=x; return m;}
	ModInt operator+(const ModInt& b) const { return make(normS(v+b.v));}
	ModInt operator-(const ModInt& b) const { return make(normS(v+mod-b.v));}
	ModInt operator-() const { return make(normS(mod-v)); }
	ModInt operator*(const ModInt& b) const { return make((ull)v*b.v%mod);}
	ModInt operator/(const ModInt& b) const { return *this*b.inv();}
	ModInt& operator+=(const ModInt& b){ return *this=*this+b;}
	ModInt& operator-=(const ModInt& b){ return *this=*this-b;}
	ModInt& operator*=(const ModInt& b){ return *this=*this*b;}
	ModInt& operator/=(const ModInt& b){ return *this=*this/b;}
	ModInt& operator++(int){ return *this=*this+1;}
	ModInt& operator--(int){ return *this=*this-1;}
	template<class T> friend ModInt operator+(T a, const ModInt& b){ return (ModInt(a) += b);}
	template<class T> friend ModInt operator-(T a, const ModInt& b){ return (ModInt(a) -= b);}
	template<class T> friend ModInt operator*(T a, const ModInt& b){ return (ModInt(a) *= b);}
	template<class T> friend ModInt operator/(T a, const ModInt& b){ return (ModInt(a) /= b);}
	ModInt pow(ll p) const {
		if(p<0) return inv().pow(-p);
		ModInt a = 1;
		ModInt x = *this;
		while(p){
			if(p&1) a *= x;
			x *= x;
			p >>= 1;
		}
		return a;
	}
	ModInt inv() const {		// should be prime
		return pow(mod-2);
	}
	// ll extgcd(ll a,ll b,ll &x,ll &y) const{
	// 	ll p[]={a,1,0},q[]={b,0,1};
	// 	while(*q){
	// 		ll t=*p/ *q;
	// 		rep(i,3) swap(p[i]-=t*q[i],q[i]);
	// 	}
	// 	if(p[0]<0) rep(i,3) p[i]=-p[i];
	// 	x=p[1],y=p[2];
	// 	return p[0];
	// }
	// ModInt inv() const {
	// 	ll x,y;
	// 	extgcd(v,mod,x,y);
	// 	return make(normS(x+mod));
	// }

	bool operator==(const ModInt& b) const { return v==b.v;}
	bool operator!=(const ModInt& b) const { return v!=b.v;}
	bool operator<(const ModInt& b) const { return v<b.v;}
	friend istream& operator>>(istream &o,ModInt& x){
		ll tmp;
		o>>tmp;
		x=ModInt(tmp);
		return o;
	}
	friend ostream& operator<<(ostream &o,const ModInt& x){ return o<<x.v;}
};
using mint = ModInt<998244353>;
//using mint = ModInt<1000000007>;

V<mint> fact,ifact,invs;
mint Choose(int a,int b){
	if(b<0 || a<b) return 0;
	return fact[a] * ifact[b] * ifact[a-b];
}
void InitFact(int N){	//[0,N]
	N++;
	fact.resize(N);
	ifact.resize(N);
	invs.resize(N);
	fact[0] = 1;
	rep1(i,N-1) fact[i] = fact[i-1] * i;
	ifact[N-1] = fact[N-1].inv();
	for(int i=N-2;i>=0;i--) ifact[i] = ifact[i+1] * (i+1);
	rep1(i,N-1) invs[i] = fact[i-1] * ifact[i];
}

// inplace_fmt (without bit rearranging)
// fft:
// 		a[rev(i)] <- \sum_j \zeta^{ij} a[j]
// invfft:
//		a[i] <- (1/n) \sum_j \zeta^{-ij} a[rev(j)]
// These two are inversions.


// !!! CHANGE IF MOD is unusual !!!
const int ORDER_2_MOD_MINUS_1 = 23;	// ord_2 (mod-1)
const mint PRIMITIVE_ROOT = 3; // primitive root of (Z/pZ)*

void fft(V<mint>& a){
	static constexpr uint mod = mint::mod;
	static constexpr uint mod2 = mod + mod;
	static const int H = ORDER_2_MOD_MINUS_1;
	static const mint root = PRIMITIVE_ROOT;
	static mint magic[H-1];

	int n = si(a);
	assert(!(n & (n-1))); assert(n >= 1); assert(n <= 1<<H);	// n should be power of 2

	if(!magic[0]){		// precalc
		rep(i,H-1){
			mint w = -root.pow(((mod-1)>>(i+2))*3);
			magic[i] = w;
		}
	}
	int m = n;
	if(m >>= 1){
		rep(i,m){
			uint v = a[i+m].v;					// < M
			a[i+m].v = a[i].v + mod - v;		// < 2M
			a[i].v += v;						// < 2M
		}
	}
	if(m >>= 1){
		mint p = 1;
		for(int h=0,s=0; s<n; s += m*2){
			for(int i=s;i<s+m;i++){
				uint v = (a[i+m] * p).v;		// < M
				a[i+m].v = a[i].v + mod - v;	// < 3M
				a[i].v += v;					// < 3M
			}
			p *= magic[__builtin_ctz(++h)];
		}
	}
	while(m){
		if(m >>= 1){
			mint p = 1;
			for(int h=0,s=0; s<n; s += m*2){
				for(int i=s;i<s+m;i++){
					uint v = (a[i+m] * p).v;		// < M
					a[i+m].v = a[i].v + mod - v;	// < 4M
					a[i].v += v;					// < 4M
				}
				p *= magic[__builtin_ctz(++h)];
			}
		}
		if(m >>= 1){
			mint p = 1;
			for(int h=0,s=0; s<n; s += m*2){
				for(int i=s;i<s+m;i++){
					uint v = (a[i+m] * p).v;								// < M
					a[i].v = (a[i].v >= mod2) ? a[i].v - mod2 : a[i].v;	// < 2M
					a[i+m].v = a[i].v + mod - v;							// < 3M
					a[i].v += v;											// < 3M
				}
				p *= magic[__builtin_ctz(++h)];
			}
		}
	}
	rep(i,n){
		a[i].v = (a[i].v >= mod2) ? a[i].v - mod2 : a[i].v;		// < 2M
		a[i].v = (a[i].v >= mod) ? a[i].v - mod : a[i].v;		// < M
	}
	// finally < mod !!
}
void invfft(V<mint>& a){
	static constexpr uint mod = mint::mod;
	static constexpr uint mod2 = mod + mod;
	static const int H = ORDER_2_MOD_MINUS_1;
	static const mint root = PRIMITIVE_ROOT;
	static mint magic[H-1];

	int n = si(a);
	assert(!(n & (n-1))); assert(n >= 1); assert(n <= 1<<H);	// n should be power of 2

	if(!magic[0]){		// precalc
		rep(i,H-1){
			mint w = -root.pow(((mod-1)>>(i+2))*3);
			magic[i] = w.inv();
		}
	}
	int m = 1;
	if(m < n>>1){
		mint p = 1;
		for(int h=0,s=0; s<n; s += m*2){
			for(int i=s;i<s+m;i++){
				ull x = a[i].v + mod - a[i+m].v;	// < 2M
				a[i].v += a[i+m].v;					// < 2M
				a[i+m].v = (p.v * x) % mod;			// < M
			}
			p *= magic[__builtin_ctz(++h)];
		}
		m <<= 1;
	}
	for(;m < n>>1; m <<= 1){
		mint p = 1;
		for(int h=0,s=0; s<n; s+= m*2){
			for(int i=s;i<s+(m>>1);i++){
				ull x = a[i].v + mod2 - a[i+m].v;	// < 4M
				a[i].v += a[i+m].v;					// < 4M
				a[i].v = (a[i].v >= mod2) ? a[i].v - mod2 : a[i].v;	// < 2M
				a[i+m].v = (p.v * x) % mod;		// < M
			}
			for(int i=s+(m>>1); i<s+m; i++){
				ull x = a[i].v + mod - a[i+m].v;	// < 2M
				a[i].v += a[i+m].v;	// < 2M
				a[i+m].v = (p.v * x) % mod;	// < M
			}
			p *= magic[__builtin_ctz(++h)];
		}
	}
	if(m < n){
		rep(i,m){
			uint x = a[i].v + mod2 - a[i+m].v;	// < 4M
			a[i].v += a[i+m].v;	// < 4M
			a[i+m].v = x;	// < 4M
		}
	}
	const mint in = mint(n).inv();
	rep(i,n) a[i] *= in;	// < M
	// finally < mod !!
}

// A,B = 500000 -> 70ms
// verify https://judge.yosupo.jp/submission/44937
V<mint> multiply(V<mint> a, V<mint> b) {
	int A = si(a), B = si(b);
	if (!A || !B) return {};
	int n = A+B-1;
	int s = 1; while(s<n) s*=2;
	if(a == b){			// # of fft call : 3 -> 2
		a.resize(s); fft(a);
		rep(i,s) a[i] *= a[i];
	}else{
		a.resize(s); fft(a);
		b.resize(s); fft(b);
		rep(i,s) a[i] *= b[i];
	}
	invfft(a); a.resize(n);
	return a;
}

/*
	係数アクセス
		f[i] でいいが、 配列外参照する可能性があるなら at/set
	
*/

template<class mint>
struct Poly: public V<mint>{
	using vector<mint>::vector;
	Poly() {}
	explicit Poly(int n) : V<mint>(n){}		// poly<mint> a; a = 2; shouldn't be [0,0]
	Poly(int n, mint c) : V<mint>(n,c){}
	Poly(const V<mint>& a) : V<mint>(a){}
	Poly(initializer_list<mint> li) : V<mint>(li){}

	int size() const { return V<mint>::size(); }
	mint at(int i) const {
		return i<size() ? (*this)[i] : 0;
	}
	void set(int i, mint x){
		if(i>=size() && !x) return;
		while(i>=size()) this->pb(0);
		(*this)[i] = x;
		return;
	}
	mint operator()(mint x) const {		// eval
		mint res = 0;
		int n = size();
		mint a = 1;
		rep(i,n){
			res += a * (*this)[i];
			a *= x;
		}
		return res;
	}
	Poly low(int n) const {		// ignore x^n (take first n), but not empty
		return Poly(this->begin(), this->begin()+min(max(n,1),size()));
	}
	Poly rev() const {
		return Poly(this->rbegin(), this->rend());
	}
	friend ostream& operator<<(ostream &o,const Poly& f){
		o << "[";
		rep(i,f.size()){
			o << f[i];
			if(i != f.size()-1) o << ",";
		}
		o << "]";
		return o;
	}

	Poly operator-() const {
		Poly res = *this;
		for(auto& v: res) v = -v;
		return res;
	}
	Poly& operator+=(const mint& c){
		(*this)[0] += c;
		return *this;
	}
	Poly& operator-=(const mint& c){
		(*this)[0] -= c;
		return *this;
	}
	Poly& operator*=(const mint& c){
		for(auto& v: *this) v *= c;
		return *this;
	}
	Poly& operator/=(const mint& c){
		return *this *= mint(1)/mint(c);
	}
	Poly& operator+=(const Poly& r){
		if(size() < r.size()) this->resize(r.size(),0);
		rep(i,r.size()) (*this)[i] += r[i];
		return *this;
	}
	Poly& operator-=(const Poly& r){
		if(size() < r.size()) this->resize(r.size(),0);
		rep(i,r.size()) (*this)[i] -= r[i];
		return *this;
	}
	Poly& operator*=(const Poly& r){
		return *this = multiply(*this,r);
	}

	// 何回も同じrで割り算するなら毎回rinvを計算するのは無駄なので、呼び出し側で一回計算した後直接こっちを呼ぶと良い
	// 取るべきinvの長さに注意
	// 例えば mod r で色々計算したい時は、基本的に deg(r) * 2 長さの多項式を r で割ることになる
	// とはいえいったん rinv を長く計算したらより短い場合はprefix見るだけだし、 rinv としてムダに長いものを渡しても問題ないので
	// 割られる多項式として最大の次数を取ればよい

	Poly quotient(const Poly& r, const Poly& rinv){
		int m = r.size(); assert(r[m-1].v);
		int n = size();
		int s = n-m+1;
		if(s <= 0) return {0};
		return (rev().low(s)*rinv.low(s)).low(s).rev();
	}
	Poly& operator/=(const Poly& r){
		return *this = quotient(r,r.rev().inv(max(size()-r.size(),0)+1));
	}
	Poly& operator%=(const Poly& r){
		*this -= *this/r * r;
		return *this = low(r.size()-1);
	}

	Poly operator+(const mint& c) const {return Poly(*this) += c; }
	Poly operator-(const mint& c) const {return Poly(*this) -= c; }
	Poly operator*(const mint& c) const {return Poly(*this) *= c; }
	Poly operator/(const mint& c) const {return Poly(*this) /= c; }
	Poly operator+(const Poly& r) const {return Poly(*this) += r; }
	Poly operator-(const Poly& r) const {return Poly(*this) -= r; }
	Poly operator*(const Poly& r) const {return Poly(*this) *= r; }
	Poly operator/(const Poly& r) const {return Poly(*this) /= r; }
	Poly operator%(const Poly& r) const {return Poly(*this) %= r; }

	Poly diff() const {
		Poly g(max(size()-1,0));
		rep(i,g.size()) g[i] = (*this)[i+1] * (i+1);
		return g;
	}
	Poly intg() const {
		assert(si(invs) > size());
		Poly g(size()+1);
		rep(i,size()) g[i+1] = (*this)[i] * invs[i+1];
		return g;
	}
	Poly square() const {
		return multiply(*this,*this);
	}

	// 1/f(x) mod x^s
	// N = s = 500000 -> 90ms
	// inv は 5 回 fft(2n) を呼んでいるので、multiply が 3 回 fft(2n) を呼ぶのと比べると
	// だいたい multiply の 5/3 倍の時間がかかる
	// 導出: Newton
	// 		fg = 1 mod x^m
	// 		(fg-1)^2 = 0 mod x^2m
	// 		f(2g-fg^2) = 1 mod x^2m
	// verify: https://judge.yosupo.jp/submission/44938
	Poly inv(int s) const {
		Poly r(s);
		r[0] = mint(1)/at(0);
		for(int n=1;n<s;n*=2){			// 5 times fft : length 2n
			V<mint> f = low(2*n); f.resize(2*n);
			fft(f);
			V<mint> g = r.low(2*n); g.resize(2*n);
			fft(g);
			rep(i,2*n) f[i] *= g[i];
			invfft(f);
			rep(i,n) f[i] = 0;
			fft(f);
			rep(i,2*n) f[i] *= g[i];
			invfft(f);
			for(int i=n;i<min(2*n,s);i++) r[i] -= f[i];
		}
		return r;
	}

	// log f mod x^s
	// 導出: D log(f) = (D f) / f
	// 500000: 180ms
	// mult の 8/3 倍
	// verify: https://judge.yosupo.jp/submission/44962
	Poly log(int s) const {
		assert(at(0) == 1);
		if(s == 1) return {0};
		return (low(s).diff() * inv(s-1)).low(s-1).intg();
	}

	// e^f mod x^s
	// f.log(s).exp(s) == [1,0,...,0]
	// 500000 : 440ms
	// TODO: 高速化!
	// 速い実装例 (hos): https://judge.yosupo.jp/submission/36732 150ms
	// 導出 Newton:
	//		g = exp(f)
	//		log(g) - f = 0
	//		g == g0 mod x^m
	//		g == g0 - (log(g0) - f) / (1/g0) mod x^2m
	// verify: yosupo
	Poly exp(int s) const {
		assert(at(0) == 0);
		Poly f({1}),g({1});
		for(int n=1;n<s;n*=2){
			g = (g*2-g.square().low(n)*f).low(n);
			Poly q = low(n).diff();
			q = q + g * (f.diff() - f*q).low(2*n-1);
			f = (f + f * (low(2*n)-q.intg()) ).low(2*n);
		}
		return f.low(s);
	}

	// f^p mod x^s
	// 500000: 600ms
	// 導出: f^p = e^(p log f)
	// log 1回、 exp 1回
	// Exp.cpp (Mifafa technique) も参照
	// 	c.f. (f の non0 coef の個数) * s
	// verify: https://judge.yosupo.jp/submission/44992
	Poly pow(ll p, int s) const {
		if(p == 0){
			return Poly(s) + 1;	// 0^0 is 1
		}
		int ord = 0;
		while(ord<s && !at(ord)) ord++;
		assert(!(p<0 and ord>0));	// 頑張ればできる
		if(p>0 and (s-1)/p < ord) return Poly(s);	// s <= p * ord
		int off = p*ord;
		int s_ = s-off;
		const mint a0 = at(ord), ia0 = a0.inv(), ap = a0.pow(p);
		Poly f(s_); rep(i,s_) f[i] = at(i+ord) * ia0;
		f = (f.log(s_) * p).exp(s_);
		Poly res(s);
		rep(i,s_) res[i+off] = f[i] * ap;
		return res;
	}

	// f^(1/2) mod x^s
	// f[0] should be 1
	// 11/6
	// verify: https://judge.yosupo.jp/submission/44997
	Poly sqrt(int s) const {
		assert(at(0) == 1);
		static const mint i2 = mint(2).inv();
		V<mint> f{1},g{1},z{1};
		for(int n=1;n<s;n*=2){
			rep(i,n) z[i] *= z[i];
			invfft(z);
			V<mint> d(2*n);
			rep(i,n) d[n+i] = z[i] - at(i) - at(n+i);
			fft(d);
			V<mint> g2(2*n);
			rep(i,n) g2[i] = g[i];
			fft(g2);
			rep(i,n*2) d[i] *= g2[i];
			invfft(d);
			f.resize(n*2);
			for(int i=n;i<n*2;i++) f[i] = -d[i] * i2;
			if(n*2 >= s) break;
			z = f;
			fft(z);
			V<mint> eps = g2;
			rep(i,n*2) eps[i] *= z[i];
			invfft(eps);
			rep(i,n) eps[i] = 0;
			fft(eps);
			rep(i,n*2) eps[i] *= g2[i];
			invfft(eps);
			g.resize(n*2);
			for(int i=n;i<n*2;i++) g[i] -= eps[i];
		}
		f.resize(s);
		return f;
	}

	// Taylor Shift
	// return f(x+c)
	// O(N logN)
	// verify: yosupo
	Poly shift(mint c){
		int n = size();
		assert(si(fact) >= n);	// please InitFact
		V<mint> f(n); rep(i,n) f[i] = (*this)[i] * fact[i];
		V<mint> g(n);
		mint cpow = 1;
		rep(i,n){g[i] = cpow * ifact[i]; cpow *= c;}
		reverse(all(g));
		V<mint> h = multiply(f,g);
		Poly res(n); rep(i,n) res[i] = h[n-1+i] * ifact[i];
		return res;
	}

	// 合成逆 mod x^s
	// O(s^2 + s^1.5 log s)
	// 方針: lagrange [x^i]g = (1/i [x^i-1](x/f)^i)
	// 		(x/f)^i = (x/f)^jL (x/f)^k とすれば前計算はs^1.5回FFT
	// 		2つの積の一箇所求めるだけなのでO(s)
	// z をかけまくったり z^L をかけまくったりするところはFFT消せるから高速化できる
	// verify: https://www.luogu.com.cn/problem/P5809
	Poly compositeInv(int s){
		assert(at(0) == 0);
		assert(at(1) != 0);
		int L = 0;
		while(L*L < s) L++;
		Poly z0(s); rep(i,s) z0[i] = at(i+1);
		Poly z = z0.inv(s);	// = x/f
		V<Poly> zi(L);	// z^i
		V<Poly>	ziL(L);	// z^iL
		zi[0] = {1};
		rep(i,L-1) zi[i+1] = (zi[i] * z).low(s);
		auto zL = (zi[L-1] * z).low(s);
		ziL[0] = {1};
		rep(i,L-1)  ziL[i+1] = (ziL[i] * zL).low(s);

		Poly res(s);
		rep1(k,s-1){
			int i = k/L, j = k%L;	// x^(iL+j)
			rep(_,k) res[k] += ziL[i].at(_) * zi[j].at(k-1-_);
			res[k] /= k;
		}
		return res;
	}
};

// 合成 f○g mod x^s
// O(ns + sqrt(n)slogs)
// sを指定しないときはnm次全部返す O(n^2m)?
// \sum_k f_k g^k = \sum_k f_k g^iL+j = \sum_i g^iL * (\sum_j f_k g^j)
// verify: https://www.luogu.com.cn/problem/P5373
Poly<mint> composite(Poly<mint> f, Poly<mint> g, int s=-1){
	int n = si(f)-1, m = si(g)-1;
	if(s == -1) s = n*m+1;
	int L = 0;
	while(L*L <= n) L++;
	V<Poly<mint>> gi(L);	// g^i
	V<Poly<mint>> giL(L);	// g^iL
	gi[0] = {1};
	rep(i,L-1) gi[i+1] = (gi[i] * g).low(s);
	auto gL = (gi[L-1] * g).low(s);
	giL[0] = {1};
	rep(i,L-1)  giL[i+1] = (giL[i] * gL).low(s);

	Poly<mint> res(s);
	rep(i,L){
		Poly<mint> z;
		rep(j,L) if(i*L+j <= n) z += gi[j] * f[i*L+j];
		res += (z * giL[i]).low(s);
	}
	return res;
}

ll norm_mod(ll a, ll m){
	a %= m; if(a < 0) a += m;
	return a;
}

//p: odd (not necessarily prime)
ll jacobi(ll a,ll p){
	a = norm_mod(a,p);
	auto sgn = [](ll x){ return x&1 ? -1 : 1; };
	if(a == 0) return p == 1;
	else if(a&1) return sgn(((p-1)&(a-1))>>1) * jacobi(p%a,a);
	else return sgn(((p&15)*(p&15)-1)/8) * jacobi(a/2,p);
}

// p : prime
// 0 <= a < p
// return smaller solution
// if no solution, -1
ll sqrt_mod(ll a,ll p){
	if(a == 0) return 0;
	if(p == 2) return 1;
	if(jacobi(a,p) == -1)return -1;
	ll b,c;
	for(b=0;;b++){
		c = norm_mod(b*b-a,p);
		if(jacobi(c,p) == -1) break;
	}
	auto mul = [&](pair<ll,ll> x, pair<ll,ll> y){
		return pair<ll,ll>(norm_mod(x.fs*y.fs+x.sc*y.sc%p*c,p),norm_mod(x.fs*y.sc+x.sc*y.fs,p));
	};
	pair<ll,ll> x(b,1),res(1,0);
	ll n = (p+1)/2;
	while(n){
		if(n&1) res = mul(res,x);
		x = mul(x,x);
		n >>= 1;
	}
	assert(res.sc == 0);
	return min(res.fs, p-res.fs);
}

// 辞書順最小
// no solution -> {}
Poly<mint> sqrt(Poly<mint> f){
	int n = f.size();
	int ord = 0;
	while(ord<n && !f[ord]) ord++;
	if(ord == n) return {0};
	if(ord&1) return {};
	ll c0 = sqrt_mod(f[ord].v,mint::mod);
	if(c0 == -1) return {};
	int n_ = n-ord;
	auto g = (Poly<mint>(f.begin()+ord,f.end())/f[ord]).sqrt(n_) * mint(c0);
	Poly<mint> res(ord/2 + n_);
	rep(i,n_) res[ord/2 + i] = g[i];
	return res;
}

// Q log^2 Q ではある
// 高速なのはうまく subproduct tree を構築するらしい
// maroon https://judge.yosupo.jp/submission/3240 160ms
// verify: https://judge.yosupo.jp/submission/45006 950ms おせ~
template<class mint>
V<mint> MultipointEval(const Poly<mint>& f, V<mint> a){
	int Q = a.size();
	int s = 1; while(s < Q) s *= 2;
	V<Poly<mint>> g(s+s,{1});
	rep(i,Q) g[s+i] = {-a[i],1};
	for(int i=s-1;i>0;i--) g[i] = g[i*2] * g[i*2+1];
	g[1] = f % g[1];
	for(int i=2;i<s+Q;i++) g[i] = g[i>>1] % g[i];
	V<mint> res(Q);
	rep(i,Q) res[i] = g[s+i][0];
	return res;
}

// N log^2 N ではある
// 高速なのはうまく subうんぬん
template<class mint>
Poly<mint> interpolate(const V<mint>& x, const V<mint>& y){
	int n = si(x);
	int s = 1; while(s<n) s*=2;
	V<Poly<mint>> g(s+s,{1}), h(s+s);
	rep(i,n) g[s+i] = {-x[i],1};
	for(int i=s-1;i>0;i--) g[i] = g[i*2] * g[i*2+1];
	h[1] = g[1].diff();
	for(int i=2;i<s+n;i++) h[i] = h[i>>1] % g[i];
	rep(i,n) h[s+i] = {y[i] / h[s+i][0]};
	for(int i=s-1;i>0;i--) h[i] = h[i*2]*g[i*2+1] + h[i*2+1]*g[i*2];
	return h[1];
}

// [x^p] f/g
// O(n logn logp)
// O(f logf + g logg logn) (f が大きくてもややOK)
// verified: https://ac.nowcoder.com/acm/contest/11259/H
// hos,divAt : https://ac.nowcoder.com/acm/contest/view-submission?submissionId=48462458

template<class T>
T divAt(Poly<T> f, Poly<T> g, ll p){
	assert(g.at(0));
	while(p){
		auto gm = g;
		for(int i=1;i<si(g);i+=2) gm[i] = -gm[i];
		auto f2 = f*gm;
		auto g2 = g*gm;
		f.clear();g.clear();
		for(int i=p&1;i<si(f2);i+=2) f.set(i/2,f2[i]);
		for(int i=0;i<si(g2);i+=2) g.set(i/2,g2[i]);
		p /= 2;
	}
	return f.at(0)/g.at(0);
}

/*
	input:
		はじめ d 項: a_0, a_1, .., a_{d-1}
		d+1 項 reccurence: c_0 * a_{i+d} + .. + c_d * a_i = 0
		aを無駄に与えても良い(足りないと、カス)
		ll k
	output:
		a_k
	O(d logd logk)
	verified: https://judge.yosupo.jp/problem/find_linear_recurrence
*/
template<class T>
T linearRecurrenceAt(V<T> a, V<T> c, ll k){
	assert(!c.empty() && c[0]);
	int d = si(c) - 1;
	assert(si(a) >= d);
	return divAt((Poly<T>(a.begin(),a.begin()+d) * Poly<T>(c)).low(d), Poly<T>(c), k);
}

// return f(K+1)
// f[k] = 0^k + .. + n^k
// \sum_{k>=0} f[k] x^k/k! = e^0x + .. + e^nx = 1-e^(n+1)x / 1-e^x
// O(KlogK)
// 0^0 = 1
// keyword: faulhaber ファウルハーバー

vector<mint> SumOfPower(mint n, int K){
	assert(si(fact) > K);
	Poly<mint> a(K+1),b(K+1);
	mint pw = 1;
	rep1(i,K+1){
		pw *= n+1;
		a[i-1] = ifact[i];
		b[i-1] = ifact[i] * pw;
	}
	auto f = b*a.inv(K+1);
	V<mint> res(K+1);
	rep(k,K+1) res[k] = f[k] * fact[k];
	return res;
}
#line 6 "H.cpp"
using poly = Poly<mint>;

template<class mint>
struct Online_Convolution{
	const int thresh = 3;
	V<mint> f,g,h;
	VV<mint> fft_f,fft_g;

	pair<V<mint>,V<mint>> calc_fft(int k){
		// 長さ 2^k の suffix を fft したものを返す
		int L = 1<<k;
		V<mint> f_suf(2*L), g_suf(2*L);
		rep(i,L){
			f_suf[i] = f[si(f)-L+i];
			g_suf[i] = g[si(g)-L+i];
		}
		if(k > thresh){
			fft(f_suf); fft(g_suf);
		}
		return {f_suf, g_suf};
	}
	void calc(int k){
		int L = 1<<k;
		auto [zf,zg] = calc_fft(k);
		V<mint> zh(L*2);
		bool fst = (k >= si(fft_f));
		if(fst){
			fft_f.eb(zf);
			fft_g.eb(zg);
		}
		if(k > thresh){
			if(fst){
				rep(i,L*2) zh[i] += zf[i] * zg[i];
				invfft(zh);
			}else{
				rep(i,L*2){
					zh[i] += zf[i] * fft_g[k][i];
					zh[i] += zg[i] * fft_f[k][i];
				}
				invfft(zh);
			}
		}else{
			if(fst){
				rep(i,L) rep(j,L) zh[i+j] += zf[i] * zg[j];
			}else{
				rep(i,L) rep(j,L) zh[i+j] += zf[i] * fft_g[k][j];
				rep(i,L) rep(j,L) zh[i+j] += zg[i] * fft_f[k][j];
			}
		}
		int off = si(f)-1;
		rep(i,L*2-1){
			if(si(h) <= off+i) h.eb(0);
			h[off + i] += zh[i];
		}
	}

	mint query(int i, mint f_i, mint g_i){
		assert(i == si(f));
		f.eb(f_i);
		g.eb(g_i);
		int K = __builtin_ctz(i+2) + (__builtin_popcount(i+2) > 1 ? 1 : 0);
		rep(k,K) calc(k);
		return h[i];
	}
};

/*
	query(i): f_i, g_i を受け取って (f/g)_i を返す
	g_0 != 0 を仮定
	O(n log^2)
*/
template<class mint>
struct Online_Division{
	V<mint> f,g,h;
	Online_Convolution<mint> X;
	mint ig0;

	mint query(int i, mint f_i, mint g_i){
		assert(i == si(f));
		f.eb(f_i);
		g.eb(g_i);
		if(i == 0){
			assert(g_i);
			ig0 = g_i.inv();
			h.eb(f_i * ig0);
		}else{
			h.eb( (f_i - X.query(i-1,g[i],h[i-1])) * ig0);
		}
		return h[i];
	}
};

/*
	query(i): f_i を受け取って (e^f)_i を返す
	f_0 == 0 を仮定
	O(n log^2)
*/
template<class mint>
struct Online_Exp{
	V<mint> F;
	Online_Convolution<mint> X;

	mint query(int i, mint f_i){
		if(i == 0){
			assert(f_i == 0);
			F.eb(1);
		}else{
			F.eb(X.query(i-1,F[i-1],f_i*i)/i);
		}
		return F[i];
	}
};

/*
	f = x * (1-g)^-1 * (e^f - g^2 e^{fg})
	f-g = x * (1+g) * exp(fg)

	p0 = (1-g)^-1
	p1 = e^f
	p2 = g^2
	p3 = fg
	p4 = e^p3
	p5 = p2 * p4
	p6: f = x * p0 * (p1-p5)
	p7: f-g = x * (1+g) * p4

	fとgのi-1次までがわかっている時、p0~p5 のi-1次までが求まる
	それを使ってiじが求まる
*/

mint solve(int N){
	Online_Division<mint> p0;
	Online_Exp<mint> p1,p4;
	Online_Convolution<mint> p2,p3,p5,p6,p7;
	V<mint> f(N+1),g(N+1);
	rep1(i,N){
		int j = i-1;
		mint p0j = p0.query(j, (i==1?1:0), (i==1?1:0)-g[j]);
		mint p1j = p1.query(j, f[j]);
		mint p2j = p2.query(j, g[j], g[j]);
		mint p3j = p3.query(j, f[j], g[j]);
		mint p4j = p4.query(j, p3j);
		mint p5j = p5.query(j, p2j, p4j);
		mint p6j = p6.query(j, p0j, p1j-p5j);
		mint p7j = p7.query(j, (i==1?1:0)+g[j], p4j);
		f[i] = p6j, g[i] = p6j-p7j;
	}
	return (f[N]+g[N]) * fact[N];
}

mint brute(int N){
	poly f(1),g(1),p0,p1,p2,p3,p4,p5,p6,p7;
	rep1(i,N){
		// calc [x^{i-1} p_*]
		// show(i);show(f);show(g);
		p0 = (-g+1).inv(i);
		p1 = f.exp(i);
		p2 = (g*g).low(i);
		p3 = (f*g).low(i);
		p4 = p3.exp(i);
		p5 = (p2*p4).low(i);
		mint fi = (p0 * (p1-p5)).at(i-1);
		mint hi = ((g+1) * p4).at(i-1);
		mint gi = fi-hi;
		f.set(i,fi); g.set(i,gi);
	}
	return (f.at(N) + g.at(N)) * fact[N];
}



int main(){
	cin.tie(0);
	ios::sync_with_stdio(false);		//DON'T USE scanf/printf/puts !!
	cout << fixed << setprecision(20);
	InitFact(1000000);
	
	int N; cin >> N; cout << solve(N) << endl;
}

这程序好像有点Bug,我给组数据试试?

Details

Tip: Click on the bar to expand more detailed information

Test #1:

score: 100
Accepted
time: 6ms
memory: 14768kb

input:

1

output:

1

result:

ok 1 number(s): "1"

Test #2:

score: 0
Accepted
time: 13ms
memory: 14892kb

input:

3

output:

24

result:

ok 1 number(s): "24"

Test #3:

score: 0
Accepted
time: 8ms
memory: 14940kb

input:

5

output:

3190

result:

ok 1 number(s): "3190"

Test #4:

score: 0
Accepted
time: 9ms
memory: 15084kb

input:

100

output:

413875584

result:

ok 1 number(s): "413875584"

Test #5:

score: 0
Accepted
time: 13ms
memory: 14872kb

input:

1

output:

1

result:

ok 1 number(s): "1"

Test #6:

score: 0
Accepted
time: 9ms
memory: 14752kb

input:

2

output:

4

result:

ok 1 number(s): "4"

Test #7:

score: 0
Accepted
time: 8ms
memory: 14776kb

input:

3

output:

24

result:

ok 1 number(s): "24"

Test #8:

score: 0
Accepted
time: 8ms
memory: 14740kb

input:

4

output:

236

result:

ok 1 number(s): "236"

Test #9:

score: 0
Accepted
time: 8ms
memory: 14912kb

input:

5

output:

3190

result:

ok 1 number(s): "3190"

Test #10:

score: 0
Accepted
time: 8ms
memory: 14712kb

input:

6

output:

55182

result:

ok 1 number(s): "55182"

Test #11:

score: 0
Accepted
time: 8ms
memory: 14884kb

input:

7

output:

1165220

result:

ok 1 number(s): "1165220"

Test #12:

score: 0
Accepted
time: 9ms
memory: 14876kb

input:

8

output:

29013896

result:

ok 1 number(s): "29013896"

Test #13:

score: 0
Accepted
time: 9ms
memory: 14816kb

input:

9

output:

832517514

result:

ok 1 number(s): "832517514"

Test #14:

score: 0
Accepted
time: 6ms
memory: 14988kb

input:

10

output:

96547079

result:

ok 1 number(s): "96547079"

Test #15:

score: 0
Accepted
time: 8ms
memory: 14868kb

input:

11

output:

296100513

result:

ok 1 number(s): "296100513"

Test #16:

score: 0
Accepted
time: 8ms
memory: 14784kb

input:

12

output:

672446962

result:

ok 1 number(s): "672446962"

Test #17:

score: 0
Accepted
time: 6ms
memory: 14840kb

input:

13

output:

986909297

result:

ok 1 number(s): "986909297"

Test #18:

score: 0
Accepted
time: 9ms
memory: 14660kb

input:

14

output:

306542229

result:

ok 1 number(s): "306542229"

Test #19:

score: 0
Accepted
time: 12ms
memory: 14812kb

input:

15

output:

8548107

result:

ok 1 number(s): "8548107"

Test #20:

score: 0
Accepted
time: 8ms
memory: 14900kb

input:

16

output:

773960239

result:

ok 1 number(s): "773960239"

Test #21:

score: 0
Accepted
time: 12ms
memory: 14816kb

input:

17

output:

611627547

result:

ok 1 number(s): "611627547"

Test #22:

score: 0
Accepted
time: 8ms
memory: 14692kb

input:

18

output:

91793980

result:

ok 1 number(s): "91793980"

Test #23:

score: 0
Accepted
time: 8ms
memory: 14856kb

input:

19

output:

689202618

result:

ok 1 number(s): "689202618"

Test #24:

score: 0
Accepted
time: 12ms
memory: 14808kb

input:

20

output:

605957782

result:

ok 1 number(s): "605957782"

Test #25:

score: 0
Accepted
time: 60ms
memory: 18128kb

input:

10000

output:

713782215

result:

ok 1 number(s): "713782215"

Test #26:

score: 0
Accepted
time: 132ms
memory: 20964kb

input:

20000

output:

337916836

result:

ok 1 number(s): "337916836"

Test #27:

score: 0
Accepted
time: 202ms
memory: 21932kb

input:

30000

output:

580803285

result:

ok 1 number(s): "580803285"

Test #28:

score: 0
Accepted
time: 270ms
memory: 26188kb

input:

40000

output:

732660392

result:

ok 1 number(s): "732660392"

Test #29:

score: 0
Accepted
time: 355ms
memory: 28072kb

input:

50000

output:

660835595

result:

ok 1 number(s): "660835595"

Test #30:

score: 0
Accepted
time: 429ms
memory: 28608kb

input:

60000

output:

323452070

result:

ok 1 number(s): "323452070"

Test #31:

score: 0
Accepted
time: 519ms
memory: 35980kb

input:

70000

output:

307315915

result:

ok 1 number(s): "307315915"

Test #32:

score: 0
Accepted
time: 586ms
memory: 36912kb

input:

80000

output:

951757567

result:

ok 1 number(s): "951757567"

Test #33:

score: 0
Accepted
time: 664ms
memory: 37456kb

input:

90000

output:

426123208

result:

ok 1 number(s): "426123208"

Test #34:

score: 0
Accepted
time: 771ms
memory: 39904kb

input:

100000

output:

827418228

result:

ok 1 number(s): "827418228"

Test #35:

score: 0
Accepted
time: 842ms
memory: 40428kb

input:

110000

output:

541614559

result:

ok 1 number(s): "541614559"

Test #36:

score: 0
Accepted
time: 919ms
memory: 41340kb

input:

120000

output:

184688986

result:

ok 1 number(s): "184688986"

Test #37:

score: 0
Accepted
time: 995ms
memory: 42192kb

input:

130000

output:

898089371

result:

ok 1 number(s): "898089371"

Test #38:

score: 0
Accepted
time: 1146ms
memory: 55988kb

input:

140000

output:

949540221

result:

ok 1 number(s): "949540221"

Test #39:

score: 0
Accepted
time: 1202ms
memory: 56892kb

input:

150000

output:

767689851

result:

ok 1 number(s): "767689851"

Test #40:

score: 0
Accepted
time: 1291ms
memory: 57784kb

input:

160000

output:

553494563

result:

ok 1 number(s): "553494563"

Test #41:

score: 0
Accepted
time: 1386ms
memory: 58616kb

input:

170000

output:

270711750

result:

ok 1 number(s): "270711750"

Test #42:

score: 0
Accepted
time: 1442ms
memory: 59424kb

input:

180000

output:

108155689

result:

ok 1 number(s): "108155689"

Test #43:

score: 0
Accepted
time: 1512ms
memory: 60476kb

input:

190000

output:

327542856

result:

ok 1 number(s): "327542856"

Test #44:

score: 0
Accepted
time: 1684ms
memory: 64192kb

input:

200000

output:

236144151

result:

ok 1 number(s): "236144151"

Test #45:

score: 0
Accepted
time: 1648ms
memory: 64112kb

input:

198798

output:

16935264

result:

ok 1 number(s): "16935264"

Extra Test:

score: 0
Extra Test Passed