QOJ.ac

QOJ

ID	Problem	Submitter	Result	Time	Memory	Language	File size	Submit time	Judge time
#346834	#8327. 积性函数求和 $10^{13}$ 方便 FFT 版	FR	WA	792ms	8788kb	C++23	33.1kb	2024-03-09 02:15:10	2024-03-09 02:36:29

Judging History

你现在查看的是最新测评结果

[2024-03-09 02:36:29]
管理员手动重测本题所有提交记录

测评结果：WA
用时：792ms
内存：8788kb

查看

[2024-03-09 02:31:47]
管理员手动重测本题所有提交记录

测评结果：71.428571
用时：19881ms
内存：1835008kb

查看

[2024-03-09 02:15:10]
评测

测评结果：71.428571
用时：18244ms
内存：1839216kb

查看

[2024-03-09 02:15:10]
提交

answer

#include <cstdio>
#include <algorithm>
#include <cmath>
#include <functional>
#include <numeric>
#include <vector>

typedef unsigned int uint;
typedef long long int int64;
typedef long long unsigned int uint64;

inline uint _udiv64(uint64 u, uint v)
{
	uint u0 = u, u1 = u >> 32;
	uint q, r;
	__asm__("divl %[v]" : "=a"(q), "=d"(r) : [v] "r"(v), "a"(u0), "d"(u1));
	return q;
}

inline uint64 inv64(uint64 x)
{
	uint64 r = x;
	for (int i = 1; i < 6; ++i)
		r -= (x * r - 1) * r;
	return r;
}

constexpr uint Max_size = 1 << 26 | 5;
constexpr uint Mod = 469762049;
constexpr uint g = 3;

inline uint norm_2(const uint x)
{
	return x < Mod * 2 ? x : x - Mod * 2;
}

inline constexpr uint norm(const uint x)
{
	return x < Mod ? x : x - Mod;
}

struct Z
{
	uint v;
	Z() { }
	constexpr Z(const uint _v) : v(_v) { }
};

inline constexpr Z operator+(const Z x1, const Z x2) { return norm(x1.v + x2.v); }
inline constexpr Z operator-(const Z x1, const Z x2) { return norm(x1.v + Mod - x2.v); }
inline constexpr Z operator-(const Z x) { return x.v ? Mod - x.v : 0; }
inline constexpr Z operator*(const Z x1, const Z x2) { return static_cast<uint64>(x1.v) * x2.v % Mod; }
inline Z &operator+=(Z &x1, const Z x2) { return x1 = x1 + x2; }
inline Z &operator-=(Z &x1, const Z x2) { return x1 = x1 - x2; }
inline Z &operator*=(Z &x1, const Z x2) { return x1 = x1 * x2; }
inline bool operator==(const Z x1, const Z x2) { return x1.v == x2.v; }
inline bool operator!=(const Z x1, const Z x2) { return x1.v != x2.v; }

inline Z Power(Z Base, int Exp)
{
	Z res = 1;
	for (; Exp; Base *= Base, Exp >>= 1)
		if (Exp & 1)
			res *= Base;
	return res;
}

inline Z Inv(const Z x)
{
	return Power(x, Mod - 2);
}

std::pair<Z, Z> sqrt(const Z x)
{
	if (x.v == 0)
		return {0, 0};
	Z w = 1;
	while (Power(w * w - x, (Mod - 1) / 2).v == 1)
		++w.v;
	Z Base0 = w, Base1 = 1;
	w = w * w - x;
	Z res0 = 1, res1 = 0;
	int Exp = (Mod + 1) / 2;
	for (; Exp; std::tie(Base0, Base1) = std::make_pair(Base0 * Base0 + Base1 * Base1 * w, 2 * Base0 * Base1), Exp >>= 1)
		if (Exp & 1)
			std::tie(res0, res1) = std::make_pair(res0 * Base0 + res1 * Base1 * w, res0 * Base1 + Base0 * res1);
	Z res = res0.v < Mod - res0.v ? res0 : Mod - res0;
	return {res, Mod - res};
}

Z inv[Max_size];

void init_inv(const int n)
{
	inv[1] = 1;
	for (int i = 2; i != n; ++i)
		inv[i] = inv[i - 1] * (i - 1);
	Z R = Inv(inv[n - 1] * (n - 1));
	for (int i = n - 1; i != 1; --i)
		inv[i] *= R, R *= i;
}

int size;
uint w[Max_size], w_q[Max_size];

inline uint mult_Shoup_2(const uint x, const uint y, const uint y_q)
{
	uint q = static_cast<uint64>(x) * y_q >> 32;
	return x * y - q * Mod;
}

inline uint mult_Shoup(const uint x, const uint y, const uint y_q)
{
	return norm(mult_Shoup_2(x, y, y_q));
}

inline uint mult_Shoup_q(const uint x, const uint y, const uint y_q)
{
	uint q = static_cast<uint64>(x) * y_q >> 32;
	return q + (x * y - q * Mod >= Mod);
}

void init_w(const int n)
{
	for (size = 2; size < n; size <<= 1)
		;
	uint pr = Power(g, (Mod - 1) / size).v;
	uint pr_q = (static_cast<uint64>(pr) << 32) / Mod;
	uint pr_r = (static_cast<uint64>(pr) << 32) % Mod;
	size >>= 1;
	w[size] = 1, w_q[size] = (static_cast<uint64>(w[size]) << 32) / Mod;
	for (int i = 1; i < size; ++i)
	{
		//w[size + i] = mult_Shoup(w[size + i - 1], pr, pr_q);
		uint x = w[size + i - 1];
		uint64 p = static_cast<uint64>(x) * pr_q;
		uint q = p >> 32;
		w[size + i] = norm(x * pr - q * Mod);
		w_q[size + i] = static_cast<uint>(p) + mult_Shoup_q(pr_r, w[size + i - 1], w_q[size + i - 1]);
	}
	for (int i = size - 1; i; --i)
		w[i] = w[i * 2], w_q[i] = w_q[i * 2];
	size <<= 1;
}

//void DFT_fr_2(Z _A[], const int L)
//{
//	uint *A = reinterpret_cast<uint *>(_A);
//	for (int d = L >> 1; d; d >>= 1)
//		for (int i = 0; i != L; i += d << 1)
//			for (int j = 0; j != d; ++j)
//			{
//				uint x = norm_2(A[i + j] + A[i + d + j]);
//				uint y = mult_Shoup_2(A[i + j] + Mod * 2 - A[i + d + j], w[d + j], w_q[d + j]);
//				A[i + j] = x, A[i + d + j] = y;
//			}
//}
void DFT_fr_2(Z _A[], const int L)
{
	if (L == 1)
		return;
	uint *A = reinterpret_cast<uint *>(_A);
//	auto butterfly1 = [](uint &a, uint &b)
//	{
//		uint x = norm_2(a + b), y = norm_2(a + Mod * 2 - b);
//		a = x, b = y;
//	};
#define butterfly1(a, b)\
	do\
	{\
		uint _a = a, _b = b;\
		uint x = norm_2(_a + _b), y = norm_2(_a + Mod * 2 - _b);\
		a = x, b = y;\
	} while (0)
	if (L == 2)
	{
		butterfly1(A[0], A[1]);
		return;
	}
//	auto butterfly = [](uint &a, uint &b, const uint _w, const uint _w_q)
//	{
//		uint x = norm_2(a + b), y = mult_Shoup_2(a + Mod * 2 - b, _w, _w_q);
//		a = x, b = y;
//	};
#define butterfly(a, b, _w, _w_q)\
	do\
	{\
		uint _a = a, _b = b;\
		uint x = norm_2(_a + _b), y = mult_Shoup_2(_a + Mod * 2 - _b, _w, _w_q);\
		a = x, b = y;\
	} while (0)
	if (L == 4)
	{
		butterfly1(A[0], A[2]);
		butterfly(A[1], A[3], w[3], w_q[3]);
		butterfly1(A[0], A[1]);
		butterfly1(A[2], A[3]);
		return;
	}
	for (int d = L >> 1; d != 4; d >>= 1)
		for (int i = 0; i != L; i += d << 1)
			for (int j = 0; j != d; j += 4)
			{
				butterfly(A[i + j + 0], A[i + d + j + 0], w[d + j + 0], w_q[d + j + 0]);
				butterfly(A[i + j + 1], A[i + d + j + 1], w[d + j + 1], w_q[d + j + 1]);
				butterfly(A[i + j + 2], A[i + d + j + 2], w[d + j + 2], w_q[d + j + 2]);
				butterfly(A[i + j + 3], A[i + d + j + 3], w[d + j + 3], w_q[d + j + 3]);
			}
	for (int i = 0; i != L; i += 8)
	{
		butterfly1(A[i + 0], A[i + 4]);
		butterfly(A[i + 1], A[i + 5], w[5], w_q[5]);
		butterfly(A[i + 2], A[i + 6], w[6], w_q[6]);
		butterfly(A[i + 3], A[i + 7], w[7], w_q[7]);
	}
	for (int i = 0; i != L; i += 8)
	{
		butterfly1(A[i + 0], A[i + 2]);
		butterfly(A[i + 1], A[i + 3], w[3], w_q[3]);
		butterfly1(A[i + 4], A[i + 6]);
		butterfly(A[i + 5], A[i + 7], w[3], w_q[3]);
	}
	for (int i = 0; i != L; i += 8)
	{
		butterfly1(A[i + 0], A[i + 1]);
		butterfly1(A[i + 2], A[i + 3]);
		butterfly1(A[i + 4], A[i + 5]);
		butterfly1(A[i + 6], A[i + 7]);
	}
#undef butterfly1
#undef butterfly
}

void DFT_fr(Z _A[], const int L)
{
	DFT_fr_2(_A, L);
	for (int i = 0; i != L; ++i)
		_A[i] = norm(_A[i].v);
}

//void IDFT_fr(Z _A[], const int L)
//{
//	uint *A = reinterpret_cast<uint *>(_A);
//	for (int d = 1; d != L; d <<= 1)
//		for (int i = 0; i != L; i += d << 1)
//			for (int j = 0; j != d; ++j)
//			{
//				uint x = norm_2(A[i + j]);
//				uint t = mult_Shoup_2(A[i + d + j], w[d + j], w_q[d + j]);
//				A[i + j] = x + t, A[i + d + j] = x + Mod * 2 - t;
//			}
//	std::reverse(A + 1, A + L);
//	if (L == 2)
//		A[0] = norm_2(A[0]), A[1] = norm_2(A[1]);
//	int k = __builtin_ctz(L);
//	for (int i = 0; i != L; ++i)
//	{
//		uint64 m = -A[i] & (L - 1);
//		A[i] = norm((A[i] + m * Mod) >> k);
//	}
//}
void IDFT_fr(Z _A[], const int L)
{
	if (L == 1)
		return;
	uint *A = reinterpret_cast<uint *>(_A);
//	auto butterfly1 = [](uint &a, uint &b)
//	{
//		uint x = norm_2(a), t = norm_2(b);
//		a = x + t, b = x + Mod * 2 - t;
//	};
#define butterfly1(a, b)\
	do\
	{\
		uint _a = a, _b = b;\
		uint x = norm_2(_a), t = norm_2(_b);\
		a = x + t, b = x + Mod * 2 - t;\
	} while (0)
	if (L == 2)
	{
		butterfly1(A[0], A[1]);
		A[0] = norm(norm_2(A[0])), A[0] = A[0] & 1 ? A[0] + Mod : A[0], A[0] /= 2;
		A[1] = norm(norm_2(A[1])), A[1] = A[1] & 1 ? A[1] + Mod : A[1], A[1] /= 2;
		return;
	}
//	auto butterfly = [](uint &a, uint &b, const uint _w, const uint _w_q)
//	{
//		uint x = norm_2(a), t = mult_Shoup_2(b, _w, _w_q);
//		a = x + t, b = x + Mod * 2 - t;
//	};
#define butterfly(a, b, _w, _w_q)\
	do\
	{\
		uint _a = a, _b = b;\
		uint x = norm_2(_a), t = mult_Shoup_2(_b, _w, _w_q);\
		a = x + t, b = x + Mod * 2 - t;\
	} while (0)
	if (L == 4)
	{
		butterfly1(A[0], A[1]);
		butterfly1(A[2], A[3]);
		butterfly1(A[0], A[2]);
		butterfly(A[1], A[3], w[3], w_q[3]);
		std::swap(A[1], A[3]);
		for (int i = 0; i != L; ++i)
		{
			uint64 m = -A[i] & 3;
			A[i] = norm((A[i] + m * Mod) >> 2);
		}
		return;
	}
	for (int i = 0; i != L; i += 8)
	{
		butterfly1(A[i + 0], A[i + 1]);
		butterfly1(A[i + 2], A[i + 3]);
		butterfly1(A[i + 4], A[i + 5]);
		butterfly1(A[i + 6], A[i + 7]);
	}
	for (int i = 0; i != L; i += 8)
	{
		butterfly1(A[i + 0], A[i + 2]);
		butterfly(A[i + 1], A[i + 3], w[3], w_q[3]);
		butterfly1(A[i + 4], A[i + 6]);
		butterfly(A[i + 5], A[i + 7], w[3], w_q[3]);
	}
	for (int i = 0; i != L; i += 8)
	{
		butterfly1(A[i + 0], A[i + 4]);
		butterfly(A[i + 1], A[i + 5], w[5], w_q[5]);
		butterfly(A[i + 2], A[i + 6], w[6], w_q[6]);
		butterfly(A[i + 3], A[i + 7], w[7], w_q[7]);
	}
	for (int d = 8; d != L; d <<= 1)
		for (int i = 0; i != L; i += d << 1)
			for (int j = 0; j != d; j += 4)
			{
				butterfly(A[i + j + 0], A[i + d + j + 0], w[d + j + 0], w_q[d + j + 0]);
				butterfly(A[i + j + 1], A[i + d + j + 1], w[d + j + 1], w_q[d + j + 1]);
				butterfly(A[i + j + 2], A[i + d + j + 2], w[d + j + 2], w_q[d + j + 2]);
				butterfly(A[i + j + 3], A[i + d + j + 3], w[d + j + 3], w_q[d + j + 3]);
			}
#undef butterfly1
#undef butterfly
	std::reverse(A + 1, A + L);
	int k = __builtin_ctz(L);
	for (int i = 0; i != L; ++i)
	{
		uint64 m = -A[i] & (L - 1);
		A[i] = norm((A[i] + m * Mod) >> k);
	}
}

struct DS
{
	static uint64 N;
	static int R2;
	
	std::vector<Z> sv;
	std::vector<Z> lv;
	
	DS() : sv(1 + R2, 0), lv(1 + R2, 0) { }
	
	Z &operator[](const uint64 x)
	{
		return x <= R2 ? sv[x] : lv[N / x];
	}
	
	const Z &operator[](const uint64 x) const
	{
		return x <= R2 ? sv[x] : lv[N / x];
	}
	
	void partial_sum()
	{
		for (int i = 2; i <= R2; ++i)
			sv[i] += sv[i - 1];
		lv[R2] += sv[R2];
		for (int i = R2 - 1; i >= 1; --i)
			lv[i] += lv[i + 1];
	}
	
	void adjacent_difference()
	{
		for (int i = 1; i < R2; ++i)
			lv[i] -= lv[i + 1];
		lv[R2] -= sv[R2];
		for (int i = R2; i >= 2; --i)
			sv[i] -= sv[i - 1];
	}
};
uint64 DS::N;
int DS::R2;

inline void add(Z &x, const uint a, const uint b)
{
	x = (x.v + 1ULL * a * b) % Mod;
}
inline void add(Z &x, const Z a, const uint b) { add(x, a.v, b); }
inline void add(Z &x, const Z a, const Z b) { add(x, a.v, b.v); }
inline void sub(Z &x, const Z a, const Z b) { add(x, a.v, Mod - b.v); }

template<typename I>
struct subrange
{
	I i, s;
	
	subrange(I _i, I _s) : i(_i), s(_s) { }
	
	const I &begin() const { return i; }
	const I &end() const { return s; }
	bool empty() const { return i == s; }
	auto size() const { return s - i; }
};

DS mult_sparse(const DS &a, const DS &b)
{
	const uint64 &N = DS::N;
	const int &R2 = DS::R2;
	
	DS res;
	
	auto s1 = [](const DS &ds) -> std::vector<std::pair<int, Z>>
	{
		std::vector<std::pair<int, Z>> vec;
		for (int i = 1; i <= R2; ++i)
			if (ds.sv[i] != ds.sv[i - 1])
				vec.emplace_back(i, ds.sv[i] - ds.sv[i - 1]);
		vec.emplace_back(R2 + 1, 0);
		return vec;
	};
	
	auto s2 = [&res](const int x, const Z y, const std::vector<std::pair<int, Z>> &p, const int r)
	{
		int i = 0;
		for (const int X = R2 / x; i != r && p[i].first <= X; ++i)
			add(res.sv[x * p[i].first], y, p[i].second);
		for (const uint64 Nx = N / x; i != r; ++i)
			add(res.lv[_udiv64(Nx, p[i].first)], y, p[i].second);
	};
	
	auto s3 = [&res](const int x, const Z y, int i, const DS &ds)
	{
		const uint64 Nx = N / x;
		for (int X = R2 / x; i > X; --i)
			add(res.lv[i], y, ds.sv[_udiv64(Nx, i)]);
		for (; i >= 1; --i)
			add(res.lv[i], y, ds.lv[x * i]);
	};

	const auto pa = s1(a);
	// const auto va = std::ranges::subrange(pa.begin(), pa.end() - 1);
	const auto va = subrange(pa.begin(), pa.end() - 1);
	const auto pb = s1(b);
	// const auto vb = std::ranges::subrange(pb.begin(), pb.end() - 1);
	const auto vb = subrange(pb.begin(), pb.end() - 1);
	
	for (int l = 0, r = va.size(); const auto &[x, y] : vb)
	{
		const uint64 Nx = N / x;
		while (r && Nx / pa[r - 1].first < r - 1)
			--r;
		s2(x, y, pa, r);
		if (r)
			sub(res[1ULL * x * pa[r].first], y, a.sv[pa[r - 1].first]);
	}
	for (const auto &[x, y] : va)
	{
		sub(res.lv[_udiv64(R2, x)], y, b.sv[R2]);
	}
	
	res.partial_sum();
	
	for (int l = 0, r = va.size(); const auto &[x, y] : vb)
	{
		const uint64 Nx = N / x;
		while (r && Nx / pa[r - 1].first < r - 1)
			--r;
		s3(x, y, _udiv64(Nx, pa[r].first), a);
	}
	for (const auto &[x, y] : va)
	{
		for (int j = 1; x * j <= R2; ++j)
			add(res.lv[j], y, b.lv[x * j]);
	}
	return res;
}

#include <bits/stdc++.h>

std::ostream &operator<<(std::ostream &os, const Z x)
{
	if (x.v <= Mod / 2)
		return os << x.v;
	else
		return os << int(x.v - Mod);
}

int ok, exact, dfscnt;

DS calc(const uint64 N, const Z A, const Z B)
{
	DS::N = N;
	const int R2 = std::sqrt(N + 0.5);
	DS::R2 = R2;
	if (N == 0)
		return DS();
	if (N == 1)
	{
		DS ds;
		ds.sv[1] = ds.lv[1] = 1;
		return ds;
	}
	
	const int R4 = std::sqrt(R2 + 0.5);
	const int MaxSP = std::max<int>(2, 16 * std::log((long double)(N)));
	
	std::vector<char> pf(1 + R2);
	std::vector<int> P;
	int L = -1;
	
	for (int p = 2; p <= R2; ++p)
		if (!pf[p])
		{
			if (L == -1 && p > MaxSP)
				L = P.size();
			if (p <= R4)
				for (int j = p * p; j <= R2; j += p)
					pf[j] = 1;
			P.push_back(p);
		}
	if (L == -1)
		L = P.size();
	
	auto attach_powerful = [&](DS &ds, const std::function<Z (uint64, uint, uint)> &f) -> DS & // `ds` is the `DS` of `f` without powerful
	{
		DS powerful;
		auto gen = [&](int i, const uint64 x, const Z y)
		{
			auto gen_rec = [&](const auto &self, int i, const uint64 x, const Z y) -> void
			{
				powerful[x] += y;
				for (; i != P.size() && 1ULL * P[i] * P[i] <= N / x; ++i)
				{
					const uint64 Nx = N / x;
					const int p = P[i];
					const Z fp = f(p, p, 1);
					// assert(fp == ds.sv[p] - ds.sv[p - 1]);
					uint64 pe = 1ULL * p * p;
					Z fpe = 0;
					for (int e = 2; pe <= Nx; pe *= p, ++e)
					{
						fpe = f(pe, p, e) - fpe * fp;
						if (fpe.v)
							self(self, i + 1, x * pe, y * fpe);
					}
				}
			};
			return gen_rec(gen_rec, i, x, y);
		};
		gen(0, 1, 1);
		powerful.partial_sum();
		return ds = mult_sparse(ds, powerful);
	};
	
	auto fix_powerful = [&](DS &ds, const std::function<Z (uint64, uint, uint)> &f, const std::function<Z (uint64, uint, uint)> &now) -> DS &
	{
		DS powerful;
		auto gen = [&](int i, const uint64 x, const Z y)
		{
			auto gen_rec = [&](const auto &self, int i, const uint64 x, const Z y) -> void
			{
				powerful[x] += y;
				for (; i != P.size() && 1ULL * P[i] * P[i] <= N / x; ++i)
				{
					const uint64 Nx = N / x;
					const int p = P[i];
					Z fp[55] = {1, 0};
					Z nowp[55] = {1, now(p, p, 1)};
					// assert(f(p, p, 1) == ds.sv[p] - ds.sv[p - 1]);
					// assert(f(p, p, 1) == nowp[1]);
					uint64 pe = 1ULL * p * p;
					for (int e = 2; pe <= Nx; pe *= p, ++e)
					{
						fp[e] = f(pe, p, e);
						nowp[e] = now(pe, p, e);
						for (int ee = 0; ee < e; ++ee)
							fp[e] -= fp[ee] * nowp[e - ee];
						if (fp[e].v)
							self(self, i + 1, x * pe, y * fp[e]);
					}
				}
			};
			return gen_rec(gen_rec, i, x, y);
		};
		gen(0, 1, 1);
		powerful.partial_sum();
		return ds = mult_sparse(ds, powerful);
	};
	
	auto mult_powerful = [&](const DS &ds, const std::function<Z (uint64, uint, uint)> &f) // `ds` is the `DS` of another function
	{
		DS powerful;
		auto gen = [&](int i, const uint64 x, const Z y)
		{
			auto gen_rec = [&](const auto &self, int i, const uint64 x, const Z y) -> void
			{
				powerful[x] += y;
				for (; i != P.size() && 1ULL * P[i] * P[i] <= N / x; ++i)
				{
					const uint64 Nx = N / x;
					const int p = P[i];
					uint64 pe = 1ULL * p * p;
					for (int e = 2; pe <= Nx; pe *= p, ++e)
					{
						Z fpe = f(pe, p, e);
						if (fpe.v)
							self(self, i + 1, x * pe, y * fpe);
					}
				}
			};
			return gen_rec(gen_rec, i, x, y);
		};
		gen(0, 1, 1);
		powerful.partial_sum();
		return mult_sparse(ds, powerful);
	};
	
	auto attach_small = [&](DS &ds, const std::function<Z (uint)> &f) -> DS & // no small p in `ds`!
	{
		ds.adjacent_difference();
		std::vector<int> pred(1 + R2 + 1);
		pred[1] = 0;
		for (int i = 2; i <= R2 + 1; ++i)
			pred[i] = ds.sv[i - 1].v ? i - 1 : pred[i - 1];
		for (int pid = L - 1; pid >= 0; --pid)
		{
			const int p = P[pid];
			Z y = f(p);
			{
				int j = 1, k = p;
				for (; (j + 1) * p - 1 <= R2; ++j)
					for (; k != (j + 1) * p; ++k)
						if (ds.lv[k].v)
							add(ds.lv[j], ds.lv[k], y);
				for (; k <= R2; ++k)
					if (ds.lv[k].v)
						add(ds.lv[j], ds.lv[k], y);
			}
			{
				int j = pred[R2 + 1];
				uint64 Nx = N / p;
				for (int X = R2 / p; j > X; j = pred[j])
					add(ds.lv[_udiv64(Nx, j)], ds.sv[j], y);
				int k = R2 + 1;
				for (; j; j = pred[j])
				{
					while (pred[k] > j * p)
						k = pred[k];
					// assert(k != j * p);
					pred[j * p] = pred[k], pred[k] = j * p;
					add(ds.sv[j * p], ds.sv[j], y);
				}
			}
		}
		ds.partial_sum();
		return ds;
	};
	
	auto eliminate_small = [&](DS &ds, const std::function<Z (uint)> &f) // no small p in `ds * f`
	{
		ds.adjacent_difference();
		std::vector<int> succ(1 + R2 + 1);
		succ[R2] = R2 + 1;
		for (int i = R2 - 1; i >= 0; --i)
			succ[i] = ds.sv[i + 1].v ? i + 1 : succ[i + 1];
		for (int pid = L - 1; pid >= 0; --pid)
		{
			const int p = P[pid];
			Z y = f(p);
			{
				int j = succ[0], k = succ[0];
				for (int X = R2 / p; j <= X; j = succ[j])
				{
					sub(ds.sv[j * p], ds.sv[j], y);
					while (succ[k] < j * p)
						k = succ[k];
					// assert(succ[k] == j * p);
					succ[k] = succ[j * p];
				}
				for (uint64 Nx = N / p; j <= R2; j = succ[j])
					sub(ds.lv[_udiv64(Nx, j)], ds.sv[j], y);
			}
			{
				int j = R2 / p, k = R2;
				for (; j; --j)
					for (; k >= j * p; --k)
						if (ds.lv[k].v)
							sub(ds.lv[j], ds.lv[k], y);
			}
		}
		ds.partial_sum();
		return ds;
	};
	
	auto attach_small_inv = [&](DS &ds, const std::function<Z (uint)> &f) -> DS & // no small p in `ds`! // 未经测试
	{
		ds.adjacent_difference();
		std::vector<int> succ(1 + R2 + 1);
		succ[R2] = R2 + 1;
		for (int i = R2 - 1; i >= 0; --i)
			succ[i] = ds.sv[i + 1].v ? i + 1 : succ[i + 1];
		for (int pid = 0; pid < L; ++pid)
		{
			const int p = P[pid];
			Z y = f(p);
			{
				int j = succ[0], k = succ[0];
				for (int X = R2 / p; j <= X; j = succ[j])
				{
					sub(ds.sv[j * p], ds.sv[j], y);
					while (succ[k] < j * p)
						k = succ[k];
					// assert(succ[k] != j * p);
					succ[j * p] = succ[k], succ[k] = j * p;
				}
				for (uint64 Nx = N / p; j <= R2; j = succ[j])
					sub(ds.lv[_udiv64(Nx, j)], ds.sv[j], y);
			}
			{
				int j = R2 / p, k = R2;
				for (; j; --j)
					for (; k >= j * p; --k)
						if (ds.lv[k].v)
							sub(ds.lv[j], ds.lv[k], y);
			}
		}
		ds.partial_sum();
		return ds;
	};
	
	auto eliminate_small_inv = [&](DS &ds, const std::function<Z (uint)> &f) // no small p in `res / f`!
	{
		ds.adjacent_difference();
		std::vector<int> pred(1 + R2 + 1);
		pred[1] = 0;
		for (int i = 2; i <= R2 + 1; ++i)
			pred[i] = ds.sv[i - 1].v ? i - 1 : pred[i - 1];
		for (int pid = 0; pid < L; ++pid)
		{
			const int p = P[pid];
			Z y = f(p);
			{
				int j = 1, k = p;
				for (; (j + 1) * p - 1 <= R2; ++j)
					for (; k != (j + 1) * p; ++k)
						if (ds.lv[k].v)
							add(ds.lv[j], ds.lv[k], y);
				for (; k <= R2; ++k)
					if (ds.lv[k].v)
						add(ds.lv[j], ds.lv[k], y);
			}
			{
				int j = pred[R2 + 1];
				uint64 Nx = N / p;
				for (int X = R2 / p; j > X; j = pred[j])
					add(ds.lv[_udiv64(Nx, j)], ds.sv[j], y);
				int k = R2 + 1;
				for (; j; j = pred[j])
				{
					while (pred[k] > j * p)
						k = pred[k];
					// assert(pred[k] == j * p);
					pred[k] = pred[j * p];
					add(ds.sv[j * p], ds.sv[j], y);
				}
			}
		}
		ds.partial_sum();
		return ds;
	};
	
	const int S = [&]()
	{
		int S = 1 + R2 / std::sqrt(std::log((long double)(N))) / 2;
		
		auto Fslog = [&S](const uint64 i) -> int { return S * std::log((long double)(i)); };
		
		for (int U = 1 << (1 + std::__lg(Fslog(N))); ++S, Fslog(N) < U; )
			;
		--S;
		
		return S;
	}();
	const int NS = (N + S - 1) / S;
	const int pfc = [&]()
	{
		int c = 0;
		uint64 x = 1;
		for (int i = L; i != P.size(); ++i)
		{
			x *= P[i];
			if (x > N)
				break;
			++c;
		};
		return c;
	}();
	std::cerr << "N MaxSP L S NS pfc (" << double(clock()) / CLOCKS_PER_SEC << "): " << N << ' ' << MaxSP << ' ' << L << ' ' << S << ' ' << NS << ' ' << pfc << std::endl;
	
	auto Cslog = [&S](const uint64 i) -> int { return std::ceil(S * std::log((long double)(i))); };
	auto Fslog = [&S](const uint64 i) -> int { return S * std::log((long double)(i)); };
	
	std::vector<int> scslogp(1 + R2, 0);
	for (auto p : P)
	{
		const int slogp = Cslog(p);
		for (int i = 1; i * p <= R2; ++i)
		{
			scslogp[i * p] += slogp;
			for (int x = i; x % p == 0; x /= p)
				scslogp[i * p] += slogp;
		}
	}
	std::vector<int> lfslog(1 + R2 + 1);
	for (int i = 1; i <= R2; ++i)
		lfslog[i] = Fslog(N / i);
	lfslog[R2 + 1] = -1;
	
	init_w(lfslog[1] + 1);
	init_inv(size);
	
	const int len = lfslog[1] + 1;
	const int slen = (len + 1) >> 1;
	std::cerr << "len slen size (" << double(clock()) / CLOCKS_PER_SEC << "): " << len << ' ' << slen << ' ' << size << std::endl;
	
	std::vector<int> lid(size);
	for (int i = 1; i <= R2; ++i)
		for (int j = lfslog[i]; j > lfslog[i + 1]; --j)
			lid[j] = i;
	std::cerr << "pre 0 (" << double(clock()) / CLOCKS_PER_SEC << ")" << std::endl;
	
	int maxR2error = [&]()
	{
		int c = 0;
		{
			uint64 x = 1;
			for (int i = L; i != P.size(); ++i)
			{
				x *= P[i];
				if (x > R2)
					break;
				++c;
			};
		}
		int x = R2;
		while (Cslog(x) > lfslog[R2] - c)
			--x;
		assert(Cslog(N / (N / (x + 1) + 1)) <= lfslog[R2] - c);
		return N / (x + 1) + 1;
	}();
	std::cerr << "pre 1 (" << double(clock()) / CLOCKS_PER_SEC << ")" << std::endl;
	
	const uint64 ll = std::max<int64>(1LL, N - NS * pfc + 1);
	const int fixl = N - ll;
	std::vector<uint64> rx(fixl + 1);
	for (uint64 i = ll; i <= N; ++i)
		rx[i - ll] = i;
	std::cerr << "pre 2 (" << double(clock()) / CLOCKS_PER_SEC << "): " << fixl << std::endl;
	/*
	N MaxSP L S NS pfc (0.002): 992802097447 441 85 151836 6538648 4
	len slen size (0.094): 4194287 2097144 4194304
	pre 0 (0.098)
	pre 1 (0.099)
	pre 2 (0.164): 26154592
	pre 3 (0.52): 21229213
	pre done (0.853)
	*/
	std::vector<int> cur(fixl + 1 + 1);
	std::vector<int> ps;
	{
		for (int pid = L; pid < P.size(); ++pid)
		{
			const int p = P[pid];
			const uint64 ip = inv64(p);
			for (int i = ((ll - 1) / p + 1) * p - ll; i <= fixl; i += p)
			{
				rx[i] *= ip;
				++cur[i];
			}
		}
		std::partial_sum(cur.begin(), cur.end(), cur.begin());
		ps.resize(cur.back());
		std::cerr << "pre 3 (" << double(clock()) / CLOCKS_PER_SEC << "): " << cur.back() << std::endl;
		for (int pid = L; pid < P.size(); ++pid)
		{
			const int p = P[pid];
			for (int i = ((ll - 1) / p + 1) * p - ll; i <= fixl; i += p)
				ps[--cur[i]] = p;
		}
		for (int i = 0; i <= fixl; ++i)
			std::reverse(ps.begin() + cur[i], ps.begin() + cur[i + 1]);
		std::cerr << "pre done (" << double(clock()) / CLOCKS_PER_SEC << ")" << std::endl;
	}
	/*/
	const int pfcnt = [&]()
	{
		int cnt = 0;
		for (auto p : P)
			if (p > MaxSP)
				cnt += N / p - (ll - 1) / p;
		return cnt;
	}();
	std::vector<int> cur(fixl + 1 + 1);
	std::vector<int> ps = [&]()
	{
		std::vector<std::pair<int, int>> _ps;
		_ps.reserve(pfcnt);
		std::vector<int> _ps_first(fixl + 1, -1);
		for (int pid = L; pid < P.size(); ++pid)
		{
			int p = P[pid];
			uint64 ip = inv64(p);
			for (int i = ((ll - 1) / p + 1) * p - ll; i <= fixl; i += p)
			{
				rx[i] *= ip;
				_ps.emplace_back(p, _ps_first[i]);
				_ps_first[i] = _ps.size() - 1;
			}
		}
		std::cerr << "pre 3 (" << double(clock()) / CLOCKS_PER_SEC << "): " << pfcnt << std::endl;
		std::vector<int> res(pfcnt);
		cur[0] = 0;
		for (int i = 0; i <= fixl; ++i)
		{
			cur[i + 1] = cur[i];
			for (int k = _ps_first[i]; k != -1; k = _ps[k].second)
				ps[cur[i + 1]++] = _ps[k].first;
		}
		return res;
	}();
	std::cerr << "pre done (" << double(clock()) / CLOCKS_PER_SEC << ")" << std::endl;
	/**/
	
	auto calc_medium = [&](const std::function<Z (uint)> &f)
	{
		std::cerr << "cm (" << double(clock()) / CLOCKS_PER_SEC << ")" << std::endl;
		
		Z *df = new Z[size];
		Z *_ef = new Z[1 + R2];
		Z *_ief = new Z[1 + R2];
		
		_ef[0] = 0;
		std::fill(_ef + 1, _ef + 1 + R2, 1);
		_ief[0] = 0;
		std::fill(_ief + 1, _ief + 1 + R2, 1);
		std::fill(df, df + size, 0);
		
		for (const auto p : P)
		{
			const Z fp = p <= MaxSP ? 0 : f(p);
			
			for (int i = 1; i * p <= R2; ++i)
			{
				int e = 1;
				for (int x = i; x % p == 0; x /= p)
					++e;
				if (e == 1)
					_ef[i * p] *= fp;
				else
					_ef[i * p] = 0;
				_ief[i * p] *= Power(-fp, e);
			}
			
			int e = 1;
			Z slogp_fpe = scslogp[p] * fp;
			for (; scslogp[p] * e < len; ++e, slogp_fpe *= fp)
			{
				if (e & 1)
					df[scslogp[p] * e] += slogp_fpe;
				else
					df[scslogp[p] * e] -= slogp_fpe;
			}
		}
		
		Z *exp = new Z[size];
		Z *dexp = new Z[size];
		Z *iexp = new Z[size];
		
		std::fill(exp, exp + size, 0);
		std::fill(dexp, dexp + size, 0);
		std::fill(iexp, iexp + size, 0);
		
		for (int i = 1; i <= R2; ++i)
			if (scslogp[i] < slen)
			{
				exp[scslogp[i]] += _ef[i];
				dexp[scslogp[i]] += _ef[i] * scslogp[i];
				iexp[scslogp[i]] += _ief[i];
			}
		delete[] _ief;
		
		const std::vector<Z> _exp(exp, exp + slen);
		
		DFT_fr(exp, size);
		DFT_fr(dexp, size);
		DFT_fr(iexp, size);
		
		Z *res = new Z[size];
		std::copy(df, df + slen, res);
		std::fill(res + slen, res + size, 0);
		
		DFT_fr(res, size);
		
		for (int i = 0; i < size; ++i)
			res[i] = iexp[i] * (dexp[i] - exp[i] * res[i]);
		delete[] dexp;
		delete[] iexp;
		
		IDFT_fr(res, size);
		
		std::fill(res, res + slen, 0);
		for (int i = slen; i < len; ++i)
			res[i] = (res[i] - df[i]) * inv[i];
		std::fill(res + len, res + size, 0);
		delete[] df;
		
		DFT_fr(res, size);
		
		for (int i = 0; i < size; ++i)
			res[i] *= exp[i];
		
		delete[] exp;
		
		IDFT_fr(res, size);
		
		for (int i = 0; i < slen; ++i)
			res[i] = _exp[i];
		for (int i = slen; i < len; ++i)
			res[i] = -res[i];
		std::cerr << "cm fft done (" << double(clock()) / CLOCKS_PER_SEC << ")" << std::endl;
		
		DS r;
		std::copy(_ef, _ef + 1 + R2, r.sv.begin());
		delete[] _ef;
		for (int i = 0; i < len; ++i)
			if (lid[i])
				r.lv[lid[i]] += res[i];
		delete[] res;
		
		for (int i = 1; i <= R2; ++i)
			r.lv[R2] -= r.sv[i];
		
		assert(static_cast<uint64>(NS) * pfc <= N);
		
		/*
		MaxSP = std::max<int>(2, 16 * std::log((long double)(N)));
		S = 1 + R2 / std::sqrt(std::log((long double)(N))) / 2;
		n = 992802097447;
		=> 7014438 6708270 17730141 ; 7014438 6708270 0
		*
		auto dfs = [&](const uint64 i, const int c)
		{
			const int curi = cur[i - ll];
			auto dfs_rec = [&](const auto &self, int n, int c, int sslogp, uint64 t, uint64 x, Z fx)
			{
				if (c > n)
					return;
				if (n == 0)
				{
					if ((t + 1) * x > N) // (N / x == t)
					{
						int tt = std::min<uint64>(t, R2);
						if (tt != lid[sslogp])
						{
							if (sslogp > lfslog[1])
								r.lv[tt] += fx;
							else if (lid[sslogp] != tt)
							{
								r.lv[lid[sslogp]] -= fx;
								r.lv[tt] += fx;
							}
							++ok;
						}
						else
							++exact;
					}
					else
						++dfscnt;
					return;
				}
				--n;
				const int p = ps[curi + n];
				self(self, n, c - 1, sslogp + scslogp[p], t, x * p, fx * f(p));
				self(self, n, c, sslogp, t * p, x, fx);
			};
			dfs_rec(dfs_rec, cur[i - ll + 1] - cur[i - ll], c, 0, rx[i - ll], 1, 1);
		};
		
		for (int c = 1; c <= pfc; ++c)
			for (uint64 i = N - NS * (c - 1); i >= ll && i > N - NS * c; --i)
				dfs(i, c);
		/*/
		auto dfs = [&](const uint64 i, const int c)
		{
			const int tu = std::min<uint64>(i / (N - i + 1), // 这个限制和上文注释的原始代码中确保每个 x 只枚举一次的判断 (N / x == t) 是等价的
				maxR2error); // x 太小，也就是 i / t 太小，t 太大的话，误差无法影响 lv // 优化效果似乎有限 // 直接用 R2 也没有算错，我蒙古，可能反例需要构造
			// 如果去掉上面的 maxR2error 的话，可能 int 不够存
			if (rx[i - ll] > tu)
				return;
			const int rxi = rx[i - ll];
			const int &curi = cur[i - ll];
			const int ni = cur[i - ll + 1] - cur[i - ll];
			
			if (ni == 1)
			{
				const int p = ps[curi];
				const int slog = scslogp[p];
				const Z fp = f(p); // 只有 p 的贡献可能出了偏差
				if (slog > lfslog[1])
					r.lv[1] += fp;
				else if (std::min(rxi, R2) > lid[slog])
				{
					// std::cerr << p << ' ' << rxi << ' ' << R2 << ' '  << lid[slog] << std::endl;
					// std::cerr << lfslog[R2] << ' ' << lfslog[R2 - 1] << ' ' << lfslog[R2 - 2] << ' ' << lfslog[R2 - 4] << ' ' << lfslog[1] << std::endl;
					r.lv[lid[slog]] -= fp;
					r.lv[std::min(rxi, R2)] += fp;
					// if (rxi > R2)
					// 	std::cerr << "FFFFFFFFF" << std::endl;
				}
				return;
			}
			if (ni == 2) // 单独处理两个素数的情况 不单独处理也不加下面的优化的话好像还更快 我蒙古
			{
				const int p0 = ps[curi], p1 = ps[curi + 1];
				const int slog = scslogp[p0] + scslogp[p1];
				const Z fp = f(p0) * f(p1); // 只有 p0 * p1 的贡献可能出了偏差
				if (slog > lfslog[1])
					r.lv[1] += fp;
				else if (std::min(rxi, R2) > lid[slog])
				{
					r.lv[lid[slog]] -= fp;
					r.lv[std::min(rxi, R2)] += fp;
					// if (rxi > R2)
					// 	std::cerr << "FFFFFFFFF" << std::endl;
				}
				return;
			}
			
			// static int ps_slogp[20]; // 这段和下面对应的 if (n != 0 && c == n) 部分好像没什么优化效果，蒙古
			// static Z ps_fp[20];
			// for (int k = 0; k < ni; ++k)
			// 	ps_slogp[k] = scslogp[ps[curi + k]];
			// std::partial_sum(ps_slogp, ps_slogp + ni, ps_slogp);
			// for (int k = 0; k < ni; ++k)
			// 	ps_fp[k] = f(ps[curi + k]);
			// std::partial_sum(ps_fp, ps_fp + ni, ps_fp, std::multiplies<Z>());
			
			auto dfs_rec = [&](const auto &self, int n, int c, int sslogp, int t, Z fx)
			{
				if (c > n)
					return;
				++dfscnt;
				// if (n != 0 && c == n)
				// {
				// 	sslogp += ps_slogp[n - 1];
				// 	fx *= ps_fp[n - 1];
				// 	n = 0;
				// }
				if (n == 0)
				{
					int tt = std::min(t, R2);
					if (tt != lid[sslogp])
					{
						if (sslogp > lfslog[1])
							r.lv[1] += fx;
						else if (lid[sslogp] != tt)
						{
							r.lv[lid[sslogp]] -= fx;
							r.lv[tt] += fx;
						}
						++ok;
					}
					else
						++exact;
					return;
				}
				--n;
				const int p = ps[curi + n];
				self(self, n, c - 1, sslogp + scslogp[p], t, fx * f(p));
				if (static_cast<uint64>(t) * p <= tu)
					self(self, n, c, sslogp, t * p, fx);
			};
			dfs_rec(dfs_rec, ni, c, 0, rxi, 1);
		};
		
		for (int c = 1; c <= pfc; ++c)
			for (uint64 i = N - NS * (c - 1); i >= ll && i > N - NS * c; --i)
				// if (cur[i - ll + 1] - cur[i - ll] > 1)
					dfs(i, c);
		/**/
		r.partial_sum();
		
		std::cerr << "cm done (" << double(clock()) / CLOCKS_PER_SEC << ")" << std::endl;
		return r;
	};
	
	auto calc_large = [&](const std::function<Z (uint64)> &f, const std::function<Z (uint64)> &ps)
	{
		auto fp = [&](uint p) { return f(p); };
		auto fpp = [&](uint64 pe, uint p, uint e) { return f(pe); };
		
		DS ds = calc_medium(fp);
		attach_powerful(attach_small(ds, fp), fpp);
		DS res;
		for (int i = R2; i >= 1; --i)
		{
			Z x = ps(N / i) - ds.lv[i];
			for (int j = 2; i * j <= R2; ++j)
				sub(x, f(j), res.lv[i * j]);
			res.lv[i] = x;
		}
		return res;
	};
	
	auto mult_large = [&](DS &&ds, const DS &l)
	{
		for (int i = 1; i <= R2; ++i)
		{
			Z &x = ds.lv[i];
			for (int j = 1; i * j <= R2; ++j)
				if (ds.sv[j] != ds.sv[j - 1])
					add(x, ds.sv[j] - ds.sv[j - 1], l.lv[i * j]);
		}
		return ds;
	};
	DS l0 = calc_large(
		[&](uint64 n) { return 1; },
		[&](uint64 n) { return Z(n % Mod); }
	);
	DS l1 = calc_large(
		[&](uint64 n) { return Z(n % Mod); },
		[&](uint64 n) { return n %= Mod, Z(n * (n + 1) / 2 % Mod); }
	);
	DS l;
	for (int i = 1; i <= R2; ++i)
		l.lv[i] = A * l0.lv[i] + B * l1.lv[i];
	
	auto fp = [&](uint p) { return A + B * p; };
	auto fpe = [&](uint64 pp, uint p, uint e) { return A * e + B * p; };
	
	DS res = mult_large(calc_medium(fp), l);
	return attach_powerful(attach_small(res, fp), fpe);
}

#include <ctime>

int main(int argc, char **argv)
{
	int T;
	scanf("%u", &T);
	while (T--)
	{
		uint64 n;
		Z a, b;
		
		scanf("%llu %u %u", &n, &a.v, &b.v);
		
		DS ds = calc(n, a, b);
		
		std::vector<Z> res(ds.sv.begin() + 1, ds.sv.end());
		res.insert(res.end(), ds.lv.begin() + 1, ds.lv.end());
		std::sort(res.begin(), res.end(), [](const Z a, const Z b) { return a.v < b.v; });
		res.erase(std::unique(res.begin(), res.end()), res.end());
		uint Ans = 0;
		for (auto x : res)
			Ans ^= x.v;
		printf("%u\n", Ans);
	}
	fprintf(stderr, "%d\n", std::__lg(size));
	fprintf(stderr, "%d %d %d\n", ok, exact, dfscnt);
	fprintf(stderr, "%lf\n", double(clock()) / CLOCKS_PER_SEC);
	
	return 0;
}

Source code, Language: C++23

Details

Tip: Click on the bar to expand more detailed information

Test #1:

score: 100

Accepted

time: 792ms

memory: 8404kb

input:

10000
988 56395756 60780067
7923 293552717 438195956
4847 24236686 75287211
6694 74889751 64994726
3720 385482711 188638093
6021 2928896 248853035
6808 310612405 330739724
4062 15289930 175596707
9583 56394035 335888448
9798 151396947 371306315
4365 216662501 351771943
1359 165179730 80942360
1436 3...

output:

result:

ok 10000 numbers

Test #2:

score: -100

Wrong Answer

time: 651ms

memory: 8788kb

input:

486
685583 192056743 391870214
272484 346225796 149350515
656101 326831808 112167252
22515 203348552 60773766
1633155 194072757 22284059
57727 404929471 327406577
57598 251468713 173130016
1102497 36566124 195330260
3504399 214678339 86082351
360127 323967709 231892988
11663 225570343 56772624
39921...

output:

434223382
116245445
125541760
160318550
446061234
484145141
518392434
81977168
17947265
307371543
407160883
335339263
39598998
470162878
410893643
26179198
26198426
40422957
398293380
265153607
228078198
293572568
155169142
224586788
375283776
8481447
491498721
350950775
534322011
64802753
436909146...

result:

wrong answer 66th numbers differ - expected: '373911337', found: '203719625'