QOJ.ac
QOJ
ID | 题目 | 提交者 | 结果 | 用时 | 内存 | 语言 | 文件大小 | 提交时间 | 测评时间 |
---|---|---|---|---|---|---|---|---|---|
#346885 | #8327. 积性函数求和 $10^{13}$ 方便 FFT 版 | FR | AC ✓ | 19876ms | 1835060kb | C++23 | 32.6kb | 2024-03-09 04:13:41 | 2024-03-09 04:13:42 |
Judging History
answer
#include <cstdio>
#include <algorithm>
#include <cmath>
#include <functional>
#include <numeric>
#include <vector>
typedef unsigned int uint;
typedef long long int int64;
typedef long long unsigned int uint64;
inline uint _udiv64(uint64 u, uint v)
{
uint u0 = u, u1 = u >> 32;
uint q, r;
__asm__("divl %[v]" : "=a"(q), "=d"(r) : [v] "r"(v), "a"(u0), "d"(u1));
return q;
}
inline uint64 inv64(uint64 x)
{
uint64 r = x;
for (int i = 1; i < 6; ++i)
r -= (x * r - 1) * r;
return r;
}
constexpr uint Max_size = 1 << 26 | 5;
constexpr uint Mod = 469762049;
constexpr uint g = 3;
inline uint norm_2(const uint x)
{
return x < Mod * 2 ? x : x - Mod * 2;
}
inline constexpr uint norm(const uint x)
{
return x < Mod ? x : x - Mod;
}
struct Z
{
uint v;
Z() { }
constexpr Z(const uint _v) : v(_v) { }
};
inline constexpr Z operator+(const Z x1, const Z x2) { return norm(x1.v + x2.v); }
inline constexpr Z operator-(const Z x1, const Z x2) { return norm(x1.v + Mod - x2.v); }
inline constexpr Z operator-(const Z x) { return x.v ? Mod - x.v : 0; }
inline constexpr Z operator*(const Z x1, const Z x2) { return static_cast<uint64>(x1.v) * x2.v % Mod; }
inline Z &operator+=(Z &x1, const Z x2) { return x1 = x1 + x2; }
inline Z &operator-=(Z &x1, const Z x2) { return x1 = x1 - x2; }
inline Z &operator*=(Z &x1, const Z x2) { return x1 = x1 * x2; }
inline bool operator==(const Z x1, const Z x2) { return x1.v == x2.v; }
inline bool operator!=(const Z x1, const Z x2) { return x1.v != x2.v; }
inline Z Power(Z Base, int Exp)
{
Z res = 1;
for (; Exp; Base *= Base, Exp >>= 1)
if (Exp & 1)
res *= Base;
return res;
}
inline Z Inv(const Z x)
{
return Power(x, Mod - 2);
}
Z inv[Max_size];
void init_inv(const int n)
{
inv[1] = 1;
for (int i = 2; i != n; ++i)
inv[i] = inv[i - 1] * (i - 1);
Z R = Inv(inv[n - 1] * (n - 1));
for (int i = n - 1; i != 1; --i)
inv[i] *= R, R *= i;
}
int size;
uint w[Max_size], w_q[Max_size];
inline uint mult_Shoup_2(const uint x, const uint y, const uint y_q)
{
uint q = static_cast<uint64>(x) * y_q >> 32;
return x * y - q * Mod;
}
inline uint mult_Shoup(const uint x, const uint y, const uint y_q)
{
return norm(mult_Shoup_2(x, y, y_q));
}
inline uint mult_Shoup_q(const uint x, const uint y, const uint y_q)
{
uint q = static_cast<uint64>(x) * y_q >> 32;
return q + (x * y - q * Mod >= Mod);
}
void init_w(const int n)
{
for (size = 2; size < n; size <<= 1)
;
uint pr = Power(g, (Mod - 1) / size).v;
uint pr_q = (static_cast<uint64>(pr) << 32) / Mod;
uint pr_r = (static_cast<uint64>(pr) << 32) % Mod;
size >>= 1;
w[size] = 1, w_q[size] = (static_cast<uint64>(w[size]) << 32) / Mod;
for (int i = 1; i < size; ++i)
{
//w[size + i] = mult_Shoup(w[size + i - 1], pr, pr_q);
uint x = w[size + i - 1];
uint64 p = static_cast<uint64>(x) * pr_q;
uint q = p >> 32;
w[size + i] = norm(x * pr - q * Mod);
w_q[size + i] = static_cast<uint>(p) + mult_Shoup_q(pr_r, w[size + i - 1], w_q[size + i - 1]);
}
for (int i = size - 1; i; --i)
w[i] = w[i * 2], w_q[i] = w_q[i * 2];
size <<= 1;
}
//void DFT_fr_2(Z _A[], const int L)
//{
// uint *A = reinterpret_cast<uint *>(_A);
// for (int d = L >> 1; d; d >>= 1)
// for (int i = 0; i != L; i += d << 1)
// for (int j = 0; j != d; ++j)
// {
// uint x = norm_2(A[i + j] + A[i + d + j]);
// uint y = mult_Shoup_2(A[i + j] + Mod * 2 - A[i + d + j], w[d + j], w_q[d + j]);
// A[i + j] = x, A[i + d + j] = y;
// }
//}
void DFT_fr_2(Z _A[], const int L)
{
if (L == 1)
return;
uint *A = reinterpret_cast<uint *>(_A);
// auto butterfly1 = [](uint &a, uint &b)
// {
// uint x = norm_2(a + b), y = norm_2(a + Mod * 2 - b);
// a = x, b = y;
// };
#define butterfly1(a, b)\
do\
{\
uint _a = a, _b = b;\
uint x = norm_2(_a + _b), y = norm_2(_a + Mod * 2 - _b);\
a = x, b = y;\
} while (0)
if (L == 2)
{
butterfly1(A[0], A[1]);
return;
}
// auto butterfly = [](uint &a, uint &b, const uint _w, const uint _w_q)
// {
// uint x = norm_2(a + b), y = mult_Shoup_2(a + Mod * 2 - b, _w, _w_q);
// a = x, b = y;
// };
#define butterfly(a, b, _w, _w_q)\
do\
{\
uint _a = a, _b = b;\
uint x = norm_2(_a + _b), y = mult_Shoup_2(_a + Mod * 2 - _b, _w, _w_q);\
a = x, b = y;\
} while (0)
if (L == 4)
{
butterfly1(A[0], A[2]);
butterfly(A[1], A[3], w[3], w_q[3]);
butterfly1(A[0], A[1]);
butterfly1(A[2], A[3]);
return;
}
for (int d = L >> 1; d != 4; d >>= 1)
for (int i = 0; i != L; i += d << 1)
for (int j = 0; j != d; j += 4)
{
butterfly(A[i + j + 0], A[i + d + j + 0], w[d + j + 0], w_q[d + j + 0]);
butterfly(A[i + j + 1], A[i + d + j + 1], w[d + j + 1], w_q[d + j + 1]);
butterfly(A[i + j + 2], A[i + d + j + 2], w[d + j + 2], w_q[d + j + 2]);
butterfly(A[i + j + 3], A[i + d + j + 3], w[d + j + 3], w_q[d + j + 3]);
}
for (int i = 0; i != L; i += 8)
{
butterfly1(A[i + 0], A[i + 4]);
butterfly(A[i + 1], A[i + 5], w[5], w_q[5]);
butterfly(A[i + 2], A[i + 6], w[6], w_q[6]);
butterfly(A[i + 3], A[i + 7], w[7], w_q[7]);
}
for (int i = 0; i != L; i += 8)
{
butterfly1(A[i + 0], A[i + 2]);
butterfly(A[i + 1], A[i + 3], w[3], w_q[3]);
butterfly1(A[i + 4], A[i + 6]);
butterfly(A[i + 5], A[i + 7], w[3], w_q[3]);
}
for (int i = 0; i != L; i += 8)
{
butterfly1(A[i + 0], A[i + 1]);
butterfly1(A[i + 2], A[i + 3]);
butterfly1(A[i + 4], A[i + 5]);
butterfly1(A[i + 6], A[i + 7]);
}
#undef butterfly1
#undef butterfly
}
void DFT_fr(Z _A[], const int L)
{
DFT_fr_2(_A, L);
for (int i = 0; i != L; ++i)
_A[i] = norm(_A[i].v);
}
//void IDFT_fr(Z _A[], const int L)
//{
// uint *A = reinterpret_cast<uint *>(_A);
// for (int d = 1; d != L; d <<= 1)
// for (int i = 0; i != L; i += d << 1)
// for (int j = 0; j != d; ++j)
// {
// uint x = norm_2(A[i + j]);
// uint t = mult_Shoup_2(A[i + d + j], w[d + j], w_q[d + j]);
// A[i + j] = x + t, A[i + d + j] = x + Mod * 2 - t;
// }
// std::reverse(A + 1, A + L);
// if (L == 2)
// A[0] = norm_2(A[0]), A[1] = norm_2(A[1]);
// int k = __builtin_ctz(L);
// for (int i = 0; i != L; ++i)
// {
// uint64 m = -A[i] & (L - 1);
// A[i] = norm((A[i] + m * Mod) >> k);
// }
//}
void IDFT_fr(Z _A[], const int L)
{
if (L == 1)
return;
uint *A = reinterpret_cast<uint *>(_A);
// auto butterfly1 = [](uint &a, uint &b)
// {
// uint x = norm_2(a), t = norm_2(b);
// a = x + t, b = x + Mod * 2 - t;
// };
#define butterfly1(a, b)\
do\
{\
uint _a = a, _b = b;\
uint x = norm_2(_a), t = norm_2(_b);\
a = x + t, b = x + Mod * 2 - t;\
} while (0)
if (L == 2)
{
butterfly1(A[0], A[1]);
A[0] = norm(norm_2(A[0])), A[0] = A[0] & 1 ? A[0] + Mod : A[0], A[0] /= 2;
A[1] = norm(norm_2(A[1])), A[1] = A[1] & 1 ? A[1] + Mod : A[1], A[1] /= 2;
return;
}
// auto butterfly = [](uint &a, uint &b, const uint _w, const uint _w_q)
// {
// uint x = norm_2(a), t = mult_Shoup_2(b, _w, _w_q);
// a = x + t, b = x + Mod * 2 - t;
// };
#define butterfly(a, b, _w, _w_q)\
do\
{\
uint _a = a, _b = b;\
uint x = norm_2(_a), t = mult_Shoup_2(_b, _w, _w_q);\
a = x + t, b = x + Mod * 2 - t;\
} while (0)
if (L == 4)
{
butterfly1(A[0], A[1]);
butterfly1(A[2], A[3]);
butterfly1(A[0], A[2]);
butterfly(A[1], A[3], w[3], w_q[3]);
std::swap(A[1], A[3]);
for (int i = 0; i != L; ++i)
{
uint64 m = -A[i] & 3;
A[i] = norm((A[i] + m * Mod) >> 2);
}
return;
}
for (int i = 0; i != L; i += 8)
{
butterfly1(A[i + 0], A[i + 1]);
butterfly1(A[i + 2], A[i + 3]);
butterfly1(A[i + 4], A[i + 5]);
butterfly1(A[i + 6], A[i + 7]);
}
for (int i = 0; i != L; i += 8)
{
butterfly1(A[i + 0], A[i + 2]);
butterfly(A[i + 1], A[i + 3], w[3], w_q[3]);
butterfly1(A[i + 4], A[i + 6]);
butterfly(A[i + 5], A[i + 7], w[3], w_q[3]);
}
for (int i = 0; i != L; i += 8)
{
butterfly1(A[i + 0], A[i + 4]);
butterfly(A[i + 1], A[i + 5], w[5], w_q[5]);
butterfly(A[i + 2], A[i + 6], w[6], w_q[6]);
butterfly(A[i + 3], A[i + 7], w[7], w_q[7]);
}
for (int d = 8; d != L; d <<= 1)
for (int i = 0; i != L; i += d << 1)
for (int j = 0; j != d; j += 4)
{
butterfly(A[i + j + 0], A[i + d + j + 0], w[d + j + 0], w_q[d + j + 0]);
butterfly(A[i + j + 1], A[i + d + j + 1], w[d + j + 1], w_q[d + j + 1]);
butterfly(A[i + j + 2], A[i + d + j + 2], w[d + j + 2], w_q[d + j + 2]);
butterfly(A[i + j + 3], A[i + d + j + 3], w[d + j + 3], w_q[d + j + 3]);
}
#undef butterfly1
#undef butterfly
std::reverse(A + 1, A + L);
int k = __builtin_ctz(L);
for (int i = 0; i != L; ++i)
{
uint64 m = -A[i] & (L - 1);
A[i] = norm((A[i] + m * Mod) >> k);
}
}
struct DS
{
static uint64 N;
static int R2;
std::vector<Z> sv;
std::vector<Z> lv;
DS() : sv(1 + R2, 0), lv(1 + R2, 0) { }
Z &operator[](const uint64 x)
{
return x <= R2 ? sv[x] : lv[N / x];
}
const Z &operator[](const uint64 x) const
{
return x <= R2 ? sv[x] : lv[N / x];
}
void partial_sum()
{
for (int i = 2; i <= R2; ++i)
sv[i] += sv[i - 1];
lv[R2] += sv[R2];
for (int i = R2 - 1; i >= 1; --i)
lv[i] += lv[i + 1];
}
void adjacent_difference()
{
for (int i = 1; i < R2; ++i)
lv[i] -= lv[i + 1];
lv[R2] -= sv[R2];
for (int i = R2; i >= 2; --i)
sv[i] -= sv[i - 1];
}
};
uint64 DS::N;
int DS::R2;
inline void add(Z &x, const uint a, const uint b)
{
x = (x.v + 1ULL * a * b) % Mod;
}
inline void add(Z &x, const Z a, const uint b) { add(x, a.v, b); }
inline void add(Z &x, const Z a, const Z b) { add(x, a.v, b.v); }
inline void sub(Z &x, const Z a, const Z b) { add(x, a.v, Mod - b.v); }
template<typename I>
struct subrange
{
I i, s;
subrange(I _i, I _s) : i(_i), s(_s) { }
const I &begin() const { return i; }
const I &end() const { return s; }
bool empty() const { return i == s; }
auto size() const { return s - i; }
};
DS mult_sparse(const DS &a, const DS &b)
{
const uint64 &N = DS::N;
const int &R2 = DS::R2;
DS res;
auto s1 = [](const DS &ds) -> std::vector<std::pair<int, Z>>
{
std::vector<std::pair<int, Z>> vec;
for (int i = 1; i <= R2; ++i)
if (ds.sv[i] != ds.sv[i - 1])
vec.emplace_back(i, ds.sv[i] - ds.sv[i - 1]);
vec.emplace_back(R2 + 1, 0);
return vec;
};
auto s2 = [&res](const int x, const Z y, const std::vector<std::pair<int, Z>> &p, const int r)
{
int i = 0;
for (const int X = R2 / x; i != r && p[i].first <= X; ++i)
add(res.sv[x * p[i].first], y, p[i].second);
for (const uint64 Nx = N / x; i != r; ++i)
add(res.lv[_udiv64(Nx, p[i].first)], y, p[i].second);
};
auto s3 = [&res](const int x, const Z y, int i, const DS &ds)
{
const uint64 Nx = N / x;
for (int X = R2 / x; i > X; --i)
add(res.lv[i], y, ds.sv[_udiv64(Nx, i)]);
for (; i >= 1; --i)
add(res.lv[i], y, ds.lv[x * i]);
};
const auto pa = s1(a);
// const auto va = std::ranges::subrange(pa.begin(), pa.end() - 1);
const auto va = subrange(pa.begin(), pa.end() - 1);
const auto pb = s1(b);
// const auto vb = std::ranges::subrange(pb.begin(), pb.end() - 1);
const auto vb = subrange(pb.begin(), pb.end() - 1);
for (int l = 0, r = va.size(); const auto &[x, y] : vb)
{
const uint64 Nx = N / x;
while (r && Nx / pa[r - 1].first < r - 1)
--r;
s2(x, y, pa, r);
if (r)
sub(res[1ULL * x * pa[r].first], y, a.sv[pa[r - 1].first]);
}
for (const auto &[x, y] : va)
{
sub(res.lv[_udiv64(R2, x)], y, b.sv[R2]);
}
res.partial_sum();
for (int l = 0, r = va.size(); const auto &[x, y] : vb)
{
const uint64 Nx = N / x;
while (r && Nx / pa[r - 1].first < r - 1)
--r;
s3(x, y, _udiv64(Nx, pa[r].first), a);
}
for (const auto &[x, y] : va)
{
for (int j = 1; x * j <= R2; ++j)
add(res.lv[j], y, b.lv[x * j]);
}
return res;
}
#include <bits/stdc++.h>
std::ostream &operator<<(std::ostream &os, const Z x)
{
if (x.v <= Mod / 2)
return os << x.v;
else
return os << int(x.v - Mod);
}
int ok, exact, dfscnt;
DS calc(const uint64 N, const Z A, const Z B)
{
DS::N = N;
const int R2 = std::sqrt(N + 0.5);
DS::R2 = R2;
if (N == 0)
return DS();
if (N == 1)
{
DS ds;
ds.sv[1] = ds.lv[1] = 1;
return ds;
}
const int R4 = std::sqrt(R2 + 0.5);
const int MaxSP = std::max<int>(2, 16 * std::log((long double)(N)));
std::vector<char> pf(1 + R2);
std::vector<int> P;
int L = -1;
for (int p = 2; p <= R2; ++p)
if (!pf[p])
{
if (L == -1 && p > MaxSP)
L = P.size();
if (p <= R4)
for (int j = p * p; j <= R2; j += p)
pf[j] = 1;
P.push_back(p);
}
if (L == -1)
L = P.size();
auto attach_powerful = [&](DS &ds, const std::function<Z (uint64, uint, uint)> &f) -> DS & // `ds` is the `DS` of `f` without powerful
{
DS powerful;
auto gen = [&](int i, const uint64 x, const Z y)
{
auto gen_rec = [&](const auto &self, int i, const uint64 x, const Z y) -> void
{
powerful[x] += y;
for (; i != P.size() && 1ULL * P[i] * P[i] <= N / x; ++i)
{
const uint64 Nx = N / x;
const int p = P[i];
const Z fp = f(p, p, 1);
// assert(fp == ds.sv[p] - ds.sv[p - 1]);
uint64 pe = 1ULL * p * p;
Z fpe = 0;
for (int e = 2; pe <= Nx; pe *= p, ++e)
{
fpe = f(pe, p, e) - fpe * fp;
if (fpe.v)
self(self, i + 1, x * pe, y * fpe);
}
}
};
return gen_rec(gen_rec, i, x, y);
};
gen(0, 1, 1);
powerful.partial_sum();
return ds = mult_sparse(ds, powerful);
};
auto fix_powerful = [&](DS &ds, const std::function<Z (uint64, uint, uint)> &f, const std::function<Z (uint64, uint, uint)> &now) -> DS &
{
DS powerful;
auto gen = [&](int i, const uint64 x, const Z y)
{
auto gen_rec = [&](const auto &self, int i, const uint64 x, const Z y) -> void
{
powerful[x] += y;
for (; i != P.size() && 1ULL * P[i] * P[i] <= N / x; ++i)
{
const uint64 Nx = N / x;
const int p = P[i];
Z fp[55] = {1, 0};
Z nowp[55] = {1, now(p, p, 1)};
// assert(f(p, p, 1) == ds.sv[p] - ds.sv[p - 1]);
// assert(f(p, p, 1) == nowp[1]);
uint64 pe = 1ULL * p * p;
for (int e = 2; pe <= Nx; pe *= p, ++e)
{
fp[e] = f(pe, p, e);
nowp[e] = now(pe, p, e);
for (int ee = 0; ee < e; ++ee)
fp[e] -= fp[ee] * nowp[e - ee];
if (fp[e].v)
self(self, i + 1, x * pe, y * fp[e]);
}
}
};
return gen_rec(gen_rec, i, x, y);
};
gen(0, 1, 1);
powerful.partial_sum();
return ds = mult_sparse(ds, powerful);
};
auto mult_powerful = [&](const DS &ds, const std::function<Z (uint64, uint, uint)> &f) // `ds` is the `DS` of another function
{
DS powerful;
auto gen = [&](int i, const uint64 x, const Z y)
{
auto gen_rec = [&](const auto &self, int i, const uint64 x, const Z y) -> void
{
powerful[x] += y;
for (; i != P.size() && 1ULL * P[i] * P[i] <= N / x; ++i)
{
const uint64 Nx = N / x;
const int p = P[i];
uint64 pe = 1ULL * p * p;
for (int e = 2; pe <= Nx; pe *= p, ++e)
{
Z fpe = f(pe, p, e);
if (fpe.v)
self(self, i + 1, x * pe, y * fpe);
}
}
};
return gen_rec(gen_rec, i, x, y);
};
gen(0, 1, 1);
powerful.partial_sum();
return mult_sparse(ds, powerful);
};
auto attach_small = [&](DS &ds, const std::function<Z (uint)> &f) -> DS & // no small p in `ds`!
{
ds.adjacent_difference();
std::vector<int> pred(1 + R2 + 1);
pred[1] = 0;
for (int i = 2; i <= R2 + 1; ++i)
pred[i] = ds.sv[i - 1].v ? i - 1 : pred[i - 1];
for (int pid = L - 1; pid >= 0; --pid)
{
const int p = P[pid];
Z y = f(p);
{
int j = 1, k = p;
for (; (j + 1) * p - 1 <= R2; ++j)
for (; k != (j + 1) * p; ++k)
if (ds.lv[k].v)
add(ds.lv[j], ds.lv[k], y);
for (; k <= R2; ++k)
if (ds.lv[k].v)
add(ds.lv[j], ds.lv[k], y);
}
{
int j = pred[R2 + 1];
uint64 Nx = N / p;
for (int X = R2 / p; j > X; j = pred[j])
add(ds.lv[_udiv64(Nx, j)], ds.sv[j], y);
int k = R2 + 1;
for (; j; j = pred[j])
{
while (pred[k] > j * p)
k = pred[k];
// assert(k != j * p);
pred[j * p] = pred[k], pred[k] = j * p;
add(ds.sv[j * p], ds.sv[j], y);
}
}
}
ds.partial_sum();
return ds;
};
auto eliminate_small = [&](DS &ds, const std::function<Z (uint)> &f) // no small p in `ds * f`
{
ds.adjacent_difference();
std::vector<int> succ(1 + R2 + 1);
succ[R2] = R2 + 1;
for (int i = R2 - 1; i >= 0; --i)
succ[i] = ds.sv[i + 1].v ? i + 1 : succ[i + 1];
for (int pid = L - 1; pid >= 0; --pid)
{
const int p = P[pid];
Z y = f(p);
{
int j = succ[0], k = succ[0];
for (int X = R2 / p; j <= X; j = succ[j])
{
sub(ds.sv[j * p], ds.sv[j], y);
while (succ[k] < j * p)
k = succ[k];
// assert(succ[k] == j * p);
succ[k] = succ[j * p];
}
for (uint64 Nx = N / p; j <= R2; j = succ[j])
sub(ds.lv[_udiv64(Nx, j)], ds.sv[j], y);
}
{
int j = R2 / p, k = R2;
for (; j; --j)
for (; k >= j * p; --k)
if (ds.lv[k].v)
sub(ds.lv[j], ds.lv[k], y);
}
}
ds.partial_sum();
return ds;
};
auto attach_small_inv = [&](DS &ds, const std::function<Z (uint)> &f) -> DS & // no small p in `ds`! // 未经测试
{
ds.adjacent_difference();
std::vector<int> succ(1 + R2 + 1);
succ[R2] = R2 + 1;
for (int i = R2 - 1; i >= 0; --i)
succ[i] = ds.sv[i + 1].v ? i + 1 : succ[i + 1];
for (int pid = 0; pid < L; ++pid)
{
const int p = P[pid];
Z y = f(p);
{
int j = succ[0], k = succ[0];
for (int X = R2 / p; j <= X; j = succ[j])
{
sub(ds.sv[j * p], ds.sv[j], y);
while (succ[k] < j * p)
k = succ[k];
// assert(succ[k] != j * p);
succ[j * p] = succ[k], succ[k] = j * p;
}
for (uint64 Nx = N / p; j <= R2; j = succ[j])
sub(ds.lv[_udiv64(Nx, j)], ds.sv[j], y);
}
{
int j = R2 / p, k = R2;
for (; j; --j)
for (; k >= j * p; --k)
if (ds.lv[k].v)
sub(ds.lv[j], ds.lv[k], y);
}
}
ds.partial_sum();
return ds;
};
auto eliminate_small_inv = [&](DS &ds, const std::function<Z (uint)> &f) // no small p in `res / f`!
{
ds.adjacent_difference();
std::vector<int> pred(1 + R2 + 1);
pred[1] = 0;
for (int i = 2; i <= R2 + 1; ++i)
pred[i] = ds.sv[i - 1].v ? i - 1 : pred[i - 1];
for (int pid = 0; pid < L; ++pid)
{
const int p = P[pid];
Z y = f(p);
{
int j = 1, k = p;
for (; (j + 1) * p - 1 <= R2; ++j)
for (; k != (j + 1) * p; ++k)
if (ds.lv[k].v)
add(ds.lv[j], ds.lv[k], y);
for (; k <= R2; ++k)
if (ds.lv[k].v)
add(ds.lv[j], ds.lv[k], y);
}
{
int j = pred[R2 + 1];
uint64 Nx = N / p;
for (int X = R2 / p; j > X; j = pred[j])
add(ds.lv[_udiv64(Nx, j)], ds.sv[j], y);
int k = R2 + 1;
for (; j; j = pred[j])
{
while (pred[k] > j * p)
k = pred[k];
// assert(pred[k] == j * p);
pred[k] = pred[j * p];
add(ds.sv[j * p], ds.sv[j], y);
}
}
}
ds.partial_sum();
return ds;
};
const int S = [&]()
{
int S = 1 + R2 / std::sqrt(std::log((long double)(N))) / 2;
auto Fslog = [&S](const uint64 i) -> int { return S * std::log((long double)(i)); };
for (int U = 1 << (1 + std::__lg(Fslog(N))); ++S, Fslog(N) < U; )
;
--S;
return S;
}();
const int NS = (N + S - 1) / S;
const int pfc = [&]()
{
int c = 0;
uint64 x = 1;
for (int i = L; i != P.size(); ++i)
{
x *= P[i];
if (x > N)
break;
++c;
}
return c;
}();
std::cerr << "N MaxSP L S NS pfc (" << double(clock()) / CLOCKS_PER_SEC << "): " << N << ' ' << MaxSP << ' ' << L << ' ' << S << ' ' << NS << ' ' << pfc << std::endl;
auto Cslog = [&S](const uint64 i) -> int { return std::ceil(S * std::log((long double)(i))); };
auto Fslog = [&S](const uint64 i) -> int { return S * std::log((long double)(i)); };
std::vector<int> scslogp(1 + R2, 0);
for (auto p : P)
{
const int slogp = Cslog(p);
for (int i = 1; i * p <= R2; ++i)
{
scslogp[i * p] += slogp;
for (int x = i; x % p == 0; x /= p)
scslogp[i * p] += slogp;
}
}
std::vector<int> lfslog(1 + R2 + 1);
for (int i = 1; i <= R2; ++i)
lfslog[i] = Fslog(N / i);
lfslog[R2 + 1] = -1;
init_w(lfslog[1] + 1);
init_inv(size);
const int len = lfslog[1] + 1;
const int slen = (len + 1) >> 1;
std::cerr << "len slen size (" << double(clock()) / CLOCKS_PER_SEC << "): " << len << ' ' << slen << ' ' << size << std::endl;
std::vector<int> lid(size + pfc);
for (int i = 1; i <= R2; ++i)
for (int j = lfslog[i]; j > lfslog[i + 1]; --j)
lid[j] = i;
std::cerr << "pre 0 (" << double(clock()) / CLOCKS_PER_SEC << ")" << std::endl;
int maxR2error = [&]()
{
int c = 0;
{
uint64 x = 1;
for (int i = L; i != P.size(); ++i)
{
x *= P[i];
if (x > R2)
break;
++c;
};
}
int x = R2;
while (Cslog(x) > lfslog[R2] - c)
--x;
assert(Cslog(N / (N / (x + 1) + 1)) <= lfslog[R2] - c);
return N / (x + 1) + 1;
}();
std::cerr << "pre 1 (" << double(clock()) / CLOCKS_PER_SEC << ")" << std::endl;
const uint64 ll = std::max<int64>(1LL, N - NS * pfc + 1);
const int fixl = N - ll;
std::vector<uint64> rx(fixl + 1);
for (uint64 i = ll; i <= N; ++i)
rx[i - ll] = i;
std::cerr << "pre 2 (" << double(clock()) / CLOCKS_PER_SEC << "): " << fixl << std::endl;
/*
N MaxSP L S NS pfc (0.002): 992802097447 441 85 151836 6538648 4
len slen size (0.094): 4194287 2097144 4194304
pre 0 (0.098)
pre 1 (0.099)
pre 2 (0.164): 26154592
pre 3 (0.52): 21229213
pre done (0.853)
*/
std::vector<int> cur(fixl + 1 + 1);
std::vector<int> ps;
{
for (int pid = L; pid < P.size(); ++pid)
{
const int p = P[pid];
const uint64 ip = inv64(p);
for (int i = ((ll - 1) / p + 1) * p - ll; i <= fixl; i += p)
{
rx[i] *= ip;
++cur[i];
}
}
std::partial_sum(cur.begin(), cur.end(), cur.begin());
ps.resize(cur.back());
std::cerr << "pre 3 (" << double(clock()) / CLOCKS_PER_SEC << "): " << cur.back() << std::endl;
for (int pid = L; pid < P.size(); ++pid)
{
const int p = P[pid];
for (int i = ((ll - 1) / p + 1) * p - ll; i <= fixl; i += p)
ps[--cur[i]] = p;
}
std::cerr << "pre done (" << double(clock()) / CLOCKS_PER_SEC << ")" << std::endl;
}
/*/
const int pfcnt = [&]()
{
int cnt = 0;
for (auto p : P)
if (p > MaxSP)
cnt += N / p - (ll - 1) / p;
return cnt;
}();
std::vector<int> cur(fixl + 1 + 1);
std::vector<int> ps = [&]()
{
std::vector<std::pair<int, int>> _ps;
_ps.reserve(pfcnt);
std::vector<int> _ps_first(fixl + 1, -1);
for (int pid = L; pid < P.size(); ++pid)
{
int p = P[pid];
uint64 ip = inv64(p);
for (int i = ((ll - 1) / p + 1) * p - ll; i <= fixl; i += p)
{
rx[i] *= ip;
_ps.emplace_back(p, _ps_first[i]);
_ps_first[i] = _ps.size() - 1;
}
}
std::cerr << "pre 3 (" << double(clock()) / CLOCKS_PER_SEC << "): " << pfcnt << std::endl;
std::vector<int> res(pfcnt);
cur[0] = 0;
for (int i = 0; i <= fixl; ++i)
{
cur[i + 1] = cur[i];
for (int k = _ps_first[i]; k != -1; k = _ps[k].second)
ps[cur[i + 1]++] = _ps[k].first;
}
return res;
}();
std::cerr << "pre done (" << double(clock()) / CLOCKS_PER_SEC << ")" << std::endl;
/**/
for (int i = 0; i <= fixl; ++i)
std::reverse(ps.begin() + cur[i], ps.begin() + cur[i + 1]); // make dfs faster?
auto calc_medium = [&](const std::function<Z (uint)> &f)
{
std::cerr << "cm (" << double(clock()) / CLOCKS_PER_SEC << ")" << std::endl;
Z *df = new Z[size];
Z *_ef = new Z[1 + R2];
Z *_ief = new Z[1 + R2];
_ef[0] = 0;
std::fill(_ef + 1, _ef + 1 + R2, 1);
_ief[0] = 0;
std::fill(_ief + 1, _ief + 1 + R2, 1);
std::fill(df, df + size, 0);
for (const auto p : P)
{
const Z fp = p <= MaxSP ? 0 : f(p);
for (int i = 1; i * p <= R2; ++i)
{
int e = 1;
for (int x = i; x % p == 0; x /= p)
++e;
if (e == 1)
_ef[i * p] *= fp;
else
_ef[i * p] = 0;
_ief[i * p] *= Power(-fp, e);
}
int e = 1;
Z slogp_fpe = scslogp[p] * fp;
for (; scslogp[p] * e < len; ++e, slogp_fpe *= fp)
{
if (e & 1)
df[scslogp[p] * e] += slogp_fpe;
else
df[scslogp[p] * e] -= slogp_fpe;
}
}
Z *exp = new Z[size];
Z *dexp = new Z[size];
Z *iexp = new Z[size];
std::fill(exp, exp + size, 0);
std::fill(dexp, dexp + size, 0);
std::fill(iexp, iexp + size, 0);
for (int i = 1; i <= R2; ++i)
if (scslogp[i] < slen)
{
exp[scslogp[i]] += _ef[i];
dexp[scslogp[i]] += _ef[i] * scslogp[i];
iexp[scslogp[i]] += _ief[i];
}
delete[] _ief;
const std::vector<Z> _exp(exp, exp + slen);
DFT_fr(exp, size);
DFT_fr(dexp, size);
DFT_fr(iexp, size);
Z *res = new Z[size];
std::copy(df, df + slen, res);
std::fill(res + slen, res + size, 0);
DFT_fr(res, size);
for (int i = 0; i < size; ++i)
res[i] = iexp[i] * (dexp[i] - exp[i] * res[i]);
delete[] dexp;
delete[] iexp;
IDFT_fr(res, size);
std::fill(res, res + slen, 0);
for (int i = slen; i < len; ++i)
res[i] = (res[i] - df[i]) * inv[i];
std::fill(res + len, res + size, 0);
delete[] df;
DFT_fr(res, size);
for (int i = 0; i < size; ++i)
res[i] *= exp[i];
delete[] exp;
IDFT_fr(res, size);
for (int i = 0; i < slen; ++i)
res[i] = _exp[i];
for (int i = slen; i < len; ++i)
res[i] = -res[i];
std::cerr << "cm fft done (" << double(clock()) / CLOCKS_PER_SEC << ")" << std::endl;
DS r;
std::copy(_ef, _ef + 1 + R2, r.sv.begin());
delete[] _ef;
for (int i = 0; i < len; ++i)
if (lid[i])
r.lv[lid[i]] += res[i];
delete[] res;
for (int i = 1; i <= R2; ++i)
r.lv[R2] -= r.sv[i];
assert(static_cast<uint64>(NS) * pfc <= N);
/*
MaxSP = std::max<int>(2, 16 * std::log((long double)(N)));
S = 1 + R2 / std::sqrt(std::log((long double)(N))) / 2;
n = 992802097447;
=> 7014438 6708270 17730141 ; 7014438 6708270 0
*
auto dfs = [&](const uint64 i, const int c)
{
const int curi = cur[i - ll];
auto dfs_rec = [&](const auto &self, int n, int c, int sslogp, uint64 t, uint64 x, Z fx)
{
if (c > n)
return;
if (n == 0)
{
if ((t + 1) * x > N) // (N / x == t)
{
int tt = std::min<uint64>(t, R2);
// if (sslogp > lfslog[1])
// r.lv[tt] += fx;
// else
if (tt > lid[sslogp])
{
r.lv[lid[sslogp]] -= fx;
r.lv[tt] += fx;
}
}
// else
// ++dfscnt;
return;
}
--n;
const int p = ps[curi + n];
self(self, n, c - 1, sslogp + scslogp[p], t, x * p, fx * f(p));
self(self, n, c, sslogp, t * p, x, fx);
};
dfs_rec(dfs_rec, cur[i - ll + 1] - cur[i - ll], c, 0, rx[i - ll], 1, 1);
};
for (int c = 1; c <= pfc; ++c)
for (uint64 i = N - NS * (c - 1); i >= ll && i > N - NS * c; --i)
dfs(i, c);
/*/
auto dfs = [&](const uint64 i, const int c)
{
const int tu = std::min<uint64>(i / (N - i + 1), // 这个限制和上文注释的原始代码中确保每个 x 只枚举一次的判断 (N / x == t) 是等价的
maxR2error); // x 太小,也就是 i / t 太小,t 太大的话,误差无法影响 lv // 优化效果似乎有限 // 直接用 R2 也没有算错,我蒙古,可能反例需要构造
// 如果去掉上面的 maxR2error 的话,可能 int 不够存
if (rx[i - ll] > tu)
return;
const int rxi = rx[i - ll];
const int &curi = cur[i - ll];
const int ni = cur[i - ll + 1] - cur[i - ll];
// if (ni == 1)
// {
// const int p = ps[curi];
// const int slog = scslogp[p];
// const Z fp = f(p);
// // if (slog > lfslog[1])
// // r.lv[1] += fp;
// // else
// if (std::min(rxi, R2) > lid[slog])
// {
// r.lv[lid[slog]] -= fp;
// r.lv[std::min(rxi, R2)] += fp;
// }
// return;
// }
// if (ni == 2) // 单独处理两个素数的情况 不单独处理也不加下面的优化的话好像还更快 我蒙古
// {
// const int p0 = ps[curi], p1 = ps[curi + 1];
// const int slog0 = scslogp[p0], slog1 = scslogp[p1];
// const Z fp0 = f(p0), fp1 = f(p1);
// if (slog0 + slog1 > lfslog[1])
// r.lv[1] += fp0 * fp1;
// else
// if (std::min(rxi, R2) > lid[slog0 + slog1])
// {
// r.lv[lid[slog0 + slog1]] -= fp0 * fp1;
// r.lv[std::min(rxi, R2)] += fp0 * fp1;
// }
// if (static_cast<uint64>(rxi) * p1 <= tu && std::min<uint64>(static_cast<uint64>(rxi) * p1, R2) > lid[slog0])
// {
// r.lv[lid[slog0]] -= fp0;
// r.lv[std::min<uint64>(static_cast<uint64>(rxi) * p1, R2)] += fp0;
// }
// if (static_cast<uint64>(rxi) * p0 <= tu && std::min<uint64>(static_cast<uint64>(rxi) * p0, R2) > lid[slog1])
// {
// r.lv[lid[slog1]] -= fp1;
// r.lv[std::min<uint64>(static_cast<uint64>(rxi) * p0, R2)] += fp1;
// }
// return;
// }
// static int ps_slogp[20]; // 这段和下面对应的 if (n != 0 && c == n) 部分好像没什么优化效果,蒙古
// static Z ps_fp[20];
// for (int k = 0; k < ni; ++k)
// ps_slogp[k] = scslogp[ps[curi + k]];
// std::partial_sum(ps_slogp, ps_slogp + ni, ps_slogp);
// for (int k = 0; k < ni; ++k)
// ps_fp[k] = f(ps[curi + k]);
// std::partial_sum(ps_fp, ps_fp + ni, ps_fp, std::multiplies<Z>());
auto dfs_rec = [&](const auto &self, int n, int c, int sslogp, int t, Z fx)
{
if (c > n)
return;
// ++dfscnt;
// if (n != 0 && c == n)
// {
// sslogp += ps_slogp[n - 1];
// fx *= ps_fp[n - 1];
// n = 0;
// }
if (n == 0)
{
int tt = std::min(t, R2);
// if (sslogp > lfslog[1])
// r.lv[1] += fx;
// else
if (tt != lid[sslogp])
{
r.lv[lid[sslogp]] -= fx;
r.lv[tt] += fx;
}
return;
}
--n;
const int p = ps[curi + n];
self(self, n, c - 1, sslogp + scslogp[p], t, fx * f(p));
if (static_cast<uint64>(t) * p <= tu)
self(self, n, c, sslogp, t * p, fx);
};
dfs_rec(dfs_rec, ni, c, 0, rxi, 1);
};
for (int c = 1; c <= pfc; ++c)
for (uint64 i = N - NS * (c - 1); i >= ll && i > N - NS * c; --i)
if (cur[i - ll + 1] != cur[i - ll])
dfs(i, c);
/**/
r.partial_sum();
std::cerr << "cm done (" << double(clock()) / CLOCKS_PER_SEC << ")" << std::endl;
return r;
};
auto calc_large = [&](const std::function<Z (uint64)> &f, const std::function<Z (uint64)> &ps)
{
auto fp = [&](uint p) { return f(p); };
auto fpp = [&](uint64 pe, uint p, uint e) { return f(pe); };
DS ds = calc_medium(fp);
attach_powerful(attach_small(ds, fp), fpp);
DS res;
for (int i = R2; i >= 1; --i)
{
Z x = ps(N / i) - ds.lv[i];
for (int j = 2; i * j <= R2; ++j)
sub(x, f(j), res.lv[i * j]);
res.lv[i] = x;
}
return res;
};
auto mult_large = [&](DS &&ds, const DS &l)
{
for (int i = 1; i <= R2; ++i)
{
Z &x = ds.lv[i];
for (int j = 1; i * j <= R2; ++j)
if (ds.sv[j] != ds.sv[j - 1])
add(x, ds.sv[j] - ds.sv[j - 1], l.lv[i * j]);
}
return ds;
};
DS l0 = calc_large(
[&](uint64 n) { return 1; },
[&](uint64 n) { return Z(n % Mod); }
);
DS l1 = calc_large(
[&](uint64 n) { return Z(n % Mod); },
[&](uint64 n) { return n %= Mod, Z(n * (n + 1) / 2 % Mod); }
);
DS l;
for (int i = 1; i <= R2; ++i)
l.lv[i] = A * l0.lv[i] + B * l1.lv[i];
auto fp = [&](uint p) { return A + B * p; };
auto fpe = [&](uint64 pp, uint p, uint e) { return A * e + B * p; };
DS res = mult_large(calc_medium(fp), l);
return attach_powerful(attach_small(res, fp), fpe);
}
#include <ctime>
int main(int argc, char **argv)
{
int T;
scanf("%u", &T);
while (T--)
{
uint64 n;
Z a, b;
scanf("%llu %u %u", &n, &a.v, &b.v);
DS ds = calc(n, a, b);
std::vector<Z> res(ds.sv.begin() + 1, ds.sv.end());
res.insert(res.end(), ds.lv.begin() + 1, ds.lv.end());
std::sort(res.begin(), res.end(), [](const Z a, const Z b) { return a.v < b.v; });
res.erase(std::unique(res.begin(), res.end()), res.end());
uint Ans = 0;
for (auto x : res)
Ans ^= x.v;
printf("%u\n", Ans);
}
fprintf(stderr, "%d\n", std::__lg(size));
fprintf(stderr, "%d %d %d\n", ok, exact, dfscnt);
fprintf(stderr, "%lf\n", double(clock()) / CLOCKS_PER_SEC);
return 0;
}
详细
Test #1:
score: 100
Accepted
time: 834ms
memory: 4148kb
input:
10000 988 56395756 60780067 7923 293552717 438195956 4847 24236686 75287211 6694 74889751 64994726 3720 385482711 188638093 6021 2928896 248853035 6808 310612405 330739724 4062 15289930 175596707 9583 56394035 335888448 9798 151396947 371306315 4365 216662501 351771943 1359 165179730 80942360 1436 3...
output:
6702293 422200583 304441446 69351732 421157478 210560518 504474449 12692533 331877891 385355840 275328665 310397326 67866328 533036893 27246365 72866646 467021279 34647362 411996318 297571277 334576259 221391996 496297771 222601160 232748202 470542910 115812226 192533857 361627876 443138779 2575036 ...
result:
ok 10000 numbers
Test #2:
score: 0
Accepted
time: 699ms
memory: 4772kb
input:
486 685583 192056743 391870214 272484 346225796 149350515 656101 326831808 112167252 22515 203348552 60773766 1633155 194072757 22284059 57727 404929471 327406577 57598 251468713 173130016 1102497 36566124 195330260 3504399 214678339 86082351 360127 323967709 231892988 11663 225570343 56772624 39921...
output:
434223382 116245445 125541760 160318550 446061234 484145141 518392434 81977168 17947265 307371543 407160883 335339263 39598998 470162878 410893643 26179198 26198426 40422957 398293380 265153607 228078198 293572568 155169142 224586788 375283776 8481447 491498721 350950775 534322011 64802753 436909146...
result:
ok 486 numbers
Test #3:
score: 0
Accepted
time: 618ms
memory: 5000kb
input:
351 2069283 349969193 52280365 1407781 304782674 71786142 2619526 356665139 467865678 128394 19761994 158668471 4868626 435554461 55057371 228834 394703499 184531829 516241 188565552 183063603 703082 128264745 446152032 2069281 460231072 101600517 1407654 181732896 221743073 6648661 455206481 450814...
output:
319910185 369336286 50213187 67975443 429652780 316610082 64991059 22778081 332789438 497599689 331161326 417226667 247312840 325206278 489998938 119792359 144611262 188956641 12934607 448204725 376317 505473640 338284847 49730199 138622978 88198200 362403025 187282938 318525939 107779358 59656206 2...
result:
ok 351 numbers
Test #4:
score: 0
Accepted
time: 434ms
memory: 4940kb
input:
333 1016064 204524889 390112646 535822 104757052 269069192 1557487 409444563 74927504 49155 283505698 318482175 6259987 190292359 349969193 112767 52280365 304782674 191842 71786142 356665139 248003 467865678 19761994 1016062 158668471 435554461 535695 55057371 394703499 4848803 184531829 188565552 ...
output:
424757689 373968255 24290918 306982012 533936667 401990420 336964323 76114089 369506627 173872187 202999923 155205263 11081034 302738228 265042946 56046100 133964275 12419321 467153573 158929408 51479146 213214379 6763076 305753342 319915377 24381258 425402644 187212393 38116675 255693248 28212987 5...
result:
ok 333 numbers
Test #5:
score: 0
Accepted
time: 19804ms
memory: 1834656kb
input:
1 9994070595599 209907780 360301068
output:
39200515
result:
ok 1 number(s): "39200515"
Test #6:
score: 0
Accepted
time: 19876ms
memory: 1835060kb
input:
1 9999145190306 209907780 360301068
output:
48621786
result:
ok 1 number(s): "48621786"
Test #7:
score: 0
Accepted
time: 18961ms
memory: 1773664kb
input:
1 9483578929763 209907780 360301068
output:
51012486
result:
ok 1 number(s): "51012486"
Extra Test:
score: 0
Extra Test Passed