QOJ.ac
QOJ
ID | 题目 | 提交者 | 结果 | 用时 | 内存 | 语言 | 文件大小 | 提交时间 | 测评时间 |
---|---|---|---|---|---|---|---|---|---|
#437957 | #8781. Element-Wise Comparison | ucup-team3215 | WA | 0ms | 3616kb | C++20 | 1.6kb | 2024-06-09 20:51:57 | 2024-06-09 20:51:58 |
Judging History
answer
#include <bits/stdc++.h>
#pragma GCC optimize "tree-vectorize,unroll-loops"
#pragma GCC target "avx512vl,avx512bw,avx512cd"
using namespace std;
constexpr int S = 1 << 12, N = 1 << 16;
alignas(64) uint16_t p[N], tp[N], cmp[S], b0[S], b1[S];
int64_t ans, n, m;
template <int s>
void sand(const uint16_t* __restrict__ a, const uint16_t* __restrict__ b, uint16_t* __restrict__ c) {
if (s < S) {
for (int i = 0; i + s < S; ++i) c[i] = a[i + s] & b[i];
for (int i = S - s; i < S; ++i) c[i] = a[i + s - S] >> 1 & b[i];
} else {
for (int i = 0; i < S; ++i) c[i] = a[i] >> s / S & b[i];
}
}
template <int S = ::N>
void sand(const uint16_t* __restrict__ a, const uint16_t* __restrict__ b, uint16_t* __restrict__ c, int s) {
if (s == S) sand<S>(a, b, c);
else if constexpr (S == 1) __builtin_unreachable();
else sand<S / 2>(a, b, c, s);
}
int main() {
cin.tie(0)->sync_with_stdio(0);
cin >> n >> m;
for (int i = 0; i < n; ++i) cin >> p[i];
for (int s = 1; s + m <= n; ++s) {
cmp[n - s] = 0;
if (n - s < S) fill(cmp + n - s, cmp + S, 0);
#pragma GCC unroll 99
for (int t = 0; t < N; t += S) if (t + s < n)
for (int j = 0; j < S; ++j) cmp[j] |= (p[j + t] < p[j + t + s]) << t / S;
fill(b0, end(b0), -1);
uint16_t* r = b0, * a = cmp, * t = b1;
#pragma GCC unroll 99
for (int b = 1; b < N; sand(a, a, t, b), swap(a, t), b *= 2) if (b & m) sand(r, a, t, b), swap(r, t);
for (int i = 0; i < S / 4; ++i) ans += __builtin_popcountll((uint64_t&)r[i * 4]);
}
cout << ans << '\n';
}
详细
Test #1:
score: 100
Accepted
time: 0ms
memory: 3592kb
input:
5 3 5 2 1 3 4
output:
0
result:
ok answer is '0'
Test #2:
score: -100
Wrong Answer
time: 0ms
memory: 3616kb
input:
5 2 3 1 4 2 5
output:
3
result:
wrong answer expected '2', found '3'