QOJ.ac
QOJ
ID | Problem | Submitter | Result | Time | Memory | Language | File size | Submit time | Judge time |
---|---|---|---|---|---|---|---|---|---|
#460079 | #8781. Element-Wise Comparison | ucup-team2307 | WA | 90ms | 3784kb | C++20 | 2.0kb | 2024-06-30 21:28:39 | 2024-06-30 21:28:39 |
Judging History
answer
#include <bits/stdc++.h>
#include <immintrin.h> // For AVX instructions
using namespace std;
using ll = long long;
using Type = unsigned short;
const int N = 5e4 + 100;
#pragma GCC push_options
#pragma GCC target("avx2")
int solve(const Type* a, const Type* b, Type n, Type m)
{
Type cnt = 0, ans = 0;
// Loop unrolling and vectorization using AVX2
__m256i v_cnt = _mm256_set1_epi16(0); // Vector for cnt
__m256i v_m = _mm256_set1_epi16(m); // Vector for m
const int step = 16; // AVX2 processes 16 unsigned shorts at a time
int i = 0;
for (; i + step <= n; i += step) {
// Load 16 elements from a and b
__m256i v_a = _mm256_loadu_si256((__m256i*)(a + i));
__m256i v_b = _mm256_loadu_si256((__m256i*)(b + i));
// Compare a[i] < b[i]
__m256i v_ok = _mm256_cmpgt_epi16(v_b, v_a); // v_ok = b[i] > a[i] -> a[i] < b[i]
// cnt = (cnt + 1) * ok
__m256i v_one = _mm256_set1_epi16(1);
v_cnt = _mm256_add_epi16(v_cnt, v_one);
v_cnt = _mm256_and_si256(v_cnt, v_ok);
// ans += (cnt >= m)
__m256i v_ge_m = _mm256_cmpgt_epi16(v_cnt, _mm256_sub_epi16(v_m, v_one));
ans += _mm_popcnt_u32(_mm256_movemask_epi8(v_ge_m)) / 2; // 16-bit elements are considered, hence divide by 2
}
// Process remaining elements
for (; i < n; i++) {
bool ok = a[i] < b[i];
cnt = (cnt + 1) * ok;
ans += (cnt >= m);
}
return ans;
}
#pragma GCC pop_options
Type p[N];
int main() {
cin.tie(0)->sync_with_stdio(0);
cin.exceptions(cin.failbit);
Type n, m;
cin >> n >> m;
#if 1 // real use
for (int i = 0; i < n; i++)
cin >> p[i];
#else // random test
for (Type i = 0; i < n; i++)
p[i] = i;
mt19937 rng;
shuffle(p, p + n, rng);
#endif
ll ans = 0;
for (Type d = 1; d + m <= n; d++)
ans += solve(p, p + d, n - d, m);
cout << ans;
}
Details
Tip: Click on the bar to expand more detailed information
Test #1:
score: 100
Accepted
time: 0ms
memory: 3680kb
input:
5 3 5 2 1 3 4
output:
0
result:
ok answer is '0'
Test #2:
score: 0
Accepted
time: 0ms
memory: 3724kb
input:
5 2 3 1 4 2 5
output:
2
result:
ok answer is '2'
Test #3:
score: 0
Accepted
time: 0ms
memory: 3596kb
input:
4 2 1 2 3 4
output:
3
result:
ok answer is '3'
Test #4:
score: 0
Accepted
time: 0ms
memory: 3668kb
input:
4 2 4 3 2 1
output:
0
result:
ok answer is '0'
Test #5:
score: 0
Accepted
time: 0ms
memory: 3664kb
input:
1 1 1
output:
0
result:
ok answer is '0'
Test #6:
score: -100
Wrong Answer
time: 90ms
memory: 3784kb
input:
50000 2 44045 29783 5389 7756 44022 45140 21967 5478 10868 49226 21775 31669 49836 13511 46116 14229 27206 31168 37389 3158 10658 41154 14635 18526 40540 6451 23197 46719 30593 13517 8604 46666 39189 43746 12778 3684 3194 36979 43020 14652 19549 31178 17144 27177 44336 2849 40220 11751 41993 32209 4...
output:
315378163
result:
wrong answer expected '310780127', found '315378163'