QOJ.ac

QOJ

IDProblemSubmitterResultTimeMemoryLanguageFile sizeSubmit timeJudge time
#460079#8781. Element-Wise Comparisonucup-team2307WA 90ms3784kbC++202.0kb2024-06-30 21:28:392024-06-30 21:28:39

Judging History

你现在查看的是最新测评结果

  • [2024-06-30 21:28:39]
  • 评测
  • 测评结果:WA
  • 用时:90ms
  • 内存:3784kb
  • [2024-06-30 21:28:39]
  • 提交

answer

#include <bits/stdc++.h>
#include <immintrin.h> // For AVX instructions
using namespace std;

using ll = long long;
using Type = unsigned short;

const int N = 5e4 + 100;

#pragma GCC push_options
#pragma GCC target("avx2")
int solve(const Type* a, const Type* b, Type n, Type m)
{
    Type cnt = 0, ans = 0;

    // Loop unrolling and vectorization using AVX2
    __m256i v_cnt = _mm256_set1_epi16(0); // Vector for cnt
    __m256i v_m = _mm256_set1_epi16(m); // Vector for m

    const int step = 16; // AVX2 processes 16 unsigned shorts at a time
    int i = 0;
    for (; i + step <= n; i += step) {
        // Load 16 elements from a and b
        __m256i v_a = _mm256_loadu_si256((__m256i*)(a + i));
        __m256i v_b = _mm256_loadu_si256((__m256i*)(b + i));

        // Compare a[i] < b[i]
        __m256i v_ok = _mm256_cmpgt_epi16(v_b, v_a); // v_ok = b[i] > a[i] -> a[i] < b[i]

        // cnt = (cnt + 1) * ok
        __m256i v_one = _mm256_set1_epi16(1);
        v_cnt = _mm256_add_epi16(v_cnt, v_one);
        v_cnt = _mm256_and_si256(v_cnt, v_ok);

        // ans += (cnt >= m)
        __m256i v_ge_m = _mm256_cmpgt_epi16(v_cnt, _mm256_sub_epi16(v_m, v_one));
        ans += _mm_popcnt_u32(_mm256_movemask_epi8(v_ge_m)) / 2; // 16-bit elements are considered, hence divide by 2
    }

    // Process remaining elements
    for (; i < n; i++) {
        bool ok = a[i] < b[i];
        cnt = (cnt + 1) * ok;
        ans += (cnt >= m);
    }

    return ans;
}
#pragma GCC pop_options

Type p[N];

int main() {
    cin.tie(0)->sync_with_stdio(0);
    cin.exceptions(cin.failbit);

    Type n, m;
    cin >> n >> m;

#if 1 // real use
    for (int i = 0; i < n; i++)
        cin >> p[i];
#else // random test
    for (Type i = 0; i < n; i++)
        p[i] = i;
    mt19937 rng;
    shuffle(p, p + n, rng);
#endif

    ll ans = 0;
    for (Type d = 1; d + m <= n; d++)
        ans += solve(p, p + d, n - d, m);
    cout << ans;
}

Details

Tip: Click on the bar to expand more detailed information

Test #1:

score: 100
Accepted
time: 0ms
memory: 3680kb

input:

5 3
5 2 1 3 4

output:

0

result:

ok answer is '0'

Test #2:

score: 0
Accepted
time: 0ms
memory: 3724kb

input:

5 2
3 1 4 2 5

output:

2

result:

ok answer is '2'

Test #3:

score: 0
Accepted
time: 0ms
memory: 3596kb

input:

4 2
1 2 3 4

output:

3

result:

ok answer is '3'

Test #4:

score: 0
Accepted
time: 0ms
memory: 3668kb

input:

4 2
4 3 2 1

output:

0

result:

ok answer is '0'

Test #5:

score: 0
Accepted
time: 0ms
memory: 3664kb

input:

1 1
1

output:

0

result:

ok answer is '0'

Test #6:

score: -100
Wrong Answer
time: 90ms
memory: 3784kb

input:

50000 2
44045 29783 5389 7756 44022 45140 21967 5478 10868 49226 21775 31669 49836 13511 46116 14229 27206 31168 37389 3158 10658 41154 14635 18526 40540 6451 23197 46719 30593 13517 8604 46666 39189 43746 12778 3684 3194 36979 43020 14652 19549 31178 17144 27177 44336 2849 40220 11751 41993 32209 4...

output:

315378163

result:

wrong answer expected '310780127', found '315378163'