QOJ.ac

QOJ

IDProblemSubmitterResultTimeMemoryLanguageFile sizeSubmit timeJudge time
#372167#8049. Equal Sumsucup-team3215WA 2178ms360352kbC++202.2kb2024-03-31 00:58:352024-03-31 00:58:36

Judging History

你现在查看的是最新测评结果

  • [2024-03-31 00:58:36]
  • 评测
  • 测评结果:WA
  • 用时:2178ms
  • 内存:360352kb
  • [2024-03-31 00:58:35]
  • 提交

answer

#include <bits/stdc++.h>
#pragma GCC target "avx512dq,avx512bw,avx512vl,avx512vbmi,avx512vbmi2,bmi,abm,bmi2"
#include "immintrin.h"
#pragma GCC optimize "O3"

using namespace std;

constexpr int N = 500, S = (N + 1) * 512, mod = 998244353, B0 = 16, B1 = 16, B08 = B0 * 8;

vector<int> cx[N];

int cy[B0 * 8][S];

void wconv(const int* a, int sz, int* b, int l, int r) {
  uint32_t s = 0;
  for (int i = 0; i < l; ++i) b[i] = 0;
  for (int i = l; i < min(r, sz + l); ++i) b[i] = (s += a[i - l]) %= mod;
  for (int i = min(r, sz + l); i < sz + l; ++i) s += a[i - l] + mod - a[i - r], b[i] = s -= (s >= 2 * mod? 2 * mod: s >= mod? mod: 0);
  for (int i = sz + l; i < r; ++i) b[i] = s;
  for (int i = max(r, sz + l); i < sz + r; ++i) b[i] = (s += mod - a[i - r]) %= mod;
}

alignas(64) uint64_t ty[B1 * B08];
alignas(64) uint64_t ans[512][512];

array<int, 2> lrx[N], lry[N];

int main() {
  cin.tie(0)->sync_with_stdio(0);
  int n, m; cin >> n >> m;
  for (int i = 0; i < n; ++i) cin >> lrx[i][0] >> lrx[i][1], ++lrx[i][1];
  for (int i = 0; i < m; ++i) cin >> lry[i][0] >> lry[i][1], ++lry[i][1];
  for (int i = 0; i < n; ++i) {
    cx[i].resize(N * (i + 1) + B1);
    wconv(i? &cx[i - 1][0]: &(const int&)1, N * i + 1, &cx[i][0], lrx[i][0], lrx[i][1]);
  }
  using v8u64 = __attribute((vector_size(64))) uint64_t;
  for (int I = 0; I < m; I += B08) {
    for (int i = I; i < min(I + B08, m); ++i) wconv(i? &cy[(i + B08 - 1) % B08][0]: &(const int&)1, N * i + 1, &cy[i % B08][0], lry[i][0], lry[i][1]);
    for (int K = 0; K <= (I + B08) * N; K += B1) {
      for (int i = 0; i < B08; ++i)
      for (int k = 0; k < B1; ++k) ty[i + B08 * k] = cy[i][K + k];
      for (int j = max(0, (K + N - 1) / N - 1); j < n; ++j) {
        v8u64 x[B1]{};
        for (int k = 0; k < B1; ++k) x[k] += cx[j][K + k];
        for (int i = 0; i < B0; ++i)
        for (int k = 0; k < B1; ++k) (v8u64&)ans[j][I + i * 8] += (v8u64)_mm512_mul_epu32((__m512i&)x[k], (__m512i&)ty[i + k * B0 << 3]);
        for (int i = 0; i < B0; i += 8) (v8u64&)ans[j][I + i] -= ((v8u64&)ans[j][I + i] >> 30) * mod;
      }
    }
  }
  for (int j = 0; j < n; ++j, cout << '\n')
  for (int i = 0; i < m; ++i) cout << ans[j][i] % mod << ' ';
}

Details

Tip: Click on the bar to expand more detailed information

Test #1:

score: 100
Accepted
time: 0ms
memory: 3704kb

input:

2 3
1 2
2 3
1 4
2 2
1 3

output:

2 0 0 
3 4 4 

result:

ok 6 numbers

Test #2:

score: -100
Wrong Answer
time: 2178ms
memory: 360352kb

input:

500 500
19 458
1 480
7 485
50 461
12 476
15 461
48 466
40 453
46 467
9 458
27 478
26 472
46 459
29 490
6 500
17 487
48 484
28 472
28 459
25 480
4 491
29 481
36 460
2 491
44 499
22 473
20 458
4 483
27 471
2 496
11 461
43 450
2 478
37 466
15 459
42 482
7 451
19 455
2 453
47 475
48 450
1 474
46 471
9 4...

output:

411 79401 9145270 673005095 180581065 984223118 586589234 293043270 404363796 865361724 665487988 118838806 926189944 226338288 521479857 808644951 786041288 340769021 177100 21 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...

result:

wrong answer 1517th numbers differ - expected: '443017050', found: '509209493'