QOJ.ac
QOJ
ID | Problem | Submitter | Result | Time | Memory | Language | File size | Submit time | Judge time |
---|---|---|---|---|---|---|---|---|---|
#372167 | #8049. Equal Sums | ucup-team3215 | WA | 2178ms | 360352kb | C++20 | 2.2kb | 2024-03-31 00:58:35 | 2024-03-31 00:58:36 |
Judging History
answer
#include <bits/stdc++.h>
#pragma GCC target "avx512dq,avx512bw,avx512vl,avx512vbmi,avx512vbmi2,bmi,abm,bmi2"
#include "immintrin.h"
#pragma GCC optimize "O3"
using namespace std;
constexpr int N = 500, S = (N + 1) * 512, mod = 998244353, B0 = 16, B1 = 16, B08 = B0 * 8;
vector<int> cx[N];
int cy[B0 * 8][S];
void wconv(const int* a, int sz, int* b, int l, int r) {
uint32_t s = 0;
for (int i = 0; i < l; ++i) b[i] = 0;
for (int i = l; i < min(r, sz + l); ++i) b[i] = (s += a[i - l]) %= mod;
for (int i = min(r, sz + l); i < sz + l; ++i) s += a[i - l] + mod - a[i - r], b[i] = s -= (s >= 2 * mod? 2 * mod: s >= mod? mod: 0);
for (int i = sz + l; i < r; ++i) b[i] = s;
for (int i = max(r, sz + l); i < sz + r; ++i) b[i] = (s += mod - a[i - r]) %= mod;
}
alignas(64) uint64_t ty[B1 * B08];
alignas(64) uint64_t ans[512][512];
array<int, 2> lrx[N], lry[N];
int main() {
cin.tie(0)->sync_with_stdio(0);
int n, m; cin >> n >> m;
for (int i = 0; i < n; ++i) cin >> lrx[i][0] >> lrx[i][1], ++lrx[i][1];
for (int i = 0; i < m; ++i) cin >> lry[i][0] >> lry[i][1], ++lry[i][1];
for (int i = 0; i < n; ++i) {
cx[i].resize(N * (i + 1) + B1);
wconv(i? &cx[i - 1][0]: &(const int&)1, N * i + 1, &cx[i][0], lrx[i][0], lrx[i][1]);
}
using v8u64 = __attribute((vector_size(64))) uint64_t;
for (int I = 0; I < m; I += B08) {
for (int i = I; i < min(I + B08, m); ++i) wconv(i? &cy[(i + B08 - 1) % B08][0]: &(const int&)1, N * i + 1, &cy[i % B08][0], lry[i][0], lry[i][1]);
for (int K = 0; K <= (I + B08) * N; K += B1) {
for (int i = 0; i < B08; ++i)
for (int k = 0; k < B1; ++k) ty[i + B08 * k] = cy[i][K + k];
for (int j = max(0, (K + N - 1) / N - 1); j < n; ++j) {
v8u64 x[B1]{};
for (int k = 0; k < B1; ++k) x[k] += cx[j][K + k];
for (int i = 0; i < B0; ++i)
for (int k = 0; k < B1; ++k) (v8u64&)ans[j][I + i * 8] += (v8u64)_mm512_mul_epu32((__m512i&)x[k], (__m512i&)ty[i + k * B0 << 3]);
for (int i = 0; i < B0; i += 8) (v8u64&)ans[j][I + i] -= ((v8u64&)ans[j][I + i] >> 30) * mod;
}
}
}
for (int j = 0; j < n; ++j, cout << '\n')
for (int i = 0; i < m; ++i) cout << ans[j][i] % mod << ' ';
}
Details
Tip: Click on the bar to expand more detailed information
Test #1:
score: 100
Accepted
time: 0ms
memory: 3704kb
input:
2 3 1 2 2 3 1 4 2 2 1 3
output:
2 0 0 3 4 4
result:
ok 6 numbers
Test #2:
score: -100
Wrong Answer
time: 2178ms
memory: 360352kb
input:
500 500 19 458 1 480 7 485 50 461 12 476 15 461 48 466 40 453 46 467 9 458 27 478 26 472 46 459 29 490 6 500 17 487 48 484 28 472 28 459 25 480 4 491 29 481 36 460 2 491 44 499 22 473 20 458 4 483 27 471 2 496 11 461 43 450 2 478 37 466 15 459 42 482 7 451 19 455 2 453 47 475 48 450 1 474 46 471 9 4...
output:
411 79401 9145270 673005095 180581065 984223118 586589234 293043270 404363796 865361724 665487988 118838806 926189944 226338288 521479857 808644951 786041288 340769021 177100 21 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
result:
wrong answer 1517th numbers differ - expected: '443017050', found: '509209493'