QOJ.ac

QOJ

IDProblemSubmitterResultTimeMemoryLanguageFile sizeSubmit timeJudge time
#879341#9697. Algebraucup-team2796#AC ✓55ms15668kbC++2328.1kb2025-02-01 23:44:472025-02-01 23:44:55

Judging History

This is the latest submission verdict.

  • [2025-02-01 23:44:55]
  • Judged
  • Verdict: AC
  • Time: 55ms
  • Memory: 15668kb
  • [2025-02-01 23:44:47]
  • Submitted

answer

#line 1 "library/Template/template.hpp"
#include <bits/stdc++.h>
using namespace std;

#define rep(i, a, b) for (int i = (int)(a); i < (int)(b); i++)
#define rrep(i, a, b) for (int i = (int)(b)-1; i >= (int)(a); i--)
#define ALL(v) (v).begin(), (v).end()
#define UNIQUE(v) sort(ALL(v)), (v).erase(unique(ALL(v)), (v).end())
#define SZ(v) (int)v.size()
#define MIN(v) *min_element(ALL(v))
#define MAX(v) *max_element(ALL(v))
#define LB(v, x) int(lower_bound(ALL(v), (x)) - (v).begin())
#define UB(v, x) int(upper_bound(ALL(v), (x)) - (v).begin())

using uint = unsigned int;
using ll = long long int;
using ull = unsigned long long;
using i128 = __int128_t;
using u128 = __uint128_t;
const int inf = 0x3fffffff;
const ll INF = 0x1fffffffffffffff;

template <typename T> inline bool chmax(T &a, T b) {
    if (a < b) {
        a = b;
        return 1;
    }
    return 0;
}
template <typename T> inline bool chmin(T &a, T b) {
    if (a > b) {
        a = b;
        return 1;
    }
    return 0;
}
template <typename T, typename U> T ceil(T x, U y) {
    assert(y != 0);
    if (y < 0)
        x = -x, y = -y;
    return (x > 0 ? (x + y - 1) / y : x / y);
}
template <typename T, typename U> T floor(T x, U y) {
    assert(y != 0);
    if (y < 0)
        x = -x, y = -y;
    return (x > 0 ? x / y : (x - y + 1) / y);
}
template <typename T> int popcnt(T x) {
    return __builtin_popcountll(x);
}
template <typename T> int topbit(T x) {
    return (x == 0 ? -1 : 63 - __builtin_clzll(x));
}
template <typename T> int lowbit(T x) {
    return (x == 0 ? -1 : __builtin_ctzll(x));
}

template <class T, class U>
ostream &operator<<(ostream &os, const pair<T, U> &p) {
    os << "P(" << p.first << ", " << p.second << ")";
    return os;
}
template <typename T> ostream &operator<<(ostream &os, const vector<T> &vec) {
    os << "{";
    for (int i = 0; i < vec.size(); i++) {
        os << vec[i] << (i + 1 == vec.size() ? "" : ", ");
    }
    os << "}";
    return os;
}
template <typename T, typename U>
ostream &operator<<(ostream &os, const map<T, U> &map_var) {
    os << "{";
    for (auto itr = map_var.begin(); itr != map_var.end(); itr++) {
        os << "(" << itr->first << ", " << itr->second << ")";
        itr++;
        if (itr != map_var.end())
            os << ", ";
        itr--;
    }
    os << "}";
    return os;
}
template <typename T> ostream &operator<<(ostream &os, const set<T> &set_var) {
    os << "{";
    for (auto itr = set_var.begin(); itr != set_var.end(); itr++) {
        os << *itr;
        ++itr;
        if (itr != set_var.end())
            os << ", ";
        itr--;
    }
    os << "}";
    return os;
}
#ifdef LOCAL
#define show(...) _show(0, #__VA_ARGS__, __VA_ARGS__)
#else
#define show(...) true
#endif
template <typename T> void _show(int i, T name) {
    cerr << '\n';
}
template <typename T1, typename T2, typename... T3>
void _show(int i, const T1 &a, const T2 &b, const T3 &...c) {
    for (; a[i] != ',' && a[i] != '\0'; i++)
        cerr << a[i];
    cerr << ":" << b << " ";
    _show(i + 1, a, c...);
}
#line 2 "library/Utility/fastio.hpp"
#include <unistd.h>
namespace fastio {
static constexpr uint32_t SZ = 1 << 17;
char ibuf[SZ];
char obuf[SZ];
char out[100];
// pointer of ibuf, obuf

uint32_t pil = 0, pir = 0, por = 0;

struct Pre {
    char num[10000][4];
    constexpr Pre() : num() {
        for (int i = 0; i < 10000; i++) {
            int n = i;
            for (int j = 3; j >= 0; j--) {
                num[i][j] = n % 10 | '0';
                n /= 10;
            }
        }
    }
} constexpr pre;

inline void load() {
    memmove(ibuf, ibuf + pil, pir - pil);
    pir = pir - pil + fread(ibuf + pir - pil, 1, SZ - pir + pil, stdin);
    pil = 0;
    if (pir < SZ)
        ibuf[pir++] = '\n';
}

inline void flush() {
    fwrite(obuf, 1, por, stdout);
    por = 0;
}

void rd(char &c) {
    do {
        if (pil + 1 > pir)
            load();
        c = ibuf[pil++];
    } while (isspace(c));
}

void rd(string &x) {
    x.clear();
    char c;
    do {
        if (pil + 1 > pir)
            load();
        c = ibuf[pil++];
    } while (isspace(c));
    do {
        x += c;
        if (pil == pir)
            load();
        c = ibuf[pil++];
    } while (!isspace(c));
}

template <typename T> void rd_real(T &x) {
    string s;
    rd(s);
    x = stod(s);
}

template <typename T> void rd_integer(T &x) {
    if (pil + 100 > pir)
        load();
    char c;
    do
        c = ibuf[pil++];
    while (c < '-');
    bool minus = 0;
    if constexpr (is_signed<T>::value || is_same_v<T, i128>) {
        if (c == '-') {
            minus = 1, c = ibuf[pil++];
        }
    }
    x = 0;
    while ('0' <= c) {
        x = x * 10 + (c & 15), c = ibuf[pil++];
    }
    if constexpr (is_signed<T>::value || is_same_v<T, i128>) {
        if (minus)
            x = -x;
    }
}

void rd(int &x) {
    rd_integer(x);
}
void rd(ll &x) {
    rd_integer(x);
}
void rd(i128 &x) {
    rd_integer(x);
}
void rd(uint &x) {
    rd_integer(x);
}
void rd(ull &x) {
    rd_integer(x);
}
void rd(u128 &x) {
    rd_integer(x);
}
void rd(double &x) {
    rd_real(x);
}
void rd(long double &x) {
    rd_real(x);
}

template <class T, class U> void rd(pair<T, U> &p) {
    return rd(p.first), rd(p.second);
}
template <size_t N = 0, typename T> void rd_tuple(T &t) {
    if constexpr (N < std::tuple_size<T>::value) {
        auto &x = std::get<N>(t);
        rd(x);
        rd_tuple<N + 1>(t);
    }
}
template <class... T> void rd(tuple<T...> &tpl) {
    rd_tuple(tpl);
}

template <size_t N = 0, typename T> void rd(array<T, N> &x) {
    for (auto &d : x)
        rd(d);
}
template <class T> void rd(vector<T> &x) {
    for (auto &d : x)
        rd(d);
}

void read() {}
template <class H, class... T> void read(H &h, T &...t) {
    rd(h), read(t...);
}

void wt(const char c) {
    if (por == SZ)
        flush();
    obuf[por++] = c;
}
void wt(const string s) {
    for (char c : s)
        wt(c);
}
void wt(const char *s) {
    size_t len = strlen(s);
    for (size_t i = 0; i < len; i++)
        wt(s[i]);
}

template <typename T> void wt_integer(T x) {
    if (por > SZ - 100)
        flush();
    if (x < 0) {
        obuf[por++] = '-', x = -x;
    }
    int outi;
    for (outi = 96; x >= 10000; outi -= 4) {
        memcpy(out + outi, pre.num[x % 10000], 4);
        x /= 10000;
    }
    if (x >= 1000) {
        memcpy(obuf + por, pre.num[x], 4);
        por += 4;
    } else if (x >= 100) {
        memcpy(obuf + por, pre.num[x] + 1, 3);
        por += 3;
    } else if (x >= 10) {
        int q = (x * 103) >> 10;
        obuf[por] = q | '0';
        obuf[por + 1] = (x - q * 10) | '0';
        por += 2;
    } else
        obuf[por++] = x | '0';
    memcpy(obuf + por, out + outi + 4, 96 - outi);
    por += 96 - outi;
}

template <typename T> void wt_real(T x) {
    ostringstream oss;
    oss << fixed << setprecision(15) << double(x);
    string s = oss.str();
    wt(s);
}

void wt(int x) {
    wt_integer(x);
}
void wt(ll x) {
    wt_integer(x);
}
void wt(i128 x) {
    wt_integer(x);
}
void wt(uint x) {
    wt_integer(x);
}
void wt(ull x) {
    wt_integer(x);
}
void wt(u128 x) {
    wt_integer(x);
}
void wt(double x) {
    wt_real(x);
}
void wt(long double x) {
    wt_real(x);
}

template <class T, class U> void wt(const pair<T, U> val) {
    wt(val.first);
    wt(' ');
    wt(val.second);
}
template <size_t N = 0, typename T> void wt_tuple(const T t) {
    if constexpr (N < std::tuple_size<T>::value) {
        if constexpr (N > 0) {
            wt(' ');
        }
        const auto x = std::get<N>(t);
        wt(x);
        wt_tuple<N + 1>(t);
    }
}
template <class... T> void wt(tuple<T...> tpl) {
    wt_tuple(tpl);
}
template <class T, size_t S> void wt(const array<T, S> val) {
    auto n = val.size();
    for (size_t i = 0; i < n; i++) {
        if (i)
            wt(' ');
        wt(val[i]);
    }
}
template <class T> void wt(const vector<T> val) {
    auto n = val.size();
    for (size_t i = 0; i < n; i++) {
        if (i)
            wt(' ');
        wt(val[i]);
    }
}

void print() {
    wt('\n');
}
template <class Head, class... Tail> void print(Head &&head, Tail &&...tail) {
    wt(head);
    if (sizeof...(Tail))
        wt(' ');
    print(forward<Tail>(tail)...);
}
void __attribute__((destructor)) _d() {
    flush();
}
} // namespace fastio

using fastio::flush;
using fastio::print;
using fastio::read;

inline void first(bool i = true) {
    print(i ? "first" : "second");
}
inline void Alice(bool i = true) {
    print(i ? "Alice" : "Bob");
}
inline void Takahashi(bool i = true) {
    print(i ? "Takahashi" : "Aoki");
}
inline void yes(bool i = true) {
    print(i ? "yes" : "no");
}
inline void Yes(bool i = true) {
    print(i ? "Yes" : "No");
}
inline void No() {
    print("No");
}
inline void YES(bool i = true) {
    print(i ? "YES" : "NO");
}
inline void NO() {
    print("NO");
}
inline void Yay(bool i = true) {
    print(i ? "Yay!" : ":(");
}
inline void Possible(bool i = true) {
    print(i ? "Possible" : "Impossible");
}
inline void POSSIBLE(bool i = true) {
    print(i ? "POSSIBLE" : "IMPOSSIBLE");
}

/**
 * @brief Fast IO
 */
#line 3 "sol.cpp"

#line 2 "library/Math/fastdiv.hpp"

struct FastDiv {
    using u64 = uint64_t;
    using u128 = __uint128_t;
    constexpr FastDiv() : m(), s(), x() {}
    constexpr FastDiv(int _m)
        : m(_m), s(__lg(m - 1)), x(((u128(1) << (s + 64)) + m - 1) / m) {}
    constexpr int get() {
        return m;
    }
    constexpr friend u64 operator/(u64 n, const FastDiv &d) {
        return (u128(n) * d.x >> d.s) >> 64;
    }
    constexpr friend int operator%(u64 n, const FastDiv &d) {
        return n - n / d * d.m;
    }
    constexpr pair<u64, int> divmod(u64 n) const {
        u64 q = n / (*this);
        return {q, n - q * m};
    }
    int m, s;
    u64 x;
};

struct FastDiv64 {
    using u64 = uint64_t;
    using u128 = __uint128_t;
    u128 mod, mh, ml;
    explicit FastDiv64(u64 mod = 1) : mod(mod) {
        u128 m = u128(-1) / mod;
        if (m * mod + mod == u128(0))
            ++m;
        mh = m >> 64;
        ml = m & u64(-1);
    }
    u64 umod() const {
        return mod;
    }
    u64 modulo(u128 x) {
        u128 z = (x & u64(-1)) * ml;
        z = (x & u64(-1)) * mh + (x >> 64) * ml + (z >> 64);
        z = (x >> 64) * mh + (z >> 64);
        x -= z * mod;
        return x < mod ? x : x - mod;
    }
    u64 mul(u64 a, u64 b) {
        return modulo(u128(a) * b);
    }
};

/**
 * @brief Fast Division
 */
#line 3 "library/Math/dynamic.hpp"

struct Fp {
    using u64 = uint64_t;
    uint v;
    static int get_mod() {
        return _getmod();
    }
    static void set_mod(int _m) {
        bar = FastDiv(_m);
    }
    Fp inv() const {
        int tmp, a = v, b = get_mod(), x = 1, y = 0;
        while (b) {
            tmp = a / b, a -= tmp * b;
            swap(a, b);
            x -= tmp * y;
            swap(x, y);
        }
        if (x < 0) {
            x += get_mod();
        }
        return x;
    }
    Fp() : v(0) {}
    Fp(ll x) {
        v = x % get_mod();
        if (v < 0)
            v += get_mod();
    }
    Fp operator-() const {
        return Fp() - *this;
    }
    Fp pow(ll t) {
        assert(t >= 0);
        Fp res = 1, b = *this;
        while (t) {
            if (t & 1)
                res *= b;
            b *= b;
            t >>= 1;
        }
        return res;
    }
    Fp &operator+=(const Fp &x) {
        v += x.v;
        if (v >= get_mod())
            v -= get_mod();
        return *this;
    }
    Fp &operator-=(const Fp &x) {
        v += get_mod() - x.v;
        if (v >= get_mod())
            v -= get_mod();
        return *this;
    }
    Fp &operator*=(const Fp &x) {
        v = (u64(v) * x.v) % bar;
        return *this;
    }
    Fp &operator/=(const Fp &x) {
        (*this) *= x.inv();
        return *this;
    }
    Fp operator+(const Fp &x) const {
        return Fp(*this) += x;
    }
    Fp operator-(const Fp &x) const {
        return Fp(*this) -= x;
    }
    Fp operator*(const Fp &x) const {
        return Fp(*this) *= x;
    }
    Fp operator/(const Fp &x) const {
        return Fp(*this) /= x;
    }
    bool operator==(const Fp &x) const {
        return v == x.v;
    }
    bool operator!=(const Fp &x) const {
        return v != x.v;
    }
    friend istream &operator>>(istream &is, Fp &x) {
        return is >> x.v;
    }
    friend ostream &operator<<(ostream &os, const Fp &x) {
        return os << x.v;
    }

  private:
    static FastDiv bar;
    static int _getmod() {
        return bar.get();
    }
};
FastDiv Fp::bar(998244353);

void rd(Fp &x) {
    fastio::rd(x.v);
}
void wt(Fp x) {
    fastio::wt(x.v);
}

/**
 * @brief Dynamic Modint
 */
#line 2 "library/Math/comb.hpp"

template <typename T> T Inv(ll n) {
    static int md;
    static vector<T> buf({0, 1});
    if (md != T::get_mod()) {
        md = T::get_mod();
        buf = vector<T>({0, 1});
    }
    assert(n > 0);
    n %= md;
    while (SZ(buf) <= n) {
        int k = SZ(buf), q = (md + k - 1) / k;
        buf.push_back(buf[k * q - md] * q);
    }
    return buf[n];
}

template <typename T> T Fact(ll n, bool inv = 0) {
    static int md;
    static vector<T> buf({1, 1}), ibuf({1, 1});
    if (md != T::get_mod()) {
        md = T::get_mod();
        buf = ibuf = vector<T>({1, 1});
    }
    assert(n >= 0 and n < md);
    while (SZ(buf) <= n) {
        buf.push_back(buf.back() * SZ(buf));
        ibuf.push_back(ibuf.back() * Inv<T>(SZ(ibuf)));
    }
    return inv ? ibuf[n] : buf[n];
}

template <typename T> T nPr(int n, int r, bool inv = 0) {
    if (n < 0 || n < r || r < 0)
        return 0;
    return Fact<T>(n, inv) * Fact<T>(n - r, inv ^ 1);
}
template <typename T> T nCr(int n, int r, bool inv = 0) {
    if (n < 0 || n < r || r < 0)
        return 0;
    return Fact<T>(n, inv) * Fact<T>(r, inv ^ 1) * Fact<T>(n - r, inv ^ 1);
}
// sum = n, r tuples
template <typename T> T nHr(int n, int r, bool inv = 0) {
    return nCr<T>(n + r - 1, r - 1, inv);
}
// sum = n, a nonzero tuples and b tuples
template <typename T> T choose(int n, int a, int b) {
    if (n == 0)
        return !a;
    return nCr<T>(n + b - 1, a + b - 1);
}

/**
 * @brief Combination
 */
#line 6 "sol.cpp"

#line 2 "library/Convolution/ntt.hpp"

template <typename T> struct NTT {
    static constexpr int rank2 = __builtin_ctzll(T::get_mod() - 1);
    std::array<T, rank2 + 1> root;  // root[i]^(2^i) == 1
    std::array<T, rank2 + 1> iroot; // root[i] * iroot[i] == 1

    std::array<T, std::max(0, rank2 - 2 + 1)> rate2;
    std::array<T, std::max(0, rank2 - 2 + 1)> irate2;

    std::array<T, std::max(0, rank2 - 3 + 1)> rate3;
    std::array<T, std::max(0, rank2 - 3 + 1)> irate3;

    NTT() {
        T g = 2;
        while (g.pow((T::get_mod() - 1) >> 1) == 1) {
            g += 1;
        }
        root[rank2] = g.pow((T::get_mod() - 1) >> rank2);
        iroot[rank2] = root[rank2].inv();
        for (int i = rank2 - 1; i >= 0; i--) {
            root[i] = root[i + 1] * root[i + 1];
            iroot[i] = iroot[i + 1] * iroot[i + 1];
        }

        {
            T prod = 1, iprod = 1;
            for (int i = 0; i <= rank2 - 2; i++) {
                rate2[i] = root[i + 2] * prod;
                irate2[i] = iroot[i + 2] * iprod;
                prod *= iroot[i + 2];
                iprod *= root[i + 2];
            }
        }
        {
            T prod = 1, iprod = 1;
            for (int i = 0; i <= rank2 - 3; i++) {
                rate3[i] = root[i + 3] * prod;
                irate3[i] = iroot[i + 3] * iprod;
                prod *= iroot[i + 3];
                iprod *= root[i + 3];
            }
        }
    }

    void ntt(std::vector<T> &a, bool type = 0) {
        int n = int(a.size());
        int h = __builtin_ctzll((unsigned int)n);
        a.resize(1 << h);

        if (type) {
            int len = h; // a[i, i+(n>>len), i+2*(n>>len), ..] is transformed
            while (len) {
                if (len == 1) {
                    int p = 1 << (h - len);
                    T irot = 1;
                    for (int s = 0; s < (1 << (len - 1)); s++) {
                        int offset = s << (h - len + 1);
                        for (int i = 0; i < p; i++) {
                            auto l = a[i + offset];
                            auto r = a[i + offset + p];
                            a[i + offset] = l + r;
                            a[i + offset + p] =
                                (unsigned long long)(T::get_mod() + l.v - r.v) *
                                irot.v;
                            ;
                        }
                        if (s + 1 != (1 << (len - 1)))
                            irot *= irate2[__builtin_ctzll(~(unsigned int)(s))];
                    }
                    len--;
                } else {
                    // 4-base
                    int p = 1 << (h - len);
                    T irot = 1, iimag = iroot[2];
                    for (int s = 0; s < (1 << (len - 2)); s++) {
                        T irot2 = irot * irot;
                        T irot3 = irot2 * irot;
                        int offset = s << (h - len + 2);
                        for (int i = 0; i < p; i++) {
                            auto a0 = 1ULL * a[i + offset + 0 * p].v;
                            auto a1 = 1ULL * a[i + offset + 1 * p].v;
                            auto a2 = 1ULL * a[i + offset + 2 * p].v;
                            auto a3 = 1ULL * a[i + offset + 3 * p].v;

                            auto a2na3iimag =
                                1ULL * T((T::get_mod() + a2 - a3) * iimag.v).v;

                            a[i + offset] = a0 + a1 + a2 + a3;
                            a[i + offset + 1 * p] =
                                (a0 + (T::get_mod() - a1) + a2na3iimag) *
                                irot.v;
                            a[i + offset + 2 * p] =
                                (a0 + a1 + (T::get_mod() - a2) +
                                 (T::get_mod() - a3)) *
                                irot2.v;
                            a[i + offset + 3 * p] =
                                (a0 + (T::get_mod() - a1) +
                                 (T::get_mod() - a2na3iimag)) *
                                irot3.v;
                        }
                        if (s + 1 != (1 << (len - 2)))
                            irot *= irate3[__builtin_ctzll(~(unsigned int)(s))];
                    }
                    len -= 2;
                }
            }
            T e = T(n).inv();
            for (auto &x : a)
                x *= e;
        } else {
            int len = 0; // a[i, i+(n>>len), i+2*(n>>len), ..] is transformed
            while (len < h) {
                if (h - len == 1) {
                    int p = 1 << (h - len - 1);
                    T rot = 1;
                    for (int s = 0; s < (1 << len); s++) {
                        int offset = s << (h - len);
                        for (int i = 0; i < p; i++) {
                            auto l = a[i + offset];
                            auto r = a[i + offset + p] * rot;
                            a[i + offset] = l + r;
                            a[i + offset + p] = l - r;
                        }
                        if (s + 1 != (1 << len))
                            rot *= rate2[__builtin_ctzll(~(unsigned int)(s))];
                    }
                    len++;
                } else {
                    // 4-base
                    int p = 1 << (h - len - 2);
                    T rot = 1, imag = root[2];
                    for (int s = 0; s < (1 << len); s++) {
                        T rot2 = rot * rot;
                        T rot3 = rot2 * rot;
                        int offset = s << (h - len);
                        for (int i = 0; i < p; i++) {
                            auto mod2 = 1ULL * T::get_mod() * T::get_mod();
                            auto a0 = 1ULL * a[i + offset].v;
                            auto a1 = 1ULL * a[i + offset + p].v * rot.v;
                            auto a2 = 1ULL * a[i + offset + 2 * p].v * rot2.v;
                            auto a3 = 1ULL * a[i + offset + 3 * p].v * rot3.v;
                            auto a1na3imag =
                                1ULL * T(a1 + mod2 - a3).v * imag.v;
                            auto na2 = mod2 - a2;
                            a[i + offset] = a0 + a2 + a1 + a3;
                            a[i + offset + 1 * p] =
                                a0 + a2 + (2 * mod2 - (a1 + a3));
                            a[i + offset + 2 * p] = a0 + na2 + a1na3imag;
                            a[i + offset + 3 * p] =
                                a0 + na2 + (mod2 - a1na3imag);
                        }
                        if (s + 1 != (1 << len))
                            rot *= rate3[__builtin_ctzll(~(unsigned int)(s))];
                    }
                    len += 2;
                }
            }
        }
    }
    vector<T> mult(const vector<T> &a, const vector<T> &b) {
        if (a.empty() or b.empty())
            return vector<T>();
        int as = a.size(), bs = b.size();
        int n = as + bs - 1;
        if (as <= 30 or bs <= 30) {
            if (as > 30)
                return mult(b, a);
            vector<T> res(n);
            rep(i, 0, as) rep(j, 0, bs) res[i + j] += a[i] * b[j];
            return res;
        }
        int m = 1;
        while (m < n)
            m <<= 1;
        vector<T> res(m);
        rep(i, 0, as) res[i] = a[i];
        ntt(res);
        if (a == b)
            rep(i, 0, m) res[i] *= res[i];
        else {
            vector<T> c(m);
            rep(i, 0, bs) c[i] = b[i];
            ntt(c);
            rep(i, 0, m) res[i] *= c[i];
        }
        ntt(res, 1);
        res.resize(n);
        return res;
    }
};

/**
 * @brief Number Theoretic Transform
 */
#line 2 "library/Math/modint.hpp"

template <unsigned mod = 1000000007> struct fp {
    unsigned v;
    static constexpr int get_mod() {
        return mod;
    }
    constexpr unsigned inv() const {
        assert(v != 0);
        int x = v, y = mod, p = 1, q = 0, t = 0, tmp = 0;
        while (y > 0) {
            t = x / y;
            x -= t * y, p -= t * q;
            tmp = x, x = y, y = tmp;
            tmp = p, p = q, q = tmp;
        }
        if (p < 0)
            p += mod;
        return p;
    }
    constexpr fp(ll x = 0) : v(x >= 0 ? x % mod : (mod - (-x) % mod) % mod) {}
    fp operator-() const {
        return fp() - *this;
    }
    fp pow(ull t) {
        fp res = 1, b = *this;
        while (t) {
            if (t & 1)
                res *= b;
            b *= b;
            t >>= 1;
        }
        return res;
    }
    fp &operator+=(const fp &x) {
        if ((v += x.v) >= mod)
            v -= mod;
        return *this;
    }
    fp &operator-=(const fp &x) {
        if ((v += mod - x.v) >= mod)
            v -= mod;
        return *this;
    }
    fp &operator*=(const fp &x) {
        v = ull(v) * x.v % mod;
        return *this;
    }
    fp &operator/=(const fp &x) {
        v = ull(v) * x.inv() % mod;
        return *this;
    }
    fp operator+(const fp &x) const {
        return fp(*this) += x;
    }
    fp operator-(const fp &x) const {
        return fp(*this) -= x;
    }
    fp operator*(const fp &x) const {
        return fp(*this) *= x;
    }
    fp operator/(const fp &x) const {
        return fp(*this) /= x;
    }
    bool operator==(const fp &x) const {
        return v == x.v;
    }
    bool operator!=(const fp &x) const {
        return v != x.v;
    }
    friend istream &operator>>(istream &is, fp &x) {
        return is >> x.v;
    }
    friend ostream &operator<<(ostream &os, const fp &x) {
        return os << x.v;
    }
};

template <unsigned mod> void rd(fp<mod> &x) {
    fastio::rd(x.v);
}
template <unsigned mod> void wt(fp<mod> x) {
    fastio::wt(x.v);
}

/**
 * @brief Modint
 */
#line 4 "library/Convolution/arbitrary.hpp"

using M1 = fp<1045430273>;
using M2 = fp<1051721729>;
using M3 = fp<1053818881>;
NTT<M1> N1;
NTT<M2> N2;
NTT<M3> N3;
constexpr int r_12 = M2(M1::get_mod()).inv();
constexpr int r_13 = M3(M1::get_mod()).inv();
constexpr int r_23 = M3(M2::get_mod()).inv();
constexpr int r_1323 = M3(ll(r_13) * r_23).v;
constexpr ll w1 = M1::get_mod();
constexpr ll w2 = ll(w1) * M2::get_mod();
template <typename T>
vector<T> ArbitraryMult(const vector<int> &a, const vector<int> &b) {
    if (a.empty() or b.empty())
        return vector<T>();
    int n = a.size() + b.size() - 1;
    vector<T> res(n);
    if (min(a.size(), b.size()) <= 60) {
        rep(i, 0, a.size()) rep(j, 0, b.size()) res[i + j] += T(a[i]) * b[j];
        return res;
    }
    vector<int> vals[3];
    vector<M1> a1(ALL(a)), b1(ALL(b)), c1 = N1.mult(a1, b1);
    vector<M2> a2(ALL(a)), b2(ALL(b)), c2 = N2.mult(a2, b2);
    vector<M3> a3(ALL(a)), b3(ALL(b)), c3 = N3.mult(a3, b3);
    for (M1 x : c1)
        vals[0].push_back(x.v);
    for (M2 x : c2)
        vals[1].push_back(x.v);
    for (M3 x : c3)
        vals[2].push_back(x.v);
    rep(i, 0, n) {
        ll p = vals[0][i];
        ll q = (vals[1][i] + M2::get_mod() - p) * r_12 % M2::get_mod();
        ll r = ((vals[2][i] + M3::get_mod() - p) * r_1323 +
                (M3::get_mod() - q) * r_23) %
               M3::get_mod();
        res[i] = (T(r) * w2 + q * w1 + p);
    }
    return res;
}

template <typename T>
vector<T> ArbitraryMult(const vector<T> &a, const vector<T> &b) {
    vector<int> A, B;
    for (auto &x : a)
        A.push_back(x.v);
    for (auto &x : b)
        B.push_back(x.v);
    return ArbitraryMult<T>(A, B);
}

/**
 * @brief Arbitrary Mod Convolution
 */
#line 8 "sol.cpp"

void naive(int n, int k) {
    vector<Fp> ex(n);
    vector<int> p(n);
    vector<int> sz(n);
    auto dfs = [&](auto &dfs, int v) -> void {
        sz[v] = 1;
        rep(to, v + 1, n) if (p[to] == v) {
            dfs(dfs, to);
            sz[v] += sz[to];
        }
    };
    vector cnt(n, vector<ll>(n + 1));
    auto rec = [&](auto &rec, int i) -> void {
        if (i == n) {
            dfs(dfs, 0);
            rep(v, 0, n) {
                ex[v] += Fp(sz[v]).pow(k);
                cnt[v][sz[v]] += 1;
            }
            return;
        }
        rep(par, 0, i) {
            p[i] = par;
            rec(rec, i + 1);
            p[i] = -1;
        }
    };
    rec(rec, 1);
    rep(v, 0, n) {
        ex[v] *= Fact<Fp>(n - 1, 1);
        show(ex[v]);
    }
    // rep(v, 1, n) {
    //     rep(j, 1, n + 1) if (cnt[v][j] > 0) {
    //         int ans = v;
    //         rep(x, 1, n - v) ans *= x;
    //         rep(x, 1, n - j) ans *= x;
    //         rep(x, 1, n - v - j + 1) ans /= x;
    //         show(cnt[v][j], ans);
    //         assert(cnt[v][j] == ans);
    //     }
    // }
}

int main() {
    int n, k, M;
    read(n, k, M);
    Fp::set_mod(M);

    // naive(n, k);

    vector<Fp> f(n + 1), g(n + 1);
    rep(i, 0, n + 1) {
        if (n - i - 1 >= 0)
            f[i] = Fp(i).pow(k) * Fact<Fp>(n - i - 1);
        g[i] = Fact<Fp>(i, 1);
    }
    f = ArbitraryMult(f, g);

    rep(v, 0, n) {
        Fp ret = f[n - v];
        // rep(j, 0, n) {
        //     if (n - v - j < 0)
        //         continue;
        //     Fp add =
        //         Fp(j).pow(k) * Fact<Fp>(n - j - 1) * Fact<Fp>(n - v - j, 1);
        //     ret += add;
        // }

        ret *= Fact<Fp>(k, 1);
        ret *= v;
        ret *= Fact<Fp>(n - v - 1);
        ret *= Fact<Fp>(n - 1, 1);
        ret *= Fact<Fp>(k);
        if (v == 0)
            ret = Fp(n).pow(k);
        print(ret);
    }
    return 0;
}

这程序好像有点Bug,我给组数据试试?

Details

Tip: Click on the bar to expand more detailed information

Test #1:

score: 100
Accepted
time: 0ms
memory: 3584kb

input:

3 1 1000000007

output:

3
500000005
1

result:

ok 3 number(s): "3 500000005 1"

Test #2:

score: 0
Accepted
time: 0ms
memory: 3712kb

input:

3 2 998244353

output:

9
499122179
1

result:

ok 3 number(s): "9 499122179 1"

Test #3:

score: 0
Accepted
time: 50ms
memory: 15540kb

input:

100000 2 247500247

output:

99990120
198313538
181648518
9984012
89152757
122606790
144989074
57767836
139713251
181810000
31507456
166275038
47328509
154160535
83327500
58965059
57266005
62816671
182890130
94757428
220071605
169630367
158184542
33329500
49804019
177268667
34308738
1462169
207899265
29486137
36126024
183936623...

result:

ok 100000 numbers

Test #4:

score: 0
Accepted
time: 50ms
memory: 15144kb

input:

93243 2 871928951

output:

846896490
718247765
577098350
41079215
405218914
227159222
310499190
205167702
832611988
705007023
845120518
295904768
823739537
169989571
159639598
28661386
227786795
150269468
541377603
807448870
354698513
504788853
600146531
233880586
541856913
423469197
327480599
74027728
25997100
224019624
8081...

result:

ok 93243 numbers

Test #5:

score: 0
Accepted
time: 49ms
memory: 14540kb

input:

88126 2 815309081

output:

428410147
686328082
479041544
205889612
300318620
20389992
685009094
57186663
226928147
600734124
661202285
507214889
694578908
19604338
105478579
39114733
253249882
660469674
547220370
545575449
181854078
101589195
813903761
115567927
415241080
361254177
318194972
5068568
567012687
53519150
9126832...

result:

ok 88126 numbers

Test #6:

score: 0
Accepted
time: 53ms
memory: 15544kb

input:

100000 20 274295591

output:

133515401
13118380
89106302
78342553
165195888
227060294
52478179
267144515
24472291
43437006
273021769
119052128
11647686
3290431
49050407
171571350
95631399
248844031
35296684
218106957
49127626
169039090
188638802
45352042
93637916
83703846
202348655
242427621
107803831
54794031
252526350
2327816...

result:

ok 100000 numbers

Test #7:

score: 0
Accepted
time: 50ms
memory: 14496kb

input:

88126 19 175327979

output:

158950473
18402437
106235197
22191987
138340285
126056816
124203721
122089700
166866855
86286201
17884807
69131094
27656384
168725914
40019793
69711817
118470230
23394707
18716275
675243
38271008
126222244
118760155
134953966
91892978
24853002
154149875
85732327
78093023
108592897
24813091
61898445
...

result:

ok 88126 numbers

Test #8:

score: 0
Accepted
time: 50ms
memory: 15192kb

input:

93243 17 521107271

output:

401623247
79539477
350407101
118842666
307611331
145882711
44804642
132585714
324150453
377577997
167442211
191408337
502330545
487993743
77033044
491860232
66453035
19882036
119057636
122698010
12896407
108213680
179249435
16768652
311367825
284266634
330830578
221032368
37875566
13079459
245051661...

result:

ok 93243 numbers

Test #9:

score: 0
Accepted
time: 51ms
memory: 15540kb

input:

100000 200 180127469

output:

167062212
92524657
166915946
29251783
29827571
4319369
81324107
236099
50093774
52375863
27752593
46678455
147927291
111428702
58058274
28516187
143468295
63746622
86677605
156508086
60090503
123421346
127381103
178968198
34588724
92087953
150913038
4291383
177405489
92727242
169922068
130905337
113...

result:

ok 100000 numbers

Test #10:

score: 0
Accepted
time: 51ms
memory: 15524kb

input:

99281 199 831673061

output:

739037710
600820702
34103819
114260931
461829617
268804979
631802906
227389899
395384357
402330121
792047383
152088457
559215019
494507217
703460390
222964336
535953767
376723634
324580553
611512391
799481475
696985790
631050926
699196221
84548661
585817140
48372570
633938057
440672424
553087056
256...

result:

ok 99281 numbers

Test #11:

score: 0
Accepted
time: 55ms
memory: 15620kb

input:

99123 190 184986859

output:

150676314
8306448
32778644
62178449
104286694
62297492
85618916
88596335
17408717
176983517
136446622
102276004
95765785
130390965
69193316
145748571
66260225
105139243
68618676
118327626
97749079
97937844
11088207
119734069
32544048
87920498
93841606
166650044
118169960
17744733
139825728
115301315...

result:

ok 99123 numbers

Test #12:

score: 0
Accepted
time: 46ms
memory: 15668kb

input:

100000 1 998244353

output:

100000
50000
332781451
25000
20000
665512902
285226958
12500
443675268
10000
726004984
332756451
844675991
142613479
665502902
6250
645928699
221837634
315240322
5000
760571888
363002492
130210133
665500402
4000
921460172
147891756
570428916
860558925
332751451
225413241
3125
574749779
822086526
855...

result:

ok 100000 numbers

Test #13:

score: 0
Accepted
time: 54ms
memory: 15544kb

input:

100000 2 998244353

output:

17556470
5835490
668405647
1740647
1157098
48359563
499738479
499600134
554961451
181810000
91007918
665714267
11156053
161014
83327500
14803641
587312080
198579032
262783548
570504423
756317395
457758306
455779873
33329500
798645810
409582602
945470198
791750956
344259332
279113704
535381158
351684...

result:

ok 100000 numbers

Test #14:

score: 0
Accepted
time: 55ms
memory: 15668kb

input:

100000 3 998244353

output:

733427426
178967739
202934029
433340642
532337140
605728396
981959828
737139460
888911107
38920791
183454787
191631176
161311621
621244336
340320388
225674683
431086340
824666829
749376222
796265987
213471341
156036046
277058601
945134678
603493090
462039974
449024634
705230519
414899866
370632747
9...

result:

ok 100000 numbers

Extra Test:

score: 0
Extra Test Passed