QOJ.ac

QOJ

ID	Problem	Submitter	Result	Time	Memory	Language	File size	Submit time	Judge time
#879341	#9697. Algebra	ucup-team2796^#	AC ✓	55ms	15668kb	C++23	28.1kb	2025-02-01 23:44:47	2025-02-01 23:44:55

Judging History

This is the latest submission verdict.

[2025-02-01 23:44:55]
Judged

Verdict: AC
Time: 55ms
Memory: 15668kb

Show

[2025-02-01 23:44:47]
Submitted

answer

#line 1 "library/Template/template.hpp"
#include <bits/stdc++.h>
using namespace std;

#define rep(i, a, b) for (int i = (int)(a); i < (int)(b); i++)
#define rrep(i, a, b) for (int i = (int)(b)-1; i >= (int)(a); i--)
#define ALL(v) (v).begin(), (v).end()
#define UNIQUE(v) sort(ALL(v)), (v).erase(unique(ALL(v)), (v).end())
#define SZ(v) (int)v.size()
#define MIN(v) *min_element(ALL(v))
#define MAX(v) *max_element(ALL(v))
#define LB(v, x) int(lower_bound(ALL(v), (x)) - (v).begin())
#define UB(v, x) int(upper_bound(ALL(v), (x)) - (v).begin())

using uint = unsigned int;
using ll = long long int;
using ull = unsigned long long;
using i128 = __int128_t;
using u128 = __uint128_t;
const int inf = 0x3fffffff;
const ll INF = 0x1fffffffffffffff;

template <typename T> inline bool chmax(T &a, T b) {
    if (a < b) {
        a = b;
        return 1;
    }
    return 0;
}
template <typename T> inline bool chmin(T &a, T b) {
    if (a > b) {
        a = b;
        return 1;
    }
    return 0;
}
template <typename T, typename U> T ceil(T x, U y) {
    assert(y != 0);
    if (y < 0)
        x = -x, y = -y;
    return (x > 0 ? (x + y - 1) / y : x / y);
}
template <typename T, typename U> T floor(T x, U y) {
    assert(y != 0);
    if (y < 0)
        x = -x, y = -y;
    return (x > 0 ? x / y : (x - y + 1) / y);
}
template <typename T> int popcnt(T x) {
    return __builtin_popcountll(x);
}
template <typename T> int topbit(T x) {
    return (x == 0 ? -1 : 63 - __builtin_clzll(x));
}
template <typename T> int lowbit(T x) {
    return (x == 0 ? -1 : __builtin_ctzll(x));
}

template <class T, class U>
ostream &operator<<(ostream &os, const pair<T, U> &p) {
    os << "P(" << p.first << ", " << p.second << ")";
    return os;
}
template <typename T> ostream &operator<<(ostream &os, const vector<T> &vec) {
    os << "{";
    for (int i = 0; i < vec.size(); i++) {
        os << vec[i] << (i + 1 == vec.size() ? "" : ", ");
    }
    os << "}";
    return os;
}
template <typename T, typename U>
ostream &operator<<(ostream &os, const map<T, U> &map_var) {
    os << "{";
    for (auto itr = map_var.begin(); itr != map_var.end(); itr++) {
        os << "(" << itr->first << ", " << itr->second << ")";
        itr++;
        if (itr != map_var.end())
            os << ", ";
        itr--;
    }
    os << "}";
    return os;
}
template <typename T> ostream &operator<<(ostream &os, const set<T> &set_var) {
    os << "{";
    for (auto itr = set_var.begin(); itr != set_var.end(); itr++) {
        os << *itr;
        ++itr;
        if (itr != set_var.end())
            os << ", ";
        itr--;
    }
    os << "}";
    return os;
}
#ifdef LOCAL
#define show(...) _show(0, #__VA_ARGS__, __VA_ARGS__)
#else
#define show(...) true
#endif
template <typename T> void _show(int i, T name) {
    cerr << '\n';
}
template <typename T1, typename T2, typename... T3>
void _show(int i, const T1 &a, const T2 &b, const T3 &...c) {
    for (; a[i] != ',' && a[i] != '\0'; i++)
        cerr << a[i];
    cerr << ":" << b << " ";
    _show(i + 1, a, c...);
}
#line 2 "library/Utility/fastio.hpp"
#include <unistd.h>
namespace fastio {
static constexpr uint32_t SZ = 1 << 17;
char ibuf[SZ];
char obuf[SZ];
char out[100];
// pointer of ibuf, obuf

uint32_t pil = 0, pir = 0, por = 0;

struct Pre {
    char num[10000][4];
    constexpr Pre() : num() {
        for (int i = 0; i < 10000; i++) {
            int n = i;
            for (int j = 3; j >= 0; j--) {
                num[i][j] = n % 10 | '0';
                n /= 10;
            }
        }
    }
} constexpr pre;

inline void load() {
    memmove(ibuf, ibuf + pil, pir - pil);
    pir = pir - pil + fread(ibuf + pir - pil, 1, SZ - pir + pil, stdin);
    pil = 0;
    if (pir < SZ)
        ibuf[pir++] = '\n';
}

inline void flush() {
    fwrite(obuf, 1, por, stdout);
    por = 0;
}

void rd(char &c) {
    do {
        if (pil + 1 > pir)
            load();
        c = ibuf[pil++];
    } while (isspace(c));
}

void rd(string &x) {
    x.clear();
    char c;
    do {
        if (pil + 1 > pir)
            load();
        c = ibuf[pil++];
    } while (isspace(c));
    do {
        x += c;
        if (pil == pir)
            load();
        c = ibuf[pil++];
    } while (!isspace(c));
}

template <typename T> void rd_real(T &x) {
    string s;
    rd(s);
    x = stod(s);
}

template <typename T> void rd_integer(T &x) {
    if (pil + 100 > pir)
        load();
    char c;
    do
        c = ibuf[pil++];
    while (c < '-');
    bool minus = 0;
    if constexpr (is_signed<T>::value || is_same_v<T, i128>) {
        if (c == '-') {
            minus = 1, c = ibuf[pil++];
        }
    }
    x = 0;
    while ('0' <= c) {
        x = x * 10 + (c & 15), c = ibuf[pil++];
    }
    if constexpr (is_signed<T>::value || is_same_v<T, i128>) {
        if (minus)
            x = -x;
    }
}

void rd(int &x) {
    rd_integer(x);
}
void rd(ll &x) {
    rd_integer(x);
}
void rd(i128 &x) {
    rd_integer(x);
}
void rd(uint &x) {
    rd_integer(x);
}
void rd(ull &x) {
    rd_integer(x);
}
void rd(u128 &x) {
    rd_integer(x);
}
void rd(double &x) {
    rd_real(x);
}
void rd(long double &x) {
    rd_real(x);
}

template <class T, class U> void rd(pair<T, U> &p) {
    return rd(p.first), rd(p.second);
}
template <size_t N = 0, typename T> void rd_tuple(T &t) {
    if constexpr (N < std::tuple_size<T>::value) {
        auto &x = std::get<N>(t);
        rd(x);
        rd_tuple<N + 1>(t);
    }
}
template <class... T> void rd(tuple<T...> &tpl) {
    rd_tuple(tpl);
}

template <size_t N = 0, typename T> void rd(array<T, N> &x) {
    for (auto &d : x)
        rd(d);
}
template <class T> void rd(vector<T> &x) {
    for (auto &d : x)
        rd(d);
}

void read() {}
template <class H, class... T> void read(H &h, T &...t) {
    rd(h), read(t...);
}

void wt(const char c) {
    if (por == SZ)
        flush();
    obuf[por++] = c;
}
void wt(const string s) {
    for (char c : s)
        wt(c);
}
void wt(const char *s) {
    size_t len = strlen(s);
    for (size_t i = 0; i < len; i++)
        wt(s[i]);
}

template <typename T> void wt_integer(T x) {
    if (por > SZ - 100)
        flush();
    if (x < 0) {
        obuf[por++] = '-', x = -x;
    }
    int outi;
    for (outi = 96; x >= 10000; outi -= 4) {
        memcpy(out + outi, pre.num[x % 10000], 4);
        x /= 10000;
    }
    if (x >= 1000) {
        memcpy(obuf + por, pre.num[x], 4);
        por += 4;
    } else if (x >= 100) {
        memcpy(obuf + por, pre.num[x] + 1, 3);
        por += 3;
    } else if (x >= 10) {
        int q = (x * 103) >> 10;
        obuf[por] = q | '0';
        obuf[por + 1] = (x - q * 10) | '0';
        por += 2;
    } else
        obuf[por++] = x | '0';
    memcpy(obuf + por, out + outi + 4, 96 - outi);
    por += 96 - outi;
}

template <typename T> void wt_real(T x) {
    ostringstream oss;
    oss << fixed << setprecision(15) << double(x);
    string s = oss.str();
    wt(s);
}

void wt(int x) {
    wt_integer(x);
}
void wt(ll x) {
    wt_integer(x);
}
void wt(i128 x) {
    wt_integer(x);
}
void wt(uint x) {
    wt_integer(x);
}
void wt(ull x) {
    wt_integer(x);
}
void wt(u128 x) {
    wt_integer(x);
}
void wt(double x) {
    wt_real(x);
}
void wt(long double x) {
    wt_real(x);
}

template <class T, class U> void wt(const pair<T, U> val) {
    wt(val.first);
    wt(' ');
    wt(val.second);
}
template <size_t N = 0, typename T> void wt_tuple(const T t) {
    if constexpr (N < std::tuple_size<T>::value) {
        if constexpr (N > 0) {
            wt(' ');
        }
        const auto x = std::get<N>(t);
        wt(x);
        wt_tuple<N + 1>(t);
    }
}
template <class... T> void wt(tuple<T...> tpl) {
    wt_tuple(tpl);
}
template <class T, size_t S> void wt(const array<T, S> val) {
    auto n = val.size();
    for (size_t i = 0; i < n; i++) {
        if (i)
            wt(' ');
        wt(val[i]);
    }
}
template <class T> void wt(const vector<T> val) {
    auto n = val.size();
    for (size_t i = 0; i < n; i++) {
        if (i)
            wt(' ');
        wt(val[i]);
    }
}

void print() {
    wt('\n');
}
template <class Head, class... Tail> void print(Head &&head, Tail &&...tail) {
    wt(head);
    if (sizeof...(Tail))
        wt(' ');
    print(forward<Tail>(tail)...);
}
void __attribute__((destructor)) _d() {
    flush();
}
} // namespace fastio

using fastio::flush;
using fastio::print;
using fastio::read;

inline void first(bool i = true) {
    print(i ? "first" : "second");
}
inline void Alice(bool i = true) {
    print(i ? "Alice" : "Bob");
}
inline void Takahashi(bool i = true) {
    print(i ? "Takahashi" : "Aoki");
}
inline void yes(bool i = true) {
    print(i ? "yes" : "no");
}
inline void Yes(bool i = true) {
    print(i ? "Yes" : "No");
}
inline void No() {
    print("No");
}
inline void YES(bool i = true) {
    print(i ? "YES" : "NO");
}
inline void NO() {
    print("NO");
}
inline void Yay(bool i = true) {
    print(i ? "Yay!" : ":(");
}
inline void Possible(bool i = true) {
    print(i ? "Possible" : "Impossible");
}
inline void POSSIBLE(bool i = true) {
    print(i ? "POSSIBLE" : "IMPOSSIBLE");
}

/**
 * @brief Fast IO
 */
#line 3 "sol.cpp"

#line 2 "library/Math/fastdiv.hpp"

struct FastDiv {
    using u64 = uint64_t;
    using u128 = __uint128_t;
    constexpr FastDiv() : m(), s(), x() {}
    constexpr FastDiv(int _m)
        : m(_m), s(__lg(m - 1)), x(((u128(1) << (s + 64)) + m - 1) / m) {}
    constexpr int get() {
        return m;
    }
    constexpr friend u64 operator/(u64 n, const FastDiv &d) {
        return (u128(n) * d.x >> d.s) >> 64;
    }
    constexpr friend int operator%(u64 n, const FastDiv &d) {
        return n - n / d * d.m;
    }
    constexpr pair<u64, int> divmod(u64 n) const {
        u64 q = n / (*this);
        return {q, n - q * m};
    }
    int m, s;
    u64 x;
};

struct FastDiv64 {
    using u64 = uint64_t;
    using u128 = __uint128_t;
    u128 mod, mh, ml;
    explicit FastDiv64(u64 mod = 1) : mod(mod) {
        u128 m = u128(-1) / mod;
        if (m * mod + mod == u128(0))
            ++m;
        mh = m >> 64;
        ml = m & u64(-1);
    }
    u64 umod() const {
        return mod;
    }
    u64 modulo(u128 x) {
        u128 z = (x & u64(-1)) * ml;
        z = (x & u64(-1)) * mh + (x >> 64) * ml + (z >> 64);
        z = (x >> 64) * mh + (z >> 64);
        x -= z * mod;
        return x < mod ? x : x - mod;
    }
    u64 mul(u64 a, u64 b) {
        return modulo(u128(a) * b);
    }
};

/**
 * @brief Fast Division
 */
#line 3 "library/Math/dynamic.hpp"

struct Fp {
    using u64 = uint64_t;
    uint v;
    static int get_mod() {
        return _getmod();
    }
    static void set_mod(int _m) {
        bar = FastDiv(_m);
    }
    Fp inv() const {
        int tmp, a = v, b = get_mod(), x = 1, y = 0;
        while (b) {
            tmp = a / b, a -= tmp * b;
            swap(a, b);
            x -= tmp * y;
            swap(x, y);
        }
        if (x < 0) {
            x += get_mod();
        }
        return x;
    }
    Fp() : v(0) {}
    Fp(ll x) {
        v = x % get_mod();
        if (v < 0)
            v += get_mod();
    }
    Fp operator-() const {
        return Fp() - *this;
    }
    Fp pow(ll t) {
        assert(t >= 0);
        Fp res = 1, b = *this;
        while (t) {
            if (t & 1)
                res *= b;
            b *= b;
            t >>= 1;
        }
        return res;
    }
    Fp &operator+=(const Fp &x) {
        v += x.v;
        if (v >= get_mod())
            v -= get_mod();
        return *this;
    }
    Fp &operator-=(const Fp &x) {
        v += get_mod() - x.v;
        if (v >= get_mod())
            v -= get_mod();
        return *this;
    }
    Fp &operator*=(const Fp &x) {
        v = (u64(v) * x.v) % bar;
        return *this;
    }
    Fp &operator/=(const Fp &x) {
        (*this) *= x.inv();
        return *this;
    }
    Fp operator+(const Fp &x) const {
        return Fp(*this) += x;
    }
    Fp operator-(const Fp &x) const {
        return Fp(*this) -= x;
    }
    Fp operator*(const Fp &x) const {
        return Fp(*this) *= x;
    }
    Fp operator/(const Fp &x) const {
        return Fp(*this) /= x;
    }
    bool operator==(const Fp &x) const {
        return v == x.v;
    }
    bool operator!=(const Fp &x) const {
        return v != x.v;
    }
    friend istream &operator>>(istream &is, Fp &x) {
        return is >> x.v;
    }
    friend ostream &operator<<(ostream &os, const Fp &x) {
        return os << x.v;
    }

  private:
    static FastDiv bar;
    static int _getmod() {
        return bar.get();
    }
};
FastDiv Fp::bar(998244353);

void rd(Fp &x) {
    fastio::rd(x.v);
}
void wt(Fp x) {
    fastio::wt(x.v);
}

/**
 * @brief Dynamic Modint
 */
#line 2 "library/Math/comb.hpp"

template <typename T> T Inv(ll n) {
    static int md;
    static vector<T> buf({0, 1});
    if (md != T::get_mod()) {
        md = T::get_mod();
        buf = vector<T>({0, 1});
    }
    assert(n > 0);
    n %= md;
    while (SZ(buf) <= n) {
        int k = SZ(buf), q = (md + k - 1) / k;
        buf.push_back(buf[k * q - md] * q);
    }
    return buf[n];
}

template <typename T> T Fact(ll n, bool inv = 0) {
    static int md;
    static vector<T> buf({1, 1}), ibuf({1, 1});
    if (md != T::get_mod()) {
        md = T::get_mod();
        buf = ibuf = vector<T>({1, 1});
    }
    assert(n >= 0 and n < md);
    while (SZ(buf) <= n) {
        buf.push_back(buf.back() * SZ(buf));
        ibuf.push_back(ibuf.back() * Inv<T>(SZ(ibuf)));
    }
    return inv ? ibuf[n] : buf[n];
}

template <typename T> T nPr(int n, int r, bool inv = 0) {
    if (n < 0 || n < r || r < 0)
        return 0;
    return Fact<T>(n, inv) * Fact<T>(n - r, inv ^ 1);
}
template <typename T> T nCr(int n, int r, bool inv = 0) {
    if (n < 0 || n < r || r < 0)
        return 0;
    return Fact<T>(n, inv) * Fact<T>(r, inv ^ 1) * Fact<T>(n - r, inv ^ 1);
}
// sum = n, r tuples
template <typename T> T nHr(int n, int r, bool inv = 0) {
    return nCr<T>(n + r - 1, r - 1, inv);
}
// sum = n, a nonzero tuples and b tuples
template <typename T> T choose(int n, int a, int b) {
    if (n == 0)
        return !a;
    return nCr<T>(n + b - 1, a + b - 1);
}

/**
 * @brief Combination
 */
#line 6 "sol.cpp"

#line 2 "library/Convolution/ntt.hpp"

template <typename T> struct NTT {
    static constexpr int rank2 = __builtin_ctzll(T::get_mod() - 1);
    std::array<T, rank2 + 1> root;  // root[i]^(2^i) == 1
    std::array<T, rank2 + 1> iroot; // root[i] * iroot[i] == 1

    std::array<T, std::max(0, rank2 - 2 + 1)> rate2;
    std::array<T, std::max(0, rank2 - 2 + 1)> irate2;

    std::array<T, std::max(0, rank2 - 3 + 1)> rate3;
    std::array<T, std::max(0, rank2 - 3 + 1)> irate3;

    NTT() {
        T g = 2;
        while (g.pow((T::get_mod() - 1) >> 1) == 1) {
            g += 1;
        }
        root[rank2] = g.pow((T::get_mod() - 1) >> rank2);
        iroot[rank2] = root[rank2].inv();
        for (int i = rank2 - 1; i >= 0; i--) {
            root[i] = root[i + 1] * root[i + 1];
            iroot[i] = iroot[i + 1] * iroot[i + 1];
        }

        {
            T prod = 1, iprod = 1;
            for (int i = 0; i <= rank2 - 2; i++) {
                rate2[i] = root[i + 2] * prod;
                irate2[i] = iroot[i + 2] * iprod;
                prod *= iroot[i + 2];
                iprod *= root[i + 2];
            }
        }
        {
            T prod = 1, iprod = 1;
            for (int i = 0; i <= rank2 - 3; i++) {
                rate3[i] = root[i + 3] * prod;
                irate3[i] = iroot[i + 3] * iprod;
                prod *= iroot[i + 3];
                iprod *= root[i + 3];
            }
        }
    }

    void ntt(std::vector<T> &a, bool type = 0) {
        int n = int(a.size());
        int h = __builtin_ctzll((unsigned int)n);
        a.resize(1 << h);

        if (type) {
            int len = h; // a[i, i+(n>>len), i+2*(n>>len), ..] is transformed
            while (len) {
                if (len == 1) {
                    int p = 1 << (h - len);
                    T irot = 1;
                    for (int s = 0; s < (1 << (len - 1)); s++) {
                        int offset = s << (h - len + 1);
                        for (int i = 0; i < p; i++) {
                            auto l = a[i + offset];
                            auto r = a[i + offset + p];
                            a[i + offset] = l + r;
                            a[i + offset + p] =
                                (unsigned long long)(T::get_mod() + l.v - r.v) *
                                irot.v;
                            ;
                        }
                        if (s + 1 != (1 << (len - 1)))
                            irot *= irate2[__builtin_ctzll(~(unsigned int)(s))];
                    }
                    len--;
                } else {
                    // 4-base
                    int p = 1 << (h - len);
                    T irot = 1, iimag = iroot[2];
                    for (int s = 0; s < (1 << (len - 2)); s++) {
                        T irot2 = irot * irot;
                        T irot3 = irot2 * irot;
                        int offset = s << (h - len + 2);
                        for (int i = 0; i < p; i++) {
                            auto a0 = 1ULL * a[i + offset + 0 * p].v;
                            auto a1 = 1ULL * a[i + offset + 1 * p].v;
                            auto a2 = 1ULL * a[i + offset + 2 * p].v;
                            auto a3 = 1ULL * a[i + offset + 3 * p].v;

                            auto a2na3iimag =
                                1ULL * T((T::get_mod() + a2 - a3) * iimag.v).v;

                            a[i + offset] = a0 + a1 + a2 + a3;
                            a[i + offset + 1 * p] =
                                (a0 + (T::get_mod() - a1) + a2na3iimag) *
                                irot.v;
                            a[i + offset + 2 * p] =
                                (a0 + a1 + (T::get_mod() - a2) +
                                 (T::get_mod() - a3)) *
                                irot2.v;
                            a[i + offset + 3 * p] =
                                (a0 + (T::get_mod() - a1) +
                                 (T::get_mod() - a2na3iimag)) *
                                irot3.v;
                        }
                        if (s + 1 != (1 << (len - 2)))
                            irot *= irate3[__builtin_ctzll(~(unsigned int)(s))];
                    }
                    len -= 2;
                }
            }
            T e = T(n).inv();
            for (auto &x : a)
                x *= e;
        } else {
            int len = 0; // a[i, i+(n>>len), i+2*(n>>len), ..] is transformed
            while (len < h) {
                if (h - len == 1) {
                    int p = 1 << (h - len - 1);
                    T rot = 1;
                    for (int s = 0; s < (1 << len); s++) {
                        int offset = s << (h - len);
                        for (int i = 0; i < p; i++) {
                            auto l = a[i + offset];
                            auto r = a[i + offset + p] * rot;
                            a[i + offset] = l + r;
                            a[i + offset + p] = l - r;
                        }
                        if (s + 1 != (1 << len))
                            rot *= rate2[__builtin_ctzll(~(unsigned int)(s))];
                    }
                    len++;
                } else {
                    // 4-base
                    int p = 1 << (h - len - 2);
                    T rot = 1, imag = root[2];
                    for (int s = 0; s < (1 << len); s++) {
                        T rot2 = rot * rot;
                        T rot3 = rot2 * rot;
                        int offset = s << (h - len);
                        for (int i = 0; i < p; i++) {
                            auto mod2 = 1ULL * T::get_mod() * T::get_mod();
                            auto a0 = 1ULL * a[i + offset].v;
                            auto a1 = 1ULL * a[i + offset + p].v * rot.v;
                            auto a2 = 1ULL * a[i + offset + 2 * p].v * rot2.v;
                            auto a3 = 1ULL * a[i + offset + 3 * p].v * rot3.v;
                            auto a1na3imag =
                                1ULL * T(a1 + mod2 - a3).v * imag.v;
                            auto na2 = mod2 - a2;
                            a[i + offset] = a0 + a2 + a1 + a3;
                            a[i + offset + 1 * p] =
                                a0 + a2 + (2 * mod2 - (a1 + a3));
                            a[i + offset + 2 * p] = a0 + na2 + a1na3imag;
                            a[i + offset + 3 * p] =
                                a0 + na2 + (mod2 - a1na3imag);
                        }
                        if (s + 1 != (1 << len))
                            rot *= rate3[__builtin_ctzll(~(unsigned int)(s))];
                    }
                    len += 2;
                }
            }
        }
    }
    vector<T> mult(const vector<T> &a, const vector<T> &b) {
        if (a.empty() or b.empty())
            return vector<T>();
        int as = a.size(), bs = b.size();
        int n = as + bs - 1;
        if (as <= 30 or bs <= 30) {
            if (as > 30)
                return mult(b, a);
            vector<T> res(n);
            rep(i, 0, as) rep(j, 0, bs) res[i + j] += a[i] * b[j];
            return res;
        }
        int m = 1;
        while (m < n)
            m <<= 1;
        vector<T> res(m);
        rep(i, 0, as) res[i] = a[i];
        ntt(res);
        if (a == b)
            rep(i, 0, m) res[i] *= res[i];
        else {
            vector<T> c(m);
            rep(i, 0, bs) c[i] = b[i];
            ntt(c);
            rep(i, 0, m) res[i] *= c[i];
        }
        ntt(res, 1);
        res.resize(n);
        return res;
    }
};

/**
 * @brief Number Theoretic Transform
 */
#line 2 "library/Math/modint.hpp"

template <unsigned mod = 1000000007> struct fp {
    unsigned v;
    static constexpr int get_mod() {
        return mod;
    }
    constexpr unsigned inv() const {
        assert(v != 0);
        int x = v, y = mod, p = 1, q = 0, t = 0, tmp = 0;
        while (y > 0) {
            t = x / y;
            x -= t * y, p -= t * q;
            tmp = x, x = y, y = tmp;
            tmp = p, p = q, q = tmp;
        }
        if (p < 0)
            p += mod;
        return p;
    }
    constexpr fp(ll x = 0) : v(x >= 0 ? x % mod : (mod - (-x) % mod) % mod) {}
    fp operator-() const {
        return fp() - *this;
    }
    fp pow(ull t) {
        fp res = 1, b = *this;
        while (t) {
            if (t & 1)
                res *= b;
            b *= b;
            t >>= 1;
        }
        return res;
    }
    fp &operator+=(const fp &x) {
        if ((v += x.v) >= mod)
            v -= mod;
        return *this;
    }
    fp &operator-=(const fp &x) {
        if ((v += mod - x.v) >= mod)
            v -= mod;
        return *this;
    }
    fp &operator*=(const fp &x) {
        v = ull(v) * x.v % mod;
        return *this;
    }
    fp &operator/=(const fp &x) {
        v = ull(v) * x.inv() % mod;
        return *this;
    }
    fp operator+(const fp &x) const {
        return fp(*this) += x;
    }
    fp operator-(const fp &x) const {
        return fp(*this) -= x;
    }
    fp operator*(const fp &x) const {
        return fp(*this) *= x;
    }
    fp operator/(const fp &x) const {
        return fp(*this) /= x;
    }
    bool operator==(const fp &x) const {
        return v == x.v;
    }
    bool operator!=(const fp &x) const {
        return v != x.v;
    }
    friend istream &operator>>(istream &is, fp &x) {
        return is >> x.v;
    }
    friend ostream &operator<<(ostream &os, const fp &x) {
        return os << x.v;
    }
};

template <unsigned mod> void rd(fp<mod> &x) {
    fastio::rd(x.v);
}
template <unsigned mod> void wt(fp<mod> x) {
    fastio::wt(x.v);
}

/**
 * @brief Modint
 */
#line 4 "library/Convolution/arbitrary.hpp"

using M1 = fp<1045430273>;
using M2 = fp<1051721729>;
using M3 = fp<1053818881>;
NTT<M1> N1;
NTT<M2> N2;
NTT<M3> N3;
constexpr int r_12 = M2(M1::get_mod()).inv();
constexpr int r_13 = M3(M1::get_mod()).inv();
constexpr int r_23 = M3(M2::get_mod()).inv();
constexpr int r_1323 = M3(ll(r_13) * r_23).v;
constexpr ll w1 = M1::get_mod();
constexpr ll w2 = ll(w1) * M2::get_mod();
template <typename T>
vector<T> ArbitraryMult(const vector<int> &a, const vector<int> &b) {
    if (a.empty() or b.empty())
        return vector<T>();
    int n = a.size() + b.size() - 1;
    vector<T> res(n);
    if (min(a.size(), b.size()) <= 60) {
        rep(i, 0, a.size()) rep(j, 0, b.size()) res[i + j] += T(a[i]) * b[j];
        return res;
    }
    vector<int> vals[3];
    vector<M1> a1(ALL(a)), b1(ALL(b)), c1 = N1.mult(a1, b1);
    vector<M2> a2(ALL(a)), b2(ALL(b)), c2 = N2.mult(a2, b2);
    vector<M3> a3(ALL(a)), b3(ALL(b)), c3 = N3.mult(a3, b3);
    for (M1 x : c1)
        vals[0].push_back(x.v);
    for (M2 x : c2)
        vals[1].push_back(x.v);
    for (M3 x : c3)
        vals[2].push_back(x.v);
    rep(i, 0, n) {
        ll p = vals[0][i];
        ll q = (vals[1][i] + M2::get_mod() - p) * r_12 % M2::get_mod();
        ll r = ((vals[2][i] + M3::get_mod() - p) * r_1323 +
                (M3::get_mod() - q) * r_23) %
               M3::get_mod();
        res[i] = (T(r) * w2 + q * w1 + p);
    }
    return res;
}

template <typename T>
vector<T> ArbitraryMult(const vector<T> &a, const vector<T> &b) {
    vector<int> A, B;
    for (auto &x : a)
        A.push_back(x.v);
    for (auto &x : b)
        B.push_back(x.v);
    return ArbitraryMult<T>(A, B);
}

/**
 * @brief Arbitrary Mod Convolution
 */
#line 8 "sol.cpp"

void naive(int n, int k) {
    vector<Fp> ex(n);
    vector<int> p(n);
    vector<int> sz(n);
    auto dfs = [&](auto &dfs, int v) -> void {
        sz[v] = 1;
        rep(to, v + 1, n) if (p[to] == v) {
            dfs(dfs, to);
            sz[v] += sz[to];
        }
    };
    vector cnt(n, vector<ll>(n + 1));
    auto rec = [&](auto &rec, int i) -> void {
        if (i == n) {
            dfs(dfs, 0);
            rep(v, 0, n) {
                ex[v] += Fp(sz[v]).pow(k);
                cnt[v][sz[v]] += 1;
            }
            return;
        }
        rep(par, 0, i) {
            p[i] = par;
            rec(rec, i + 1);
            p[i] = -1;
        }
    };
    rec(rec, 1);
    rep(v, 0, n) {
        ex[v] *= Fact<Fp>(n - 1, 1);
        show(ex[v]);
    }
    // rep(v, 1, n) {
    //     rep(j, 1, n + 1) if (cnt[v][j] > 0) {
    //         int ans = v;
    //         rep(x, 1, n - v) ans *= x;
    //         rep(x, 1, n - j) ans *= x;
    //         rep(x, 1, n - v - j + 1) ans /= x;
    //         show(cnt[v][j], ans);
    //         assert(cnt[v][j] == ans);
    //     }
    // }
}

int main() {
    int n, k, M;
    read(n, k, M);
    Fp::set_mod(M);

    // naive(n, k);

    vector<Fp> f(n + 1), g(n + 1);
    rep(i, 0, n + 1) {
        if (n - i - 1 >= 0)
            f[i] = Fp(i).pow(k) * Fact<Fp>(n - i - 1);
        g[i] = Fact<Fp>(i, 1);
    }
    f = ArbitraryMult(f, g);

    rep(v, 0, n) {
        Fp ret = f[n - v];
        // rep(j, 0, n) {
        //     if (n - v - j < 0)
        //         continue;
        //     Fp add =
        //         Fp(j).pow(k) * Fact<Fp>(n - j - 1) * Fact<Fp>(n - v - j, 1);
        //     ret += add;
        // }

        ret *= Fact<Fp>(k, 1);
        ret *= v;
        ret *= Fact<Fp>(n - v - 1);
        ret *= Fact<Fp>(n - 1, 1);
        ret *= Fact<Fp>(k);
        if (v == 0)
            ret = Fp(n).pow(k);
        print(ret);
    }
    return 0;
}

Source code, Language: C++23

这程序好像有点Bug，我给组数据试试？

Details

Tip: Click on the bar to expand more detailed information

Test #1:

score: 100

Accepted

time: 0ms

memory: 3584kb

input:

3 1 1000000007

output:

3
500000005
1

result:

ok 3 number(s): "3 500000005 1"

Test #2:

score: 0

Accepted

time: 0ms

memory: 3712kb

input:

3 2 998244353

output:

9
499122179
1

result:

ok 3 number(s): "9 499122179 1"

Test #3:

score: 0

Accepted

time: 50ms

memory: 15540kb

input:

100000 2 247500247

output:

99990120
198313538
181648518
9984012
89152757
122606790
144989074
57767836
139713251
181810000
31507456
166275038
47328509
154160535
83327500
58965059
57266005
62816671
182890130
94757428
220071605
169630367
158184542
33329500
49804019
177268667
34308738
1462169
207899265
29486137
36126024
183936623...

result:

ok 100000 numbers

Test #4:

score: 0

Accepted

time: 50ms

memory: 15144kb

input:

93243 2 871928951

output:

result:

ok 93243 numbers

Test #5:

score: 0

Accepted

time: 49ms

memory: 14540kb

input:

88126 2 815309081

output:

result:

ok 88126 numbers

Test #6:

score: 0

Accepted

time: 53ms

memory: 15544kb

input:

100000 20 274295591

output:

result:

ok 100000 numbers

Test #7:

score: 0

Accepted

time: 50ms

memory: 14496kb

input:

88126 19 175327979

output:

result:

ok 88126 numbers

Test #8:

score: 0

Accepted

time: 50ms

memory: 15192kb

input:

93243 17 521107271

output:

401623247
79539477
350407101
118842666
307611331
145882711
44804642
132585714
324150453
377577997
167442211
191408337
502330545
487993743
77033044
491860232
66453035
19882036
119057636
122698010
12896407
108213680
179249435
16768652
311367825
284266634
330830578
221032368
37875566
13079459
245051661...

result:

ok 93243 numbers

Test #9:

score: 0

Accepted

time: 51ms

memory: 15540kb

input:

100000 200 180127469

output:

result:

ok 100000 numbers

Test #10:

score: 0

Accepted

time: 51ms

memory: 15524kb

input:

99281 199 831673061

output:

result:

ok 99281 numbers

Test #11:

score: 0

Accepted

time: 55ms

memory: 15620kb

input:

99123 190 184986859

output:

150676314
8306448
32778644
62178449
104286694
62297492
85618916
88596335
17408717
176983517
136446622
102276004
95765785
130390965
69193316
145748571
66260225
105139243
68618676
118327626
97749079
97937844
11088207
119734069
32544048
87920498
93841606
166650044
118169960
17744733
139825728
115301315...

result:

ok 99123 numbers

Test #12:

score: 0

Accepted

time: 46ms

memory: 15668kb

input:

100000 1 998244353

output:

result:

ok 100000 numbers

Test #13:

score: 0

Accepted

time: 54ms

memory: 15544kb

input:

100000 2 998244353

output:

result:

ok 100000 numbers

Test #14:

score: 0

Accepted

time: 55ms

memory: 15668kb

input:

100000 3 998244353

output:

result:

ok 100000 numbers

Extra Test:

score: 0

Extra Test Passed