QOJ.ac

QOJ

The 2nd Universal Cup Finals is coming! Check out our event page, schedule, and competition rules!

ID	Problem	Submitter	Result	Time	Memory	Language	File size	Submit time	Judge time
#528927	#1360. Determinant	suspicious-impostor	WA	1ms	3900kb	C++20	54.1kb	2024-08-24 03:23:50	2024-08-24 03:23:51

Judging History

This is the latest submission verdict.

[2024-08-24 03:23:51]
Judged

Verdict: WA
Time: 1ms
Memory: 3900kb

Show

[2024-08-24 03:23:50]
Submitted

answer

#include <optional>
#include <utility>
#include <cassert>
#include <tuple>
namespace cp_algo::algebra {
    // a * x + b
    template<typename base>
    struct lin {
        base a = 1, b = 0;
        std::optional<base> c;
        lin() {}
        lin(base b): a(0), b(b) {}
        lin(base a, base b): a(a), b(b) {}
        lin(base a, base b, base _c): a(a), b(b), c(_c) {}

        // polynomial product modulo x^2 - c
        lin operator * (const lin& t) {
            assert(c && t.c && *c == *t.c);
            return {a * t.b + b * t.a, b * t.b + a * t.a * (*c), *c};
        }

        // a * (t.a * x + t.b) + b
        lin apply(lin const& t) const {
            return {a * t.a, a * t.b + b};
        }

        void prepend(lin const& t) {
            *this = t.apply(*this);
        }

        base eval(base x) const {
            return a * x + b;
        }
    };

    // (ax+b) / (cx+d)
    template<typename base>
    struct linfrac {
        base a, b, c, d;
        linfrac(): a(1), b(0), c(0), d(1) {} // x, identity for composition
        linfrac(base a): a(a), b(1), c(1), d(0) {} // a + 1/x, for continued fractions
        linfrac(base a, base b, base c, base d): a(a), b(b), c(c), d(d) {}

        // composition of two linfracs
        linfrac operator * (linfrac t) const {
            return t.prepend(linfrac(*this));
        }

        linfrac operator-() const {
            return {-a, -b, -c, -d};
        }

        linfrac adj() const {
            return {d, -b, -c, a};
        }
        
        linfrac& prepend(linfrac const& t) {
            t.apply(a, c);
            t.apply(b, d);
            return *this;
        }

        // apply linfrac to A/B
        void apply(base &A, base &B) const {
            std::tie(A, B) = std::pair{a * A + b * B, c * A + d * B};
        }
    };
}

#line 1 "cp-algorithms-aux/cp-algo/algebra/fft.hpp"


#line 1 "cp-algorithms-aux/cp-algo/algebra/common.hpp"


#include <functional>
#include <cstdint>
namespace cp_algo::algebra {
    const int maxn = 1 << 12;
    const int magic = 250; // threshold for sizes to run the naive algo

    auto bpow(auto const& x, int64_t n, auto const& one, auto op) {
        if(n == 0) {
            return one;
        } else {
            auto t = bpow(x, n / 2, one, op);
            t = op(t, t);
            if(n % 2) {
                t = op(t, x);
            }
            return t;
        }
    }
    auto bpow(auto x, int64_t n, auto ans) {
        return bpow(x, n, ans, std::multiplies{});
    }
    template<typename T>
    T bpow(T const& x, int64_t n) {
        return bpow(x, n, T(1));
    }

    template<typename T>
    T fact(int n) {
        static T F[maxn];
        static bool init = false;
        if(!init) {
            F[0] = T(1);
            for(int i = 1; i < maxn; i++) {
                F[i] = F[i - 1] * T(i);
            }
            init = true;
        }
        return F[n];
    }
    
    template<typename T>
    T rfact(int n) {
        static T F[maxn];
        static bool init = false;
        if(!init) {
            F[maxn - 1] = T(1) / fact<T>(maxn - 1);
            for(int i = maxn - 2; i >= 0; i--) {
                F[i] = F[i + 1] * T(i + 1);
            }
            init = true;
        }
        return F[n];
    }

    template<typename T>
    T small_inv(int n) {
        static T F[maxn];
        static bool init = false;
        if(!init) {
            for(int i = 1; i < maxn; i++) {
                F[i] = rfact<T>(i) * fact<T>(i - 1);
            }
            init = true;
        }
        return F[n];
    }
}

#line 1 "cp-algorithms-aux/cp-algo/algebra/modular.hpp"


#line 1 "cp-algorithms-aux/cp-algo/random/rng.hpp"


#include <chrono>
#include <random>
namespace cp_algo::random {
    uint64_t rng() {
        static std::mt19937_64 rng(std::chrono::steady_clock::now().time_since_epoch().count());
        return rng();
    }
}

#line 6 "cp-algorithms-aux/cp-algo/algebra/modular.hpp"
#include <algorithm>
#include <iostream>
#line 9 "cp-algorithms-aux/cp-algo/algebra/modular.hpp"
namespace cp_algo::algebra {
    template<int m>
    struct modular {
        // https://en.wikipedia.org/wiki/Berlekamp-Rabin_algorithm
        std::optional<modular> sqrt() const {
            if(r == 0) {
                return 0;
            } else if(bpow(*this, (m - 1) / 2) != modular(1)) {
                return std::nullopt;
            } else {
                while(true) {
                    modular z = random::rng();
                    if(z * z == *this) {
                        return z;
                    }
                    lin<modular> x(1, z, *this); // x + z (mod x^2 - b)
                    x = bpow(x, (m - 1) / 2, lin<modular>(0, 1, *this));
                    if(x.a != modular(0)) {
                        return x.a.inv();
                    }
                }
            }
        }
        
        uint64_t r;
        constexpr modular(): r(0) {}
        constexpr modular(int64_t rr): r(rr % m) {r = std::min<uint64_t>(r, r + m);}
        modular inv() const {return bpow(*this, m - 2);}
        modular operator - () const {return std::min(-r, m - r);}
        modular operator * (const modular &t) const {return r * t.r;}
        modular operator / (const modular &t) const {return *this * t.inv();}
        modular& operator += (const modular &t) {r += t.r; r = std::min(r, r - m); return *this;}
        modular& operator -= (const modular &t) {r -= t.r; r = std::min(r, r + m); return *this;}
        modular operator + (const modular &t) const {return modular(*this) += t;}
        modular operator - (const modular &t) const {return modular(*this) -= t;}
        modular& operator *= (const modular &t) {return *this = *this * t;}
        modular& operator /= (const modular &t) {return *this = *this / t;}
        
        auto operator <=> (const modular &t) const = default;
        
        explicit operator int() const {return r;}
        int64_t rem() const {return 2 * r > m ? r - m : r;}

        // Only use if you really know what you're doing!
        static constexpr uint64_t mm = 8LL * m * m;
        void add_unsafe(uint64_t t) {r += t;}
        void pseudonormalize() {r = std::min(r, r - mm);}
        modular& normalize() {if(r >= m) r %= m; return *this;}
    };
    
    template<int m>
    std::istream& operator >> (std::istream &in, modular<m> &x) {
        return in >> x.r;
    }
    
    template<int m>
    std::ostream& operator << (std::ostream &out, modular<m> const& x) {
        return out << x.r % m;
    }
}

#line 7 "cp-algorithms-aux/cp-algo/algebra/fft.hpp"
#include <vector>
namespace cp_algo::algebra::fft {
    using ftype = double;
    struct point {
        ftype x, y;
        
        ftype real() {return x;}
        ftype imag() {return y;}
        
        point(): x(0), y(0){}
        point(ftype x, ftype y = 0): x(x), y(y){}
        
        static point polar(ftype rho, ftype ang) {
            return point{rho * cos(ang), rho * sin(ang)};
        }
        
        point conj() const {
            return {x, -y};
        }
        
        point operator +=(const point &t) {x += t.x, y += t.y; return *this;}
        point operator +(const point &t) const {return point(*this) += t;}
        point operator -(const point &t) const {return {x - t.x, y - t.y};}
        point operator *(const point &t) const {return {x * t.x - y * t.y, x * t.y + y * t.x};}
    };

    point w[maxn]; // w[2^n + k] = exp(pi * k / (2^n))
    int bitr[maxn];// b[2^n + k] = bitreverse(k)
    const ftype pi = acos(-1);
    bool initiated = 0;
    void init() {
        if(!initiated) {
            for(int i = 1; i < maxn; i *= 2) {
                int ti = i / 2;
                for(int j = 0; j < i; j++) {
                    w[i + j] = point::polar(ftype(1), pi * j / i);
                    if(ti) {
                        bitr[i + j] = 2 * bitr[ti + j % ti] + (j >= ti);
                    }
                }
            }
            initiated = 1;
        }
    }
    
    void fft(auto &a, int n) {
        init();
        if(n == 1) {
            return;
        }
        int hn = n / 2;
        for(int i = 0; i < n; i++) {
            int ti = 2 * bitr[hn + i % hn] + (i > hn);
            if(i < ti) {
                std::swap(a[i], a[ti]);
            }
        }
        for(int i = 1; i < n; i *= 2) {
            for(int j = 0; j < n; j += 2 * i) {
                for(int k = j; k < j + i; k++) {
                    point t = a[k + i] * w[i + k - j];
                    a[k + i] = a[k] - t;
                    a[k] += t;
                }
            }
        }
    }
    
    void mul_slow(std::vector<auto> &a, const std::vector<auto> &b) {
        if(a.empty() || b.empty()) {
            a.clear();
        } else {
            int n = a.size();
            int m = b.size();
            a.resize(n + m - 1);
            for(int k = n + m - 2; k >= 0; k--) {
                a[k] *= b[0];
                for(int j = std::max(k - n + 1, 1); j < std::min(k + 1, m); j++) {
                    a[k] += a[k - j] * b[j];
                }
            }
        }
    }
    
    template<int m>
    struct dft {
        static constexpr int split = 1 << 15;
        std::vector<point> A;
        
        dft(std::vector<modular<m>> const& a, size_t n): A(n) {
            for(size_t i = 0; i < std::min(n, a.size()); i++) {
                A[i] = point(
                    a[i].rem() % split,
                    a[i].rem() / split
                );
            }
            if(n) {
                fft(A, n);
            }
        }
    
        auto operator * (dft const& B) {
            assert(A.size() == B.A.size());
            size_t n = A.size();
            if(!n) {
                return std::vector<modular<m>>();
            }
            std::vector<point> C(n), D(n);
            for(size_t i = 0; i < n; i++) {
                C[i] = A[i] * (B[i] + B[(n - i) % n].conj());
                D[i] = A[i] * (B[i] - B[(n - i) % n].conj());
            }
            fft(C, n);
            fft(D, n);
            reverse(begin(C) + 1, end(C));
            reverse(begin(D) + 1, end(D));
            int t = 2 * n;
            std::vector<modular<m>> res(n);
            for(size_t i = 0; i < n; i++) {
                modular<m> A0 = llround(C[i].real() / t);
                modular<m> A1 = llround(C[i].imag() / t + D[i].imag() / t);
                modular<m> A2 = llround(D[i].real() / t);
                res[i] = A0 + A1 * split - A2 * split * split;
            }
            return res;
        }
        
        point& operator [](int i) {return A[i];}
        point operator [](int i) const {return A[i];}
    };
    
    size_t com_size(size_t as, size_t bs) {
        if(!as || !bs) {
            return 0;
        }
        size_t n = as + bs - 1;
        while(__builtin_popcount(n) != 1) {
            n++;
        }
        return n;
    }
    
    template<int m>
    void mul(std::vector<modular<m>> &a, std::vector<modular<m>> b) {
        if(std::min(a.size(), b.size()) < magic) {
            mul_slow(a, b);
            return;
        }
        auto n = com_size(a.size(), b.size());
        auto A = dft<m>(a, n);
        if(a == b) {
            a = A * A;
        } else {
            a = A * dft<m>(b, n);
        }
    }
}

#line 7 "cp-algorithms-aux/cp-algo/algebra/poly/impl/euclid.hpp"
#include <numeric>
#line 11 "cp-algorithms-aux/cp-algo/algebra/poly/impl/euclid.hpp"
#include <list>
// operations related to gcd and Euclidean algo
namespace cp_algo::algebra::poly::impl {
    template<typename poly>
    using gcd_result = std::pair<
        std::list<std::decay_t<poly>>,
        linfrac<std::decay_t<poly>>>;

    template<typename poly>
    gcd_result<poly> half_gcd(poly &&A, poly &&B) {
        assert(A.deg() >= B.deg());
        int m = size(A.a) / 2;
        if(B.deg() < m) {
            return {};
        }
        auto [ai, R] = A.divmod(B);
        std::tie(A, B) = {B, R};
        std::list a = {ai};
        auto T = -linfrac(ai).adj();

        auto advance = [&](int k) {
            auto [ak, Tk] = half_gcd(A.div_xk(k), B.div_xk(k));
            a.splice(end(a), ak);
            T.prepend(Tk);
            return Tk;
        };
        advance(m).apply(A, B);
        if constexpr (std::is_reference_v<poly>) {
            advance(2 * m - A.deg()).apply(A, B);
        } else {
            advance(2 * m - A.deg());
        }
        return {std::move(a), std::move(T)};
    }
    template<typename poly>
    gcd_result<poly> full_gcd(poly &&A, poly &&B) {
        using poly_t = std::decay_t<poly>;
        std::list<poly_t> ak;
        std::vector<linfrac<poly_t>> trs;
        while(!B.is_zero()) {
            auto [a0, R] = A.divmod(B);
            ak.push_back(a0);
            trs.push_back(-linfrac(a0).adj());
            std::tie(A, B) = {B, R};

            auto [a, Tr] = half_gcd(A, B);
            ak.splice(end(ak), a);
            trs.push_back(Tr);
        }
        return {ak, std::accumulate(rbegin(trs), rend(trs), linfrac<poly_t>{}, std::multiplies{})};
    }

    // computes product of linfrac on [L, R)
    auto convergent(auto L, auto R) {
        using poly = decltype(L)::value_type;
        if(R == next(L)) {
            return linfrac(*L);
        } else {
            int s = std::transform_reduce(L, R, 0, std::plus{}, std::mem_fn(&poly::deg));
            auto M = L;
            for(int c = M->deg(); 2 * c <= s; M++) {
                c += next(M)->deg();
            }
            return convergent(L, M) * convergent(M, R);
        }
    }
    template<typename poly>
    poly min_rec(poly const& p, size_t d) {
        auto R2 = p.mod_xk(d).reverse(d), R1 = poly::xk(d);
        if(R2.is_zero()) {
            return poly(1);
        }
        auto [a, Tr] = full_gcd(R1, R2);
        a.emplace_back();
        auto pref = begin(a);
        for(int delta = d - a.front().deg(); delta >= 0; pref++) {
            delta -= pref->deg() + next(pref)->deg();
        }
        return convergent(begin(a), pref).a;
    }

    template<typename poly>
    std::optional<poly> inv_mod(poly p, poly q) {
        assert(!q.is_zero());
        auto [a, Tr] = full_gcd(q, p);
        if(q.deg() != 0) {
            return std::nullopt;
        }
        return Tr.b / q[0];
    }
}

#line 1 "cp-algorithms-aux/cp-algo/algebra/poly/impl/base.hpp"


#line 6 "cp-algorithms-aux/cp-algo/algebra/poly/impl/base.hpp"
// really basic operations, typically taking O(n)
namespace cp_algo::algebra::poly::impl {
    void normalize(auto& p) {
        while(p.deg() >= 0 && p.lead() == 0) {
            p.a.pop_back();
        }
    }
    auto neg(auto p) {
        std::ranges::transform(p.a, begin(p.a), std::negate{});
        return p;
    }
    auto& scale(auto &p, auto x) {
        for(auto &it: p.a) {
            it *= x;
        }
        p.normalize();
        return p;
    }
    auto& add(auto &p, auto q) {
        p.a.resize(std::max(p.a.size(), q.a.size()));
        std::ranges::transform(p.a, q.a, begin(p.a), std::plus{});
        normalize(p);
        return p;
    }
    auto& sub(auto &p, auto q) {
        p.a.resize(std::max(p.a.size(), q.a.size()));
        std::ranges::transform(p.a, q.a, begin(p.a), std::minus{});
        normalize(p);
        return p;
    }
    auto mod_xk(auto const& p, size_t k) {
        return std::vector(begin(p.a), begin(p.a) + std::min(k, p.a.size()));
    }
    auto mul_xk(auto p, int k) {
        if(k < 0) {
            return p.div_xk(-k);
        }
        p.a.insert(begin(p.a), k, 0);
        normalize(p);
        return p;
    }
    template<typename poly>
    poly div_xk(poly const& p, int k) {
        if(k < 0) {
            return p.mul_xk(-k);
        }
        return std::vector(begin(p.a) + std::min<size_t>(k, p.a.size()), end(p.a));
    }
    auto substr(auto const& p, size_t l, size_t r) {
        return std::vector(
            begin(p.a) + std::min(l, p.a.size()),
            begin(p.a) + std::min(r, p.a.size())
        );
    }
    auto reverse(auto p, size_t n) {
        p.a.resize(n);
        std::ranges::reverse(p.a);
        normalize(p);
        return p;
    }
}

#line 1 "cp-algorithms-aux/cp-algo/algebra/poly/impl/div.hpp"


#line 6 "cp-algorithms-aux/cp-algo/algebra/poly/impl/div.hpp"
// operations related to polynomial division
namespace cp_algo::algebra::poly::impl {
    auto divmod_slow(auto const& p, auto const& q) {
        auto R = p;
        auto D = decltype(p){};
        auto q_lead_inv = q.lead().inv();
        while(R.deg() >= q.deg()) {
            D.a.push_back(R.lead() * q_lead_inv);
            if(D.lead() != 0) {
                for(size_t i = 1; i <= q.a.size(); i++) {
                    R.a[R.a.size() - i] -= D.lead() * q.a[q.a.size() - i];
                }
            }
            R.a.pop_back();
        }
        std::ranges::reverse(D.a);
        R.normalize();
        return std::array{D, R};
    }
    template<typename poly>
    auto divmod_hint(poly const& p, poly const& q, poly const& qri) {
        assert(!q.is_zero());
        int d = p.deg() - q.deg();
        if(std::min(d, q.deg()) < magic) {
            return divmod_slow(p, q);
        }
        poly D;
        if(d >= 0) {
            D = (p.reverse().mod_xk(d + 1) * qri.mod_xk(d + 1)).mod_xk(d + 1).reverse(d + 1);
        }
        return std::array{D, p - D * q};
    }
    auto divmod(auto const& p, auto const& q) {
        assert(!q.is_zero());
        int d = p.deg() - q.deg();
        if(std::min(d, q.deg()) < magic) {
            return divmod_slow(p, q);
        }
        return divmod_hint(p, q, q.reverse().inv(d + 1));
    }

    template<typename poly>
    poly powmod_hint(poly const& p, int64_t k, poly const& md, poly const& mdri) {
        return bpow(p, k, poly(1), [&](auto const& p, auto const& q){
            return divmod_hint(p * q, md, mdri)[1];
        });
    }
    template<typename poly>
    auto powmod(poly const& p, int64_t k, poly const& md) {
        int d = md.deg();
        if(p == poly::xk(1) && false) { // does it actually speed anything up?..
            if(k < md.deg()) {
                return poly::xk(k);
            } else {
                auto mdr = md.reverse();
                return (mdr.inv(k - md.deg() + 1, md.deg()) * mdr).reverse(md.deg());
            }
        }
        if(md == poly::xk(d)) {
            return p.pow(k, d);
        }
        if(md == poly::xk(d) - poly(1)) {
            return p.powmod_circular(k, d);
        }
        return powmod_hint(p, k, md, md.reverse().inv(md.deg() + 1));
    }

    auto interleave(auto const& p) {
        auto [p0, p1] = p.bisect();
        return p0 * p0 - (p1 * p1).mul_xk(1);
    }
    template<typename poly>
    poly inv(poly const& q, int64_t k, size_t n) {
        if(k <= std::max<int64_t>(n, size(q.a))) {
            return q.inv(k + n).div_xk(k);
        }
        if(k % 2) {
            return inv(q, k - 1, n + 1).div_xk(1);
        }
        
        auto qq = inv(interleave(q), k / 2 - q.deg() / 2, (n + 1) / 2 + q.deg() / 2);
        auto [q0, q1] = q.negx().bisect();
        return (
            (q0 * qq).x2() + (q1 * qq).x2().mul_xk(1)
        ).div_xk(2*q0.deg()).mod_xk(n);
    }
    template<typename poly>
    poly inv(poly const& p, size_t n) {
        auto q = p.mod_xk(n);
        if(n == 1) {
            return poly(1) / q[0];
        }
        // Q(-x) = P0(x^2) + xP1(x^2)
        auto [q0, q1] = q.negx().bisect();
        
        int N = fft::com_size((n + 1) / 2, (n + 1) / 2);
        
        auto q0f = fft::dft(q0.a, N);
        auto q1f = fft::dft(q1.a, N);

        // Q(x)*Q(-x) = Q0(x^2)^2 - x^2 Q1(x^2)^2
        auto qqf = fft::dft(inv(
            poly(q0f * q0f) - poly(q1f * q1f).mul_xk(1)
        , (n + 1) / 2).a, N);
        
        return (
            poly(q0f * qqf).x2() + poly(q1f * qqf).x2().mul_xk(1)
        ).mod_xk(n);
    }
}

#line 15 "cp-algorithms-aux/cp-algo/algebra/poly.hpp"
namespace cp_algo::algebra {
    template<typename T>
    struct poly_t {
        using base = T;
        std::vector<T> a;
        
        void normalize() {poly::impl::normalize(*this);}
        
        poly_t(){}
        poly_t(T a0): a{a0} {normalize();}
        poly_t(std::vector<T> const& t): a(t) {normalize();}
        
        poly_t operator -() const {return poly::impl::neg(*this);}
        poly_t& operator += (poly_t const& t) {return poly::impl::add(*this, t);}
        poly_t& operator -= (poly_t const& t) {return poly::impl::sub(*this, t);}
        poly_t operator + (poly_t const& t) const {return poly_t(*this) += t;}
        poly_t operator - (poly_t const& t) const {return poly_t(*this) -= t;}
        
        poly_t mod_xk(size_t k) const {return poly::impl::mod_xk(*this, k);} // %= x^k
        poly_t mul_xk(size_t k) const {return poly::impl::mul_xk(*this, k);} // *= x^k
        poly_t div_xk(size_t k) const {return poly::impl::div_xk(*this, k);} // /= x^k
        poly_t substr(size_t l, size_t r) const {return poly::impl::substr(*this, l, r);}
        
        poly_t operator *= (const poly_t &t) {fft::mul(a, t.a); normalize(); return *this;}
        poly_t operator * (const poly_t &t) const {return poly_t(*this) *= t;}

        poly_t& operator /= (const poly_t &t) {return *this = divmod(t)[0];}
        poly_t& operator %= (const poly_t &t) {return *this = divmod(t)[1];}
        poly_t operator / (poly_t const& t) const {return poly_t(*this) /= t;}
        poly_t operator % (poly_t const& t) const {return poly_t(*this) %= t;}

        poly_t& operator *= (T const& x) {return *this = poly::impl::scale(*this, x);}
        poly_t& operator /= (T const& x) {return *this *= x.inv();}
        poly_t operator * (T const& x) const {return poly_t(*this) *= x;}
        poly_t operator / (T const& x) const {return poly_t(*this) /= x;}
        
        poly_t reverse(size_t n) const {return poly::impl::reverse(*this, n);}
        poly_t reverse() const {return reverse(size(a));}
        
        std::array<poly_t, 2> divmod(poly_t const& b) const {
            return poly::impl::divmod(*this, b);
        }
        
        // reduces A/B to A'/B' such that
        // deg B' < deg A / 2
        static std::pair<std::list<poly_t>, linfrac<poly_t>> half_gcd(auto &&A, auto &&B) {
            return poly::impl::half_gcd(A, B);
        }
        // reduces A / B to gcd(A, B) / 0
        static std::pair<std::list<poly_t>, linfrac<poly_t>> full_gcd(auto &&A, auto &&B) {
            return poly::impl::full_gcd(A, B);
        }
        static poly_t gcd(poly_t &&A, poly_t &&B) {
            full_gcd(A, B);
            return A;
        }
        
        // Returns a (non-monic) characteristic polynomial
        // of the minimum linear recurrence for the sequence
        poly_t min_rec(size_t d) const {
            return poly::impl::min_rec(*this, d);
        }
        
        // calculate inv to *this modulo t
        std::optional<poly_t> inv_mod(poly_t const& t) const {
            return poly::impl::inv_mod(*this, t);
        };
        
        poly_t negx() const { // A(x) -> A(-x)
            auto res = *this;
            for(int i = 1; i <= deg(); i += 2) {
                res.a[i] = -res[i];
            }
            return res;
        }
        
        void print(int n) const {
            for(int i = 0; i < n; i++) {
                std::cout << (*this)[i] << ' ';
            }
            std::cout << "\n";
        }
        
        void print() const {
            print(deg() + 1);
        }
        
        T eval(T x) const { // evaluates in single point x
            T res(0);
            for(int i = deg(); i >= 0; i--) {
                res *= x;
                res += a[i];
            }
            return res;
        }
        
        T lead() const { // leading coefficient
            assert(!is_zero());
            return a.back();
        }
        
        int deg() const { // degree, -1 for P(x) = 0
            return (int)a.size() - 1;
        }
        
        bool is_zero() const {
            return a.empty();
        }
        
        T operator [](int idx) const {
            return idx < 0 || idx > deg() ? T(0) : a[idx];
        }
        
        T& coef(size_t idx) { // mutable reference at coefficient
            return a[idx];
        }
        
        bool operator == (const poly_t &t) const {return a == t.a;}
        bool operator != (const poly_t &t) const {return a != t.a;}
        
        poly_t deriv(int k = 1) const { // calculate derivative
            if(deg() + 1 < k) {
                return poly_t(T(0));
            }
            std::vector<T> res(deg() + 1 - k);
            for(int i = k; i <= deg(); i++) {
                res[i - k] = fact<T>(i) * rfact<T>(i - k) * a[i];
            }
            return res;
        }
        
        poly_t integr() const { // calculate integral with C = 0
            std::vector<T> res(deg() + 2);
            for(int i = 0; i <= deg(); i++) {
                res[i + 1] = a[i] * small_inv<T>(i + 1);
            }
            return res;
        }
        
        size_t trailing_xk() const { // Let p(x) = x^k * t(x), return k
            if(is_zero()) {
                return -1;
            }
            int res = 0;
            while(a[res] == T(0)) {
                res++;
            }
            return res;
        }
        
        poly_t log(size_t n) const { // calculate log p(x) mod x^n
            assert(a[0] == T(1));
            return (deriv().mod_xk(n) * inv(n)).integr().mod_xk(n);
        }
        
        poly_t exp(size_t n) const { // calculate exp p(x) mod x^n
            if(is_zero()) {
                return T(1);
            }
            assert(a[0] == T(0));
            poly_t ans = T(1);
            size_t a = 1;
            while(a < n) {
                poly_t C = ans.log(2 * a).div_xk(a) - substr(a, 2 * a);
                ans -= (ans * C).mod_xk(a).mul_xk(a);
                a *= 2;
            }
            return ans.mod_xk(n);
        }
        
        poly_t pow_bin(int64_t k, size_t n) const { // O(n log n log k)
            if(k == 0) {
                return poly_t(1).mod_xk(n);
            } else {
                auto t = pow(k / 2, n);
                t = (t * t).mod_xk(n);
                return (k % 2 ? *this * t : t).mod_xk(n);
            }
        }

        poly_t circular_closure(size_t m) const {
            if(deg() == -1) {
                return *this;
            }
            auto t = *this;
            for(size_t i = t.deg(); i >= m; i--) {
                t.a[i - m] += t.a[i];
            }
            t.a.resize(std::min(t.a.size(), m));
            return t;
        }

        static poly_t mul_circular(poly_t const& a, poly_t const& b, size_t m) {
            return (a.circular_closure(m) * b.circular_closure(m)).circular_closure(m);
        }

        poly_t powmod_circular(int64_t k, size_t m) const {
            if(k == 0) {
                return poly_t(1);
            } else {
                auto t = powmod_circular(k / 2, m);
                t = mul_circular(t, t, m);
                if(k % 2) {
                    t = mul_circular(t, *this, m);
                }
                return t;
            }
        }
        
        poly_t powmod(int64_t k, poly_t const& md) const {
            return poly::impl::powmod(*this, k, md);
        }
        
        // O(d * n) with the derivative trick from
        // https://codeforces.com/blog/entry/73947?#comment-581173
        poly_t pow_dn(int64_t k, size_t n) const {
            if(n == 0) {
                return poly_t(T(0));
            }
            assert((*this)[0] != T(0));
            std::vector<T> Q(n);
            Q[0] = bpow(a[0], k);
            auto a0inv = a[0].inv();
            for(int i = 1; i < (int)n; i++) {
                for(int j = 1; j <= std::min(deg(), i); j++) {
                    Q[i] += a[j] * Q[i - j] * (T(k) * T(j) - T(i - j));
                }
                Q[i] *= small_inv<T>(i) * a0inv;
            }
            return Q;
        }
        
        // calculate p^k(n) mod x^n in O(n log n)
        // might be quite slow due to high constant
        poly_t pow(int64_t k, size_t n) const {
            if(is_zero()) {
                return k ? *this : poly_t(1);
            }
            int i = trailing_xk();
            if(i > 0) {
                return k >= int64_t(n + i - 1) / i ? poly_t(T(0)) : div_xk(i).pow(k, n - i * k).mul_xk(i * k);
            }
            if(std::min(deg(), (int)n) <= magic) {
                return pow_dn(k, n);
            }
            if(k <= magic) {
                return pow_bin(k, n);
            }
            T j = a[i];
            poly_t t = *this / j;
            return bpow(j, k) * (t.log(n) * T(k)).exp(n).mod_xk(n);
        }
        
        // returns std::nullopt if undefined
        std::optional<poly_t> sqrt(size_t n) const {
            if(is_zero()) {
                return *this;
            }
            int i = trailing_xk();
            if(i % 2) {
                return std::nullopt;
            } else if(i > 0) {
                auto ans = div_xk(i).sqrt(n - i / 2);
                return ans ? ans->mul_xk(i / 2) : ans;
            }
            auto st = (*this)[0].sqrt();
            if(st) {
                poly_t ans = *st;
                size_t a = 1;
                while(a < n) {
                    a *= 2;
                    ans -= (ans - mod_xk(a) * ans.inv(a)).mod_xk(a) / 2;
                }
                return ans.mod_xk(n);
            }
            return std::nullopt;
        }
        
        poly_t mulx(T a) const { // component-wise multiplication with a^k
            T cur = 1;
            poly_t res(*this);
            for(int i = 0; i <= deg(); i++) {
                res.coef(i) *= cur;
                cur *= a;
            }
            return res;
        }

        poly_t mulx_sq(T a) const { // component-wise multiplication with a^{k choose 2}
            T cur = 1, total = 1;
            poly_t res(*this);
            for(int i = 0; i <= deg(); i++) {
                res.coef(i) *= total;
                cur *= a;
                total *= cur;
            }
            return res;
        }

        // be mindful of maxn, as the function
        // requires multiplying polynomials of size deg() and n+deg()!
        poly_t chirpz(T z, int n) const { // P(1), P(z), P(z^2), ..., P(z^(n-1))
            if(is_zero()) {
                return std::vector<T>(n, 0);
            }
            if(z == T(0)) {
                std::vector<T> ans(n, (*this)[0]);
                if(n > 0) {
                    ans[0] = accumulate(begin(a), end(a), T(0));
                }
                return ans;
            }
            auto A = mulx_sq(z.inv());
            auto B = ones(n+deg()).mulx_sq(z);
            return semicorr(B, A).mod_xk(n).mulx_sq(z.inv());
        }

        // res[i] = prod_{1 <= j <= i} 1/(1 - z^j)
        static auto _1mzk_prod_inv(T z, int n) {
            std::vector<T> res(n, 1), zk(n);
            zk[0] = 1;
            for(int i = 1; i < n; i++) {
                zk[i] = zk[i - 1] * z;
                res[i] = res[i - 1] * (T(1) - zk[i]);
            }
            res.back() = res.back().inv();
            for(int i = n - 2; i >= 0; i--) {
                res[i] = (T(1) - zk[i+1]) * res[i+1];
            }
            return res;
        }
        
        // prod_{0 <= j < n} (1 - z^j x)
        static auto _1mzkx_prod(T z, int n) {
            if(n == 1) {
                return poly_t(std::vector<T>{1, -1});
            } else {
                auto t = _1mzkx_prod(z, n / 2);
                t *= t.mulx(bpow(z, n / 2));
                if(n % 2) {
                    t *= poly_t(std::vector<T>{1, -bpow(z, n - 1)});
                }
                return t;
            }
        }

        poly_t chirpz_inverse(T z, int n) const { // P(1), P(z), P(z^2), ..., P(z^(n-1))
            if(is_zero()) {
                return {};
            }
            if(z == T(0)) {
                if(n == 1) {
                    return *this;
                } else {
                    return std::vector{(*this)[1], (*this)[0] - (*this)[1]};
                }
            }
            std::vector<T> y(n);
            for(int i = 0; i < n; i++) {
                y[i] = (*this)[i];
            }
            auto prods_pos = _1mzk_prod_inv(z, n);
            auto prods_neg = _1mzk_prod_inv(z.inv(), n);

            T zn = bpow(z, n-1).inv();
            T znk = 1;
            for(int i = 0; i < n; i++) {
                y[i] *= znk * prods_neg[i] * prods_pos[(n - 1) - i];
                znk *= zn;
            }

            poly_t p_over_q = poly_t(y).chirpz(z, n);
            poly_t q = _1mzkx_prod(z, n);

            return (p_over_q * q).mod_xk(n).reverse(n);
        }

        static poly_t build(std::vector<poly_t> &res, int v, auto L, auto R) { // builds evaluation tree for (x-a1)(x-a2)...(x-an)
            if(R - L == 1) {
                return res[v] = std::vector<T>{-*L, 1};
            } else {
                auto M = L + (R - L) / 2;
                return res[v] = build(res, 2 * v, L, M) * build(res, 2 * v + 1, M, R);
            }
        }

        poly_t to_newton(std::vector<poly_t> &tree, int v, auto l, auto r) {
            if(r - l == 1) {
                return *this;
            } else {
                auto m = l + (r - l) / 2;
                auto A = (*this % tree[2 * v]).to_newton(tree, 2 * v, l, m);
                auto B = (*this / tree[2 * v]).to_newton(tree, 2 * v + 1, m, r);
                return A + B.mul_xk(m - l);
            }
        }

        poly_t to_newton(std::vector<T> p) {
            if(is_zero()) {
                return *this;
            }
            int n = p.size();
            std::vector<poly_t> tree(4 * n);
            build(tree, 1, begin(p), end(p));
            return to_newton(tree, 1, begin(p), end(p));
        }

        std::vector<T> eval(std::vector<poly_t> &tree, int v, auto l, auto r) { // auxiliary evaluation function
            if(r - l == 1) {
                return {eval(*l)};
            } else {
                auto m = l + (r - l) / 2;
                auto A = (*this % tree[2 * v]).eval(tree, 2 * v, l, m);
                auto B = (*this % tree[2 * v + 1]).eval(tree, 2 * v + 1, m, r);
                A.insert(end(A), begin(B), end(B));
                return A;
            }
        }
        
        std::vector<T> eval(std::vector<T> x) { // evaluate polynomial in (x1, ..., xn)
            int n = x.size();
            if(is_zero()) {
                return std::vector<T>(n, T(0));
            }
            std::vector<poly_t> tree(4 * n);
            build(tree, 1, begin(x), end(x));
            return eval(tree, 1, begin(x), end(x));
        }
        
        poly_t inter(std::vector<poly_t> &tree, int v, auto ly, auto ry) { // auxiliary interpolation function
            if(ry - ly == 1) {
                return {*ly / a[0]};
            } else {
                auto my = ly + (ry - ly) / 2;
                auto A = (*this % tree[2 * v]).inter(tree, 2 * v, ly, my);
                auto B = (*this % tree[2 * v + 1]).inter(tree, 2 * v + 1, my, ry);
                return A * tree[2 * v + 1] + B * tree[2 * v];
            }
        }
        
        static auto inter(std::vector<T> x, std::vector<T> y) { // interpolates minimum polynomial from (xi, yi) pairs
            int n = x.size();
            std::vector<poly_t> tree(4 * n);
            return build(tree, 1, begin(x), end(x)).deriv().inter(tree, 1, begin(y), end(y));
        }

        static auto resultant(poly_t a, poly_t b) { // computes resultant of a and b
            if(b.is_zero()) {
                return 0;
            } else if(b.deg() == 0) {
                return bpow(b.lead(), a.deg());
            } else {
                int pw = a.deg();
                a %= b;
                pw -= a.deg();
                auto mul = bpow(b.lead(), pw) * T((b.deg() & a.deg() & 1) ? -1 : 1);
                auto ans = resultant(b, a);
                return ans * mul;
            }
        }
                
        static poly_t xk(size_t n) { // P(x) = x^n
            return poly_t(T(1)).mul_xk(n);
        }
        
        static poly_t ones(size_t n) { // P(x) = 1 + x + ... + x^{n-1} 
            return std::vector<T>(n, 1);
        }
        
        static poly_t expx(size_t n) { // P(x) = e^x (mod x^n)
            return ones(n).borel();
        }

        static poly_t log1px(size_t n) { // P(x) = log(1+x) (mod x^n)
            std::vector<T> coeffs(n, 0);
            for(size_t i = 1; i < n; i++) {
                coeffs[i] = (i & 1 ? T(i).inv() : -T(i).inv());
            }
            return coeffs;
        }

        static poly_t log1mx(size_t n) { // P(x) = log(1-x) (mod x^n)
            return -ones(n).integr();
        }
        
        // [x^k] (a corr b) = sum_{i} a{(k-m)+i}*bi
        static poly_t corr(poly_t a, poly_t b) { // cross-correlation
            return a * b.reverse();
        }

        // [x^k] (a semicorr b) = sum_i a{i+k} * b{i}
        static poly_t semicorr(poly_t a, poly_t b) {
            return corr(a, b).div_xk(b.deg());
        }
        
        poly_t invborel() const { // ak *= k!
            auto res = *this;
            for(int i = 0; i <= deg(); i++) {
                res.coef(i) *= fact<T>(i);
            }
            return res;
        }
        
        poly_t borel() const { // ak /= k!
            auto res = *this;
            for(int i = 0; i <= deg(); i++) {
                res.coef(i) *= rfact<T>(i);
            }
            return res;
        }
        
        poly_t shift(T a) const { // P(x + a)
            return semicorr(invborel(), expx(deg() + 1).mulx(a)).borel();
        }
        
        poly_t x2() { // P(x) -> P(x^2)
            std::vector<T> res(2 * a.size());
            for(size_t i = 0; i < a.size(); i++) {
                res[2 * i] = a[i];
            }
            return res;
        }
        
        // Return {P0, P1}, where P(x) = P0(x) + xP1(x)
        std::array<poly_t, 2> bisect() const {
            std::vector<T> res[2];
            res[0].reserve(deg() / 2 + 1);
            res[1].reserve(deg() / 2 + 1);
            for(int i = 0; i <= deg(); i++) {
                res[i % 2].push_back(a[i]);
            }
            return {res[0], res[1]};
        }
        
        // Find [x^k] P / Q
        static T kth_rec(poly_t P, poly_t Q, int64_t k) {
            while(k > Q.deg()) {
                int n = Q.a.size();
                auto [Q0, Q1] = Q.mulx(-1).bisect();
                auto [P0, P1] = P.bisect();
                
                int N = fft::com_size((n + 1) / 2, (n + 1) / 2);
                
                auto Q0f = fft::dft(Q0.a, N);
                auto Q1f = fft::dft(Q1.a, N);
                auto P0f = fft::dft(P0.a, N);
                auto P1f = fft::dft(P1.a, N);
                
                if(k % 2) {
                    P = poly_t(Q0f * P1f) + poly_t(Q1f * P0f);
                } else {
                    P = poly_t(Q0f * P0f) + poly_t(Q1f * P1f).mul_xk(1);
                }
                Q = poly_t(Q0f * Q0f) - poly_t(Q1f * Q1f).mul_xk(1);
                k /= 2;
            }
            return (P * Q.inv(Q.deg() + 1))[k];
        }

        // inverse series mod x^n
        poly_t inv(size_t n) const {
            return poly::impl::inv(*this, n);
        }
        // [x^k]..[x^{k+n-1}] of inv()
        // supports negative k if k+n >= 0
        poly_t inv(int64_t k, size_t n) const {
            return poly::impl::inv(*this, k, n);
        }
        
        // compute A(B(x)) mod x^n in O(n^2)
        static poly_t compose(poly_t A, poly_t B, int n) {
            int q = std::sqrt(n);
            std::vector<poly_t> Bk(q);
            auto Bq = B.pow(q, n);
            Bk[0] = poly_t(T(1));
            for(int i = 1; i < q; i++) {
                Bk[i] = (Bk[i - 1] * B).mod_xk(n);
            }
            poly_t Bqk(1);
            poly_t ans;
            for(int i = 0; i <= n / q; i++) {
                poly_t cur;
                for(int j = 0; j < q; j++) {
                    cur += Bk[j] * A[i * q + j];
                }
                ans += (Bqk * cur).mod_xk(n);
                Bqk = (Bqk * Bq).mod_xk(n);
            }
            return ans;
        }
        
        // compute A(B(x)) mod x^n in O(sqrt(pqn log^3 n))
        // preferrable when p = deg A and q = deg B
        // are much less than n
        static poly_t compose_large(poly_t A, poly_t B, int n) {
            if(B[0] != T(0)) {
                return compose_large(A.shift(B[0]), B - B[0], n);
            }
            
            int q = std::sqrt(n);
            auto [B0, B1] = std::make_pair(B.mod_xk(q), B.div_xk(q));
            
            B0 = B0.div_xk(1);
            std::vector<poly_t> pw(A.deg() + 1);
            auto getpow = [&](int k) {
                return pw[k].is_zero() ? pw[k] = B0.pow(k, n - k) : pw[k];
            };
            
            std::function<poly_t(poly_t const&, int, int)> compose_dac = [&getpow, &compose_dac](poly_t const& f, int m, int N) {
                if(f.deg() <= 0) {
                    return f;
                }
                int k = m / 2;
                auto [f0, f1] = std::make_pair(f.mod_xk(k), f.div_xk(k));
                auto [A, B] = std::make_pair(compose_dac(f0, k, N), compose_dac(f1, m - k, N - k));
                return (A + (B.mod_xk(N - k) * getpow(k).mod_xk(N - k)).mul_xk(k)).mod_xk(N);
            };
            
            int r = n / q;
            auto Ar = A.deriv(r);
            auto AB0 = compose_dac(Ar, Ar.deg() + 1, n);
            
            auto Bd = B0.mul_xk(1).deriv();
            
            poly_t ans = T(0);
            
            std::vector<poly_t> B1p(r + 1);
            B1p[0] = poly_t(T(1));
            for(int i = 1; i <= r; i++) {
                B1p[i] = (B1p[i - 1] * B1.mod_xk(n - i * q)).mod_xk(n - i * q);
            }
            while(r >= 0) {
                ans += (AB0.mod_xk(n - r * q) * rfact<T>(r) * B1p[r]).mul_xk(r * q).mod_xk(n);
                r--;
                if(r >= 0) {
                    AB0 = ((AB0 * Bd).integr() + A[r] * fact<T>(r)).mod_xk(n);
                }
            }
            
            return ans;
        }
    };
    
    static auto operator * (const auto& a, const poly_t<auto>& b) {
        return b * a;
    }
};

#line 1 "cp-algorithms-aux/cp-algo/linalg/matrix.hpp"


#line 1 "cp-algorithms-aux/cp-algo/linalg/vector.hpp"


#line 7 "cp-algorithms-aux/cp-algo/linalg/vector.hpp"
#include <valarray>
#line 9 "cp-algorithms-aux/cp-algo/linalg/vector.hpp"
#include <iterator>
namespace cp_algo::linalg {
    template<class vec, typename base>
    struct valarray_base: std::valarray<base> {
        using Base = std::valarray<base>;
        using Base::Base;

        valarray_base(base const& t): Base(t, 1) {}

        auto begin() {return std::begin(*static_cast<Base*>(this));}
        auto end() {return std::end(*static_cast<Base*>(this));}
        auto begin() const {return std::begin(*static_cast<Base const*>(this));}
        auto end() const {return std::end(*static_cast<Base const*>(this));}

        bool operator == (vec const& t) const {return std::ranges::equal(*this, t);}
        bool operator != (vec const& t) const {return !(*this == t);}

        vec operator-() const {return Base::operator-();}
        vec operator-(vec const& t) const {return Base::operator-(t);}
        vec operator+(vec const& t) const {return Base::operator+(t);}

        static vec from_range(auto const& R) {
            vec res(std::ranges::distance(R));
            std::ranges::copy(R, res.begin());
            return res;
        }
    };

    template<class vec, typename base>
    struct vec_base: valarray_base<vec, base> {
        using Base = valarray_base<vec, base>;
        using Base::Base;

        static vec ei(size_t n, size_t i) {
            vec res(n);
            res[i] = 1;
            return res;
        }

        // Make sure the result is vec, not Base
        vec operator*(base t) const {return Base::operator*(t);}

        void add_scaled(vec const& b, base scale, size_t i = 0) {
            assert(false);
            for(; i < size(*this); i++) {
                (*this)[i] += scale * b[i];
            }
        }
        auto& normalize() {
            return *this;
        }
        auto& normalize(size_t i) {
            return (*this)[i];
        }
        void read() {
            for(auto &it: *this) {
                std::cin >> it;
            }
        }
        void print() const {
            std::ranges::copy(*this, std::ostream_iterator<base>(std::cout, " "));
            std::cout << "\n";
        }
        static vec random(size_t n) {
            vec res(n);
            std::ranges::generate(res, random::rng);
            return res;
        }
        // Concatenate vectors
        vec operator |(vec const& t) const {
            vec res(size(*this) + size(t));
            res[std::slice(0, size(*this), 1)] = *this;
            res[std::slice(size(*this), size(t), 1)] = t;
            return res;
        }

        // Generally, vec shouldn't be modified
        // after it's pivot index is set
        std::pair<size_t, base> find_pivot() {
            auto true_this = static_cast<vec*>(this);
            if(pivot == size_t(-1)) {
                pivot = 0;
                while(pivot < size(*this) && true_this->normalize(pivot) == base(0)) {
                    pivot++;
                }
                if(pivot < size(*this)) {
                    pivot_inv = base(1) / (*this)[pivot];
                }
            }
            return {pivot, pivot_inv};
        }
        void reduce_by(vec &t) {
            auto true_this = static_cast<vec*>(this);
            auto [pivot, pinv] = t.find_pivot();
            if(pivot < size(*this)) {
                true_this->add_scaled(t, -true_this->normalize(pivot) * pinv, pivot);
            }
        }
    private:
        size_t pivot = -1;
        base pivot_inv;
    };

    template<typename base>
    struct vec: vec_base<vec<base>, base> {
        using Base = vec_base<vec<base>, base>;
        using Base::Base;
    };

    template<int mod>
    struct vec<algebra::modular<mod>>:
            vec_base<vec<algebra::modular<mod>>, algebra::modular<mod>> {
        using base = algebra::modular<mod>;
        using Base = vec_base<vec<base>, base>;
        using Base::Base;

        void add_scaled(vec const& b, base scale, size_t i = 0) {
            for(; i < size(*this); i++) {
                (*this)[i].add_unsafe(scale.r * b[i].r);
            }
            if(++counter == 8) {
                for(auto &it: *this) {
                    it.pseudonormalize();
                }
                counter = 0;
            }
        }
        auto& normalize() {
            for(auto &it: *this) {
                it.normalize();
            }
            return *this;
        }
        auto& normalize(size_t i) {
            return (*this)[i].normalize();
        }
    private:
        size_t counter = 0;
    };
}

#line 10 "cp-algorithms-aux/cp-algo/linalg/matrix.hpp"
#include <array>
namespace cp_algo::linalg {
    template<typename base>
    struct matrix: valarray_base<matrix<base>, vec<base>> {
        using Base = valarray_base<matrix<base>, vec<base>>;
        using Base::Base;

        matrix(size_t n): Base(vec<base>(n), n) {}
        matrix(size_t n, size_t m): Base(vec<base>(m), n) {}

        size_t n() const {return size(*this);}
        size_t m() const {return n() ? size(row(0)) : 0;}
        auto dim() const {return std::array{n(), m()};}

        auto& row(size_t i) {return (*this)[i];}
        auto const& row(size_t i) const {return (*this)[i];}

        matrix& operator *=(base t) {for(auto &it: *this) it *= t; return *this;}
        matrix operator *(base t) const {return matrix(*this) *= t;}

        // Make sure the result is matrix, not Base
        matrix& operator*=(matrix const& t) {return *this = *this * t;}

        void read() {
            for(auto &it: *this) {
                it.read();
            }
        }
        void print() const {
            for(auto const& it: *this) {
                it.print();
            }
        }

        static matrix eye(size_t n) {
            matrix res(n);
            for(size_t i = 0; i < n; i++) {
                res[i][i] = 1;
            }
            return res;
        }

        // Concatenate matrices
        matrix operator |(matrix const& b) const {
            assert(n() == b.n());
            matrix res(n(), m()+b.m());
            for(size_t i = 0; i < n(); i++) {
                res[i] = row(i) | b[i];
            }
            return res;
        }
        matrix submatrix(auto slicex, auto slicey) const {
            matrix res = (*this)[slicex];
            for(auto &row: res) {
                row = vec<base>(row[slicey]);
            }
            return res;
        }

        matrix T() const {
            matrix res(m(), n());
            for(size_t i = 0; i < n(); i++) {
                for(size_t j = 0; j < m(); j++) {
                    res[j][i] = row(i)[j];
                }
            }
            return res;
        }

        matrix operator *(matrix const& b) const {
            assert(m() == b.n());
            matrix res(n(), b.m());
            for(size_t i = 0; i < n(); i++) {
                for(size_t j = 0; j < m(); j++) {
                    res[i].add_scaled(b[j], row(i)[j]);
                }
            }
            return res.normalize();
        }

        vec<base> apply(vec<base> const& x) const {
            return (matrix(x) * *this)[0];
        }

        matrix pow(uint64_t k) const {
            assert(n() == m());
            return bpow(*this, k, eye(n()));
        }

        static matrix random(size_t n, size_t m) {
            matrix res(n, m);
            std::ranges::generate(res, std::bind(vec<base>::random, m));
            return res;
        }
        static matrix random(size_t n) {
            return random(n, n);
        }

        matrix& normalize() {
            for(auto &it: *this) {
                it.normalize();
            }
            return *this;
        }

        enum Mode {normal, reverse};
        template<Mode mode = normal>
        auto gauss(size_t lim) {
            size_t rk = 0;
            std::vector<size_t> free, pivots;
            for(size_t i = 0; i < lim; i++) {
                for(size_t j = rk; j < n() && row(rk).normalize(i) == base(0); j++) {
                    if(row(j).normalize(i) != 0) {
                        row(rk) += row(j);
                    }
                }
                if(rk == n() || row(rk).normalize()[i] == base(0)) {
                    free.push_back(i);
                } else {
                    pivots.push_back(i);
                    for(size_t j = (mode == normal) * rk; j < n(); j++) {
                        if(j != rk) {
                            row(j).reduce_by(row(rk));
                        }
                    }
                    rk += 1;
                }
            }
            normalize();
            return std::array{pivots, free};
        }
        template<Mode mode = normal>
        auto gauss() {
            return gauss<mode>(m());
        }

        size_t rank() const {
            if(n() < m()) {
                return T().rank();
            }
            return size(matrix(*this).gauss()[0]);
        }

        base det() const {
            assert(n() == m());
            matrix b = *this;
            b.gauss();
            base res = 1;
            for(size_t i = 0; i < n(); i++) {
                res *= b[i][i];
            }
            return res;
        }

        std::optional<matrix> inv() const {
            assert(n() == m());
            matrix b = *this | eye(n());
            if(size(b.gauss<reverse>(n())[0]) < n()) {
                return std::nullopt;
            }
            for(size_t i = 0; i < n(); i++) {
                b[i] *= base(1) / b[i][i];
            }
            return b.submatrix(std::slice(0, n(), 1), std::slice(n(), n(), 1));
        }

        // Can also just run gauss on T() | eye(m)
        // but it would be slower :(
        auto kernel() const {
            auto A = *this;
            auto [pivots, free] = A.template gauss<reverse>();
            matrix sols(size(free), m());
            for(size_t j = 0; j < size(pivots); j++) {
                base scale = base(1) / A[j][pivots[j]];
                for(size_t i = 0; i < size(free); i++) {
                    sols[i][pivots[j]] = A[j][free[i]] * scale;
                }
            }
            for(size_t i = 0; i < size(free); i++) {
                sols[i][free[i]] = -1;
            }
            return sols;
        }

        // [solution, basis], transposed
        std::optional<std::array<matrix, 2>> solve(matrix t) const {
            matrix sols = (*this | t).kernel();
            if(sols.n() < t.m() || sols.submatrix(
                std::slice(sols.n() - t.m(), t.m(), 1),
                std::slice(m(), t.m(), 1)
            ) != -eye(t.m())) {
                return std::nullopt;
            } else {
                return std::array{
                    sols.submatrix(std::slice(sols.n() - t.m(), t.m(), 1),
                                   std::slice(0, m(), 1)),
                    sols.submatrix(std::slice(0, sols.n() - t.m(), 1),
                                   std::slice(0, m(), 1))
                };
            }
        }
    };
}

#line 7 "main.cpp"
#include <bits/stdc++.h>

using namespace std;
using namespace cp_algo::algebra;
using namespace cp_algo::linalg;

const int mod = 998244353;
using base = modular<mod>;
using polyn = poly_t<base>;

void solve() {
    size_t n, q;
    cin >> n >> q;
    matrix<base> A(n);
    A.read();
    vector<vec<base>> basis;
    auto x = vec<base>::random(n);
    size_t degree = 0;
    polyn ans = base(-1);
    while(size(basis) <= n) {
        vec<base> y = x | vec<base>::ei(n + 1, size(basis));
        for(auto &it: basis) {
            y.reduce_by(it);
        }
        y.normalize();
        if(vec<base>(y[slice(0, n, 1)]) == vec<base>(n)) {
            vector<base> cur(begin(y) + n + size(basis) - degree,
                             begin(y) + n + size(basis) + 1);
            ans *= polyn(cur);
            degree = 0;
            if(size(basis) < n) {
                x = vec<base>::random(n);
            } else {
                break;
            }
        } else {
            basis.push_back(y);
            x = A.apply(x);
            degree++;
        }
    }
    vector<base> qs(q);
    for (auto& x : qs) {
        cin >> x;
    }
    for (auto x : ans.eval(qs)) {
        cout << x << " ";
    }
    cout << endl;
}

signed main() {
    //freopen("input.txt", "r", stdin);
    ios::sync_with_stdio(0);
    cin.tie(0);
    int t = 1;
    //cin >> t;
    while(t--) {
        solve();
    }
}

Source code, Language: C++20

Details

Tip: Click on the bar to expand more detailed information

Test #1:

score: 100

Accepted

time: 1ms

memory: 3900kb

input:

3 6
2 4 5
6 3 8
1 6 3
10 9 5 8 3 1

output:

407 470 402 495 260 110

result:

ok 6 numbers

Test #2:

score: -100

Wrong Answer

time: 0ms

memory: 3820kb

input:

10 10
200268224 742027139 991782556 311300662 103717984 858920295 99206423 318384181 123957670 464387796
475747059 886243349 918458492 433143039 366536656 679864255 403637778 945858353 934728373 854446149
313645968 823847598 152436083 234526494 267259107 950363928 83485586 904554893 830929790 528110...

output:

476496923 945898896 286047173 228992283 768902264 291707350 232955181 173818621 927660234 621913521

result:

wrong answer 1st numbers differ - expected: '521747430', found: '476496923'