QOJ.ac

QOJ

IDProblemSubmitterResultTimeMemoryLanguageFile sizeSubmit timeJudge time
#314855#8178. Bracket SequestionMei Dui Yao (Qiuyang Zhang, Jiyu Shen)#AC ✓935ms29004kbC++1471.1kb2024-01-26 13:07:022024-01-26 13:07:02

Judging History

This is the latest submission verdict.

  • [2024-01-26 13:07:02]
  • Judged
  • Verdict: AC
  • Time: 935ms
  • Memory: 29004kb
  • [2024-01-26 13:07:02]
  • Submitted

answer

// what is matter? never mind.
#include<bits/stdc++.h>
#define For(i,a,b) for(int i=(a);i<=(b);++i)
#define Rep(i,a,b) for(int i=(a);i>=(b);--i)
using namespace std;
inline int read()
{
    char c=getchar();int x=0;bool f=0;
    for(;!isdigit(c);c=getchar())f^=!(c^45);
    for(;isdigit(c);c=getchar())x=(x<<1)+(x<<3)+(c^48);
    if(f)x=-x;return x;
}

#define NDEBUG

using namespace std;

// intrinstic
#include <immintrin.h>


namespace atcoder {

namespace internal {

#if __cplusplus >= 202002L

using std::bit_ceil;

#else

// @return same with std::bit::bit_ceil
unsigned int bit_ceil(unsigned int n) {
    unsigned int x = 1;
    while (x < (unsigned int)(n)) x *= 2;
    return x;
}

#endif

// @param n `1 <= n`
// @return same with std::bit::countr_zero
int countr_zero(unsigned int n) {
#ifdef _MSC_VER
    unsigned long index;
    _BitScanForward(&index, n);
    return index;
#else
    return __builtin_ctz(n);
#endif
}

// @param n `1 <= n`
// @return same with std::bit::countr_zero
constexpr int countr_zero_constexpr(unsigned int n) {
    int x = 0;
    while (!(n & (1 << x))) x++;
    return x;
}

}  // namespace internal

}  // namespace atcoder

namespace atcoder {

namespace internal {

#ifndef _MSC_VER
template <class T>
using is_signed_int128 =
    typename std::conditional<std::is_same<T, __int128_t>::value ||
                                  std::is_same<T, __int128>::value,
                              std::true_type,
                              std::false_type>::type;

template <class T>
using is_unsigned_int128 =
    typename std::conditional<std::is_same<T, __uint128_t>::value ||
                                  std::is_same<T, unsigned __int128>::value,
                              std::true_type,
                              std::false_type>::type;

template <class T>
using make_unsigned_int128 =
    typename std::conditional<std::is_same<T, __int128_t>::value,
                              __uint128_t,
                              unsigned __int128>;

template <class T>
using is_integral = typename std::conditional<std::is_integral<T>::value ||
                                                  is_signed_int128<T>::value ||
                                                  is_unsigned_int128<T>::value,
                                              std::true_type,
                                              std::false_type>::type;

template <class T>
using is_signed_int = typename std::conditional<(is_integral<T>::value &&
                                                 std::is_signed<T>::value) ||
                                                    is_signed_int128<T>::value,
                                                std::true_type,
                                                std::false_type>::type;

template <class T>
using is_unsigned_int =
    typename std::conditional<(is_integral<T>::value &&
                               std::is_unsigned<T>::value) ||
                                  is_unsigned_int128<T>::value,
                              std::true_type,
                              std::false_type>::type;

template <class T>
using to_unsigned = typename std::conditional<
    is_signed_int128<T>::value,
    make_unsigned_int128<T>,
    typename std::conditional<std::is_signed<T>::value,
                              std::make_unsigned<T>,
                              std::common_type<T>>::type>::type;

#else

template <class T> using is_integral = typename std::is_integral<T>;

template <class T>
using is_signed_int =
    typename std::conditional<is_integral<T>::value && std::is_signed<T>::value,
                              std::true_type,
                              std::false_type>::type;

template <class T>
using is_unsigned_int =
    typename std::conditional<is_integral<T>::value &&
                                  std::is_unsigned<T>::value,
                              std::true_type,
                              std::false_type>::type;

template <class T>
using to_unsigned = typename std::conditional<is_signed_int<T>::value,
                                              std::make_unsigned<T>,
                                              std::common_type<T>>::type;

#endif

template <class T>
using is_signed_int_t = std::enable_if_t<is_signed_int<T>::value>;

template <class T>
using is_unsigned_int_t = std::enable_if_t<is_unsigned_int<T>::value>;

template <class T> using to_unsigned_t = typename to_unsigned<T>::type;

}  // namespace internal

}  // namespace atcoder

namespace atcoder {

namespace internal {

template <class E> struct csr {
    std::vector<int> start;
    std::vector<E> elist;
    explicit csr(int n, const std::vector<std::pair<int, E>>& edges)
        : start(n + 1), elist(edges.size()) {
        for (auto e : edges) {
            start[e.first + 1]++;
        }
        for (int i = 1; i <= n; i++) {
            start[i] += start[i - 1];
        }
        auto counter = start;
        for (auto e : edges) {
            elist[counter[e.first]++] = e.second;
        }
    }
};

}  // namespace internal

}  // namespace atcoder

namespace atcoder {
namespace internal {

// @param m `1 <= m`
// @return x mod m
constexpr long long safe_mod(long long x, long long m) {
    x %= m;
    if (x < 0) x += m;
    return x;
}

// Fast modular multiplication by barrett reduction
// Reference: https://en.wikipedia.org/wiki/Barrett_reduction
// NOTE: reconsider after Ice Lake
struct barrett {
    unsigned int _m;
    unsigned long long im;

    // @param m `1 <= m`
    explicit barrett(unsigned int m) : _m(m), im((unsigned long long)(-1) / m + 1) {}

    // @return m
    unsigned int umod() const { return _m; }

    // @param a `0 <= a < m`
    // @param b `0 <= b < m`
    // @return `a * b % m`
    unsigned int mul(unsigned int a, unsigned int b) const {
        // [1] m = 1
        // a = b = im = 0, so okay

        // [2] m >= 2
        // im = ceil(2^64 / m)
        // -> im * m = 2^64 + r (0 <= r < m)
        // let z = a*b = c*m + d (0 <= c, d < m)
        // a*b * im = (c*m + d) * im = c*(im*m) + d*im = c*2^64 + c*r + d*im
        // c*r + d*im < m * m + m * im < m * m + 2^64 + m <= 2^64 + m * (m + 1) < 2^64 * 2
        // ((ab * im) >> 64) == c or c + 1
        unsigned long long z = a;
        z *= b;
#ifdef _MSC_VER
        unsigned long long x;
        _umul128(z, im, &x);
#else
        unsigned long long x =
            (unsigned long long)(((unsigned __int128)(z)*im) >> 64);
#endif
        unsigned long long y = x * _m;
        return (unsigned int)(z - y + (z < y ? _m : 0));
    }
};

// @param n `0 <= n`
// @param m `1 <= m`
// @return `(x ** n) % m`
constexpr long long pow_mod_constexpr(long long x, long long n, int m) {
    if (m == 1) return 0;
    unsigned int _m = (unsigned int)(m);
    unsigned long long r = 1;
    unsigned long long y = safe_mod(x, m);
    while (n) {
        if (n & 1) r = (r * y) % _m;
        y = (y * y) % _m;
        n >>= 1;
    }
    return r;
}

// Reference:
// M. Forisek and J. Jancina,
// Fast Primality Testing for Integers That Fit into a Machine Word
// @param n `0 <= n`
constexpr bool is_prime_constexpr(int n) {
    if (n <= 1) return false;
    if (n == 2 || n == 7 || n == 61) return true;
    if (n % 2 == 0) return false;
    long long d = n - 1;
    while (d % 2 == 0) d /= 2;
    constexpr long long bases[3] = {2, 7, 61};
    for (long long a : bases) {
        long long t = d;
        long long y = pow_mod_constexpr(a, t, n);
        while (t != n - 1 && y != 1 && y != n - 1) {
            y = y * y % n;
            t <<= 1;
        }
        if (y != n - 1 && t % 2 == 0) {
            return false;
        }
    }
    return true;
}
template <int n> constexpr bool is_prime = is_prime_constexpr(n);

// @param b `1 <= b`
// @return pair(g, x) s.t. g = gcd(a, b), xa = g (mod b), 0 <= x < b/g
constexpr std::pair<long long, long long> inv_gcd(long long a, long long b) {
    a = safe_mod(a, b);
    if (a == 0) return {b, 0};

    // Contracts:
    // [1] s - m0 * a = 0 (mod b)
    // [2] t - m1 * a = 0 (mod b)
    // [3] s * |m1| + t * |m0| <= b
    long long s = b, t = a;
    long long m0 = 0, m1 = 1;

    while (t) {
        long long u = s / t;
        s -= t * u;
        m0 -= m1 * u;  // |m1 * u| <= |m1| * s <= b

        // [3]:
        // (s - t * u) * |m1| + t * |m0 - m1 * u|
        // <= s * |m1| - t * u * |m1| + t * (|m0| + |m1| * u)
        // = s * |m1| + t * |m0| <= b

        auto tmp = s;
        s = t;
        t = tmp;
        tmp = m0;
        m0 = m1;
        m1 = tmp;
    }
    // by [3]: |m0| <= b/g
    // by g != b: |m0| < b/g
    if (m0 < 0) m0 += b / s;
    return {s, m0};
}

// Compile time primitive root
// @param m must be prime
// @return primitive root (and minimum in now)
constexpr int primitive_root_constexpr(int m) {
    if (m == 2) return 1;
    if (m == 167772161) return 3;
    if (m == 469762049) return 3;
    if (m == 754974721) return 11;
    if (m == 998244353) return 3;
    int divs[20] = {};
    divs[0] = 2;
    int cnt = 1;
    int x = (m - 1) / 2;
    while (x % 2 == 0) x /= 2;
    for (int i = 3; (long long)(i)*i <= x; i += 2) {
        if (x % i == 0) {
            divs[cnt++] = i;
            while (x % i == 0) {
                x /= i;
            }
        }
    }
    if (x > 1) {
        divs[cnt++] = x;
    }
    for (int g = 2;; g++) {
        bool ok = true;
        for (int i = 0; i < cnt; i++) {
            if (pow_mod_constexpr(g, (m - 1) / divs[i], m) == 1) {
                ok = false;
                break;
            }
        }
        if (ok) return g;
    }
}
template <int m> constexpr int primitive_root = primitive_root_constexpr(m);

// @param n `n < 2^32`
// @param m `1 <= m < 2^32`
// @return sum_{i=0}^{n-1} floor((ai + b) / m) (mod 2^64)
unsigned long long floor_sum_unsigned(unsigned long long n,
                                      unsigned long long m,
                                      unsigned long long a,
                                      unsigned long long b) {
    unsigned long long ans = 0;
    while (true) {
        if (a >= m) {
            ans += n * (n - 1) / 2 * (a / m);
            a %= m;
        }
        if (b >= m) {
            ans += n * (b / m);
            b %= m;
        }

        unsigned long long y_max = a * n + b;
        if (y_max < m) break;
        // y_max < m * (n + 1)
        // floor(y_max / m) <= n
        n = (unsigned long long)(y_max / m);
        b = (unsigned long long)(y_max % m);
        std::swap(m, a);
    }
    return ans;
}

}  // namespace internal

}  // namespace atcoder

namespace atcoder {

namespace internal {

struct modint_base {};
struct static_modint_base : modint_base {};

template <class T> using is_modint = std::is_base_of<modint_base, T>;
template <class T> using is_modint_t = std::enable_if_t<is_modint<T>::value>;

}  // namespace internal

template <int m, std::enable_if_t<(1 <= m)>* = nullptr>
struct static_modint : internal::static_modint_base {
    using mint = static_modint;

  public:
    static constexpr int mod() { return m; }
    static mint raw(int v) {
        mint x;
        x._v = v;
        return x;
    }

    static_modint() : _v(0) {}
    template <class T, internal::is_signed_int_t<T>* = nullptr>
    static_modint(T v) {
        long long x = (long long)(v % (long long)(umod()));
        if (x < 0) x += umod();
        _v = (unsigned int)(x);
    }
    template <class T, internal::is_unsigned_int_t<T>* = nullptr>
    static_modint(T v) {
        _v = (unsigned int)(v % umod());
    }

    unsigned int val() const { return _v; }

    mint& operator++() {
        _v++;
        if (_v == umod()) _v = 0;
        return *this;
    }
    mint& operator--() {
        if (_v == 0) _v = umod();
        _v--;
        return *this;
    }
    mint operator++(int) {
        mint result = *this;
        ++*this;
        return result;
    }
    mint operator--(int) {
        mint result = *this;
        --*this;
        return result;
    }

    mint& operator+=(const mint& rhs) {
        _v += rhs._v;
        if (_v >= umod()) _v -= umod();
        return *this;
    }
    mint& operator-=(const mint& rhs) {
        _v -= rhs._v;
        if (_v >= umod()) _v += umod();
        return *this;
    }
    mint& operator*=(const mint& rhs) {
        unsigned long long z = _v;
        z *= rhs._v;
        _v = (unsigned int)(z % umod());
        return *this;
    }
    mint& operator/=(const mint& rhs) { return *this = *this * rhs.inv(); }

    mint operator+() const { return *this; }
    mint operator-() const { return mint() - *this; }

    mint pow(long long n) const {
        assert(0 <= n);
        mint x = *this, r = 1;
        while (n) {
            if (n & 1) r *= x;
            x *= x;
            n >>= 1;
        }
        return r;
    }
    mint inv() const {
        if (prime) {
            assert(_v);
            return pow(umod() - 2);
        } else {
            auto eg = internal::inv_gcd(_v, m);
            assert(eg.first == 1);
            return eg.second;
        }
    }

    friend mint operator+(const mint& lhs, const mint& rhs) {
        return mint(lhs) += rhs;
    }
    friend mint operator-(const mint& lhs, const mint& rhs) {
        return mint(lhs) -= rhs;
    }
    friend mint operator*(const mint& lhs, const mint& rhs) {
        return mint(lhs) *= rhs;
    }
    friend mint operator/(const mint& lhs, const mint& rhs) {
        return mint(lhs) /= rhs;
    }
    friend bool operator==(const mint& lhs, const mint& rhs) {
        return lhs._v == rhs._v;
    }
    friend bool operator!=(const mint& lhs, const mint& rhs) {
        return lhs._v != rhs._v;
    }

  private:
    unsigned int _v;
    static constexpr unsigned int umod() { return m; }
    static constexpr bool prime = internal::is_prime<m>;
};

template <int id> struct dynamic_modint : internal::modint_base {
    using mint = dynamic_modint;

  public:
    static int mod() { return (int)(bt.umod()); }
    static void set_mod(int m) {
        assert(1 <= m);
        bt = internal::barrett(m);
    }
    static mint raw(int v) {
        mint x;
        x._v = v;
        return x;
    }

    dynamic_modint() : _v(0) {}
    template <class T, internal::is_signed_int_t<T>* = nullptr>
    dynamic_modint(T v) {
        long long x = (long long)(v % (long long)(mod()));
        if (x < 0) x += mod();
        _v = (unsigned int)(x);
    }
    template <class T, internal::is_unsigned_int_t<T>* = nullptr>
    dynamic_modint(T v) {
        _v = (unsigned int)(v % mod());
    }

    unsigned int val() const { return _v; }

    mint& operator++() {
        _v++;
        if (_v == umod()) _v = 0;
        return *this;
    }
    mint& operator--() {
        if (_v == 0) _v = umod();
        _v--;
        return *this;
    }
    mint operator++(int) {
        mint result = *this;
        ++*this;
        return result;
    }
    mint operator--(int) {
        mint result = *this;
        --*this;
        return result;
    }

    mint& operator+=(const mint& rhs) {
        _v += rhs._v;
        if (_v >= umod()) _v -= umod();
        return *this;
    }
    mint& operator-=(const mint& rhs) {
        _v += mod() - rhs._v;
        if (_v >= umod()) _v -= umod();
        return *this;
    }
    mint& operator*=(const mint& rhs) {
        _v = bt.mul(_v, rhs._v);
        return *this;
    }
    mint& operator/=(const mint& rhs) { return *this = *this * rhs.inv(); }

    mint operator+() const { return *this; }
    mint operator-() const { return mint() - *this; }

    mint pow(long long n) const {
        assert(0 <= n);
        mint x = *this, r = 1;
        while (n) {
            if (n & 1) r *= x;
            x *= x;
            n >>= 1;
        }
        return r;
    }
    mint inv() const {
        auto eg = internal::inv_gcd(_v, mod());
        assert(eg.first == 1);
        return eg.second;
    }

    friend mint operator+(const mint& lhs, const mint& rhs) {
        return mint(lhs) += rhs;
    }
    friend mint operator-(const mint& lhs, const mint& rhs) {
        return mint(lhs) -= rhs;
    }
    friend mint operator*(const mint& lhs, const mint& rhs) {
        return mint(lhs) *= rhs;
    }
    friend mint operator/(const mint& lhs, const mint& rhs) {
        return mint(lhs) /= rhs;
    }
    friend bool operator==(const mint& lhs, const mint& rhs) {
        return lhs._v == rhs._v;
    }
    friend bool operator!=(const mint& lhs, const mint& rhs) {
        return lhs._v != rhs._v;
    }

  private:
    unsigned int _v;
    static internal::barrett bt;
    static unsigned int umod() { return bt.umod(); }
};
template <int id> internal::barrett dynamic_modint<id>::bt(998244353);

using modint998244353 = static_modint<998244353>;
using modint1000000007 = static_modint<1000000007>;
using modint = dynamic_modint<-1>;

namespace internal {

template <class T>
using is_static_modint = std::is_base_of<internal::static_modint_base, T>;

template <class T>
using is_static_modint_t = std::enable_if_t<is_static_modint<T>::value>;

template <class> struct is_dynamic_modint : public std::false_type {};
template <int id>
struct is_dynamic_modint<dynamic_modint<id>> : public std::true_type {};

template <class T>
using is_dynamic_modint_t = std::enable_if_t<is_dynamic_modint<T>::value>;

}  // namespace internal

}  // namespace atcoder

namespace atcoder {

namespace internal {

template <class mint,
          int g = internal::primitive_root<mint::mod()>,
          internal::is_static_modint_t<mint>* = nullptr>
struct fft_info {
    static constexpr int rank2 = countr_zero_constexpr(mint::mod() - 1);
    std::array<mint, rank2 + 1> root;   // root[i]^(2^i) == 1
    std::array<mint, rank2 + 1> iroot;  // root[i] * iroot[i] == 1

    std::array<mint, std::max(0, rank2 - 2 + 1)> rate2;
    std::array<mint, std::max(0, rank2 - 2 + 1)> irate2;

    std::array<mint, std::max(0, rank2 - 3 + 1)> rate3;
    std::array<mint, std::max(0, rank2 - 3 + 1)> irate3;

    fft_info() {
        root[rank2] = mint(g).pow((mint::mod() - 1) >> rank2);
        iroot[rank2] = root[rank2].inv();
        for (int i = rank2 - 1; i >= 0; i--) {
            root[i] = root[i + 1] * root[i + 1];
            iroot[i] = iroot[i + 1] * iroot[i + 1];
        }

        {
            mint prod = 1, iprod = 1;
            for (int i = 0; i <= rank2 - 2; i++) {
                rate2[i] = root[i + 2] * prod;
                irate2[i] = iroot[i + 2] * iprod;
                prod *= iroot[i + 2];
                iprod *= root[i + 2];
            }
        }
        {
            mint prod = 1, iprod = 1;
            for (int i = 0; i <= rank2 - 3; i++) {
                rate3[i] = root[i + 3] * prod;
                irate3[i] = iroot[i + 3] * iprod;
                prod *= iroot[i + 3];
                iprod *= root[i + 3];
            }
        }
    }
};

template <class mint, internal::is_static_modint_t<mint>* = nullptr>
void butterfly(std::vector<mint>& a) {
    int n = int(a.size());
    int h = internal::countr_zero((unsigned int)n);

    static const fft_info<mint> info;

    int len = 0;  // a[i, i+(n>>len), i+2*(n>>len), ..] is transformed
    while (len < h) {
        if (h - len == 1) {
            int p = 1 << (h - len - 1);
            mint rot = 1;
            for (int s = 0; s < (1 << len); s++) {
                int offset = s << (h - len);
                for (int i = 0; i < p; i++) {
                    auto l = a[i + offset];
                    auto r = a[i + offset + p] * rot;
                    a[i + offset] = l + r;
                    a[i + offset + p] = l - r;
                }
                if (s + 1 != (1 << len))
                    rot *= info.rate2[countr_zero(~(unsigned int)(s))];
            }
            len++;
        } else {
            // 4-base
            int p = 1 << (h - len - 2);
            mint rot = 1, imag = info.root[2];
            for (int s = 0; s < (1 << len); s++) {
                mint rot2 = rot * rot;
                mint rot3 = rot2 * rot;
                int offset = s << (h - len);
                for (int i = 0; i < p; i++) {
                    auto mod2 = 1ULL * mint::mod() * mint::mod();
                    auto a0 = 1ULL * a[i + offset].val();
                    auto a1 = 1ULL * a[i + offset + p].val() * rot.val();
                    auto a2 = 1ULL * a[i + offset + 2 * p].val() * rot2.val();
                    auto a3 = 1ULL * a[i + offset + 3 * p].val() * rot3.val();
                    auto a1na3imag =
                        1ULL * mint(a1 + mod2 - a3).val() * imag.val();
                    auto na2 = mod2 - a2;
                    a[i + offset] = a0 + a2 + a1 + a3;
                    a[i + offset + 1 * p] = a0 + a2 + (2 * mod2 - (a1 + a3));
                    a[i + offset + 2 * p] = a0 + na2 + a1na3imag;
                    a[i + offset + 3 * p] = a0 + na2 + (mod2 - a1na3imag);
                }
                if (s + 1 != (1 << len))
                    rot *= info.rate3[countr_zero(~(unsigned int)(s))];
            }
            len += 2;
        }
    }
}

template <class mint, internal::is_static_modint_t<mint>* = nullptr>
void butterfly_inv(std::vector<mint>& a) {
    int n = int(a.size());
    int h = internal::countr_zero((unsigned int)n);

    static const fft_info<mint> info;

    int len = h;  // a[i, i+(n>>len), i+2*(n>>len), ..] is transformed
    while (len) {
        if (len == 1) {
            int p = 1 << (h - len);
            mint irot = 1;
            for (int s = 0; s < (1 << (len - 1)); s++) {
                int offset = s << (h - len + 1);
                for (int i = 0; i < p; i++) {
                    auto l = a[i + offset];
                    auto r = a[i + offset + p];
                    a[i + offset] = l + r;
                    a[i + offset + p] =
                        (unsigned long long)(mint::mod() + l.val() - r.val()) *
                        irot.val();
                    ;
                }
                if (s + 1 != (1 << (len - 1)))
                    irot *= info.irate2[countr_zero(~(unsigned int)(s))];
            }
            len--;
        } else {
            // 4-base
            int p = 1 << (h - len);
            mint irot = 1, iimag = info.iroot[2];
            for (int s = 0; s < (1 << (len - 2)); s++) {
                mint irot2 = irot * irot;
                mint irot3 = irot2 * irot;
                int offset = s << (h - len + 2);
                for (int i = 0; i < p; i++) {
                    auto a0 = 1ULL * a[i + offset + 0 * p].val();
                    auto a1 = 1ULL * a[i + offset + 1 * p].val();
                    auto a2 = 1ULL * a[i + offset + 2 * p].val();
                    auto a3 = 1ULL * a[i + offset + 3 * p].val();

                    auto a2na3iimag =
                        1ULL *
                        mint((mint::mod() + a2 - a3) * iimag.val()).val();

                    a[i + offset] = a0 + a1 + a2 + a3;
                    a[i + offset + 1 * p] =
                        (a0 + (mint::mod() - a1) + a2na3iimag) * irot.val();
                    a[i + offset + 2 * p] =
                        (a0 + a1 + (mint::mod() - a2) + (mint::mod() - a3)) *
                        irot2.val();
                    a[i + offset + 3 * p] =
                        (a0 + (mint::mod() - a1) + (mint::mod() - a2na3iimag)) *
                        irot3.val();
                }
                if (s + 1 != (1 << (len - 2)))
                    irot *= info.irate3[countr_zero(~(unsigned int)(s))];
            }
            len -= 2;
        }
    }
}

template <class mint, internal::is_static_modint_t<mint>* = nullptr>
std::vector<mint> convolution_naive(const std::vector<mint>& a,
                                    const std::vector<mint>& b) {
    int n = int(a.size()), m = int(b.size());
    std::vector<mint> ans(n + m - 1);
    if (n < m) {
        for (int j = 0; j < m; j++) {
            for (int i = 0; i < n; i++) {
                ans[i + j] += a[i] * b[j];
            }
        }
    } else {
        for (int i = 0; i < n; i++) {
            for (int j = 0; j < m; j++) {
                ans[i + j] += a[i] * b[j];
            }
        }
    }
    return ans;
}

template <class mint, internal::is_static_modint_t<mint>* = nullptr>
std::vector<mint> convolution_fft(std::vector<mint> a, std::vector<mint> b) {
    int n = int(a.size()), m = int(b.size());
    int z = (int)internal::bit_ceil((unsigned int)(n + m - 1));
    a.resize(z);
    internal::butterfly(a);
    b.resize(z);
    internal::butterfly(b);
    for (int i = 0; i < z; i++) {
        a[i] *= b[i];
    }
    internal::butterfly_inv(a);
    a.resize(n + m - 1);
    mint iz = mint(z).inv();
    for (int i = 0; i < n + m - 1; i++) a[i] *= iz;
    return a;
}

}  // namespace internal

template <class mint, internal::is_static_modint_t<mint>* = nullptr>
std::vector<mint> convolution(std::vector<mint>&& a, std::vector<mint>&& b) {
    int n = int(a.size()), m = int(b.size());
    if (!n || !m) return {};

    int z = (int)internal::bit_ceil((unsigned int)(n + m - 1));
    assert((mint::mod() - 1) % z == 0);

    if (std::min(n, m) <= 60) return convolution_naive(a, b);
    return internal::convolution_fft(a, b);
}
template <class mint, internal::is_static_modint_t<mint>* = nullptr>
std::vector<mint> convolution(const std::vector<mint>& a,
                              const std::vector<mint>& b) {
    int n = int(a.size()), m = int(b.size());
    if (!n || !m) return {};

    int z = (int)internal::bit_ceil((unsigned int)(n + m - 1));
    assert((mint::mod() - 1) % z == 0);

    if (std::min(n, m) <= 60) return convolution_naive(a, b);
    return internal::convolution_fft(a, b);
}

template <unsigned int mod = 998244353,
          class T,
          std::enable_if_t<internal::is_integral<T>::value>* = nullptr>
std::vector<T> convolution(const std::vector<T>& a, const std::vector<T>& b) {
    int n = int(a.size()), m = int(b.size());
    if (!n || !m) return {};

    using mint = static_modint<mod>;

    int z = (int)internal::bit_ceil((unsigned int)(n + m - 1));
    assert((mint::mod() - 1) % z == 0);

    std::vector<mint> a2(n), b2(m);
    for (int i = 0; i < n; i++) {
        a2[i] = mint(a[i]);
    }
    for (int i = 0; i < m; i++) {
        b2[i] = mint(b[i]);
    }
    auto c2 = convolution(std::move(a2), std::move(b2));
    std::vector<T> c(n + m - 1);
    for (int i = 0; i < n + m - 1; i++) {
        c[i] = c2[i].val();
    }
    return c;
}

std::vector<long long> convolution_ll(const std::vector<long long>& a,
                                      const std::vector<long long>& b) {
    int n = int(a.size()), m = int(b.size());
    if (!n || !m) return {};

    static constexpr unsigned long long MOD1 = 754974721;  // 2^24
    static constexpr unsigned long long MOD2 = 167772161;  // 2^25
    static constexpr unsigned long long MOD3 = 469762049;  // 2^26
    static constexpr unsigned long long M2M3 = MOD2 * MOD3;
    static constexpr unsigned long long M1M3 = MOD1 * MOD3;
    static constexpr unsigned long long M1M2 = MOD1 * MOD2;
    static constexpr unsigned long long M1M2M3 = MOD1 * MOD2 * MOD3;

    static constexpr unsigned long long i1 =
        internal::inv_gcd(MOD2 * MOD3, MOD1).second;
    static constexpr unsigned long long i2 =
        internal::inv_gcd(MOD1 * MOD3, MOD2).second;
    static constexpr unsigned long long i3 =
        internal::inv_gcd(MOD1 * MOD2, MOD3).second;
        
    static constexpr int MAX_AB_BIT = 24;
    static_assert(MOD1 % (1ull << MAX_AB_BIT) == 1, "MOD1 isn't enough to support an array length of 2^24.");
    static_assert(MOD2 % (1ull << MAX_AB_BIT) == 1, "MOD2 isn't enough to support an array length of 2^24.");
    static_assert(MOD3 % (1ull << MAX_AB_BIT) == 1, "MOD3 isn't enough to support an array length of 2^24.");
    assert(n + m - 1 <= (1 << MAX_AB_BIT));

    auto c1 = convolution<MOD1>(a, b);
    auto c2 = convolution<MOD2>(a, b);
    auto c3 = convolution<MOD3>(a, b);

    std::vector<long long> c(n + m - 1);
    for (int i = 0; i < n + m - 1; i++) {
        unsigned long long x = 0;
        x += (c1[i] * i1) % MOD1 * M2M3;
        x += (c2[i] * i2) % MOD2 * M1M3;
        x += (c3[i] * i3) % MOD3 * M1M2;
        // B = 2^63, -B <= x, r(real value) < B
        // (x, x - M, x - 2M, or x - 3M) = r (mod 2B)
        // r = c1[i] (mod MOD1)
        // focus on MOD1
        // r = x, x - M', x - 2M', x - 3M' (M' = M % 2^64) (mod 2B)
        // r = x,
        //     x - M' + (0 or 2B),
        //     x - 2M' + (0, 2B or 4B),
        //     x - 3M' + (0, 2B, 4B or 6B) (without mod!)
        // (r - x) = 0, (0)
        //           - M' + (0 or 2B), (1)
        //           -2M' + (0 or 2B or 4B), (2)
        //           -3M' + (0 or 2B or 4B or 6B) (3) (mod MOD1)
        // we checked that
        //   ((1) mod MOD1) mod 5 = 2
        //   ((2) mod MOD1) mod 5 = 3
        //   ((3) mod MOD1) mod 5 = 4
        long long diff =
            c1[i] - internal::safe_mod((long long)(x), (long long)(MOD1));
        if (diff < 0) diff += MOD1;
        static constexpr unsigned long long offset[5] = {
            0, 0, M1M2M3, 2 * M1M2M3, 3 * M1M2M3};
        x -= offset[diff % 5];
        c[i] = x;
    }

    return c;
}

}  // namespace atcoder


using namespace atcoder;
using ll = long long;
#include <algorithm>
#include <array>
#include <bitset>
#include <cassert>
#include <cctype>
#include <cfenv>
#include <cfloat>
#include <chrono>
#include <cinttypes>
#include <climits>
#include <cmath>
#include <complex>
#include <cstdarg>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <deque>
#include <fstream>
#include <functional>
#include <initializer_list>
#include <iomanip>
#include <ios>
#include <iostream>
#include <istream>
#include <iterator>
#include <limits>
#include <list>
#include <map>
#include <memory>
#include <new>
#include <numeric>
#include <ostream>
#include <queue>
#include <random>
#include <set>
#include <sstream>
#include <stack>
#include <streambuf>
#include <string>
#include <tuple>
#include <type_traits>
#include <typeinfo>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
ll MOD;
// utility
namespace Nyaan {
using ll = long long;
using i64 = long long;
using u64 = unsigned long long;
using i128 = __int128_t;
using u128 = __uint128_t;

template <typename T>
using V = vector<T>;
template <typename T>
using VV = vector<vector<T>>;
using vi = vector<int>;
using vl = vector<long long>;
using vd = V<double>;
using vs = V<string>;
using vvi = vector<vector<int>>;
using vvl = vector<vector<long long>>;

template <typename T, typename U>
struct P : pair<T, U> {
  template <typename... Args>
  P(Args... args) : pair<T, U>(args...) {}

  using pair<T, U>::first;
  using pair<T, U>::second;

  T &x() { return first; }
  const T &x() const { return first; }
  U &y() { return second; }
  const U &y() const { return second; }

  P &operator+=(const P &r) {
    first += r.first;
    second += r.second;
    return *this;
  }
  P &operator-=(const P &r) {
    first -= r.first;
    second -= r.second;
    return *this;
  }
  P &operator*=(const P &r) {
    first *= r.first;
    second *= r.second;
    return *this;
  }
  P operator+(const P &r) const { return P(*this) += r; }
  P operator-(const P &r) const { return P(*this) -= r; }
  P operator*(const P &r) const { return P(*this) *= r; }
};

using pl = P<ll, ll>;
using pi = P<int, int>;
using vp = V<pl>;

constexpr int inf = 1001001001;
constexpr long long infLL = 4004004004004004004LL;

template <typename T>
int sz(const T &t) {
  return t.size();
}

template <typename T, typename U>
inline bool amin(T &x, U y) {
  return (y < x) ? (x = y, true) : false;
}
template <typename T, typename U>
inline bool amax(T &x, U y) {
  return (x < y) ? (x = y, true) : false;
}

template <typename T>
inline T Max(const vector<T> &v) {
  return *max_element(begin(v), end(v));
}
template <typename T>
inline T Min(const vector<T> &v) {
  return *min_element(begin(v), end(v));
}
template <typename T>
inline long long Sum(const vector<T> &v) {
  return accumulate(begin(v), end(v), 0LL);
}

template <typename T>
int lb(const vector<T> &v, const T &a) {
  return lower_bound(begin(v), end(v), a) - begin(v);
}
template <typename T>
int ub(const vector<T> &v, const T &a) {
  return upper_bound(begin(v), end(v), a) - begin(v);
}

constexpr long long TEN(int n) {
  long long ret = 1, x = 10;
  for (; n; x *= x, n >>= 1) ret *= (n & 1 ? x : 1);
  return ret;
}

template <typename T, typename U>
pair<T, U> mkp(const T &t, const U &u) {
  return make_pair(t, u);
}

template <typename T>
vector<T> mkrui(const vector<T> &v, bool rev = false) {
  vector<T> ret(v.size() + 1);
  if (rev) {
    for (int i = int(v.size()) - 1; i >= 0; i--) ret[i] = v[i] + ret[i + 1];
  } else {
    for (int i = 0; i < int(v.size()); i++) ret[i + 1] = ret[i] + v[i];
  }
  return ret;
};

template <typename T>
vector<T> mkuni(const vector<T> &v) {
  vector<T> ret(v);
  sort(ret.begin(), ret.end());
  ret.erase(unique(ret.begin(), ret.end()), ret.end());
  return ret;
}

template <typename F>
vector<int> mkord(int N, F f) {
  vector<int> ord(N);
  iota(begin(ord), end(ord), 0);
  sort(begin(ord), end(ord), f);
  return ord;
}

template <typename T>
vector<int> mkinv(vector<T> &v) {
  int max_val = *max_element(begin(v), end(v));
  vector<int> inv(max_val + 1, -1);
  for (int i = 0; i < (int)v.size(); i++) inv[v[i]] = i;
  return inv;
}

}  // namespace Nyaan

// bit operation
namespace Nyaan {
__attribute__((target("popcnt"))) inline int popcnt(const u64 &a) {
  return _mm_popcnt_u64(a);
}
inline int lsb(const u64 &a) { return a ? __builtin_ctzll(a) : 64; }
inline int ctz(const u64 &a) { return a ? __builtin_ctzll(a) : 64; }
inline int msb(const u64 &a) { return a ? 63 - __builtin_clzll(a) : -1; }
template <typename T>
inline int gbit(const T &a, int i) {
  return (a >> i) & 1;
}
template <typename T>
inline void sbit(T &a, int i, bool b) {
  if (gbit(a, i) != b) a ^= T(1) << i;
}
constexpr long long PW(int n) { return 1LL << n; }
constexpr long long MSK(int n) { return (1LL << n) - 1; }
}  // namespace Nyaan
template<class T>bool chmax(T &a,const T b) { if (a<b) { a=b; return 1; } return 0;}
// inout
namespace Nyaan {

template <typename T, typename U>
ostream &operator<<(ostream &os, const pair<T, U> &p) {
  os << p.first << " " << p.second;
  return os;
}
template <typename T, typename U>
istream &operator>>(istream &is, pair<T, U> &p) {
  is >> p.first >> p.second;
  return is;
}

template <typename T>
ostream &operator<<(ostream &os, const vector<T> &v) {
  int s = (int)v.size();
  for (int i = 0; i < s; i++) os << (i ? " " : "") << v[i];
  return os;
}
template <typename T>
istream &operator>>(istream &is, vector<T> &v) {
  for (auto &x : v) is >> x;
  return is;
}

void in() {}
template <typename T, class... U>
void in(T &t, U &... u) {
  cin >> t;
  in(u...);
}

void out() { cout << "\n"; }
template <typename T, class... U, char sep = ' '>
void out(const T &t, const U &... u) {
  cout << t.val();
  if (sizeof...(u)) cout << sep;
  out(u...);
}

void outr() {}
template <typename T, class... U, char sep = ' '>
void outr(const T &t, const U &... u) {
  cout << t;
  outr(u...);
}

struct IoSetupNya {
  IoSetupNya() {
    cin.tie(nullptr);
    ios::sync_with_stdio(false);
    cout << fixed << setprecision(15);
    cerr << fixed << setprecision(7);
  }
} iosetupnya;

}  // namespace Nyaan

// debug
namespace DebugImpl {

template <typename U, typename = void>
struct is_specialize : false_type {};
template <typename U>
struct is_specialize<
    U, typename conditional<false, typename U::iterator, void>::type>
    : true_type {};
template <typename U>
struct is_specialize<
    U, typename conditional<false, decltype(U::first), void>::type>
    : true_type {};
template <typename U>
struct is_specialize<U, enable_if_t<is_integral<U>::value, void>> : true_type {
};

void dump(const char& t) { cerr << t; }

void dump(const string& t) { cerr << t; }

void dump(const bool& t) { cerr << (t ? "true" : "false"); }

template <typename U,
          enable_if_t<!is_specialize<U>::value, nullptr_t> = nullptr>
void dump(const U& t) {
  cerr << t;
}

template <typename T>
void dump(const T& t, enable_if_t<is_integral<T>::value>* = nullptr) {
  string res;
  if (t == Nyaan::inf) res = "inf";
  if constexpr (is_signed<T>::value) {
    if (t == -Nyaan::inf) res = "-inf";
  }
  if constexpr (sizeof(T) == 8) {
    if (t == Nyaan::infLL) res = "inf";
    if constexpr (is_signed<T>::value) {
      if (t == -Nyaan::infLL) res = "-inf";
    }
  }
  if (res.empty()) res = to_string(t);
  cerr << res;
}

template <typename T, typename U>
void dump(const pair<T, U>&);
template <typename T>
void dump(const pair<T*, int>&);

template <typename T>
void dump(const T& t,
          enable_if_t<!is_void<typename T::iterator>::value>* = nullptr) {
  cerr << "[ ";
  for (auto it = t.begin(); it != t.end();) {
    dump(*it);
    cerr << (++it == t.end() ? "" : ", ");
  }
  cerr << " ]";
}

template <typename T, typename U>
void dump(const pair<T, U>& t) {
  cerr << "( ";
  dump(t.first);
  cerr << ", ";
  dump(t.second);
  cerr << " )";
}

template <typename T>
void dump(const pair<T*, int>& t) {
  cerr << "[ ";
  for (int i = 0; i < t.second; i++) {
    dump(t.first[i]);
    cerr << (i == t.second - 1 ? "" : ", ");
  }
  cerr << " ]";
}

void trace() { cerr << endl; }
template <typename Head, typename... Tail>
void trace(Head&& head, Tail&&... tail) {
  cerr << " ";
  dump(head);
  if (sizeof...(tail) != 0) cerr << ",";
  trace(forward<Tail>(tail)...);
}

}  // namespace DebugImpl

#ifdef NyaanDebug
#define trc(...)                            \
  do {                                      \
    cerr << "## " << #__VA_ARGS__ << " = "; \
    DebugImpl::trace(__VA_ARGS__);          \
  } while (0)
#else
#define trc(...) (void(0))
#endif

// macro
#define each(x, v) for (auto&& x : v)
#define each2(x, y, v) for (auto&& [x, y] : v)
#define all(v) (v).begin(), (v).end()
#define rep(i, N) for (long long i = 0; i < (long long)(N); i++)
#define repr(i, N) for (long long i = (long long)(N)-1; i >= 0; i--)
#define rep1(i, N) for (long long i = 1; i <= (long long)(N); i++)
#define repr1(i, N) for (long long i = (N); (long long)(i) > 0; i--)
#define reg(i, a, b) for (long long i = (a); i < (b); i++)
#define regr(i, a, b) for (long long i = (b)-1; i >= (a); i--)
#define fi first
#define se second
#define ini(...)   \
  int __VA_ARGS__; \
  in(__VA_ARGS__)
#define inl(...)         \
  long long __VA_ARGS__; \
  in(__VA_ARGS__)
#define ins(...)      \
  string __VA_ARGS__; \
  in(__VA_ARGS__)
#define in2(s, t)                           \
  for (int i = 0; i < (int)s.size(); i++) { \
    in(s[i], t[i]);                         \
  }
#define in3(s, t, u)                        \
  for (int i = 0; i < (int)s.size(); i++) { \
    in(s[i], t[i], u[i]);                   \
  }
#define in4(s, t, u, v)                     \
  for (int i = 0; i < (int)s.size(); i++) { \
    in(s[i], t[i], u[i], v[i]);             \
  }
#define die(...)             \
  do {                       \
    Nyaan::out(__VA_ARGS__); \
    return;                  \
  } while (0)

namespace Nyaan {
void solve();
}
//int main() { Nyaan::solve(); }

//





template <class T>
struct Matrix {
  vector<vector<T> > A;

  Matrix() = default;
  Matrix(int n, int m) : A(n, vector<T>(m, T())) {}
  Matrix(int n) : A(n, vector<T>(n, T())){};

  int H() const { return A.size(); }

  int W() const { return A[0].size(); }

  int size() const { return A.size(); }

  inline const vector<T> &operator[](int k) const { return A[k]; }

  inline vector<T> &operator[](int k) { return A[k]; }

  static Matrix I(int n) {
    Matrix mat(n);
    for (int i = 0; i < n; i++) mat[i][i] = 1;
    return (mat);
  }

  Matrix &operator+=(const Matrix &B) {
    int n = H(), m = W();
    assert(n == B.H() && m == B.W());
    for (int i = 0; i < n; i++)
      for (int j = 0; j < m; j++) (*this)[i][j] += B[i][j];
    return (*this);
  }

  Matrix &operator-=(const Matrix &B) {
    int n = H(), m = W();
    assert(n == B.H() && m == B.W());
    for (int i = 0; i < n; i++)
      for (int j = 0; j < m; j++) (*this)[i][j] -= B[i][j];
    return (*this);
  }

  Matrix &operator*=(const Matrix &B) {
    int n = H(), m = B.W(), p = W();
    assert(p == B.H());
    vector<vector<T> > C(n, vector<T>(m, T{}));
    for (int i = 0; i < n; i++)
      for (int k = 0; k < p; k++)
        for (int j = 0; j < m; j++) C[i][j] += (*this)[i][k] * B[k][j];
    A.swap(C);
    return (*this);
  }

  Matrix &operator^=(long long k) {
    Matrix B = Matrix::I(H());
    while (k > 0) {
      if (k & 1) B *= *this;
      *this *= *this;
      k >>= 1LL;
    }
    A.swap(B.A);
    return (*this);
  }

  Matrix operator+(const Matrix &B) const { return (Matrix(*this) += B); }

  Matrix operator-(const Matrix &B) const { return (Matrix(*this) -= B); }

  Matrix operator*(const Matrix &B) const { return (Matrix(*this) *= B); }

  Matrix operator^(const long long k) const { return (Matrix(*this) ^= k); }

  bool operator==(const Matrix &B) const {
    assert(H() == B.H() && W() == B.W());
    for (int i = 0; i < H(); i++)
      for (int j = 0; j < W(); j++)
        if (A[i][j] != B[i][j]) return false;
    return true;
  }

  bool operator!=(const Matrix &B) const {
    assert(H() == B.H() && W() == B.W());
    for (int i = 0; i < H(); i++)
      for (int j = 0; j < W(); j++)
        if (A[i][j] != B[i][j]) return true;
    return false;
  }

  friend ostream &operator<<(ostream &os, const Matrix &p) {
    int n = p.H(), m = p.W();
    for (int i = 0; i < n; i++) {
      os << (i ? "   " : "") << "[";
      for (int j = 0; j < m; j++) {
        os << p[i][j] << (j + 1 == m ? "]\n" : ",");
      }
    }
    return (os);
  }

  T determinant() const {
    Matrix B(*this);
    assert(H() == W());
    T ret = 1;
    for (int i = 0; i < H(); i++) {
      int idx = -1;
      for (int j = i; j < W(); j++) {
        if (B[j][i] != 0) {
          idx = j;
          break;
        }
      }
      if (idx == -1) return 0;
      if (i != idx) {
        ret *= T(-1);
        swap(B[i], B[idx]);
      }
      ret *= B[i][i];
      T inv = T(1) / B[i][i];
      for (int j = 0; j < W(); j++) {
        B[i][j] *= inv;
      }
      for (int j = i + 1; j < H(); j++) {
        T a = B[j][i];
        if (a == 0) continue;
        for (int k = i; k < W(); k++) {
          B[j][k] -= B[i][k] * a;
        }
      }
    }
    return ret;
  }
};

/**
 * @brief 行列ライブラリ
 */

template <typename mint>
std::pair<int, mint> GaussElimination(vector<vector<mint>> &a,
                                      bool LE = false) {
  int H = a.size(), W = a[0].size();
  int rank = 0, je = LE ? W - 1 : W;
  mint det = 1;
  for (int j = 0; j < je; j++) {
    int idx = -1;
    for (int i = rank; i < H; i++) {
      if (a[i][j] != mint(0)) {
        idx = i;
        break;
      }
    }
    if (idx == -1) {
      det = 0;
      continue;
    }
    if (rank != idx) {
      det = -det;
      swap(a[rank], a[idx]);
    }
    det *= a[rank][j];
    if (LE && a[rank][j] != mint(1)) {
      mint coeff = a[rank][j].inverse();
      for (int k = j; k < W; k++) a[rank][k] *= coeff;
    }
    int is = LE ? 0 : rank + 1;
    for (int i = is; i < H; i++) {
      if (i == rank) continue;
      if (a[i][j] != mint(0)) {
        mint coeff = a[i][j] / a[rank][j];
        for (int k = j; k < W; k++) a[i][k] -= a[rank][k] * coeff;
      }
    }
    rank++;
  }
  return make_pair(rank, det);
}


template <typename mint>
vector<vector<mint>> LinearEquation(vector<vector<mint>> a, vector<mint> b) {
  int H = a.size(), W = a[0].size();
  for (int i = 0; i < H; i++) a[i].push_back(b[i]);
  auto p = GaussElimination(a, true);
  int rank = p.first;

  for (int i = rank; i < H; ++i) {
    if (a[i][W] != 0) return vector<vector<mint>>{};
  }

  vector<vector<mint>> res(1, vector<mint>(W));
  vector<int> pivot(W, -1);
  for (int i = 0, j = 0; i < rank; ++i) {
    while (a[i][j] == 0) ++j;
    res[0][j] = a[i][W], pivot[j] = i;
  }
  for (int j = 0; j < W; ++j) {
    if (pivot[j] == -1) {
      vector<mint> x(W);
      x[j] = 1;
      for (int k = 0; k < j; ++k) {
        if (pivot[k] != -1) x[k] = -a[pivot[k]][j];
      }
      res.push_back(x);
    }
  }
  return res;
}


// コンストラクタの MAX に 「C(n, r) や fac(n) でクエリを投げる最大の n 」
// を入れると倍速くらいになる
// mod を超えて前計算して 0 割りを踏むバグは対策済み
template <typename T>
struct Binomial {
  vector<T> f, g, h;
  Binomial(int MAX = 0) {
    assert(T::get_mod() != 0 && "Binomial<mint>()");
    f.resize(1, T{1});
    g.resize(1, T{1});
    h.resize(1, T{1});
    if (MAX > 0) extend(MAX + 1);
  }

  void extend(int m = -1) {
    int n = f.size();
    if (m == -1) m = n * 2;
    m = min<int>(m, MOD);
    if (n >= m) return;
    f.resize(m);
    g.resize(m);
    h.resize(m);
    for (int i = n; i < m; i++) f[i] = f[i - 1] * T(i);
    g[m - 1] = f[m - 1].inv();
    h[m - 1] = g[m - 1] * f[m - 2];
    for (int i = m - 2; i >= n; i--) {
      g[i] = g[i + 1] * T(i + 1);
      h[i] = g[i] * f[i - 1];
    }
  }

  T fac(int i) {
    if (i < 0) return T(0);
    while (i >= (int)f.size()) extend();
    return f[i];
  }

  T finv(int i) {
    if (i < 0) return T(0);
    while (i >= (int)g.size()) extend();
    return g[i];
  }

  T inv(int i) {
    if (i < 0) return -inv(-i);
    while (i >= (int)h.size()) extend();
    return h[i];
  }

  T C(int n, int r) {
    if (n < 0 || n < r || r < 0) return T(0);
    return fac(n) * finv(n - r) * finv(r);
  }

  inline T operator()(int n, int r) { return C(n, r); }

  template <typename I>
  T multinomial(const vector<I>& r) {
    static_assert(is_integral<I>::value == true);
    int n = 0;
    for (auto& x : r) {
      if (x < 0) return T(0);
      n += x;
    }
    T res = fac(n);
    for (auto& x : r) res *= finv(x);
    return res;
  }

  template <typename I>
  T operator()(const vector<I>& r) {
    return multinomial(r);
  }

  T C_naive(int n, int r) {
    if (n < 0 || n < r || r < 0) return T(0);
    T ret = T(1);
    r = min(r, n - r);
    for (int i = 1; i <= r; ++i) ret *= inv(i) * (n--);
    return ret;
  }

  T P(int n, int r) {
    if (n < 0 || n < r || r < 0) return T(0);
    return fac(n) * finv(n - r);
  }

  // [x^r] 1 / (1-x)^n
  T H(int n, int r) {
    if (n < 0 || r < 0) return T(0);
    return r == 0 ? 1 : C(n + r - 1, r);
  }
};
template<class T>
vector<T> NTT(vector<T> a,vector<T> b){
  ll nmod=T::mod();
  int n=a.size();
  int m=b.size();
  vector<int> x1(n);
  vector<int> y1(m);
  for(int i=0;i<n;i++){
    ll tmp1,tmp2,tmp3;
    tmp1=a[i].val();
    x1[i]=tmp1;
  }
  for(int i=0;i<m;i++){
    ll tmp1,tmp2,tmp3;
    tmp1=b[i].val();
    y1[i]=tmp1;
  }
  auto z1=convolution<167772161>(x1,y1);
  auto z2=convolution<469762049>(x1,y1);
  auto z3=convolution<1224736769>(x1,y1);
  vector<T> res(n+m-1);
  ll m1=167772161;
  ll m2=469762049;
  ll m3=1224736769;
  ll m1m2=104391568;
  ll m1m2m3=721017874;
  ll mm12=m1*m2%nmod;
  for(int i=0;i<n+m-1;i++){
    int v1=(z2[i]-z1[i])*m1m2%m2;
    if(v1<0) v1+=m2;
    int v2=(z3[i]-(z1[i]+v1*m1)%m3)*m1m2m3%m3;
    if(v2<0) v2+=m3;
    res[i]=(z1[i]+v1*m1+v2*mm12);
  }
  return res;
}
template<class T>
struct FormalPowerSeries:vector<T>{
  using vector<T>::vector;
  using F=FormalPowerSeries;
  F &operator=(const vector<T> &g){
    int n=g.size();
    int m=(*this).size();
    (*this).resize(n);
    for(int i=0;i<n;i++) (*this)[i]=g[i];
    return (*this);
  }
  F &operator=(const F &g){
    int n=g.size();
    int m=(*this).size();
    (*this).resize(n);
    for(int i=0;i<n;i++) (*this)[i]=g[i];
    return (*this);
  }
  F &operator-(){
    for(int i=0;i<(*this).size();i++) (*this)[i]*=-1;
    return (*this);
  }
  F &operator+=(const F &g){
    int n=(*this).size();
    int m=g.size();
    if(n<m) (*this).resize(m);
    for(int i=0;i<m;i++) (*this)[i]+=g[i];
    return (*this);
  }
  F &operator+=(const T &r){
    if((*this).size()==0) (*this).resize(1);
    (*this)[0]+=r;
    return (*this);
  }
  F &operator-=(const F &g){
    int n=(*this).size();
    int m=g.size();
    if(n<m) (*this).resize(m);
    for(int i=0;i<m;i++) (*this)[i]-=g[i];
    return (*this);
  }
  F &operator-=(const T &r){
    if((*this).size()==0) (*this).resize(1);
    (*this)[0]-=r;
    return (*this);
  }
  F &operator*=(const F &g){
    (*this)=NTT((*this),g);
    return (*this);
  }
  F &operator*=(const T &r){
    for(int i=0;i<(*this).size();i++) (*this)[i]*=r;
    return (*this);
  }
  F &operator/=(const F &g){
    int n=(*this).size();
    (*this)=NTT((*this),g.inv());
    (*this).resize(n);
    return (*this);
  }
  F &operator/=(T r){
    r=r.inv();
    for(int i=0;i<(*this).size();i++) (*this)[i]*=r;
    return (*this);
  }
  F &operator<<=(const int d) {
    int n=(*this).size();
    (*this).insert((*this).begin(),d,0);
    return *this;
  }
  F &operator>>=(const int d) {
    int n=(*this).size();
    (*this).erase((*this).begin(),(*this).begin()+min(n, d));
    return *this;
  }
  F operator*(const T &g) const { return F(*this)*=g;}
  F operator-(const T &g) const { return F(*this)-=g;}
  F operator+(const T &g) const { return F(*this)+=g;}
  F operator/(const T &g) const { return F(*this)/=g;}
  F operator*(const F &g) const { return F(*this)*=g;}
  F operator-(const F &g) const { return F(*this)-=g;}
  F operator+(const F &g) const { return F(*this)+=g;}
  F operator/(const F &g) const { return F(*this)/=g;}
  F operator%(const F &g) const { return F(*this)%=g;}
  F operator<<(const int d) const { return F(*this)<<=d;}
  F operator>>(const int d) const { return F(*this)>>=d;}  
  F pre(int sz) const {
    return F(begin(*this), begin(*this) + min((int)this->size(), sz));
  }
  F inv(int deg=-1) const {
    int n=(*this).size();
    if(deg==-1) deg=n;
    assert(n>0&&(*this)[0]!=T(0));
    F g(1);
    g[0]=(*this)[0].inv();
    while(g.size()<deg){
      int m=g.size();
      F f(begin(*this),begin(*this)+min(n,2*m));
      F r(g);
      f.resize(2*m);
      r.resize(2*m);
      internal::butterfly(f);
      internal::butterfly(r);
      for(int i=0;i<2*m;i++) f[i]*=r[i];
      internal::butterfly_inv(f);
      f.erase(f.begin(),f.begin()+m);
      f.resize(2*m);
      internal::butterfly(f);
      for(int i=0;i<2*m;i++) f[i]*=r[i];
      internal::butterfly_inv(f);
      T in=T(2*m).inv();
      in*=-in;
      for(int i=0;i<m;i++) f[i]*=in;
      g.insert(g.end(),f.begin(),f.begin()+m);
    }
    return g.pre(deg);
  }
  T eval(const T &a){
    T x=1;
    T ret=0;
    for(int i=0;i<(*this).size();i++){
      ret+=(*this)[i]*x;
      x*=a;
    }
    return ret;
  }
  void onemul(const int d,const T c){
    int n=(*this).size();
    for(int i=n-d-1;i>=0;i--){
      (*this)[i+d]+=(*this)[i]*c;
    }
  }
  void onediv(const int d,const T c){
    int n=(*this).size();
    for(int i=0;i<n-d;i++){
      (*this)[i+d]-=(*this)[i]*c;
    }
  }
  F diff() const {
    int n=(*this).size();
    F ret(n);
    for(int i=1;i<n;i++) ret[i-1]=(*this)[i]*i;
    ret[n-1]=0;
    return ret;
  }
  F integral() const {
    int n=(*this).size(),mod =T::mod();
    vector<T> inv(n);
    inv[1]=1;
    for(int i=2;i<n;i++) inv[i]=T(mod)-inv[mod%i]*(mod/i);
    F ret(n);
    for(int i=n-2;i>=0;i--) ret[i+1]=(*this)[i]*inv[i+1];
    ret[0]=0;
    return ret;
  }
  F log(int deg=-1) const {
    int n=(*this).size();
    if(deg==-1) deg=n;
    assert((*this)[0]==T(1));
    return ((*this).diff()*(*this).inv(deg)).pre(deg).integral();
  }
  F exp(int deg=-1) const {
    int n=(*this).size();
    if(deg==-1) deg=n;
    assert(n==0||(*this)[0]==0);
    F Inv;
    Inv.reserve(deg);
    Inv.push_back(T(0));
    Inv.push_back(T(1));
    auto inplace_integral = [&](F& f) -> void {
    const int n = (int)f.size();
      int mod=T::mod();
      while(Inv.size()<=n){
        int i = Inv.size();
        Inv.push_back((-Inv[mod%i])*(mod/i));
      }
      f.insert(begin(f),T(0));
      for(int i=1;i<=n;i++) f[i]*=Inv[i];
    };
    auto inplace_diff = [](F &f) -> void {
      if(f.empty()) return;
      f.erase(begin(f));
      T coeff=1,one=1;
      for(int i=0;i<f.size();i++){
        f[i]*=coeff;
        coeff++;
      }
    };
    F b{1,1<(int)(*this).size()?(*this)[1]:0},c{1},z1,z2{1,1};
    for(int m=2;m<=deg;m<<=1){
      auto y=b;
      y.resize(2*m);
      internal::butterfly(y);
      z1=z2;
      F z(m);
      for(int i=0;i<m;i++) z[i]=y[i]*z1[i];
      internal::butterfly_inv(z);
      T si=T(m).inv();
      for(int i=0;i<m;i++) z[i]*=si;
      fill(begin(z),begin(z)+m/2,T(0));
      internal::butterfly(z);
      for(int i=0;i<m;i++) z[i]*=-z1[i];
      internal::butterfly_inv(z);
      for(int i=0;i<m;i++) z[i]*=si;
      c.insert(end(c),begin(z)+m/2,end(z));
      z2=c;
      z2.resize(2*m);
      internal::butterfly(z2);
      F x(begin((*this)),begin((*this))+min<int>((*this).size(),m));
      x.resize(m);
      inplace_diff(x);
      x.push_back(T(0));
      internal::butterfly(x);
      for(int i=0;i<m;i++) x[i]*=y[i];
      internal::butterfly_inv(x);
      for(int i=0;i<m;i++) x[i]*=si;
      x-=b.diff();
      x.resize(2*m);
      for(int i=0;i<m-1;i++) x[m+i]=x[i],x[i]=T(0);
      internal::butterfly(x);
      for(int i=0;i<2*m;i++) x[i]*=z2[i];
      internal::butterfly_inv(x);
      T si2=T(m<<1).inv();
      for(int i=0;i<2*m;i++) x[i]*=si2;
      x.pop_back();
      inplace_integral(x);
      for(int i=m;i<min<int>((*this).size(),2*m);i++) x[i]+=(*this)[i];
      fill(begin(x),begin(x)+m,T(0));
      internal::butterfly(x);
      for(int i=0;i<2*m;i++) x[i]*=y[i];
      internal::butterfly_inv(x);
      for(int i=0;i<2*m;i++) x[i]*=si2;
      b.insert(end(b),begin(x)+m,end(x));
    }
    return b.pre(deg);
  }
  F pow(ll m){
    int n=(*this).size();
    int x=0;
    while(x<(*this).size()&&(*this)[x]==T(0)){
      x++;
    }
    if(m==0){
      F ret(n);
      ret[0]=1;
      return ret;
    }
    if(x*m>=n){
      F ret(n);
      return ret;
    }
    F f(n-x);
    T y=(*this)[x];
    for(int i=x;i<n;i++) f[i-x]=(*this)[i]/y;
    f=f.log();
    for(int i=0;i<f.size();i++) f[i]*=m;
    f=f.exp();
    y=y.pow(m);
    for(int i=0;i<f.size();i++) f[i]*=y;
    F ret(n);
    for(int i=x*m;i<n;i++) ret[i]=f[i-x*m];
    return ret;
  }
  F shift(T c){
    int n=(*this).size();
    int mod=T::mod();
    vector<T> inv(n+1);
    inv[1]=1;
    for(int i=2;i<=n;i++) inv[i]=mod-inv[mod%i]*(mod/i);
    T x=1;
    for(int i=0;i<n;i++){
      (*this)[i]*=x;
      x*=(i+1);
    }
    F g(n);
    T y=1;
    T now=1;
    for(int i=0;i<n;i++){
      g[n-i-1]=now*y;
      now*=c;
      y*=inv[i+1];
    }
    auto tmp=convolution(g,(*this));
    T z=1;
    for(int i=0;i<n;i++){
      (*this)[i]=tmp[n+i-1]*z;
      z*=inv[i+1];
    }
    return (*this);
  }
  pair<F,F> division(F g){
    F f=(*this);
    int n=f.size();
    int m=g.size();
    if(n<m){
      F p(0);
      return {p,f};
    }
    F p(n-m+1),q(n-m+1);
    for(int i=0;i<n-m+1;i++) p[i]=f[n-i-1];
    for(int i=0;i<n-m+1&&i<m;i++) q[i]=g[m-i-1];
    p/=q;
    for(int i=0;i<(n-m+1)/2;i++) swap(p[i],p[(n-m+1)-i-1]);
    g.resize(n);
    g*=p;
    for(int i=0;i<n;i++) f[i]-=g[i];
    int v=n-m+1,u=0;
    for(int i=0;i<n;i++) if(f[i].val()) chmax(u,i+1);
    p.resize(v);
    f.resize(u);
    return {p,f};
  }
  vector<T> multieva(vector<T> p){
    int m=p.size();
    int n=(*this).size();
    int M=1;
    int l=0;
    while(M<m){
      M*=2;
      l++;
    }
    p.resize(M);
    swap(m,M);
    vector<vector<F>> g(l+1);
    g[0].resize(m);
    for(int i=0;i<m;i++){
      g[0][i].resize(2);
      g[0][i][0]=-p[i];
      g[0][i][1]=1;
    }
    for(int i=0;i<l;i++){
      g[i+1].resize(m>>(i+1));
      for(int j=0;j<(m>>(i+1));j++) g[i+1][j]=g[i][2*j]*g[i][2*j+1];
    }
    g[l][0]=(*this).division(g[l][0]).se;
    for(int i=l;i>=1;i--){
      for(int j=0;j<(m>>(i-1));j++){
        g[i-1][j]=g[i][j/2].division(g[i-1][j]).se;
      }
    }
    for(int i=0;i<M;i++) if(g[0][i].size()==0) g[0][i].resize(1);
    vector<T> ret(M);
    for(int i=0;i<M;i++) ret[i]=g[0][i][0];
    return ret;
  }
};
template<class T>
void GaussJordan(vector<vector<T>> &A,bool is_extended = false){
  ll m=A.size(),n=A[0].size();
  ll rank=0;
  for(int i=0;i<n;i++){
    if(is_extended&&i==n-1) break;
    ll p=-1;
    for(int j=rank;j<m;j++){
      if(A[j][i]!=T(0)){
        p=j;
        break;
      }
    }
    if(p==-1) continue;
    swap(A[p],A[rank]);
    auto k=A[rank][i];
    for(int i2=0;i2<n;i2++){
      A[rank][i2]/=k;
    }
    for(int j=0;j<m;j++){
      if(j!=rank&&A[j][i]!=T(0)){
        auto fac=A[j][i];
        for(int i2=0;i2<n;i2++){
          A[j][i2]-=A[rank][i2]*fac;
        }
      }
    }
    rank++;
  }
}
 
template<class T>
void linear_equation(vector<vector<T>> a, vector<T> b, vector<T> &res) {
  ll m=a.size(),n=a[0].size();
  vector<vector<T>> M(m,vector<T>(n+1));
  for(int i=0;i<m;i++){
    for(int j=0;j<n;j++){
      M[i][j]=a[i][j];
    }
    M[i][n]=b[i];
  }
  GaussJordan(M,true);
  res.assign(n,0);
  for(int i=0;i<n;i++) res[i]=M[i][n];
}
template<class F>
pair<F,F> Characteristic_equation(const F &a) {
  using T=typename F::value_type;
  ll n=a.size();
  ll p=n/2;
  ll u=p+(p+1);
  vector<vector<T>> f(u,vector<T>(u));
  f[0][0]=1;
  for(int i=1;i<=p;i++){
    f[i][i-1]=-1;
  }
  for(int i=p;i<u;i++){
    ll t=0;
    for(int j=1+i-p;j<u;j++){
      f[j][i]=a[t];
      t++;
    }
  }
  vector<T> b(u);
  b[0]=1;
  vector<T> res(u);
  linear_equation(f,b,res);
  F X(p),Y(p+1);
  for(int i=0;i<p;i++) X[i]=res[i];
  for(int j=p;j<res.size();j++) Y[j-p]=res[j];
  return {X,Y};
}
template <class T>
T getK(FormalPowerSeries<T> p, FormalPowerSeries<T> q,ll k){
  if(p.size()==0) return 0;
  if(k==0) return p[0]/q[0];
  if(p.size()>=q.size()){
    p=p.division(q).se;
  }
  if(q[0].val()!=1){
    for(int i=0;i<p.size();i++) p[i]/=q[0];
    for(int i=q.size()-1;i>=0;i--) q[i]/=q[0];
  }
  if(k<0) return T(0);
  ll d=q.size();
  p.resize(d-1);
  while(k){
    auto qn=q;
    for(int i=1;i<d;i+=2) qn[i]*=-1;
    p*=qn;
    q*=qn;
    for(int i=0;i<d-1;i++){
      p[i]=p[(i<<1)|(k&1)];
    }
    for(int i=0;i<d;i++){
      q[i]=q[(i<<1)];
    }
    p.resize(d-1);
    q.resize(d);
    k/=2;
  }
  return p[0];
}
using mint = modint;
using fps=FormalPowerSeries<mint>;
template <typename mint>
FormalPowerSeries<mint> SamplePointShift(FormalPowerSeries<mint>& ys, mint m) {
  static Binomial<mint> C;
  int d = ys.size() - 1;
  FormalPowerSeries<mint> f(d + 1), g(d * 2 + 1);
  for (int i = 0; i <= d; i++) {
    f[i] = ys[i] * C.finv(i) * C.finv(d - i);
    if ((d - i) & 1) f[i] = -f[i];
  }
  for (int i = 0; i <= 2 * d; i++) {
    assert(m - d + i != mint(0));
    g[i] = (m - d + i).inv();
  }
  auto h = f * g;
  mint coeff = 1;
  for (int i = 0; i <= d; i++) coeff *= (m - d + i);
  for (int i = 0; i <= d; i++) {
    h[i + d] *= coeff;
    coeff *= (m + i + 1) * g[i];
  }
  return FormalPowerSeries<mint>{begin(h) + d, begin(h) + 2 * d + 1};
}

// return m(k-1) * m(k-2) * ... * m(1) * m(0)
template <typename mint>
Matrix<mint> polynomial_matrix_prod(Matrix<FormalPowerSeries<mint>> &m,
                                    long long k) {
  using Mat = Matrix<mint>;
  using fps = FormalPowerSeries<mint>;

  auto shift = [](vector<Mat> &G, mint x) -> vector<Mat> {
    int d = G.size(), n = G[0].size();
    vector<Mat> H(d, Mat(n));
    for (int i = 0; i < n; i++) {
      for (int j = 0; j < n; j++) {
        fps g(d);
        for (int l = 0; l < d; l++) g[l] = G[l][i][j];
        fps h = SamplePointShift(g, x);
        for (int l = 0; l < d; l++) H[l][i][j] = h[l];
      }
    }
    return H;
  };

  int n = m.size();
  int deg = 1;
  for (auto &_ : m.A) {
    for (auto &x : _) deg = max<int>(deg, (int)x.size() - 1);
  }
  while (deg & (deg - 1)) deg++;

  vector<Mat> G(deg + 1);
  long long v = 1;
  while (deg * v * v < k) v *= 2;
  mint iv = mint(v).inv();

  for (int i = 0; i < (int)G.size(); i++) {
    mint x = mint(v) * i;
    Mat mt(n);
    for (int j = 0; j < n; j++)
      for (int l = 0; l < n; l++) mt[j][l] = m[j][l].eval(x);
    G[i] = mt;
  }

  for (long long w = 1; w != v; w <<= 1) {
    mint W = w;
    auto G1 = shift(G, W * iv);
    auto G2 = shift(G, (W * deg * v + v) * iv);
    auto G3 = shift(G, (W * deg * v + v + W) * iv);
    for (int i = 0; i <= w * deg; i++)
      G[i] = G1[i] * G[i], G2[i] = G3[i] * G2[i];
    copy(begin(G2), end(G2) - 1, back_inserter(G));
  }

  Mat res = Mat::I(n);
  long long i = 0;
  while (i + v <= k) res = G[i / v] * res, i += v;
  while (i < k) {
    Mat mt(n);
    for (int j = 0; j < n; j++)
      for (int l = 0; l < n; l++) mt[j][l] = m[j][l].eval(i);
    res = mt * res;
    i++;
  }
  return res;
}

/**
 * @brief 多項式行列のprefix product
 */

// return polynomial coefficient s.t. sum_{j=k...0} f_j(i) a_{i+j} = 0
// (In more details, read verification code.)
template <typename mint>
vector<FormalPowerSeries<mint>> find_p_recursive(vector<mint>& a, int d) {
  using fps = FormalPowerSeries<mint>;
  int n = a.size();
  int k = (n + 2) / (d + 2) - 1;
  if (k <= 0) return {};
  int m = (k + 1) * (d + 1);
  vector<vector<mint>> M(m - 1, vector<mint>(m));
  for (int i = 0; i < m - 1; i++) {
    for (int j = 0; j <= k; j++) {
      mint base = 1;
      for (int l = 0; l <= d; l++) {
        M[i][(d + 1) * j + l] = base * a[i + j];
        base *= i + j;
      }
    }
  }
  auto gauss = LinearEquation<mint>(M, vector<mint>(m - 1, 0));
  if (gauss.size() <= 1) return {};
  auto c = gauss[1];
  while (all_of(end(c) - d - 1, end(c), [](mint x) { return x == mint(0); })) {
    c.erase(end(c) - d - 1, end(c));
  }
  k = c.size() / (d + 1) - 1;
  vector<fps> res;
  for (int i = 0, j = 0; i < (int)c.size(); i += d + 1, j++) {
    fps f{1}, base{j, 1};
    fps sm;
    for (int l = 0; l <= d; l++) sm += f * c[i + l], f *= base;
    res.push_back(sm);
  }
  reverse(begin(res), end(res));
  return res;
}

template <typename mint>
mint kth_term_of_p_recursive(vector<mint>& a, long long k, int d) {
  if (k < (int)a.size()) return a[k];
  using fps = FormalPowerSeries<mint>;
  int deg = 2;
  Matrix<FormalPowerSeries<mint>> m(deg), denom(1);
  Matrix<mint> a0(deg);
  m[0][0]=fps({159,124,17});
  m[0][1]=fps({-108,-234,-36});
  m[1][0]=fps({30,16,2});
  m[1][1]=fps({0,0,0});
  denom[0][0]=fps({30,16,2});
  a0[0][0]=2;
  a0[0][1]=0;
  a0[1][0]=1;
  a0[1][1]=0;
  mint res = (polynomial_matrix_prod(m, k - deg + 1) * a0)[0][0];
  res /= polynomial_matrix_prod(denom, k - deg + 1)[0][0];
  return res;
}

template <typename mint>
mint kth_term_of_p_recursive(vector<mint>& a, long long k) {
  if (k < (int)a.size()) return a[k];

  int i = a.size() - 1;
  vector<mint> b{begin(a), end(a) - 1};

  for (int d = 0; d < 10; d++) {
    if (kth_term_of_p_recursive(b, i, d) == a.back()) {
      return kth_term_of_p_recursive(a, k, d);
    }
  }

  exit(1);
}

/**
 * @brief P-recursiveの高速計算
 * @docs docs/fps/find-p-recursive.md
 */




#include <immintrin.h>

__attribute__((target("sse4.2"))) inline __m128i my128_mullo_epu32(
    const __m128i &a, const __m128i &b) {
  return _mm_mullo_epi32(a, b);
}

__attribute__((target("sse4.2"))) inline __m128i my128_mulhi_epu32(
    const __m128i &a, const __m128i &b) {
  __m128i a13 = _mm_shuffle_epi32(a, 0xF5);
  __m128i b13 = _mm_shuffle_epi32(b, 0xF5);
  __m128i prod02 = _mm_mul_epu32(a, b);
  __m128i prod13 = _mm_mul_epu32(a13, b13);
  __m128i prod = _mm_unpackhi_epi64(_mm_unpacklo_epi32(prod02, prod13),
                                    _mm_unpackhi_epi32(prod02, prod13));
  return prod;
}

__attribute__((target("sse4.2"))) inline __m128i montgomery_mul_128(
    const __m128i &a, const __m128i &b, const __m128i &r, const __m128i &m1) {
  return _mm_sub_epi32(
      _mm_add_epi32(my128_mulhi_epu32(a, b), m1),
      my128_mulhi_epu32(my128_mullo_epu32(my128_mullo_epu32(a, b), r), m1));
}

__attribute__((target("sse4.2"))) inline __m128i montgomery_add_128(
    const __m128i &a, const __m128i &b, const __m128i &m2, const __m128i &m0) {
  __m128i ret = _mm_sub_epi32(_mm_add_epi32(a, b), m2);
  return _mm_add_epi32(_mm_and_si128(_mm_cmpgt_epi32(m0, ret), m2), ret);
}

__attribute__((target("sse4.2"))) inline __m128i montgomery_sub_128(
    const __m128i &a, const __m128i &b, const __m128i &m2, const __m128i &m0) {
  __m128i ret = _mm_sub_epi32(a, b);
  return _mm_add_epi32(_mm_and_si128(_mm_cmpgt_epi32(m0, ret), m2), ret);
}

__attribute__((target("avx2"))) inline __m256i my256_mullo_epu32(
    const __m256i &a, const __m256i &b) {
  return _mm256_mullo_epi32(a, b);
}

__attribute__((target("avx2"))) inline __m256i my256_mulhi_epu32(
    const __m256i &a, const __m256i &b) {
  __m256i a13 = _mm256_shuffle_epi32(a, 0xF5);
  __m256i b13 = _mm256_shuffle_epi32(b, 0xF5);
  __m256i prod02 = _mm256_mul_epu32(a, b);
  __m256i prod13 = _mm256_mul_epu32(a13, b13);
  __m256i prod = _mm256_unpackhi_epi64(_mm256_unpacklo_epi32(prod02, prod13),
                                       _mm256_unpackhi_epi32(prod02, prod13));
  return prod;
}

__attribute__((target("avx2"))) inline __m256i montgomery_mul_256(
    const __m256i &a, const __m256i &b, const __m256i &r, const __m256i &m1) {
  return _mm256_sub_epi32(
      _mm256_add_epi32(my256_mulhi_epu32(a, b), m1),
      my256_mulhi_epu32(my256_mullo_epu32(my256_mullo_epu32(a, b), r), m1));
}

__attribute__((target("avx2"))) inline __m256i montgomery_add_256(
    const __m256i &a, const __m256i &b, const __m256i &m2, const __m256i &m0) {
  __m256i ret = _mm256_sub_epi32(_mm256_add_epi32(a, b), m2);
  return _mm256_add_epi32(_mm256_and_si256(_mm256_cmpgt_epi32(m0, ret), m2),
                          ret);
}

__attribute__((target("avx2"))) inline __m256i montgomery_sub_256(
    const __m256i &a, const __m256i &b, const __m256i &m2, const __m256i &m0) {
  __m256i ret = _mm256_sub_epi32(a, b);
  return _mm256_add_epi32(_mm256_and_si256(_mm256_cmpgt_epi32(m0, ret), m2),
                          ret);
}
namespace ntt_inner {
using u64 = uint64_t;
constexpr uint32_t get_pr(uint32_t mod) {
  if (mod == 2) return 1;
  u64 ds[32] = {};
  int idx = 0;
  u64 m = mod - 1;
  for (u64 i = 2; i * i <= m; ++i) {
    if (m % i == 0) {
      ds[idx++] = i;
      while (m % i == 0) m /= i;
    }
  }
  if (m != 1) ds[idx++] = m;

  uint32_t pr = 2;
  while (1) {
    int flg = 1;
    for (int i = 0; i < idx; ++i) {
      u64 a = pr, b = (mod - 1) / ds[i], r = 1;
      while (b) {
        if (b & 1) r = r * a % mod;
        a = a * a % mod;
        b >>= 1;
      }
      if (r == 1) {
        flg = 0;
        break;
      }
    }
    if (flg == 1) break;
    ++pr;
  }
  return pr;
}

constexpr int SZ_FFT_BUF = 1 << 23;
uint32_t _buf1[SZ_FFT_BUF] __attribute__((aligned(64)));
uint32_t _buf2[SZ_FFT_BUF] __attribute__((aligned(64)));
}  // namespace ntt_inner





template <uint32_t mod>
struct LazyMontgomeryModInt {
  using mint = LazyMontgomeryModInt;
  using i32 = int32_t;
  using u32 = uint32_t;
  using u64 = uint64_t;

  static constexpr u32 get_r() {
    u32 ret = mod;
    for (i32 i = 0; i < 4; ++i) ret *= 2 - mod * ret;
    return ret;
  }

  static constexpr u32 r = get_r();
  static constexpr u32 n2 = -u64(mod) % mod;
  static_assert(r * mod == 1, "invalid, r * mod != 1");
  static_assert(mod < (1 << 30), "invalid, mod >= 2 ^ 30");
  static_assert((mod & 1) == 1, "invalid, mod % 2 == 0");

  u32 a;

  constexpr LazyMontgomeryModInt() : a(0) {}
  constexpr LazyMontgomeryModInt(const int64_t &b)
      : a(reduce(u64(b % mod + mod) * n2)){};

  static constexpr u32 reduce(const u64 &b) {
    return (b + u64(u32(b) * u32(-r)) * mod) >> 32;
  }

  constexpr mint &operator+=(const mint &b) {
    if (i32(a += b.a - 2 * mod) < 0) a += 2 * mod;
    return *this;
  }

  constexpr mint &operator-=(const mint &b) {
    if (i32(a -= b.a) < 0) a += 2 * mod;
    return *this;
  }

  constexpr mint &operator*=(const mint &b) {
    a = reduce(u64(a) * b.a);
    return *this;
  }

  constexpr mint &operator/=(const mint &b) {
    *this *= b.inverse();
    return *this;
  }

  constexpr mint operator+(const mint &b) const { return mint(*this) += b; }
  constexpr mint operator-(const mint &b) const { return mint(*this) -= b; }
  constexpr mint operator*(const mint &b) const { return mint(*this) *= b; }
  constexpr mint operator/(const mint &b) const { return mint(*this) /= b; }
  constexpr bool operator==(const mint &b) const {
    return (a >= mod ? a - mod : a) == (b.a >= mod ? b.a - mod : b.a);
  }
  constexpr bool operator!=(const mint &b) const {
    return (a >= mod ? a - mod : a) != (b.a >= mod ? b.a - mod : b.a);
  }
  constexpr mint operator-() const { return mint() - mint(*this); }

  constexpr mint pow(u64 n) const {
    mint ret(1), mul(*this);
    while (n > 0) {
      if (n & 1) ret *= mul;
      mul *= mul;
      n >>= 1;
    }
    return ret;
  }
  
  constexpr mint inverse() const { return pow(mod - 2); }

  friend ostream &operator<<(ostream &os, const mint &b) {
    return os << b.get();
  }

  friend istream &operator>>(istream &is, mint &b) {
    int64_t t;
    is >> t;
    b = LazyMontgomeryModInt<mod>(t);
    return (is);
  }
  
  constexpr u32 get() const {
    u32 ret = reduce(a);
    return ret >= mod ? ret - mod : ret;
  }

  static constexpr u32 get_mod() { return mod; }
};


using namespace Nyaan;
using vm = vector<mint>;
using vvm = vector<vm>;
using fps = FormalPowerSeries<mint>;


long long f[200005];
int qpow(int a,int b,int mod){
	int res=1;
	for(;b;b>>=1,a=1ll*a*a%mod)if(b&1)res=1ll*res*a%mod;
	return res;
}

signed main()
{
	long long N;cin>>N;
	cin>>MOD;
	mint::set_mod(MOD);	
	vector<mint>a(100);
	a[0]=1,a[1]=4,a[2]=28,a[3]=224;
	For(i,4,(int)a.size()-1){
		a[i]=a[i-1] * (17 * i * i + 56 * i - 21) - a[i-2] * 36 * (i+4) * (2*i-3);
		a[i]/=(i+1) * (i+3);
	}
	modint res=kth_term_of_p_recursive(a, N, 2);
	if(N>=100)res=res*qpow(2,N,MOD);
	cout<<res.val();
	return 0;
}

Details

Tip: Click on the bar to expand more detailed information

Test #1:

score: 100
Accepted
time: 0ms
memory: 3608kb

input:

1 998244353

output:

4

result:

ok "4"

Test #2:

score: 0
Accepted
time: 0ms
memory: 3920kb

input:

2 900000011

output:

28

result:

ok "28"

Test #3:

score: 0
Accepted
time: 22ms
memory: 4548kb

input:

999937 999999937

output:

170733195

result:

ok "170733195"

Test #4:

score: 0
Accepted
time: 411ms
memory: 15716kb

input:

167167924 924924167

output:

596516682

result:

ok "596516682"

Test #5:

score: 0
Accepted
time: 867ms
memory: 28480kb

input:

831034609 960842557

output:

713077575

result:

ok "713077575"

Test #6:

score: 0
Accepted
time: 865ms
memory: 29004kb

input:

863561819 960340721

output:

36551280

result:

ok "36551280"

Test #7:

score: 0
Accepted
time: 871ms
memory: 28480kb

input:

822678662 904636463

output:

110525574

result:

ok "110525574"

Test #8:

score: 0
Accepted
time: 865ms
memory: 28548kb

input:

834446518 949829633

output:

481797759

result:

ok "481797759"

Test #9:

score: 0
Accepted
time: 874ms
memory: 28428kb

input:

866653150 924518537

output:

418736071

result:

ok "418736071"

Test #10:

score: 0
Accepted
time: 888ms
memory: 28448kb

input:

900000000 945380759

output:

777900344

result:

ok "777900344"

Test #11:

score: 0
Accepted
time: 876ms
memory: 28400kb

input:

899999999 965001769

output:

758027795

result:

ok "758027795"

Test #12:

score: 0
Accepted
time: 935ms
memory: 28440kb

input:

899999998 964310621

output:

213037885

result:

ok "213037885"

Test #13:

score: 0
Accepted
time: 882ms
memory: 28908kb

input:

899999997 911051677

output:

140568971

result:

ok "140568971"

Test #14:

score: 0
Accepted
time: 890ms
memory: 28404kb

input:

899999996 910915007

output:

779356343

result:

ok "779356343"

Test #15:

score: 0
Accepted
time: 872ms
memory: 28380kb

input:

899999995 904472021

output:

240020425

result:

ok "240020425"

Test #16:

score: 0
Accepted
time: 893ms
memory: 28432kb

input:

899999994 983911997

output:

789822976

result:

ok "789822976"

Test #17:

score: 0
Accepted
time: 897ms
memory: 28928kb

input:

899999993 906669713

output:

389298197

result:

ok "389298197"

Test #18:

score: 0
Accepted
time: 882ms
memory: 28476kb

input:

899999992 991936241

output:

208720220

result:

ok "208720220"

Test #19:

score: 0
Accepted
time: 901ms
memory: 28560kb

input:

899999991 916035773

output:

634945387

result:

ok "634945387"

Test #20:

score: 0
Accepted
time: 878ms
memory: 28396kb

input:

564883390 910858603

output:

686396817

result:

ok "686396817"

Test #21:

score: 0
Accepted
time: 887ms
memory: 28916kb

input:

768069548 918722461

output:

617379002

result:

ok "617379002"

Test #22:

score: 0
Accepted
time: 885ms
memory: 28412kb

input:

899723318 982259209

output:

591025104

result:

ok "591025104"

Test #23:

score: 0
Accepted
time: 876ms
memory: 28412kb

input:

605073563 907482073

output:

561081415

result:

ok "561081415"

Test #24:

score: 0
Accepted
time: 425ms
memory: 15764kb

input:

366317911 974921639

output:

826203883

result:

ok "826203883"

Test #25:

score: 0
Accepted
time: 43ms
memory: 4908kb

input:

4705443 912188153

output:

59076968

result:

ok "59076968"

Test #26:

score: 0
Accepted
time: 46ms
memory: 5108kb

input:

4677951 964852117

output:

354810995

result:

ok "354810995"

Test #27:

score: 0
Accepted
time: 46ms
memory: 4908kb

input:

4987908 992726881

output:

358021425

result:

ok "358021425"

Test #28:

score: 0
Accepted
time: 39ms
memory: 4884kb

input:

4564516 904497749

output:

677047391

result:

ok "677047391"

Test #29:

score: 0
Accepted
time: 47ms
memory: 4964kb

input:

4805950 958611557

output:

530391365

result:

ok "530391365"

Test #30:

score: 0
Accepted
time: 46ms
memory: 5104kb

input:

5000000 972995789

output:

285829649

result:

ok "285829649"

Test #31:

score: 0
Accepted
time: 42ms
memory: 5100kb

input:

4999999 904999313

output:

547438555

result:

ok "547438555"

Test #32:

score: 0
Accepted
time: 47ms
memory: 4964kb

input:

4999998 927293837

output:

48186944

result:

ok "48186944"

Test #33:

score: 0
Accepted
time: 46ms
memory: 4876kb

input:

4999997 913168601

output:

481067448

result:

ok "481067448"

Test #34:

score: 0
Accepted
time: 46ms
memory: 4980kb

input:

4999996 994117843

output:

597780176

result:

ok "597780176"

Test #35:

score: 0
Accepted
time: 46ms
memory: 4908kb

input:

4999995 968266037

output:

675120838

result:

ok "675120838"

Test #36:

score: 0
Accepted
time: 47ms
memory: 4972kb

input:

4999994 904210121

output:

648061474

result:

ok "648061474"

Test #37:

score: 0
Accepted
time: 42ms
memory: 4892kb

input:

4999993 907728929

output:

849380814

result:

ok "849380814"

Test #38:

score: 0
Accepted
time: 42ms
memory: 4932kb

input:

4999992 947455987

output:

206320322

result:

ok "206320322"

Test #39:

score: 0
Accepted
time: 47ms
memory: 4920kb

input:

4999991 974041903

output:

463365147

result:

ok "463365147"

Test #40:

score: 0
Accepted
time: 1ms
memory: 3588kb

input:

1 943308833

output:

4

result:

ok "4"

Test #41:

score: 0
Accepted
time: 1ms
memory: 3844kb

input:

2 945671491

output:

28

result:

ok "28"

Test #42:

score: 0
Accepted
time: 0ms
memory: 3880kb

input:

3 936569281

output:

224

result:

ok "224"

Test #43:

score: 0
Accepted
time: 0ms
memory: 3852kb

input:

4 924568301

output:

1888

result:

ok "1888"

Test #44:

score: 0
Accepted
time: 0ms
memory: 3652kb

input:

5 953983273

output:

16320

result:

ok "16320"

Test #45:

score: 0
Accepted
time: 0ms
memory: 3924kb

input:

6 990997369

output:

143040

result:

ok "143040"

Test #46:

score: 0
Accepted
time: 0ms
memory: 3688kb

input:

7 987104879

output:

1264128

result:

ok "1264128"

Test #47:

score: 0
Accepted
time: 0ms
memory: 3712kb

input:

8 931262323

output:

11230720

result:

ok "11230720"

Test #48:

score: 0
Accepted
time: 0ms
memory: 3608kb

input:

9 968657761

output:

100124672

result:

ok "100124672"

Test #49:

score: 0
Accepted
time: 0ms
memory: 3668kb

input:

10 917158091

output:

894785536

result:

ok "894785536"

Test #50:

score: 0
Accepted
time: 42ms
memory: 5132kb

input:

4617442 970173539

output:

787952622

result:

ok "787952622"

Test #51:

score: 0
Accepted
time: 46ms
memory: 5140kb

input:

2231131 922458703

output:

294167323

result:

ok "294167323"

Test #52:

score: 0
Accepted
time: 46ms
memory: 4968kb

input:

3158509 942524287

output:

379615863

result:

ok "379615863"

Test #53:

score: 0
Accepted
time: 46ms
memory: 5088kb

input:

4392605 900206119

output:

8640947

result:

ok "8640947"

Test #54:

score: 0
Accepted
time: 46ms
memory: 4964kb

input:

2354228 983140729

output:

655547043

result:

ok "655547043"

Extra Test:

score: 0
Extra Test Passed