#pragma GCC optimize("Ofast")
#include <bits/stdc++.h>
#define ll long long
#include "pilgrimage.h"
using namespace std;
int mod, inv, Phi = 1;
inline int add(int x, int y) {x += y; return x >= mod ? x - mod : x;}
inline int del(int x, int y) {x -= y; return x < 0 ? x + mod : x;}
inline void Add(int &x, int y) {x = add(x, y);}
inline void Del(int &x, int y) {x = del(x, y);}
inline int qpow(int x, ll y) {
if (y >= Phi) y %= Phi, y += Phi;
int res = 1;
while (y) {
if (y & 1ll) res = 1ll * res * x % mod;
x = 1ll * x * x % mod;
y >>= 1;
}
return res;
}
namespace my_ex_Lucas {
struct node {
int p, c, pw, m, im, phi;
vector<int> pre, qpw, iv;
};
int p, cnt;
node a[20];
void ex_gcd(int A, int B, int &x, int &y) {
if (!B) return x = 1, y = 0, void();
int xx, yy;
ex_gcd(B, A % B, xx, yy);
x = yy; y = xx - A / B * yy;
}
inline int get_inv(int v, int mod) {
int x, y;
ex_gcd(v, mod, x, y);
x = (x % mod + mod) % mod;
assert(1ll * v * x % mod == 1);
return x;
}
void init(int P) {
int x = p = P; cnt = 0;
for (int i = 2; i * i <= x; ++i) {
if (x % i) continue;
int num = 0, now = 1;
while (x % i == 0) num++, now *= i, x /= i;
cnt++;
a[cnt].p = i; a[cnt].c = num; a[cnt].pw = now; a[cnt].m = p / now; a[cnt].im = get_inv(a[cnt].m % a[cnt].pw, a[cnt].pw);
a[cnt].phi = now - now / i;
a[cnt].pre.resize(now);
a[cnt].pre[0] = 1;
for (int j = 1; j < now; ++j) a[cnt].pre[j] = 1ll * a[cnt].pre[j - 1] * (j % i == 0 ? 1 : j) % now;
a[cnt].qpw.resize(a[cnt].phi * 2);
a[cnt].qpw[0] = 1;
for (int j = 1; j < a[cnt].phi * 2; ++j) a[cnt].qpw[j] = 1ll * a[cnt].qpw[j - 1] * a[cnt].pre[now - 1] % now;
a[cnt].iv.resize(now);
for (int j = 1; j < now; ++j)
if (j % i) a[cnt].iv[j] = get_inv(j, now);
Phi *= a[cnt].phi;
}
if (x > 1) {
Phi *= x;
cnt++;
a[cnt].p = x; a[cnt].c = 1; a[cnt].pw = x; a[cnt].m = p / x; a[cnt].im = get_inv(a[cnt].m % a[cnt].pw, a[cnt].pw);
a[cnt].phi = x - 1;
a[cnt].pre.resize(x);
a[cnt].pre[0] = 1;
for (int j = 1; j < x; ++j) a[cnt].pre[j] = 1ll * a[cnt].pre[j - 1] * j % x;
a[cnt].qpw.resize(a[cnt].phi * 2);
a[cnt].qpw[0] = 1;
for (int j = 1; j < a[cnt].phi * 2; ++j) a[cnt].qpw[j] = 1ll * a[cnt].qpw[j - 1] * a[cnt].pre[x - 1] % x;
a[cnt].iv.resize(x);
for (int j = 1; j < x; ++j) a[cnt].iv[j] = get_inv(j, x);
}
}
int calc(ll n, int id) {
ll mem = n / a[id].pw;
if (mem >= a[id].phi) mem = mem % a[id].phi + a[id].phi;
return 1ll * a[id].qpw[mem] * a[id].pre[n % a[id].pw] % a[id].pw;
}
int work(ll n, ll m, int id) {
int o = 0, res = 1, ire = 1;
res = calc(n, id);
ire = 1ll * calc(m, id) * calc(n - m, id) % a[id].pw;
for (ll now = a[id].p; now <= n; now *= (ll)a[id].p) {
o += n / now - m / now - (n - m) / now;
res = 1ll * res * calc(n / now, id) % a[id].pw;
ire = 1ll * ire * calc(m / now, id) % a[id].pw * calc((n - m) / now, id) % a[id].pw;
}
res = 1ll * res * a[id].iv[ire] % a[id].pw;
if (o >= a[id].c) return 0;
while (o--) res = 1ll * res * a[id].p % a[id].pw;
return res;
}
int ex_Lucas(ll n, ll m) {
if (n < 0 || m < 0 || n < m) return 0;
int res = 0;
for (int i = 1; i <= cnt; ++i) {
int tmp = work(n, m, i);
res = (res + 1ll * tmp * a[i].m % p * a[i].im % p) % p;
}
return res;
}
}
using my_ex_Lucas :: ex_Lucas;
void init(int o, int p) {
mod = p;
inv = (mod + 1) / 2;
my_ex_Lucas :: init(p);
}
int ask(ll n) {
if (n == 1) return inv;
n--;
int val = (2ll * n - 1) % mod;
val = 8ll * val * (val + 2) % mod;
int tmp1 = ex_Lucas(2ll * n - 2, n - 1);
int tmp2 = ex_Lucas(2ll * n - 2, n - 2);
val = 1ll * val * del(tmp1, tmp2) % mod;
val = 1ll * val * qpow(inv, 2ll * n + 3)
return val * del(ex_Lucas(2ll * n - 2, n - 1), ex_Lucas(2ll * n - 2, n - 2)) % mod * qpow(inv, 2ll * n + 3) % mod;
}