#define FAST_INPUT
#pragma GCC optimize("O3")
#pragma GCC target("avx2")
#include <bitset>
// START #include "main.h"
#if defined(PRINT_TIMING)
#include <chrono>
#endif
#include <iomanip>
#include <iostream>
#include <limits>
template <typename input_t>
struct solve_main_wrapper {
#if defined(MULTI_UNTIL)
#define _SOLVE_MAIN_LOOP_CONDITION true
using return_t = bool;
#else
#define _SOLVE_MAIN_LOOP_CONDITION testnum <= test_cases
using return_t = void;
#endif
input_t& cin;
solve_main_wrapper(input_t& _cin, int argc, char** argv) : cin(_cin) {
(void)argc;
(void)argv;
}
auto solve_main(size_t testnum) -> return_t;
auto solve_all() -> int {
[[maybe_unused]] size_t test_cases = 1;
#if defined(MULTI_TEST) or defined(PRINT_CASE)
cin >> test_cases;
#endif
for (size_t testnum = 1; _SOLVE_MAIN_LOOP_CONDITION; testnum++) {
#define MAKE_STRING_IMPL(STRING) #STRING
#define MAKE_STRING(STRING) MAKE_STRING_IMPL(STRING)
#if defined(PRINT_CASE)
std::cout << "Case " << MAKE_STRING(PRINT_CASE) << testnum << ": ";
#undef MAKE_STRING
#undef MAKE_STRING_IMPL
#endif
#if defined(PRINT_TIMING)
auto start_time = std::chrono::high_resolution_clock::now();
#endif
#if defined(MULTI_UNTIL)
if (not solve_main(testnum)) break;
#else
solve_main(testnum);
#endif
#if defined(PRINT_TIMING)
auto duration = std::chrono::high_resolution_clock::now() - start_time;
using namespace std::chrono;
std::cerr << "\n[t" << testnum << "] " << duration / 1.0s << "s\n\n";
#endif
}
return 0;
}
};
#if defined(FAST_INPUT) or defined(FAST_INPUT_BUFFER)
// START #include "utility/fast_input.h"
// START #include "utility/fast_input_read.h"
#include <cstddef>
template <typename T = void>
struct fast_input_read {};
// END #include "utility/fast_input_read.h"
#include <complex>
#include <string>
#include <tuple>
#define _USING_FAST_INPUT
template <size_t buf_size>
struct fast_input {
char buf[buf_size], *S, *T;
FILE* const ifptr;
fast_input(FILE* _in = stdin) : S(buf), T(buf), ifptr(_in) {}
static auto is_digit(char c) -> bool { return '0' <= c and c <= '9'; }
explicit operator bool() const { return peek() != EOF; }
template <typename T>
auto operator>>(T& x) -> fast_input& {
if constexpr (requires(T& t) { this->get(t); }) {
this->get(x);
} else {
fast_input_read<T>::get(*this, x);
}
return *this;
}
auto getc() -> char {
if (S == T) {
T = (S = buf) + fread(buf, 1, buf_size, ifptr);
if (S == T) return EOF;
}
return *S++;
}
auto peek() -> char {
if (S == T) {
T = (S = buf) + fread(buf, 1, buf_size, ifptr);
if (S == T) return EOF;
}
return *S;
}
auto get(char& x) -> void {
while (isspace(x = getc()) && x != EOF) {}
}
// TODO slow?
auto get(std::string& x) -> void {
x.clear();
char c;
while (isspace(c = getc()) && c != EOF) {}
for (; !isspace(c) && c != EOF; c = getc()) {
x.push_back(c);
}
}
auto get(decltype(std::ignore)) -> void {
char c;
while (isspace(c = getc()) && c != EOF) {}
for (; !isspace(c) && c != EOF; c = getc()) {}
}
template <typename var_t>
requires(std::is_integral_v<var_t>)
auto get(var_t& x) -> void {
x = 0;
char c;
bool negative = false;
while (!is_digit(c = getc()) && c != EOF) {
negative = (c == '-');
}
for (; is_digit(c) && c != EOF; c = getc()) {
x = x * 10 + c - '0';
}
if (negative) {
x = -x;
}
}
// TODO slow ?
template <typename var_t>
requires(std::is_floating_point_v<var_t>)
auto get(var_t& x) -> void {
x = 0;
char c;
bool negative = false;
while (!is_digit(c = getc()) && c != '.' && c != EOF) {
negative = (c == '-');
}
if (c != '.') {
for (; is_digit(c) && c != EOF; c = getc()) {
x = x * 10 + c - '0';
}
}
if (c == '.') {
static var_t div;
div = 1;
while (is_digit(c = getc()) && c != EOF) {
x = x * 10 + c - '0';
div *= 10;
}
x /= div;
}
if (negative) {
x = -x;
}
}
template <typename T, typename U>
auto get(std::pair<T, U>& x) -> void {
*this >> x.first >> x.second;
}
template <size_t index = 0, typename... T>
inline auto get(std::tuple<T...>& x) -> void {
if constexpr (index < sizeof...(T)) {
*this >> (std::get<index>(x));
get<index + 1>(x);
}
}
template <typename T>
auto get(std::complex<T>& x) -> void {
T a, b;
*this >> a >> b;
x = {a, b};
}
auto getline() -> std::string {
std::string out;
char c;
while ((c = getc()) != '\n' && c != EOF) {
out.push_back(c);
}
return out;
}
};
// END #include "utility/fast_input.h"
#endif
auto main(int argc, char** argv) -> int {
std::cout << std::fixed << std::setprecision(10);
#ifdef _USING_FAST_INPUT
#if not defined(FAST_INPUT_BUFFER)
#define FAST_INPUT_BUFFER 16384
#endif
fast_input<FAST_INPUT_BUFFER> cin;
solve_main_wrapper solver(cin, argc, argv);
#else
std::cin.tie(0)->sync_with_stdio(0);
solve_main_wrapper solver(std::cin, argc, argv);
#endif
return solver.solve_all();
}
#define SOLVE() \
template <typename input_t> \
auto solve_main_wrapper<input_t>::solve_main([[maybe_unused]] size_t testnum) -> return_t
using ll = long long;
constexpr char nl = '\n';
#include <algorithm>
#include <cassert>
#include <map>
#include <set>
#include <unordered_map>
#include <unordered_set>
#include <vector>
using namespace std;
// END #include "main.h"
SOLVE() {
int n, m;
cin >> n >> m;
vector<pair<int, int>> order;
for (int i = 0; i < n; i++) {
int a;
cin >> a;
order.emplace_back(a, i);
}
sort(begin(order), end(order));
static constexpr int N = 5e4;
bitset<N> builder;
vector<bitset<N>> bs(n);
for (auto [a, i] : order) {
bs[i] = builder;
bs[i] <<= n - i;
builder[i] = 1;
}
for (int i = 0; i < n; i++) {
bs[i] &= builder;
}
vector<bitset<N>> pref(n), suff(n);
for (int i = 0; i < n; i++) {
pref[i] = bs[i];
if (i % m != 0 and i - 1 >= 0) {
pref[i] &= pref[i - 1];
}
}
for (int i = n - 1; i >= 0; i--) {
suff[i] = bs[i];
if (i % m != m - 1 and i + 1 < n) {
suff[i] &= suff[i + 1];
}
}
ll ans = 0;
for (int i = 0; i + m - 1 < n; i++) {
auto res = suff[i] & pref[i + m - 1];
ans += res.count();
}
cout << ans << nl;
}