QOJ.ac

QOJ

IDProblemSubmitterResultTimeMemoryLanguageFile sizeSubmit timeJudge time
#601465#8781. Element-Wise ComparisonericmegalovaniaWA 115ms618420kbC++209.3kb2024-09-30 00:40:242024-09-30 00:40:25

Judging History

你现在查看的是最新测评结果

  • [2024-09-30 00:40:25]
  • 评测
  • 测评结果:WA
  • 用时:115ms
  • 内存:618420kb
  • [2024-09-30 00:40:24]
  • 提交

answer

#define __AVX__ 1
#define __AVX2__ 1
#pragma GCC optimize("Ofast,unroll-loops")
#pragma GCC target("avx2,bmi,fma,popcnt")
#include<immintrin.h>

#include<bits/stdc++.h>
using namespace std;

//#define ONLINE
#ifndef ONLINE
char DEBUG_BUFFER[1000];
#define debug(...) {sprintf(DEBUG_BUFFER,##__VA_ARGS__);\
cerr<<"\033[1;36m"<<DEBUG_BUFFER<<"\033[0;2m"<<"\033[0m";}
#else
#define debug(...) ;
#endif

using LL=long long;
using PII=pair<int,int>;

#define all(x) (x).begin(),(x).end()
#define allr(x) (x).rbegin(),(x).rend()

#define FAST_IO {ios::sync_with_stdio(false);cin.tie(nullptr);}
inline int read(){static int x; cin>>x; return x;}
inline LL readLL(){static LL x; cin>>x; return x;}
mt19937_64 rng(chrono::system_clock::now().time_since_epoch().count());

#define __JYC_CP
namespace avx256_bitset{
	using vec=__m256i;
	using u32=unsigned int;
	using u64=unsigned long long;
	using cup=const u32*; //const-u32-pointer
	const u32 o=1,inf=0xffffffff;
#define v32(x) _mm256_##x
#define B Bitset<N>
#define vec_zero (v32(set1_epi32)(0))
#define vec_one (v32(set1_epi32)(inf))
#define load(x) (v32(load_si256)((vec*)(x)))
#define loadu(x) (v32(loadu_si256)((vec*)(x)))
	template<const int N>
	class Bitset{
	private:
		static_assert(N%256==0);
		static const int T=(N-1)/32+1,T2=((N-1)/256+1)<<3,back_siz=N-32*(T-1);
		static_assert(back_siz>=0 && back_siz<=32);
		static const u32 set_last=(o<<back_siz-1)-1+(o<<back_siz-1);
		alignas(32) u32 a[T2]={};
		inline void revise(){
			a[T-1]&=set_last;
			for(int i=T;i<T2;++i) a[i]=0;
		}
	public:
		Bitset(){}
		inline void reset(){
			static const vec all_zero=vec_zero;
			cup p=a,pend=a+T2;
			for(;p!=pend;p+=8){
				v32(store_si256)((vec*)p,all_zero);
			}
		}
		inline void set(){
			static const vec all_one=vec_one;
			cup p=a,pend=a+T2;
			for(;p!=pend;p+=8){
				v32(store_si256)((vec*)p,all_one);
			}
			revise();
		}
		inline void reset(const int& x){
			a[x>>5]&=~(o<<x&31);
			if((x>>5)==T-1) a[T-1]&=set_last;
		}
		inline void set(const int& x){
			a[x>>5]|=o<<(x&31);
		}
		inline void flip(const int& x){
			a[x>>5]^=o<<(x&31);
		}
		inline int test(int x){
			return (a[x>>5]>>(x&31))&1;
		}
		inline bool operator [](int x) const{
			return (a[x>>5]>>(x&31))&1;
		}
		inline bool none() const{
			cup p=a,pend=a+T2;
			for(;p!=pend;p+=8){
				const vec& v=load(p);
				if(!v32(testz_si256)(v,v)) return 0;
			}
			return 1;
		}
		inline bool any() const{
			return !none();
		}
		inline int _Find_first() const{
			for(int i=0;i<T2;i+=8){
				const vec& v=load(&a[i]);
				if(v32(testz_si256)(v,v)) continue;
				for(int j=i;j<i+8;j++) if(a[j]){
					return j*32+__builtin_ctz(a[j]);
				}
			}
			return N;
		}
		inline int _Find_next(const int& pos) const{
			if((pos&31)!=31){
				int i=pos/32,j=(pos+1)&31,nw=pos+1;
				for(;j<32;++j,++nw){
					if(a[i]>>j&1) return nw;
				}
			}
			int i=pos/32+1;
			for(;i%8;++i) if(a[i]){
				return i*32+__builtin_ctz(a[i]);
			}
			for(;i<T2;i+=8){
				const vec& v=load(&a[i]);
				if(v32(testz_si256)(v,v)) continue;
				for(int j=i;j<i+8;j++) if(a[j]){
					return j*32+__builtin_ctz(a[j]);
				}
			}
			return N;
		}
		inline int count() const{
			int ans=0;
			for(int i=0;i<T2/2;++i){
				ans+=__builtin_popcountll( ((u64*)(a)) [i]);
			}
			return ans;
			//needs to support avx512
			//flags: AVX512VPOPCNTDQ + AVX512VL
			/*
			int ans[8]={};
			vec res=vec_zero,tmp;
			cup p=a,pend=a+T2;
			for(;p!=pend;p+=8){
			tmp=v32(popcnt_epi32)(load(p));
			res=v32(add_epi32)(res,tmp);
			}
			v32(store_si256)((vec*)ans,res);
			return accumulate(ans,ans+8,0);
			*/
		}
		inline B& operator =(const B& b){
			u32* p=a;
			cup pend=a+T2,p2=b.a;
			for(;p!=pend;p+=8,p2+=8){
				v32(store_si256)((vec*)p,load(p2));
			}
			return *this;
		}
		inline B operator ~() const{
			static const __m256i all_one=vec_one;
			B ans; cup p=a,pend=a+T2;
			u32* pans=ans.a;
			for(;p!=pend;p+=8,pans+=8){
				auto v=v32(load_si256)((vec*)p);
				v=v32(andnot_si256)(v,all_one);
				v32(store_si256)((vec*)pans,v);
			}
			revise();
			return ans;
		}
#define OP(b,op) \
		B ans; cup p=a,pend=a+T2,p2=b.a;\
		u32* pans=ans.a;\
		for(;p!=pend;p+=8,p2+=8,pans+=8){\
		const vec& tmp=v32(op##_si256)(load(p),load(p2));\
		v32(store_si256)((vec*)pans,tmp);\
		}return ans
		inline B operator &(const B& b) const{
			OP(b,and);
		}
		inline B operator |(const B& b) const{
			OP(b,or);
		}
		inline B operator ^(const B& b) const{
			OP(b,xor);
		}
#undef OP
#define OP(b,op) \
		cup p=a,pend=a+T2,p2=b.a;\
		for(;p!=pend;p+=8,p2+=8){\
		const vec& tmp=v32(op##_si256)(load(p),load(p2));\
		v32(store_si256)((vec*)p,tmp);\
		}return *this
		inline B& operator &=(const B& b){
			OP(b,and);
		}
		inline B& operator |=(const B& b){
			OP(b,or);
		}
		inline B& operator ^=(const B& b){
			OP(b,xor);
		}
#undef OP
		inline B operator <<(const int& t) const{
			B ans;
			if(t>=N) return ans;
			const int div=t/32,rem=t%32;
			cup p=a+T2-8;
			u32* pans=ans.a+T2-8;
			if(rem){
				for(;p-div-1>=a;p-=8,pans-=8){
					vec v1=v32(slli_epi32)(loadu(p-div),rem);
					vec v2=v32(srli_epi32)(loadu(p-div-1),32-rem);
					v32(store_si256)((vec*)pans,v32(or_si256)(v1,v2));
				}
			}
			else{
				for(;p-div-1>=a;p-=8,pans-=8){
					v32(store_si256)((vec*)pans,loadu(p-div));
				}
			}
			p+=7,pans+=7;
			for(;p-div-1>=a;--p,--pans){
				(*pans)=(*(p-div))<<rem;
				if(rem) (*pans)|=(*(p-div-1))>>(32-rem);
			}
			(*pans)=a[0]<<rem;
			ans.revise();
			return ans;
		}
		inline B& operator <<=(const int& t){
			return (*this)=(*this)<<t;
		}
		inline B operator >>(const int& t) const{
			B ans;
			if(t>=N) return ans;
			const int div=t/32,rem=t%32;
			cup p=a;
			u32* pans=ans.a;
			if(rem){
				for(;p+div+1<=a+T2-8;p+=8,pans+=8){
					vec v1=v32(srli_epi32)(loadu(p+div),rem);
					vec v2=v32(slli_epi32)(loadu(p+div+1),32-rem);
					v32(store_si256)((vec*)pans,v32(or_si256)(v1,v2));
				}
			}
			else{
				for(;p+div+1<=a+T2-8;p+=8,pans+=8){
					v32(store_si256)((vec*)pans,loadu(p+div));
				}
			}
			p-=7,pans-=7;
			for(;p+div+1<=a+T2-8;++p,++pans){
				(*pans)=(*(p+div))>>rem;
				if(rem) (*pans)|=(*(p+div+1))<<(32-rem);
			}
			(*pans)=a[T2-1]>>rem;
			ans.revise();
			return ans;
		}
		inline B& operator >>=(const int& t){
			return (*this)=(*this)>>t;
		}
//the following parts are for competitive programming
#ifdef __JYC_CP
#define OP(x,y,z,op) \
		z.reset(); cup p=x.a,pend=x.a+T2,p2=y.a;\
		u32* pans=z.a;\
		for(;p!=pend;p+=8,p2+=8,pans+=8){\
		const vec& tmp=v32(op##_si256)(load(p),load(p2));\
		v32(store_si256)((vec*)pans,tmp);}
		inline static void And(const B& x,const B& y,B& z){
			OP(x,y,z,and);
		}
		inline static void Or(const B& x,const B& y,B& z){
			OP(x,y,z,or);
		}
		inline static void Xor(const B& x,const B& y,B& z){
			OP(x,y,z,xor);
		}
#undef OP
		inline static void Lsh(const B& x,const int& t,B& ans){
			ans.reset();
			if(t>=N) return;
			const int div=t/32,rem=t%32;
			cup p=x.a+T2-8;
			u32* pans=ans.a+T2-8;
			if(rem){
				for(;p-div-1>=x.a;p-=8,pans-=8){
					vec v1=v32(slli_epi32)(loadu(p-div),rem);
					vec v2=v32(srli_epi32)(loadu(p-div-1),32-rem);
					v32(store_si256)((vec*)pans,v32(or_si256)(v1,v2));
				}
			}
			else{
				for(;p-div-1>=x.a;p-=8,pans-=8){
					v32(store_si256)((vec*)pans,loadu(p-div));
				}
			}
			p+=7,pans+=7;
			for(;p-div-1>=x.a;--p,--pans){
				(*pans)=(*(p-div))<<rem;
				if(rem) (*pans)|=(*(p-div-1))>>(32-rem);
			}
			(*pans)=x.a[0]<<rem;
			ans.revise();
		}
		inline static void Rsh(const B& x,const int& t,B& ans){
			ans.reset();
			if(t>=N) return;
			const int div=t/32,rem=t%32;
			cup p=x.a;
			u32* pans=ans.a;
			if(rem){
				for(;p+div+1<=x.a+T2-8;p+=8,pans+=8){
					vec v1=v32(srli_epi32)(loadu(p+div),rem);
					vec v2=v32(slli_epi32)(loadu(p+div+1),32-rem);
					v32(store_si256)((vec*)pans,v32(or_si256)(v1,v2));
				}
			}
			else{
				for(;p+div+1<=x.a+T2-8;p+=8,pans+=8){
					v32(store_si256)((vec*)pans,loadu(p+div));
				}
			}
			p-=7,pans-=7;
			for(;p+div+1<=x.a+T2-8;++p,++pans){
				(*pans)=(*(p+div))>>rem;
				if(rem) (*pans)|=(*(p+div+1))<<(32-rem);
			}
			(*pans)=x.a[T2-1]>>rem;
			ans.revise();
		}
#endif
		string str() const{
			string s="";
			for(int i=N-1;i>=0;--i){
				s.push_back('0'+(int)operator[](i));
			}
			return s;
		}
		void output() const{
			debug("%s\n",this->str().data());
		}
	};
}
#undef B

//#define N 16
#define N (196*256)
using B=avx256_bitset::Bitset<N>;
int pos[N];
B f[N],suf[N],tmp;

int main(){
	FAST_IO;
	int n=read(),m=read();
	for(int i=0;i<n;i++){
		pos[read()-1]=i;
	}
	tmp.reset();
	for(int x=n-1;x>=0;x--){
		B::Rsh(tmp,pos[x],f[pos[x]]);
		tmp.set(pos[x]);
//		f[pos[x]]=tmp>>pos[x];
//		tmp[pos[x]]=1;
	}
	LL ans=0;
	for(int i=0,last=-1;i<n-m+1;i++){
		if(i>last){
			last+=m;
			suf[0]=f[last];
			for(int j=1;j<m;j++){
				B::And(suf[j-1],f[last-j],suf[j]);
//				suf[j]=suf[j-1]&f[last-j];
			}
			tmp.set();
		}
		ans+=(suf[last-i]&tmp).count();
		tmp&=f[i+m];
	}
	cout<<ans;
	return 0;
}
/* stuff you should look for
* int overflow, array bounds
* special cases (n=1?)
* do smth instead of nothing and stay organized
* WRITE STUFF DOWN
* DON'T GET STUCK ON ONE APPROACH
*/

Details

Tip: Click on the bar to expand more detailed information

Test #1:

score: 100
Accepted
time: 36ms
memory: 618344kb

input:

5 3
5 2 1 3 4

output:

0

result:

ok answer is '0'

Test #2:

score: 0
Accepted
time: 4ms
memory: 618348kb

input:

5 2
3 1 4 2 5

output:

2

result:

ok answer is '2'

Test #3:

score: 0
Accepted
time: 35ms
memory: 618168kb

input:

4 2
1 2 3 4

output:

3

result:

ok answer is '3'

Test #4:

score: 0
Accepted
time: 24ms
memory: 618420kb

input:

4 2
4 3 2 1

output:

0

result:

ok answer is '0'

Test #5:

score: 0
Accepted
time: 36ms
memory: 618280kb

input:

1 1
1

output:

0

result:

ok answer is '0'

Test #6:

score: -100
Wrong Answer
time: 115ms
memory: 618420kb

input:

50000 2
44045 29783 5389 7756 44022 45140 21967 5478 10868 49226 21775 31669 49836 13511 46116 14229 27206 31168 37389 3158 10658 41154 14635 18526 40540 6451 23197 46719 30593 13517 8604 46666 39189 43746 12778 3684 3194 36979 43020 14652 19549 31178 17144 27177 44336 2849 40220 11751 41993 32209 4...

output:

310408256

result:

wrong answer expected '310780127', found '310408256'