QOJ.ac

QOJ

ID题目提交者结果用时内存语言文件大小提交时间测评时间
#601469#8781. Element-Wise ComparisonericmegalovaniaRE 31ms643840kbC++209.6kb2024-09-30 00:51:412024-09-30 00:51:42

Judging History

你现在查看的是最新测评结果

  • [2024-09-30 00:51:42]
  • 评测
  • 测评结果:RE
  • 用时:31ms
  • 内存:643840kb
  • [2024-09-30 00:51:41]
  • 提交

answer

#define __AVX__ 1
#define __AVX2__ 1
#pragma GCC optimize("Ofast,unroll-loops")
#pragma GCC target("avx2,bmi,fma,popcnt")
#include<immintrin.h>

#include<bits/stdc++.h>
using namespace std;

//#define ONLINE
#ifndef ONLINE
char DEBUG_BUFFER[1000];
#define debug(...) {sprintf(DEBUG_BUFFER,##__VA_ARGS__);\
cerr<<"\033[1;36m"<<DEBUG_BUFFER<<"\033[0;2m"<<"\033[0m";}
#else
#define debug(...) ;
#endif

using LL=long long;
using PII=pair<int,int>;

#define all(x) (x).begin(),(x).end()
#define allr(x) (x).rbegin(),(x).rend()

#define FAST_IO {ios::sync_with_stdio(false);cin.tie(nullptr);}
inline int read(){static int x; cin>>x; return x;}
inline LL readLL(){static LL x; cin>>x; return x;}
mt19937_64 rng(chrono::system_clock::now().time_since_epoch().count());

#define __JYC_CP
namespace avx256_bitset{
	using vec=__m256i;
	using u32=unsigned int;
	using u64=unsigned long long;
	using cup=const u32*; //const-u32-pointer
	const u32 o=1,inf=0xffffffff;
#define v32(x) _mm256_##x
#define B Bitset<N>
#define vec_zero (v32(set1_epi32)(0))
#define vec_one (v32(set1_epi32)(inf))
#define load(x) (v32(load_si256)((vec*)(x)))
#define loadu(x) (v32(loadu_si256)((vec*)(x)))
	template<const int N>
	class Bitset{
	private:
		static_assert(N%256==0);
		static const int T=(N-1)/32+1,T2=((N-1)/256+1)<<3,back_siz=N-32*(T-1);
		static_assert(back_siz>=0 && back_siz<=32);
		static const u32 set_last=(o<<back_siz-1)-1+(o<<back_siz-1);
		alignas(32) u32 a[T2]={};
		inline void revise(){
			a[T-1]&=set_last;
			for(int i=T;i<T2;++i) a[i]=0;
		}
	public:
		Bitset(){}
		inline void reset(){
			static const vec all_zero=vec_zero;
			cup p=a,pend=a+T2;
			for(;p!=pend;p+=8){
				v32(store_si256)((vec*)p,all_zero);
			}
		}
		inline void set(){
			static const vec all_one=vec_one;
			cup p=a,pend=a+T2;
			for(;p!=pend;p+=8){
				v32(store_si256)((vec*)p,all_one);
			}
			revise();
		}
		inline void reset(const int& x){
			a[x>>5]&=~(o<<x&31);
			if((x>>5)==T-1) a[T-1]&=set_last;
		}
		inline void set(const int& x){
			a[x>>5]|=o<<(x&31);
		}
		inline void flip(const int& x){
			a[x>>5]^=o<<(x&31);
		}
		inline int test(int x){
			return (a[x>>5]>>(x&31))&1;
		}
		inline bool operator [](int x) const{
			return (a[x>>5]>>(x&31))&1;
		}
		inline bool none() const{
			cup p=a,pend=a+T2;
			for(;p!=pend;p+=8){
				const vec& v=load(p);
				if(!v32(testz_si256)(v,v)) return 0;
			}
			return 1;
		}
		inline bool any() const{
			return !none();
		}
		inline int _Find_first() const{
			for(int i=0;i<T2;i+=8){
				const vec& v=load(&a[i]);
				if(v32(testz_si256)(v,v)) continue;
				for(int j=i;j<i+8;j++) if(a[j]){
					return j*32+__builtin_ctz(a[j]);
				}
			}
			return N;
		}
		inline int _Find_next(const int& pos) const{
			if((pos&31)!=31){
				int i=pos/32,j=(pos+1)&31,nw=pos+1;
				for(;j<32;++j,++nw){
					if(a[i]>>j&1) return nw;
				}
			}
			int i=pos/32+1;
			for(;i%8;++i) if(a[i]){
				return i*32+__builtin_ctz(a[i]);
			}
			for(;i<T2;i+=8){
				const vec& v=load(&a[i]);
				if(v32(testz_si256)(v,v)) continue;
				for(int j=i;j<i+8;j++) if(a[j]){
					return j*32+__builtin_ctz(a[j]);
				}
			}
			return N;
		}
		inline int count() const{
			int ans=0;
			for(int i=0;i<T2/2;++i){
				ans+=__builtin_popcountll( ((u64*)(a)) [i]);
			}
			return ans;
			//needs to support avx512
			//flags: AVX512VPOPCNTDQ + AVX512VL
			/*
			int ans[8]={};
			vec res=vec_zero,tmp;
			cup p=a,pend=a+T2;
			for(;p!=pend;p+=8){
			tmp=v32(popcnt_epi32)(load(p));
			res=v32(add_epi32)(res,tmp);
			}
			v32(store_si256)((vec*)ans,res);
			return accumulate(ans,ans+8,0);
			*/
		}
		inline B& operator =(const B& b){
			u32* p=a;
			cup pend=a+T2,p2=b.a;
			for(;p!=pend;p+=8,p2+=8){
				v32(store_si256)((vec*)p,load(p2));
			}
			return *this;
		}
		inline B operator ~() const{
			static const __m256i all_one=vec_one;
			B ans; cup p=a,pend=a+T2;
			u32* pans=ans.a;
			for(;p!=pend;p+=8,pans+=8){
				auto v=v32(load_si256)((vec*)p);
				v=v32(andnot_si256)(v,all_one);
				v32(store_si256)((vec*)pans,v);
			}
			revise();
			return ans;
		}
#define OP(b,op) \
		B ans; cup p=a,pend=a+T2,p2=b.a;\
		u32* pans=ans.a;\
		for(;p!=pend;p+=8,p2+=8,pans+=8){\
		const vec& tmp=v32(op##_si256)(load(p),load(p2));\
		v32(store_si256)((vec*)pans,tmp);\
		}return ans
		inline B operator &(const B& b) const{
			OP(b,and);
		}
		inline B operator |(const B& b) const{
			OP(b,or);
		}
		inline B operator ^(const B& b) const{
			OP(b,xor);
		}
#undef OP
#define OP(b,op) \
		cup p=a,pend=a+T2,p2=b.a;\
		for(;p!=pend;p+=8,p2+=8){\
		const vec& tmp=v32(op##_si256)(load(p),load(p2));\
		v32(store_si256)((vec*)p,tmp);\
		}return *this
		inline B& operator &=(const B& b){
			OP(b,and);
		}
		inline B& operator |=(const B& b){
			OP(b,or);
		}
		inline B& operator ^=(const B& b){
			OP(b,xor);
		}
#undef OP
		inline B operator <<(const int& t) const{
			B ans;
			if(t>=N) return ans;
			const int div=t/32,rem=t%32;
			cup p=a+T2-8;
			u32* pans=ans.a+T2-8;
			if(rem){
				for(;p-div-1>=a;p-=8,pans-=8){
					vec v1=v32(slli_epi32)(loadu(p-div),rem);
					vec v2=v32(srli_epi32)(loadu(p-div-1),32-rem);
					v32(store_si256)((vec*)pans,v32(or_si256)(v1,v2));
				}
			}
			else{
				for(;p-div-1>=a;p-=8,pans-=8){
					v32(store_si256)((vec*)pans,loadu(p-div));
				}
			}
			p+=7,pans+=7;
			for(;p-div-1>=a;--p,--pans){
				(*pans)=(*(p-div))<<rem;
				if(rem) (*pans)|=(*(p-div-1))>>(32-rem);
			}
			(*pans)=a[0]<<rem;
			ans.revise();
			return ans;
		}
		inline B& operator <<=(const int& t){
			return (*this)=(*this)<<t;
		}
		inline B operator >>(const int& t) const{
			B ans;
			if(t>=N) return ans;
			const int div=t/32,rem=t%32;
			cup p=a;
			u32* pans=ans.a;
			if(rem){
				for(;p+div+1<=a+T2-8;p+=8,pans+=8){
					vec v1=v32(srli_epi32)(loadu(p+div),rem);
					vec v2=v32(slli_epi32)(loadu(p+div+1),32-rem);
					v32(store_si256)((vec*)pans,v32(or_si256)(v1,v2));
				}
			}
			else{
				for(;p+div+1<=a+T2-8;p+=8,pans+=8){
					v32(store_si256)((vec*)pans,loadu(p+div));
				}
			}
			p-=7,pans-=7;
			for(;p+div+1<=a+T2-8;++p,++pans){
				(*pans)=(*(p+div))>>rem;
				if(rem) (*pans)|=(*(p+div+1))<<(32-rem);
			}
			(*pans)=a[T2-1]>>rem;
			ans.revise();
			return ans;
		}
		inline B& operator >>=(const int& t){
			return (*this)=(*this)>>t;
		}
//the following parts are for competitive programming
#ifdef __JYC_CP
#define OP(x,y,z,op) \
		z.reset(); cup p=x.a,pend=x.a+T2,p2=y.a;\
		u32* pans=z.a;\
		for(;p!=pend;p+=8,p2+=8,pans+=8){\
		const vec& tmp=v32(op##_si256)(load(p),load(p2));\
		v32(store_si256)((vec*)pans,tmp);}
		inline static void And(const B& x,const B& y,B& z){
			OP(x,y,z,and);
		}
		inline static void Or(const B& x,const B& y,B& z){
			OP(x,y,z,or);
		}
		inline static void Xor(const B& x,const B& y,B& z){
			OP(x,y,z,xor);
		}
#undef OP
		inline static void Lsh(const B& x,const int& t,B& ans){
			ans.reset();
			if(t>=N) return;
			const int div=t/32,rem=t%32;
			cup p=x.a+T2-8;
			u32* pans=ans.a+T2-8;
			if(rem){
				for(;p-div-1>=x.a;p-=8,pans-=8){
					vec v1=v32(slli_epi32)(loadu(p-div),rem);
					vec v2=v32(srli_epi32)(loadu(p-div-1),32-rem);
					v32(store_si256)((vec*)pans,v32(or_si256)(v1,v2));
				}
			}
			else{
				for(;p-div-1>=x.a;p-=8,pans-=8){
					v32(store_si256)((vec*)pans,loadu(p-div));
				}
			}
			p+=7,pans+=7;
			for(;p-div-1>=x.a;--p,--pans){
				(*pans)=(*(p-div))<<rem;
				if(rem) (*pans)|=(*(p-div-1))>>(32-rem);
			}
			(*pans)=x.a[0]<<rem;
			ans.revise();
		}
		inline static void Rsh(const B& x,const int& t,B& ans){
			ans.reset();
			if(t>=N) return;
			const int div=t/32,rem=t%32;
			cup p=x.a;
			u32* pans=ans.a;
			if(rem){
				for(;p+div+1<=x.a+T2-8;p+=8,pans+=8){
					vec v1=v32(srli_epi32)(loadu(p+div),rem);
					vec v2=v32(slli_epi32)(loadu(p+div+1),32-rem);
					v32(store_si256)((vec*)pans,v32(or_si256)(v1,v2));
				}
			}
			else{
				for(;p+div+1<=x.a+T2-8;p+=8,pans+=8){
					v32(store_si256)((vec*)pans,loadu(p+div));
				}
			}
			p-=7,pans-=7;
			for(;p+div+1<=x.a+T2-8;++p,++pans){
				(*pans)=(*(p+div))>>rem;
				if(rem) (*pans)|=(*(p+div+1))<<(32-rem);
			}
			(*pans)=x.a[T2-1]>>rem;
			ans.revise();
		}
		inline int count(int n) const{
			int ans=0;
			for(int i=0;i<n+63;i+=64){
				ans+=__builtin_popcountll(*( ((u64*)(a)) + i/32 ));
			}
			return ans;
		}
#endif
		string str() const{
			string s="";
			for(int i=N-1;i>=0;--i){
				s.push_back('0'+(int)operator[](i));
			}
			return s;
		}
		void output() const{
			debug("%s\n",this->str().data());
		}
	};
}
#undef B

//#define N 16
#define N (200*256)
using B=avx256_bitset::Bitset<N>;
int pos[N];
B f[N],suf[N],tmp,res;

int main(){
	FAST_IO;
	int n=read(),m=read();
	for(int i=0;i<n;i++){
		pos[read()-1]=i;
	}
	tmp.reset();
	for(int x=n-1;x>=0;x--){
		B::Rsh(tmp,pos[x],f[pos[x]]);
		tmp.set(pos[x]);
//		f[pos[x]]=tmp>>pos[x];
//		tmp[pos[x]]=1;
	}
	LL ans=0;
	for(int i=0,last=-1;i<n-m+1;i++){
		if(i>last){
			last+=m;
			suf[0]=f[last];
			for(int j=1;j<m;j++){
				B::And(suf[j-1],f[last-j],suf[j]);
//				suf[j]=suf[j-1]&f[last-j];
			}
			tmp.set();
		}
		B::And(suf[last-i],tmp,res);
		ans+=(suf[last-i]&tmp).count(n+63);
//		ans+=(suf[last-i]&tmp).count();
		tmp&=f[i+m];
	}
	cout<<ans;
	return 0;
}
/* stuff you should look for
* int overflow, array bounds
* special cases (n=1?)
* do smth instead of nothing and stay organized
* WRITE STUFF DOWN
* DON'T GET STUCK ON ONE APPROACH
*/

詳細信息

Test #1:

score: 100
Accepted
time: 19ms
memory: 643552kb

input:

5 3
5 2 1 3 4

output:

0

result:

ok answer is '0'

Test #2:

score: 0
Accepted
time: 24ms
memory: 643688kb

input:

5 2
3 1 4 2 5

output:

2

result:

ok answer is '2'

Test #3:

score: 0
Accepted
time: 31ms
memory: 643744kb

input:

4 2
1 2 3 4

output:

3

result:

ok answer is '3'

Test #4:

score: 0
Accepted
time: 19ms
memory: 643840kb

input:

4 2
4 3 2 1

output:

0

result:

ok answer is '0'

Test #5:

score: 0
Accepted
time: 8ms
memory: 643600kb

input:

1 1
1

output:

0

result:

ok answer is '0'

Test #6:

score: -100
Runtime Error

input:

50000 2
44045 29783 5389 7756 44022 45140 21967 5478 10868 49226 21775 31669 49836 13511 46116 14229 27206 31168 37389 3158 10658 41154 14635 18526 40540 6451 23197 46719 30593 13517 8604 46666 39189 43746 12778 3684 3194 36979 43020 14652 19549 31178 17144 27177 44336 2849 40220 11751 41993 32209 4...

output:


result: