PrevUpHomeNext

sycl::group_barrier @cpp


sycl::group_barrier 组屏障 - cpp

sycl::group_barrier 组屏障, cpp/c++

当 sycl 每个工作组中的工作项并行的完成了它们的任务后, 如果需要同步它们的步伐,就可以使用屏障, 即调用 sycl::group_barrier, 第一个参数是从 item 对象中取得的工作组 .get_group, 第二个参数是屏障作用的范围,一般是 sycl::memory_scope::work_group。

sycl::group_barrier 同步一个工作组中的所有工作项,使用组屏障。

{ // 工作项作用域
	// 一些工作 ...

	// 读写了 sycl 局部内存 ...	(一般写后需要同步,读可免)
	sycl::group_barrier(item.get_group(), sycl::memory_scope::work_group);

	// 又一些工作 ...
} // 离开工作项作用域

sycl::group_barrier cpp 代码例子

文件名:sycl_group_barrier.cpp

#include <sycl/sycl.hpp>
#include <numeric>
#include <iomanip>

int main()
{
	sycl::queue queue{sycl::cpu_selector_v};
	
	constexpr int xsize = 8;
	constexpr int ysize = 9;
	constexpr int size = xsize*ysize;
	constexpr int lxsize = 4;
	constexpr int lysize = 3;
	
	float * v0 = sycl::malloc_shared<float>(size, queue);
	float * v1 = sycl::malloc_shared<float>(size, queue);
	float * result = sycl::malloc_device<float>(size, queue);
	std::iota(v0, v0+size, 1.5f);
	std::iota(v1, v1+size, -100.44f);
	
	constexpr int lm_offset = 10;
	
	queue.submit(
		[&] (sycl::handler & handler)
		{
			sycl::local_accessor<float, 2> lm{sycl::range<2>{lysize, lxsize*lm_offset}, handler};
			handler.parallel_for<class kname1>(
				sycl::nd_range<2>{
					sycl::range<2>{ysize, xsize},
					sycl::range<2>{lysize, lxsize}
				},
				[=] (sycl::nd_item<2> item)
				{
					sycl::id<1> gidy = item.get_global_id(0);
					sycl::id<1> gidx = item.get_global_id(1);
					sycl::id<1> lidy = item.get_local_id(0);
					sycl::id<1> lidx = item.get_local_id(1);
					
					float & lm0 = lm[lidy][lidx*lm_offset+0];
					float & lm1 = lm[lidy][lidx*lm_offset+1];
					float & lm2 = lm[lidy][lidx*lm_offset+2];
					
					lm0 = v0[gidy*xsize+gidx];
					lm1 = v1[gidy*xsize+gidx];
					lm2 = 0.0f;
					
					sycl::group_barrier(item.get_group(), sycl::memory_scope::work_group);
					
					lm2 = lm0 + lm1;
					sycl::group_barrier(item.get_group(), sycl::memory_scope::work_group);
					
					result[gidy*xsize + gidx] = lm2;
				}
			);
		}
	);
	queue.wait();
	
	float * host_result = new float[size];
	queue.copy(result, host_result, size);
	
	auto print = [&] (const std::string & label, float * data)
	{
		std::cout << label << std::endl;
		for (int j=0; j<ysize; ++j)
		{
			for (int i=0; i<xsize; ++i)
			{
				std::cout << std::setprecision(3) << data[j*xsize+i] << ' ';
			}
			std::cout << std::endl;
		}
		std::cout << std::endl;
	};
	
	print("v0:", v0);
	print("v1:", v1);
	print("host_result:", host_result);
	
	sycl::free(v0, queue);
	sycl::free(v1, queue);
	sycl::free(result, queue);
	delete [] host_result;
}

输出:

v0:
1.5 2.5 3.5 4.5 5.5 6.5 7.5 8.5
9.5 10.5 11.5 12.5 13.5 14.5 15.5 16.5
17.5 18.5 19.5 20.5 21.5 22.5 23.5 24.5
25.5 26.5 27.5 28.5 29.5 30.5 31.5 32.5
33.5 34.5 35.5 36.5 37.5 38.5 39.5 40.5
41.5 42.5 43.5 44.5 45.5 46.5 47.5 48.5
49.5 50.5 51.5 52.5 53.5 54.5 55.5 56.5
57.5 58.5 59.5 60.5 61.5 62.5 63.5 64.5
65.5 66.5 67.5 68.5 69.5 70.5 71.5 72.5

v1:
-100 -99.4 -98.4 -97.4 -96.4 -95.4 -94.4 -93.4
-92.4 -91.4 -90.4 -89.4 -88.4 -87.4 -86.4 -85.4
-84.4 -83.4 -82.4 -81.4 -80.4 -79.4 -78.4 -77.4
-76.4 -75.4 -74.4 -73.4 -72.4 -71.4 -70.4 -69.4
-68.4 -67.4 -66.4 -65.4 -64.4 -63.4 -62.4 -61.4
-60.4 -59.4 -58.4 -57.4 -56.4 -55.4 -54.4 -53.4
-52.4 -51.4 -50.4 -49.4 -48.4 -47.4 -46.4 -45.4
-44.4 -43.4 -42.4 -41.4 -40.4 -39.4 -38.4 -37.4
-36.4 -35.4 -34.4 -33.4 -32.4 -31.4 -30.4 -29.4

host_result:
-98.9 -96.9 -94.9 -92.9 -90.9 -88.9 -86.9 -84.9
-82.9 -80.9 -78.9 -76.9 -74.9 -72.9 -70.9 -68.9
-66.9 -64.9 -62.9 -60.9 -58.9 -56.9 -54.9 -52.9
-50.9 -48.9 -46.9 -44.9 -42.9 -40.9 -38.9 -36.9
-34.9 -32.9 -30.9 -28.9 -26.9 -24.9 -22.9 -20.9
-18.9 -16.9 -14.9 -12.9 -10.9 -8.94 -6.94 -4.94
-2.94 -0.94 1.06 3.06 5.06 7.06 9.06 11.1
13.1 15.1 17.1 19.1 21.1 23.1 25.1 27.1
29.1 31.1 33.1 35.1 37.1 39.1 41.1 43.1

相关链接

sycl gpu 编程 - c++









首页:发一格 fayige.top









版权

Copyright 2024 fayige.top

Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)


PrevUpHomeNext