不幸的是,对于日常程序员,OpenCL 的学习曲线陡峭;一个简单的 Hello World 程序可能就需要上百行晦涩难懂的代码。因此,为了减轻这种痛苦,Khronos 组织已经开发了一个称为 SYCL 的新标准,这是一个在 OpenCL 之上的 C++ 抽象层。 -- Tartan Llama
本文导航
-GPGPU 简介 …… 22%
-回到 SYCL …… 37%
-SYCL 并行 STL …… 79%
编译自: https://blog.tartanllama.xyz/c++/2017/05/19/sycl/
作者: Tartan Llama
译者: ictlyh
#include <vector>
#include <iostream>
#include <sycl/execution_policy>
#include <experimental/algorithm>
#include <sycl/helpers/sycl_buffers.hpp>
using namespace std::experimental::parallel;
using namespace sycl::helpers;
int main() {
constexpr size_t array_size = 1024*512;
std::array<cl::sycl::cl_int, array_size> a;
std::iota(begin(a),end(a),0);
{
cl::sycl::buffer<int> b(a.data(), cl::sycl::range<1>(a.size()));
cl::sycl::queue q;
sycl::sycl_execution_policy<class Mul> sycl_policy(q);
transform(sycl_policy, begin(b), end(b), begin(b),
[](int x) { return x*2; });
}
}
#include <iostream>
#include <array>
#include <numeric>
#include <CL/cl.hpp>
int main(){
std::vector<cl::Platform> all_platforms;
cl::Platform::get(&all_platforms);
if(all_platforms.size()==0){
std::cout<<" No platforms found. Check OpenCL installation!\n";
exit(1);
}
cl::Platform default_platform=all_platforms[0];
std::vector<cl::Device> all_devices;
default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
if(all_devices.size()==0){
std::cout<<" No devices found. Check OpenCL installation!\n";
exit(1);
}
cl::Device default_device=all_devices[0];
cl::Context context({default_device});
cl::Program::Sources sources;
std::string kernel_code=
" void kernel mul2(global int* A){"
" A[get_global_id(0)]=A[get_global_id(0)]*2;"
" }";
sources.push_back({kernel_code.c_str(),kernel_code.length()});
cl::Program program(context,sources);
if(program.build({default_device})!=CL_SUCCESS){
std::cout<<" Error building: "<<program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(default_device)<<"\n";
exit(1);
}
constexpr size_t array_size = 1024*512;
std::array<cl_int, array_size> a;
std::iota(begin(a),end(a),0);
cl::Buffer buffer_A(context,CL_MEM_READ_WRITE,sizeof(int)*a.size());
cl::CommandQueue queue(context,default_device);
if (queue.enqueueWriteBuffer(buffer_A,CL_TRUE,0,sizeof(int)*a.size(),a.data()) != CL_SUCCESS) {
std::cout << "Failed to write memory;n";
exit(1);
}
cl::Kernel kernel_add = cl::Kernel(program,"mul2");
kernel_add.setArg(0,buffer_A);
if (queue.enqueueNDRangeKernel(kernel_add,cl::NullRange,cl::NDRange(a.size()),cl::NullRange) != CL_SUCCESS) {
std::cout << "Failed to enqueue kernel\n";
exit(1);
}
if (queue.finish() != CL_SUCCESS) {
std::cout << "Failed to finish kernel\n";
exit(1);
}
if (queue.enqueueReadBuffer(buffer_A,CL_TRUE,0,sizeof(int)*a.size(),a.data()) != CL_SUCCESS) {
std::cout << "Failed to read result\n";
exit(1);
}
}
#include <CL/sycl.hpp>
#include <array>
#include <numeric>
#include <iostream>
int main() {
const size_t array_size = 1024*512;
std::array<cl::sycl::cl_int, array_size> in,out;
std::iota(begin(in),end(in),0);
{
cl::sycl::queue device_queue;
cl::sycl::range<1> n_items{array_size};
cl::sycl::buffer<cl::sycl::cl_int, 1> in_buffer(in.data(), n_items);
cl::sycl::buffer<cl::sycl::cl_int, 1> out_buffer(out.data(), n_items);
device_queue.submit([&](cl::sycl::handler &cgh) {
constexpr auto sycl_read = cl::sycl::access::mode::read;
constexpr auto sycl_write = cl::sycl::access::mode::write;
auto in_accessor = in_buffer.get_access<sycl_read>(cgh);
auto out_accessor = out_buffer.get_access<sycl_write>(cgh);
cgh.parallel_for<class VecScalMul>(n_items,
[=](cl::sycl::id<1> wiID) {
out_accessor[wiID] = in_accessor[wiID]*2;
});
});
}
}
#include <CL/sycl.hpp>
const size_t array_size = 1024*512;
std::array<cl::sycl::cl_int, array_size> in,out;
std::iota(begin(in),end(in),0);
{
//...
}
cl::sycl::range<1> n_items{array_size};
cl::sycl::buffer<cl::sycl::cl_int, 1> in_buffer(in.data(), n_items);
cl::sycl::buffer<cl::sycl::cl_int, 1> out_buffer(out.data(), n_items);
device_queue.submit([&](cl::sycl::handler &cgh) {/*...*/});
constexpr auto sycl_read = cl::sycl::access::mode::read_write;
constexpr auto sycl_write = cl::sycl::access::mode::write;
auto in_accessor = in_buffer.get_access<sycl_read>(cgh);
auto out_accessor = out_buffer.get_access<sycl_write>(cgh);
cgh.parallel_for<class VecScalMul>(n_items,
[=](cl::sycl::id<1> wiID) {
out_accessor[wiID] = in_accessor[wiID]*2;
});
}
#include <vector>
#include <iostream>
#include <sycl/execution_policy>
#include <experimental/algorithm>
#include <sycl/helpers/sycl_buffers.hpp>
using namespace std::experimental::parallel;
using namespace sycl::helpers;
int main() {
constexpr size_t array_size = 1024*512;
std::array<cl::sycl::cl_int, array_size> in,out;
std::iota(begin(in),end(in),0);
{
cl::sycl::buffer<int> in_buffer(in.data(), cl::sycl::range<1>(in.size()));
cl::sycl::buffer<int> out_buffer(out.data(), cl::sycl::range<1>(out.size()));
cl::sycl::queue q;
sycl::sycl_execution_policy<class Mul> sycl_policy(q);
transform(sycl_policy, begin(in_buffer), end(in_buffer), begin(out_buffer),
[](int x) { return x*2; });
}
}
constexpr size_t array_size = 1024*512;
std::array<cl::sycl::cl_int, array_size> in, out;
std::iota(begin(in),end(in),0);
cl::sycl::buffer<int> in_buffer(in.data(), cl::sycl::range<1>(in.size()));
cl::sycl::buffer<int> out_buffer(out.data(), cl::sycl::range<1>(out.size()));
cl::sycl::queue q;
sycl::sycl_execution_policy<class Mul> sycl_policy(q);
transform(sycl_policy, begin(in_buffer), end(in_buffer), begin(out_buffer),
[](int x) { return x*2; });
欢迎光临 51学通信论坛2017新版 (http://bbs.51xuetongxin.com/) | Powered by Discuz! X3 |