forked from preda/gpuowl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclwrap.h
103 lines (72 loc) · 3.31 KB
/
clwrap.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
// Copyright Mihai Preda.
#pragma once
#include "tinycl.h"
#include <string>
#include <string_view>
#include <vector>
#include <cassert>
#include <memory>
#include <any>
using cl_queue = cl_command_queue;
void release(cl_context context);
void release(cl_kernel k);
void release(cl_mem buf);
void release(cl_program program);
void release(cl_queue queue);
void release(cl_event event);
template<typename T>
struct Deleter {
using pointer = T;
void operator()(T t) const { release(t); }
};
namespace std {
template<> struct default_delete<cl_context> : public Deleter<cl_context> {};
template<> struct default_delete<cl_kernel> : public Deleter<cl_kernel> {};
template<> struct default_delete<cl_mem> : public Deleter<cl_mem> {};
template<> struct default_delete<cl_program> : public Deleter<cl_program> {};
template<> struct default_delete<cl_queue> : public Deleter<cl_queue> {};
template<> struct default_delete<cl_event> : public Deleter<cl_event> {};
}
template<typename T> using Holder = std::unique_ptr<T, Deleter<T> >;
using QueueHolder = std::unique_ptr<cl_queue>;
using KernelHolder = std::unique_ptr<cl_kernel>;
using EventHolder = std::unique_ptr<cl_event>;
class Context;
std::string getUUID(int seqId);
void check(int err, const char *file, int line, const char *func, string_view mes);
#define CHECK1(err) check(err, __FILE__, __LINE__, __func__, #err)
#define CHECK2(err, mes) check(err, __FILE__, __LINE__, __func__, mes)
vector<cl_device_id> getAllDeviceIDs();
string getShortInfo(cl_device_id device);
string getLongInfo(cl_device_id device);
// Get GPU free memory in bytes.
u64 getFreeMem(cl_device_id id);
bool hasFreeMemInfo(cl_device_id id);
bool isAmdGpu(cl_device_id id);
cl_context createContext(cl_device_id id);
cl_program compile(cl_context context, cl_device_id device, const string &source, const string &extraArgs,
const std::vector<string>& defines);
cl_program loadBinary(cl_context, cl_device_id, const string& fileName);
string getBinary(cl_program program);
void dumpBinary(cl_program program, const string& fileName);
cl_kernel makeKernel(cl_program program, const char *name);
template<typename T>
void setArg(cl_kernel k, int pos, const T &value) { CHECK1(clSetKernelArg(k, pos, sizeof(value), &value)); }
template<>
void setArg<int>(cl_kernel k, int pos, const int &value);
cl_mem makeBuf_(cl_context context, unsigned kind, size_t size, const void *ptr = 0);
cl_queue makeQueue(cl_device_id d, cl_context c, bool profile);
void flush( cl_queue q);
void finish(cl_queue q);
EventHolder run(cl_queue queue, cl_kernel kernel, size_t groupSize, size_t workSize, const string &name, bool generateEvent);
void read(cl_queue queue, bool blocking, cl_mem buf, size_t size, void *data, size_t start = 0);
void write(cl_queue queue, bool blocking, cl_mem buf, size_t size, const void *data, size_t start = 0);
void copyBuf(cl_queue queue, const cl_mem src, cl_mem dst, size_t size);
void fillBuf(cl_queue q, cl_mem buf, void *pat, size_t patSize, size_t size = 0, size_t start = 0);
int getKernelNumArgs(cl_kernel k);
int getWorkGroupSize(cl_kernel k, cl_device_id device, const char *name);
std::string getKernelArgName(cl_kernel k, int pos);
cl_device_id getDevice(u32 argsDevId);
u64 getEventNanos(cl_event event);
u32 getEventInfo(cl_event event);
cl_context getQueueContext(cl_command_queue q);