|
1 | | -# ncnn glsl extension |
| 1 | +# ncnn GLSL extension |
2 | 2 |
|
3 | 3 | ## rationale |
4 | 4 | Different GPUs support different features, some support fp16 as buffer storage type, some support fp16 as operand variable, some old GPUs only support fp32 |
@@ -53,6 +53,80 @@ void main() |
53 | 53 |
|
54 | 54 | The ncnn glsl extension provides the necessary data types for storage, computation, shared memory, and load, store, conversion functions for buffers and images. We also provide some buffer and image copy functions to prevent loss of precision when using fp16 as the intermediate data type, and to avoid unnecessary `unpackHalf2x16` and `packHalf2x16` pair. |
55 | 55 |
|
| 56 | +# entrypoint for compiling GLSL |
| 57 | +
|
| 58 | +The gpu.h header in the ncnn library exposes 3 APIs for compiling glsl code into spir-v binary, they support ncnn glsl extension, these 3 functions accept opt switch to control the expansion form of ncnn glsl extension. The first two accept raw glsl code strings, and the last one is used to create ncnn's built-in shader. |
| 59 | +
|
| 60 | +```cpp |
| 61 | +namespace ncnn { |
| 62 | +
|
| 63 | +// online spirv compilation |
| 64 | +NCNN_EXPORT int compile_spirv_module(const char* comp_string, const Option& opt, std::vector<uint32_t>& spirv); |
| 65 | +NCNN_EXPORT int compile_spirv_module(const char* comp_data, int comp_data_size, const Option& opt, std::vector<uint32_t>& spirv); |
| 66 | +NCNN_EXPORT int compile_spirv_module(int shader_type_index, const Option& opt, std::vector<uint32_t>& spirv); |
| 67 | +
|
| 68 | +} // namespace ncnn |
| 69 | +``` |
| 70 | + |
| 71 | +## compile ncnn extended GLSL code directly |
| 72 | + |
| 73 | +You can write shader code with ncnn glsl extension, compiled to spir-v using ncnn functions. The compiled product is a standard-compliant spir-v binary, which can be directly used to create a pipeline object in the vulkan api |
| 74 | + |
| 75 | +```cpp |
| 76 | +static const char my_glsl_data[] = R"( |
| 77 | +#version 450 |
| 78 | +
|
| 79 | +#if NCNN_fp16_storage |
| 80 | +#extension GL_EXT_shader_16bit_storage: require |
| 81 | +#endif |
| 82 | +#if NCNN_fp16_arithmetic |
| 83 | +#extension GL_EXT_shader_explicit_arithmetic_types_float16: require |
| 84 | +#endif |
| 85 | +
|
| 86 | +layout (binding = 0) readonly buffer a_blob { sfpvec4 a_blob_data[]; }; |
| 87 | +layout (binding = 1) writeonly buffer b_blob { sfpvec4 b_blob_data[]; }; |
| 88 | +
|
| 89 | +void main() |
| 90 | +{ |
| 91 | + const int i = int(gl_GlobalInvocationID.x); |
| 92 | +
|
| 93 | + afpvec4 v = buffer_ld4(a_blob_data, i); |
| 94 | +
|
| 95 | + v = v + 123; |
| 96 | +
|
| 97 | + buffer_st4(b_blob_data, i, v); |
| 98 | +} |
| 99 | +)"; |
| 100 | + |
| 101 | +Option opt; |
| 102 | + // you can control the extention behavior |
| 103 | + // even if the gpu supports 16bit storage |
| 104 | +opt.use_fp16_storage = false; |
| 105 | + |
| 106 | +std::vector<uint32_t> spirv; |
| 107 | +ncnn::compile_spirv_module(my_glsl_data, sizeof(my_glsl_data) - 1, opt, spirv); |
| 108 | + |
| 109 | +// To create pipeline object later |
| 110 | +// ncnn::Pipeline pipeline(vkdev); |
| 111 | +// pipeline.set_local_size_xyz(64, 1, 1); |
| 112 | +// pipeline.create(spirv.data(), spirv.size() * 4, specializations); |
| 113 | +``` |
| 114 | +
|
| 115 | +## ncnn built-in shader |
| 116 | +
|
| 117 | +The shader index inside ncnn is exposed in the `layer_shader_type.h` header and can be used if needed |
| 118 | +
|
| 119 | +```cpp |
| 120 | +#include "layer_shader_type.h" |
| 121 | +
|
| 122 | +int shader_type_index = LayerShaderType::convert_ycbcr; |
| 123 | +
|
| 124 | +Option opt; |
| 125 | +
|
| 126 | +std::vector<uint32_t> spirv; |
| 127 | +int retc = compile_spirv_module(shader_type_index, opt, spirv); |
| 128 | +``` |
| 129 | + |
56 | 130 | # data types |
57 | 131 |
|
58 | 132 | ## storage type |
@@ -96,10 +170,10 @@ declare variable in shared local memory |
96 | 170 | shared lfp tmp_a[8][4][2]; |
97 | 171 | ``` |
98 | 172 |
|
99 | | -|local type|fp32|fp16a| |
100 | | -|---|---|---| |
101 | | -|lfp|float|float16_t| |
102 | | -|lfpvec4|vec4|f16vec4| |
| 173 | +|local type|fp32|fp16p / fp16s|fp16s + fp16a| |
| 174 | +|---|---|---|---| |
| 175 | +|lfp|float|float|float16_t| |
| 176 | +|lfpvec4|vec4|uvec2|f16vec4| |
103 | 177 |
|
104 | 178 | ## image format and precision hint type |
105 | 179 |
|
|
0 commit comments