blob: c4117b8a1a3a6661715a93928e257c7803ac6ff1 [file] [log] [blame]
Georgios Pinitas908f6162021-05-04 10:11:09 +01001/*
Mohammed Suhail Munshi73771072024-03-25 15:55:42 +00002 * Copyright (c) 2016-2024 Arm Limited.
Georgios Pinitas908f6162021-05-04 10:11:09 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Georgios Pinitas7891a732021-08-20 21:39:25 +010024#include "src/gpu/cl/ClKernelLibrary.h"
Georgios Pinitas908f6162021-05-04 10:11:09 +010025
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Utils.h"
28
29#include <algorithm>
30#include <array>
31#include <fstream>
32#include <utility>
33
34#ifdef ARM_COMPUTE_COMPRESSED_KERNELS
35#include <zlib.h>
36
37namespace
38{
39/* Decoding table */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010040constexpr std::array<uint8_t, 256> b64_invtab = {
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
43 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
44 22, 23, 24, 25, 0, 0, 0, 0, 0, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45 45, 46, 47, 48, 49, 50, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Georgios Pinitas908f6162021-05-04 10:11:09 +010050};
51
52/** Decode a base64 encoded string
53 *
54 * @param[in] str Base64 encoded string to decode
55 *
56 * @return The decode string in case of a valid, non-empty string otherwise an empty string
57 */
58std::string decode_base64(const std::string &str)
59{
60 constexpr const char pad_char = '=';
61
62 // Handle empty string
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010063 if (str.empty())
Georgios Pinitas908f6162021-05-04 10:11:09 +010064 {
65 return {};
66 }
67
68 // Base64 encoded string has size multiple of 4
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010069 if (str.length() % 4)
Georgios Pinitas908f6162021-05-04 10:11:09 +010070 {
71 return {};
72 }
73
74 //
75 // Check encoded string padding
76 std::size_t padding = (str.rbegin()[0] == pad_char) + (str.rbegin()[1] == pad_char);
77 const int str_len = str.size();
78
79 // Reserve memory for the decoded string
80 // Note each 4 consecutive elements of 6-bit encode 3 bytes
81 std::string dec_b64;
82 dec_b64.reserve(((str_len / 4) * 3));
83
84 // Block decoding function (exclude padding)
85 int c = 0;
86 const int end = str_len - 4 - padding;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010087 for (; c <= end; c += 4)
Georgios Pinitas908f6162021-05-04 10:11:09 +010088 {
89 const int byte0 = b64_invtab[str[c]];
90 const int byte1 = b64_invtab[str[c + 1]];
91 const int byte2 = b64_invtab[str[c + 2]];
92 const int byte3 = b64_invtab[str[c + 3]];
93
94 dec_b64.push_back((byte0 << 2) | (byte1 >> 4));
95 dec_b64.push_back((byte1 << 4) | (byte2 >> 2));
96 dec_b64.push_back((byte2 << 6) | (byte3));
97 }
98
99 // Last step that might contain padding symbols
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100100 if (padding == 1)
Georgios Pinitas908f6162021-05-04 10:11:09 +0100101 {
102 const int byte0 = b64_invtab[str[c]];
103 const int byte1 = b64_invtab[str[c + 1]];
104 const int byte2 = b64_invtab[str[c + 2]];
105
106 dec_b64.push_back((byte0 << 2) | (byte1 >> 4));
107 dec_b64.push_back((byte1 << 4) | (byte2 >> 2));
108 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100109 else if (padding == 2)
Georgios Pinitas908f6162021-05-04 10:11:09 +0100110 {
111 const int byte0 = b64_invtab[str[c]];
112 const int byte1 = b64_invtab[str[c + 1]];
113
114 dec_b64.push_back((byte0 << 2) | (byte1 >> 4));
115 }
116
117 return dec_b64;
118}
119
120/** Decompress a zlib compressed string
121 *
122 * @param[in] str ZLib compressed string
123 *
124 * @return The decompressed string if successful, otherwise false.
125 */
126std::string decompress_zlib(const std::string &str)
127{
128 // Create and initialize decompression stream
129 z_stream ds{};
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100130 if (inflateInit(&ds) != Z_OK)
Georgios Pinitas908f6162021-05-04 10:11:09 +0100131 {
132 return std::string();
133 }
134 ds.avail_in = str.size();
135 ds.next_in = (Bytef *)str.data();
136
137 // Roll-over the string using a buffer and decompress
138 int status = Z_OK;
139 char roll_buff[16384];
140 std::string inflated_str;
141 do
142 {
143 ds.avail_out = sizeof(roll_buff);
144 ds.next_out = reinterpret_cast<Bytef *>(roll_buff);
145
146 status = inflate(&ds, 0);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100147 if (inflated_str.size() < ds.total_out)
Georgios Pinitas908f6162021-05-04 10:11:09 +0100148 {
149 inflated_str.append(roll_buff, ds.total_out - inflated_str.size());
150 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100151 } while (status == Z_OK);
Georgios Pinitas908f6162021-05-04 10:11:09 +0100152
153 // Finalize decompression stream
154 inflateEnd(&ds);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100155 if (status != Z_STREAM_END)
Georgios Pinitas908f6162021-05-04 10:11:09 +0100156 {
157 return std::string();
158 }
159
160 return inflated_str;
161}
162} // namespace
163#endif /* ARM_COMPUTE_COMPRESSED_KERNELS */
164
165namespace arm_compute
166{
167namespace opencl
168{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100169const std::map<std::string, std::string> ClKernelLibrary::_kernel_program_map = {
Adnan AlSinanf81f51c2021-07-26 18:18:37 +0100170 // Common Kernels
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100171 {"activation_layer", "common/activation_layer.cl"},
172 {"activation_layer_quant", "common/activation_layer_quant.cl"},
173 {"activation_layer_quant_f32", "common/activation_layer_quant.cl"},
174 {"arg_min_max_x", "common/arg_min_max.cl"},
175 {"arg_min_max_y", "common/arg_min_max.cl"},
176 {"arg_min_max_z", "common/arg_min_max.cl"},
177 {"arg_min_max_w", "common/arg_min_max.cl"},
178 {"bitwise_or", "common/bitwise_op.cl"},
179 {"bitwise_and", "common/bitwise_op.cl"},
180 {"bitwise_xor", "common/bitwise_op.cl"},
181 {"bitwise_not", "common/bitwise_op.cl"},
182 {"bounding_box_transform", "common/bounding_box_transform.cl"},
183 {"bounding_box_transform_quantized", "common/bounding_box_transform_quantized.cl"},
184 {"compare_equal", "common/comparisons.cl"},
185 {"compare_equal_quantized", "common/comparisons.cl"},
186 {"compare_notequal", "common/comparisons.cl"},
187 {"compare_notequal_quantized", "common/comparisons.cl"},
188 {"compare_greater", "common/comparisons.cl"},
189 {"compare_greater_quantized", "common/comparisons.cl"},
190 {"compare_greaterequal", "common/comparisons.cl"},
191 {"compare_greaterequal_quantized", "common/comparisons.cl"},
192 {"compare_less", "common/comparisons.cl"},
193 {"compare_less_quantized", "common/comparisons.cl"},
194 {"compare_lessequal", "common/comparisons.cl"},
195 {"compare_lessequal_quantized", "common/comparisons.cl"},
196 {"concatenate", "common/concatenate.cl"},
197 {"concatenate_width", "common/concatenate.cl"},
198 {"concatenate_height", "common/concatenate.cl"},
199 {"concatenate_width_x2", "common/concatenate.cl"},
200 {"concatenate_width_x4", "common/concatenate.cl"},
201 {"col2im", "common/col2im.cl"},
202 {"cast_down", "common/cast.cl"},
203 {"cast_up", "common/cast.cl"},
204 {"convert_fc_weights", "common/convert_fc_weights.cl"},
205 {"copy_tensor", "common/copy_tensor.cl"},
206 {"crop_tensor", "common/crop_tensor.cl"},
207 {"deconvolution_reshape", "common/deconvolution_layer.cl"},
208 {"deconvolution_upsample", "common/deconvolution_layer.cl"},
209 {"dequantization_layer", "common/dequantization_layer.cl"},
210 {"elementwise_operation_ADD", "common/elementwise_operation.cl"},
211 {"elementwise_operation_SUB", "common/elementwise_operation.cl"},
212 {"elementwise_operation_MAX", "common/elementwise_operation.cl"},
213 {"elementwise_operation_MIN", "common/elementwise_operation.cl"},
214 {"elementwise_operation_DIV", "common/elementwise_operation.cl"},
215 {"elementwise_operation_SQUARED_DIFF", "common/elementwise_operation.cl"},
216 {"elementwise_operation_POWER", "common/elementwise_operation.cl"},
217 {"elementwise_operation_PRELU", "common/elementwise_operation.cl"},
218 {"elementwise_operation_AND", "common/elementwise_operation.cl"},
219 {"elementwise_operation_OR", "common/elementwise_operation.cl"},
220 {"elementwise_operation_ADD_quantized", "common/elementwise_operation_quantized.cl"},
221 {"elementwise_operation_SUB_quantized", "common/elementwise_operation_quantized.cl"},
222 {"elementwise_operation_MAX_quantized", "common/elementwise_operation_quantized.cl"},
223 {"elementwise_operation_MIN_quantized", "common/elementwise_operation_quantized.cl"},
224 {"elementwise_operation_DIV_quantized", "common/elementwise_operation_quantized.cl"},
225 {"elementwise_operation_SQUARED_DIFF_quantized", "common/elementwise_operation_quantized.cl"},
226 {"elementwise_operation_PRELU_quantized", "common/elementwise_operation_quantized.cl"},
227 {"elementwise_unary", "common/elementwise_unary.cl"},
228 {"elementwise_unary_quantized", "common/elementwise_unary_quantized.cl"},
229 {"fft_digit_reverse_axis_0", "common/fft_digit_reverse.cl"},
230 {"fft_digit_reverse_axis_1", "common/fft_digit_reverse.cl"},
231 {"fft_radix_2_first_stage_axis_0", "common/fft.cl"},
232 {"fft_radix_2_first_stage_axis_1", "common/fft.cl"},
233 {"fft_radix_2_axis_0", "common/fft.cl"},
234 {"fft_radix_2_axis_1", "common/fft.cl"},
235 {"fft_radix_3_first_stage_axis_0", "common/fft.cl"},
236 {"fft_radix_3_first_stage_axis_1", "common/fft.cl"},
237 {"fft_radix_3_axis_0", "common/fft.cl"},
238 {"fft_radix_3_axis_1", "common/fft.cl"},
239 {"fft_radix_4_first_stage_axis_0", "common/fft.cl"},
240 {"fft_radix_4_first_stage_axis_1", "common/fft.cl"},
241 {"fft_radix_4_axis_0", "common/fft.cl"},
242 {"fft_radix_4_axis_1", "common/fft.cl"},
243 {"fft_radix_5_first_stage_axis_0", "common/fft.cl"},
244 {"fft_radix_5_first_stage_axis_1", "common/fft.cl"},
245 {"fft_radix_5_axis_0", "common/fft.cl"},
246 {"fft_radix_5_axis_1", "common/fft.cl"},
247 {"fft_radix_7_first_stage_axis_0", "common/fft.cl"},
248 {"fft_radix_7_first_stage_axis_1", "common/fft.cl"},
249 {"fft_radix_7_axis_0", "common/fft.cl"},
250 {"fft_radix_7_axis_1", "common/fft.cl"},
251 {"fft_radix_8_first_stage_axis_0", "common/fft.cl"},
252 {"fft_radix_8_first_stage_axis_1", "common/fft.cl"},
253 {"fft_radix_8_axis_0", "common/fft.cl"},
254 {"fft_radix_8_axis_1", "common/fft.cl"},
255 {"fft_scale_conj", "common/fft_scale.cl"},
256 {"fill_image_borders_constant", "common/fill_border.cl"},
257 {"fill_image_borders_replicate", "common/fill_border.cl"},
258 {"floor_layer", "common/floor.cl"},
259 {"fuse_batchnormalization_layer", "common/batchnormalization_layer.cl"},
260 {"gather", "common/gather.cl"},
261 {"gemm_ma_f16", "common/gemm.cl"},
262 {"gemm_ma_f32", "common/gemm.cl"},
263 {"gemm_mv", "common/gemv.cl"},
264 {"gemm_mv_quantized", "common/gemv.cl"},
265 {"gemm_mm_native", "common/gemm.cl"},
266 {"gemm_mm_reshaped_only_rhs_nt_mmul", "common/gemm_reshaped_only_rhs_mmul.cl"},
267 {"gemm_mm_reshaped_only_rhs_nt_mmul_texture", "common/gemm_reshaped_only_rhs_mmul.cl"},
268 {"gemm_mm_reshaped_lhs_nt_rhs_t", "common/gemm.cl"},
269 {"gemm_mm_reshaped_lhs_nt_rhs_t_texture", "common/gemm.cl"},
270 {"gemm_mm_reshaped_lhs_t_rhs_nt", "common/gemm.cl"},
271 {"gemm_mm_reshaped_lhs_t_rhs_nt_texture", "common/gemm.cl"},
272 {"gemm_mm_reshaped_only_rhs_nt", "common/gemm.cl"},
273 {"gemm_mm_reshaped_only_rhs_nt_texture", "common/gemm.cl"},
274 {"gemm_mm_reshaped_only_rhs_t", "common/gemm.cl"},
275 {"gemm_mm_reshaped_only_rhs_t_texture", "common/gemm.cl"},
276 {"gemm_lc_vm_f32", "common/gemm.cl"},
277 {"gemm_reshape_lhs_matrix_nt", "common/gemm_utils.cl"},
278 {"gemm_reshape_lhs_matrix_t", "common/gemm_utils.cl"},
279 {"gemm_reshape_rhs_matrix_nt", "common/gemm_utils.cl"},
280 {"gemm_reshape_rhs_matrix_t", "common/gemm_utils.cl"},
281 {"gemmlowp_matrix_a_reduction", "common/gemmlowp.cl"},
282 {"gemmlowp_matrix_a_reduction_dot8", "common/gemmlowp.cl"},
283 {"gemmlowp_matrix_b_reduction", "common/gemmlowp.cl"},
284 {"gemmlowp_mm_native", "common/gemmlowp.cl"},
285 {"gemmlowp_mm_reshaped_lhs_nt_rhs_t", "common/gemmlowp.cl"},
286 {"gemmlowp_mm_reshaped_only_rhs_t", "common/gemmlowp.cl"},
287 {"gemmlowp_mm_reshaped_only_rhs_t_fused_output_stage_fixedpoint", "common/gemmlowp.cl"},
288 {"gemmlowp_mm_reshaped_only_rhs_mmul", "common/gemmlowp_reshaped_only_rhs_mmul.cl"},
289 {"gemmlowp_offset_contribution", "common/gemmlowp.cl"},
290 {"gemmlowp_offset_contribution_quantize_down", "common/gemmlowp.cl"},
291 {"gemmlowp_offset_contribution_quantize_down_fixedpoint", "common/gemmlowp.cl"},
292 {"gemmlowp_output_stage_quantize_down", "common/gemmlowp.cl"},
293 {"gemmlowp_output_stage_quantize_down_fixedpoint", "common/gemmlowp.cl"},
294 {"gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16", "common/gemmlowp.cl"},
295 {"gemmlowp_output_stage_quantize_down_float", "common/gemmlowp.cl"},
296 {"generate_proposals_compute_all_anchors", "common/generate_proposals.cl"},
297 {"generate_proposals_compute_all_anchors_quantized", "common/generate_proposals_quantized.cl"},
298 {"instance_normalization", "common/instance_normalization.cl"},
299 {"compute_mean_var", "common/instance_normalization.cl"},
300 {"l2_normalize_x", "common/l2_normalize.cl"},
301 {"l2_normalize_y", "common/l2_normalize.cl"},
302 {"l2_normalize_z", "common/l2_normalize.cl"},
303 {"mat_mul_native_mmul_nt_nt", "common/mat_mul_mmul.cl"},
304 {"mat_mul_native_mmul_t_nt", "common/mat_mul_mmul.cl"},
305 {"mat_mul_native_mmul_nt_t", "common/mat_mul_mmul.cl"},
306 {"mat_mul_native_mmul_t_t", "common/mat_mul_mmul.cl"},
307 {"mat_mul_native_nt_nt", "common/mat_mul.cl"},
308 {"mat_mul_native_nt_t", "common/mat_mul.cl"},
309 {"mat_mul_native_t_nt", "common/mat_mul.cl"},
310 {"mat_mul_native_t_t", "common/mat_mul.cl"},
311 {"mat_mul_native_quantized_nt_nt", "common/mat_mul_quantized.cl"},
312 {"mat_mul_native_quantized_nt_t", "common/mat_mul_quantized.cl"},
313 {"mat_mul_native_quantized_t_nt", "common/mat_mul_quantized.cl"},
314 {"mat_mul_native_quantized_t_t", "common/mat_mul_quantized.cl"},
315 {"mat_mul_native_quantized_mmul_nt_nt", "common/mat_mul_quantized_mmul.cl"},
316 {"mat_mul_native_quantized_mmul_nt_t", "common/mat_mul_quantized_mmul.cl"},
317 {"mat_mul_native_quantized_mmul_t_nt", "common/mat_mul_quantized_mmul.cl"},
318 {"mat_mul_native_quantized_mmul_t_t", "common/mat_mul_quantized_mmul.cl"},
319 {"max_unpooling_layer_2", "common/unpooling_layer.cl"},
320 {"mean_stddev_normalization", "common/mean_stddev_normalization.cl"},
321 {"memset", "common/memset.cl"},
322 {"minmax_layer", "common/minmax_layer.cl"},
323 {"non_max_suppression", "common/nonmax.cl"},
324 {"pad_layer_constant", "common/pad_layer.cl"},
325 {"pad_layer_symmetric_reflect", "common/pad_layer.cl"},
326 {"permute", "common/permute.cl"},
327 {"pixelwise_mul_complex", "common/pixelwise_mul_float.cl"},
328 {"pixelwise_mul_float", "common/pixelwise_mul_float.cl"},
329 {"pixelwise_mul_int", "common/pixelwise_mul_int.cl"},
330 {"pixelwise_mul_quantized", "common/pixelwise_mul_int.cl"},
331 {"qlstm_layer_normalization", "common/qlstm_layer_normalization.cl"},
332 {"quantization_layer", "common/quantization_layer.cl"},
333 {"range", "common/range.cl"},
334 {"range_quantized", "common/range.cl"},
335 {"reduction_operation_x", "common/reduction_operation.cl"},
336 {"reduction_operation_non_parallel_x", "common/reduction_operation.cl"},
337 {"reduction_operation_y", "common/reduction_operation.cl"},
338 {"reduction_operation_z", "common/reduction_operation.cl"},
339 {"reduction_operation_w", "common/reduction_operation.cl"},
340 {"reshape_layer", "common/reshape_layer.cl"},
341 {"reshape_to_columns", "common/convolution_layer.cl"},
342 {"reverse", "common/reverse.cl"},
343 {"roi_align_layer", "common/roi_align_layer.cl"},
344 {"roi_align_layer_quantized", "common/roi_align_layer_quantized.cl"},
345 {"roi_pooling_layer", "common/roi_pooling_layer.cl"},
346 {"select_same_rank", "common/select.cl"},
347 {"select_different_rank_2", "common/select.cl"},
348 {"select_different_rank_n", "common/select.cl"},
Viet-Hoa Do29254ae2023-10-13 17:40:32 +0100349 {"softmax_x", "common/softmax_layer.cl"},
350 {"softmax_non_x", "common/softmax_layer.cl"},
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100351 {"stack_layer", "common/stack_layer.cl"},
352 {"strided_slice", "common/slice_ops.cl"},
353 {"tile", "common/tile.cl"},
354 {"transpose", "common/transpose.cl"},
Adnan AlSinanf81f51c2021-07-26 18:18:37 +0100355#ifdef ENABLE_NCHW_KERNELS
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100356 {"batch_to_space_nchw", "nchw/batch_to_space.cl"},
357 {"batch_to_space_static_nchw", "nchw/batch_to_space.cl"},
358 {"batchnormalization_layer_nchw", "nchw/batchnormalization_layer.cl"},
359 {"channel_shuffle_nchw", "nchw/channel_shuffle.cl"},
360 {"depth_to_space_nchw", "nchw/depth_to_space.cl"},
361 {"dequantization_layer_per_channel_nchw", "nchw/dequantization_layer.cl"},
362 {"direct_convolution1x1", "nchw/direct_convolution1x1.cl"},
363 {"direct_convolution_nchw", "nchw/direct_convolution.cl"},
Adnan AlSinan30124352021-12-02 19:12:20 +0000364
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100365 {"im2col1x1_stridex1_nchw", "nchw/im2col.cl"},
366 {"im2col3x3_nchw", "nchw/im2col.cl"},
367 {"im2col5x5_nchw", "nchw/im2col.cl"},
368 {"im2col11x11_padx0_pady0_nchw", "nchw/im2col.cl"},
369 {"im2col_generic_nchw", "nchw/im2col.cl"},
370 {"im2col_generic_padx0_pady0_nchw", "nchw/im2col.cl"},
371 {"normalization_layer_cross_map_nchw", "nchw/normalization_layer.cl"},
372 {"normalization_layer_in_map_nchw", "nchw/normalization_layer.cl"},
373 {"normalize_planar_yuv_layer_nchw", "nchw/normalize_planar_yuv_layer.cl"},
374 {"normalize_planar_yuv_layer_q8_nchw", "nchw/normalize_planar_yuv_layer_quantized.cl"},
375 {"pooling_layer_MxN_nchw", "nchw/pooling_layer.cl"},
376 {"pooling_layer_2_nchw_indices", "nchw/pooling_layer.cl"},
377 {"prior_box_layer_nchw", "nchw/prior_box_layer.cl"},
378 {"reorg_layer_nchw", "nchw/reorg_layer.cl"},
379 {"scale_nearest_neighbour_nchw", "nchw/scale.cl"},
380 {"scale_bilinear_nchw", "nchw/scale.cl"},
381 {"space_to_batch_nchw", "nchw/space_to_batch.cl"},
382 {"space_to_batch_static_nchw", "nchw/space_to_batch.cl"},
383 {"space_to_depth_nchw", "nchw/space_to_depth.cl"},
384 {"upsample_layer_nchw", "nchw/upsample_layer.cl"},
385 {"winograd_filter_transform_2x2_3x3_nchw", "nchw/winograd_filter_transform.cl"},
386 {"winograd_filter_transform_2x1_3x1_nchw", "nchw/winograd_filter_transform.cl"},
387 {"winograd_filter_transform_1x2_1x3_nchw", "nchw/winograd_filter_transform.cl"},
388 {"winograd_filter_transform_4x4_3x3_nchw", "nchw/winograd_filter_transform.cl"},
389 {"winograd_filter_transform_4x1_3x1_nchw", "nchw/winograd_filter_transform.cl"},
390 {"winograd_filter_transform_1x4_1x3_nchw", "nchw/winograd_filter_transform.cl"},
391 {"winograd_filter_transform_4x4_5x5_nchw", "nchw/winograd_filter_transform.cl"},
392 {"winograd_filter_transform_4x1_5x1_nchw", "nchw/winograd_filter_transform.cl"},
393 {"winograd_filter_transform_1x4_1x5_nchw", "nchw/winograd_filter_transform.cl"},
394 {"winograd_input_transform_2x2_3x3_stepz1_nchw", "nchw/winograd_input_transform.cl"},
395 {"winograd_input_transform_2x2_3x3_stepz2_nchw", "nchw/winograd_input_transform.cl"},
396 {"winograd_input_transform_2x1_3x1_stepz1_nchw", "nchw/winograd_input_transform.cl"},
397 {"winograd_input_transform_2x1_3x1_stepz2_nchw", "nchw/winograd_input_transform.cl"},
398 {"winograd_input_transform_1x2_1x3_stepz1_nchw", "nchw/winograd_input_transform.cl"},
399 {"winograd_input_transform_1x2_1x3_stepz2_nchw", "nchw/winograd_input_transform.cl"},
400 {"winograd_input_transform_4x4_3x3_stepz1_nchw", "nchw/winograd_input_transform.cl"},
401 {"winograd_input_transform_4x1_3x1_stepz1_nchw", "nchw/winograd_input_transform.cl"},
402 {"winograd_input_transform_1x4_1x3_stepz1_nchw", "nchw/winograd_input_transform.cl"},
403 {"winograd_input_transform_4x4_5x5_stepz1_nchw", "nchw/winograd_input_transform.cl"},
404 {"winograd_input_transform_4x1_5x1_stepz1_nchw", "nchw/winograd_input_transform.cl"},
405 {"winograd_input_transform_1x4_1x5_stepz1_nchw", "nchw/winograd_input_transform.cl"},
406 {"winograd_output_transform_2x2_3x3_nchw", "nchw/winograd_output_transform.cl"},
407 {"winograd_output_transform_2x1_3x1_nchw", "nchw/winograd_output_transform.cl"},
408 {"winograd_output_transform_1x2_1x3_nchw", "nchw/winograd_output_transform.cl"},
409 {"winograd_output_transform_4x4_3x3_nchw", "nchw/winograd_output_transform.cl"},
410 {"winograd_output_transform_4x1_3x1_nchw", "nchw/winograd_output_transform.cl"},
411 {"winograd_output_transform_1x4_1x3_nchw", "nchw/winograd_output_transform.cl"},
412 {"winograd_output_transform_4x4_5x5_nchw", "nchw/winograd_output_transform.cl"},
413 {"winograd_output_transform_4x1_5x1_nchw", "nchw/winograd_output_transform.cl"},
414 {"winograd_output_transform_1x4_1x5_nchw", "nchw/winograd_output_transform.cl"},
Adnan AlSinanf81f51c2021-07-26 18:18:37 +0100415#endif /* ENABLE_NCHW_KERNELS */
416#ifdef ENABLE_NHWC_KERNELS
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100417 {"batch_to_space_nhwc", "nhwc/batch_to_space.cl"},
418 {"batch_to_space_static_nhwc", "nhwc/batch_to_space.cl"},
419 {"batchnormalization_layer_nhwc", "nhwc/batchnormalization_layer.cl"},
420 {"channel_shuffle_nhwc", "nhwc/channel_shuffle.cl"},
421 {"depth_to_space_nhwc", "nhwc/depth_to_space.cl"},
422 {"dequantization_layer_per_channel_nhwc", "nhwc/dequantization_layer.cl"},
423 {"dwc_native_fp_nhwc", "nhwc/dwc_native_fp_nhwc.cl"},
424 {"dwc_native_quantized_nhwc", "nhwc/dwc_native_quantized_nhwc.cl"},
425 {"direct_convolution_nhwc", "nhwc/direct_convolution.cl"},
426 {"direct_convolution3d_ndhwc", "nhwc/direct_convolution3d.cl"},
427 {"im2col3x3_nhwc", "nhwc/im2col.cl"},
428 {"im2col9x9_nhwc", "nhwc/im2col.cl"},
429 {"im2col_generic_nhwc", "nhwc/im2col.cl"},
430 {"indirect_convolution_nhwc", "nhwc/indirect_convolution.cl"},
431 {"indirect_convolution_address_precalculation", "nhwc/indirect_convolution.cl"},
432 {"normalization_layer_cross_map_nhwc", "nhwc/normalization_layer.cl"},
433 {"normalization_layer_in_map_nhwc", "nhwc/normalization_layer.cl"},
434 {"normalize_planar_yuv_layer_nhwc", "nhwc/normalize_planar_yuv_layer.cl"},
435 {"normalize_planar_yuv_layer_q8_nhwc", "nhwc/normalize_planar_yuv_layer_quantized.cl"},
436 {"pooling_layer_MxN_nhwc", "nhwc/pooling_layer.cl"},
437 {"pooling_layer_2x2_nhwc", "nhwc/pooling_layer.cl"},
438 {"pooling_layer_MxN_quantized_nhwc", "nhwc/pooling_layer_quantized.cl"},
439 {"pooling_3d_layer_MxN_ndhwc", "nhwc/pooling_3d_layer.cl"},
440 {"pooling_3d_layer_MxN_ndhwc_quantized", "nhwc/pooling_3d_layer_quantized.cl"},
441 {"reorg_layer_nhwc", "nhwc/reorg_layer.cl"},
442 {"scale_nearest_neighbour_nhwc", "nhwc/scale.cl"},
443 {"scale_bilinear_nhwc", "nhwc/scale.cl"},
Gunes Bayirada32002024-04-24 10:27:13 +0100444 {"scatter_mp1d_2d_mpnd", "common/scatter.cl"},
Mohammed Suhail Munshi73771072024-03-25 15:55:42 +0000445 {"scatter1D", "common/scatter.cl"},
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100446 {"space_to_batch_nhwc", "nhwc/space_to_batch.cl"},
447 {"space_to_batch_static_nhwc", "nhwc/space_to_batch.cl"},
448 {"space_to_depth_nhwc", "nhwc/space_to_depth.cl"},
449 {"transposed_convolution_nhwc", "nhwc/transposed_convolution.cl"},
450 {"upsample_layer_nhwc", "nhwc/upsample_layer.cl"},
451 {"winograd_filter_transform_4x1_3x1_nhwc", "nhwc/winograd_filter_transform.cl"},
452 {"winograd_filter_transform_1x4_1x3_nhwc", "nhwc/winograd_filter_transform.cl"},
453 {"winograd_filter_transform_4x4_3x3_nhwc", "nhwc/winograd_filter_transform.cl"},
454 {"winograd_filter_transform_4x4_5x5_nhwc", "nhwc/winograd_filter_transform.cl"},
455 {"winograd_filter_transform_4x1_5x1_nhwc", "nhwc/winograd_filter_transform.cl"},
456 {"winograd_filter_transform_1x4_1x5_nhwc", "nhwc/winograd_filter_transform.cl"},
457 {"winograd_filter_transform_2x2_7x7_nhwc", "nhwc/winograd_filter_transform.cl"},
458 {"winograd_filter_transform_2x1_7x1_nhwc", "nhwc/winograd_filter_transform.cl"},
459 {"winograd_filter_transform_1x2_1x7_nhwc", "nhwc/winograd_filter_transform.cl"},
460 {"winograd_input_transform_4x1_3x1_stepz1_nhwc", "nhwc/winograd_input_transform.cl"},
461 {"winograd_input_transform_1x4_1x3_stepz1_nhwc", "nhwc/winograd_input_transform.cl"},
462 {"winograd_input_transform_4x4_3x3_stepz1_nhwc", "nhwc/winograd_input_transform.cl"},
463 {"winograd_input_transform_4x4_5x5_stepz1_nhwc", "nhwc/winograd_input_transform.cl"},
464 {"winograd_input_transform_4x1_5x1_stepz1_nhwc", "nhwc/winograd_input_transform.cl"},
465 {"winograd_input_transform_1x4_1x5_stepz1_nhwc", "nhwc/winograd_input_transform.cl"},
466 {"winograd_input_transform_2x2_7x7_stepz1_nhwc", "nhwc/winograd_input_transform.cl"},
467 {"winograd_input_transform_2x1_7x1_stepz1_nhwc", "nhwc/winograd_input_transform.cl"},
468 {"winograd_input_transform_1x2_1x7_stepz1_nhwc", "nhwc/winograd_input_transform.cl"},
469 {"winograd_output_transform_4x1_3x1_nhwc", "nhwc/winograd_output_transform.cl"},
470 {"winograd_output_transform_1x4_1x3_nhwc", "nhwc/winograd_output_transform.cl"},
471 {"winograd_output_transform_4x4_3x3_nhwc", "nhwc/winograd_output_transform.cl"},
472 {"winograd_output_transform_4x4_5x5_nhwc", "nhwc/winograd_output_transform.cl"},
473 {"winograd_output_transform_4x1_5x1_nhwc", "nhwc/winograd_output_transform.cl"},
474 {"winograd_output_transform_1x4_1x5_nhwc", "nhwc/winograd_output_transform.cl"},
475 {"winograd_output_transform_2x2_7x7_nhwc", "nhwc/winograd_output_transform.cl"},
476 {"winograd_output_transform_2x1_7x1_nhwc", "nhwc/winograd_output_transform.cl"},
477 {"winograd_output_transform_1x2_1x7_nhwc", "nhwc/winograd_output_transform.cl"},
Adnan AlSinanf81f51c2021-07-26 18:18:37 +0100478#endif /* ENABLE_NHWC_KERNELS */
Georgios Pinitas908f6162021-05-04 10:11:09 +0100479};
480
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100481const std::map<std::string, std::string> ClKernelLibrary::_program_source_map = {
Georgios Pinitas908f6162021-05-04 10:11:09 +0100482#ifdef EMBEDDED_KERNELS
483 {
Jakub Sujak32741722022-11-25 16:43:18 +0000484 "activation_float_helpers.h",
485#include "./cl_kernels/activation_float_helpers.hembed"
486 },
487 {
488 "activation_quant_helpers.h",
489#include "./cl_kernels/activation_quant_helpers.hembed"
490 },
491 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100492 "common/activation_layer.cl",
493#include "./cl_kernels/common/activation_layer.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100494 },
495 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100496 "common/activation_layer_quant.cl",
497#include "./cl_kernels/common/activation_layer_quant.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100498 },
499 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100500 "common/arg_min_max.cl",
501#include "./cl_kernels/common/arg_min_max.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100502 },
503 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100504 "common/bitwise_op.cl",
505#include "./cl_kernels/common/bitwise_op.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100506 },
507 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100508 "common/bounding_box_transform.cl",
509#include "./cl_kernels/common/bounding_box_transform.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100510 },
511 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100512 "common/bounding_box_transform_quantized.cl",
513#include "./cl_kernels/common/bounding_box_transform_quantized.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100514 },
515 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100516 "common/col2im.cl",
517#include "./cl_kernels/common/col2im.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100518 },
519 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100520 "common/comparisons.cl",
521#include "./cl_kernels/common/comparisons.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100522 },
523 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100524 "common/concatenate.cl",
525#include "./cl_kernels/common/concatenate.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100526 },
527 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100528 "common/convert_fc_weights.cl",
529#include "./cl_kernels/common/convert_fc_weights.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100530 },
531 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100532 "common/convolution_layer.cl",
533#include "./cl_kernels/common/convolution_layer.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100534 },
535 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100536 "common/copy_tensor.cl",
537#include "./cl_kernels/common/copy_tensor.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100538 },
539 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100540 "common/crop_tensor.cl",
541#include "./cl_kernels/common/crop_tensor.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100542 },
543 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100544 "common/deconvolution_layer.cl",
545#include "./cl_kernels/common/deconvolution_layer.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100546 },
547 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100548 "common/cast.cl",
549#include "./cl_kernels/common/cast.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100550 },
551 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100552 "common/dequantization_layer.cl",
553#include "./cl_kernels/common/dequantization_layer.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100554 },
555 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100556 "common/elementwise_operation.cl",
557#include "./cl_kernels/common/elementwise_operation.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100558 },
559 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100560 "common/elementwise_operation_quantized.cl",
561#include "./cl_kernels/common/elementwise_operation_quantized.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100562 },
563 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100564 "common/elementwise_unary.cl",
565#include "./cl_kernels/common/elementwise_unary.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100566 },
567 {
Ramy Elgammal14d7b532023-01-30 04:56:47 +0000568 "common/elementwise_unary_quantized.cl",
569#include "./cl_kernels/common/elementwise_unary_quantized.clembed"
570 },
571 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100572 "common/fft.cl",
573#include "./cl_kernels/common/fft.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100574 },
575 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100576 "common/fft_digit_reverse.cl",
577#include "./cl_kernels/common/fft_digit_reverse.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100578 },
579 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100580 "common/fft_scale.cl",
581#include "./cl_kernels/common/fft_scale.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100582 },
583 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100584 "common/fill_border.cl",
585#include "./cl_kernels/common/fill_border.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100586 },
587 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100588 "common/floor.cl",
589#include "./cl_kernels/common/floor.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100590 },
591 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100592 "common/gather.cl",
593#include "./cl_kernels/common/gather.clembed"
594 },
595 {
Mohammed Suhail Munshi73771072024-03-25 15:55:42 +0000596 "common/scatter.cl",
597#include "./cl_kernels/common/scatter.clembed"
598 },
599 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100600 "common/gemm.cl",
601#include "./cl_kernels/common/gemm.clembed"
602 },
603 {
Gunes Bayir4bfc70e2021-12-10 16:17:56 +0000604 "common/gemm_reshaped_only_rhs_mmul.cl",
605#include "./cl_kernels/common/gemm_reshaped_only_rhs_mmul.clembed"
606 },
607 {
ramelg019cca5922021-11-11 10:05:00 +0000608 "common/gemm_utils.cl",
609#include "./cl_kernels/common/gemm_utils.clembed"
610 },
611 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100612 "common/gemmlowp.cl",
613#include "./cl_kernels/common/gemmlowp.clembed"
614 },
615 {
Freddie Liardete572dff2022-05-16 14:09:10 +0100616 "common/gemmlowp_reshaped_only_rhs_mmul.cl",
617#include "./cl_kernels/common/gemmlowp_reshaped_only_rhs_mmul.clembed"
618 },
619 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100620 "common/gemv.cl",
621#include "./cl_kernels/common/gemv.clembed"
622 },
623 {
624 "common/generate_proposals.cl",
625#include "./cl_kernels/common/generate_proposals.clembed"
626 },
627 {
628 "common/generate_proposals_quantized.cl",
629#include "./cl_kernels/common/generate_proposals_quantized.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100630 },
631 {
Giorgio Arena892b70a2022-03-30 12:23:10 +0100632 "gemm_helpers.h",
633#include "./cl_kernels/gemm_helpers.hembed"
634 },
635 {
Georgios Pinitas908f6162021-05-04 10:11:09 +0100636 "helpers.h",
637#include "./cl_kernels/helpers.hembed"
638 },
639 {
640 "helpers_asymm.h",
641#include "./cl_kernels/helpers_asymm.hembed"
642 },
643 {
Giorgio Arena892b70a2022-03-30 12:23:10 +0100644 "repeat.h",
645#include "./cl_kernels/repeat.hembed"
646 },
647 {
SiCong Lica364df2022-04-13 15:48:19 +0100648 "tile_helpers.h",
649#include "./cl_kernels/tile_helpers.hembed"
650 },
651 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100652 "common/instance_normalization.cl",
653#include "./cl_kernels/common/instance_normalization.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100654 },
655 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100656 "common/l2_normalize.cl",
657#include "./cl_kernels/common/l2_normalize.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100658 },
659 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100660 "common/mean_stddev_normalization.cl",
661#include "./cl_kernels/common/mean_stddev_normalization.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100662 },
663 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100664 "common/memset.cl",
665#include "./cl_kernels/common/memset.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100666 },
667 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100668 "common/minmax_layer.cl",
669#include "./cl_kernels/common/minmax_layer.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100670 },
671 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100672 "common/nonmax.cl",
673#include "./cl_kernels/common/nonmax.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100674 },
675 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100676 "common/batchnormalization_layer.cl",
677#include "./cl_kernels/common/batchnormalization_layer.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100678 },
679 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100680 "common/pad_layer.cl",
681#include "./cl_kernels/common/pad_layer.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100682 },
683 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100684 "common/permute.cl",
685#include "./cl_kernels/common/permute.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100686 },
687 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100688 "common/pixelwise_mul_float.cl",
689#include "./cl_kernels/common/pixelwise_mul_float.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100690 },
691 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100692 "common/pixelwise_mul_int.cl",
693#include "./cl_kernels/common/pixelwise_mul_int.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100694 },
695 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100696 "common/qlstm_layer_normalization.cl",
697#include "./cl_kernels/common/qlstm_layer_normalization.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100698 },
699 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100700 "common/quantization_layer.cl",
701#include "./cl_kernels/common/quantization_layer.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100702 },
703 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100704 "common/range.cl",
705#include "./cl_kernels/common/range.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100706 },
707 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100708 "common/reduction_operation.cl",
709#include "./cl_kernels/common/reduction_operation.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100710 },
711 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100712 "common/reshape_layer.cl",
713#include "./cl_kernels/common/reshape_layer.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100714 },
715 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100716 "common/reverse.cl",
717#include "./cl_kernels/common/reverse.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100718 },
719 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100720 "common/roi_align_layer.cl",
721#include "./cl_kernels/common/roi_align_layer.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100722 },
723 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100724 "common/roi_align_layer_quantized.cl",
725#include "./cl_kernels/common/roi_align_layer_quantized.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100726 },
727 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100728 "common/roi_pooling_layer.cl",
729#include "./cl_kernels/common/roi_pooling_layer.clembed"
730 },
731 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100732 "common/select.cl",
733#include "./cl_kernels/common/select.clembed"
734 },
735 {
736 "common/softmax_layer.cl",
737#include "./cl_kernels/common/softmax_layer.clembed"
738 },
739 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100740 "common/slice_ops.cl",
741#include "./cl_kernels/common/slice_ops.clembed"
742 },
743 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100744 "common/stack_layer.cl",
745#include "./cl_kernels/common/stack_layer.clembed"
746 },
747 {
748 "common/tile.cl",
749#include "./cl_kernels/common/tile.clembed"
750 },
751 {
752 "common/transpose.cl",
753#include "./cl_kernels/common/transpose.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100754 },
755 {
756 "types.h",
757#include "./cl_kernels/types.hembed"
758 },
759 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100760 "common/unpooling_layer.cl",
761#include "./cl_kernels/common/unpooling_layer.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100762 },
Ramy Elgammal2b6ebfe2023-03-09 21:15:37 +0000763 {
764 "common/mat_mul.cl",
765#include "./cl_kernels/common/mat_mul.clembed"
766 },
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100767 {
SiCong Lia8d80582023-05-19 14:23:37 +0100768 "common/mat_mul_mmul.cl",
769#include "./cl_kernels/common/mat_mul_mmul.clembed"
770 },
771 {
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100772 "common/mat_mul_quantized.cl",
773#include "./cl_kernels/common/mat_mul_quantized.clembed"
774 },
Gunes Bayire87fa662023-09-07 12:20:33 +0100775 {
776 "common/mat_mul_quantized_mmul.cl",
777#include "./cl_kernels/common/mat_mul_quantized_mmul.clembed"
778 },
Adnan AlSinanf81f51c2021-07-26 18:18:37 +0100779#ifdef ENABLE_NCHW_KERNELS
780 {
781 "nchw/batch_to_space.cl",
782#include "./cl_kernels/nchw/batch_to_space.clembed"
783 },
784 {
785 "nchw/channel_shuffle.cl",
786#include "./cl_kernels/nchw/channel_shuffle.clembed"
787 },
788 {
789 "nchw/upsample_layer.cl",
790#include "./cl_kernels/nchw/upsample_layer.clembed"
791 },
792 {
793 "nchw/depth_to_space.cl",
794#include "./cl_kernels/nchw/depth_to_space.clembed"
795 },
796 {
797 "nchw/dequantization_layer.cl",
798#include "./cl_kernels/nchw/dequantization_layer.clembed"
799 },
800 {
Adnan AlSinan30124352021-12-02 19:12:20 +0000801 "nchw/direct_convolution.cl",
802#include "./cl_kernels/nchw/direct_convolution.clembed"
Adnan AlSinanf81f51c2021-07-26 18:18:37 +0100803 },
804 {
805 "nchw/im2col.cl",
806#include "./cl_kernels/nchw/im2col.clembed"
807 },
808 {
809 "nchw/normalization_layer.cl",
810#include "./cl_kernels/nchw/normalization_layer.clembed"
811 },
812 {
813 "nchw/normalize_planar_yuv_layer.cl",
814#include "./cl_kernels/nchw/normalize_planar_yuv_layer.clembed"
815 },
816 {
817 "nchw/normalize_planar_yuv_layer_quantized.cl",
818#include "./cl_kernels/nchw/normalize_planar_yuv_layer_quantized.clembed"
819 },
820 {
821 "nchw/batchnormalization_layer.cl",
822#include "./cl_kernels/nchw/batchnormalization_layer.clembed"
823 },
824 {
825 "nchw/pooling_layer.cl",
826#include "./cl_kernels/nchw/pooling_layer.clembed"
827 },
828 {
Adnan AlSinanf81f51c2021-07-26 18:18:37 +0100829 "nchw/prior_box_layer.cl",
830#include "./cl_kernels/nchw/prior_box_layer.clembed"
831 },
832 {
Adnan AlSinanf81f51c2021-07-26 18:18:37 +0100833 "nchw/reorg_layer.cl",
834#include "./cl_kernels/nchw/reorg_layer.clembed"
835 },
836 {
837 "nchw/scale.cl",
838#include "./cl_kernels/nchw/scale.clembed"
839 },
840 {
Adnan AlSinanf81f51c2021-07-26 18:18:37 +0100841 "nchw/space_to_batch.cl",
842#include "./cl_kernels/nchw/space_to_batch.clembed"
843 },
844 {
845 "nchw/space_to_depth.cl",
846#include "./cl_kernels/nchw/space_to_depth.clembed"
847 },
Georgios Pinitas908f6162021-05-04 10:11:09 +0100848 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100849 "nchw/winograd_filter_transform.cl",
850#include "./cl_kernels/nchw/winograd_filter_transform.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100851 },
852 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100853 "nchw/winograd_input_transform.cl",
854#include "./cl_kernels/nchw/winograd_input_transform.clembed"
855 },
856 {
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100857 "nchw/winograd_output_transform.cl",
858#include "./cl_kernels/nchw/winograd_output_transform.clembed"
859 },
Adnan AlSinanf81f51c2021-07-26 18:18:37 +0100860#endif /* ENABLE_NCHW_KERNELS */
861
862#ifdef ENABLE_NHWC_KERNELS
863 {
864 "nhwc/batch_to_space.cl",
865#include "./cl_kernels/nhwc/batch_to_space.clembed"
866 },
867 {
868 "nhwc/channel_shuffle.cl",
869#include "./cl_kernels/nhwc/channel_shuffle.clembed"
870 },
871 {
872 "nhwc/upsample_layer.cl",
873#include "./cl_kernels/nhwc/upsample_layer.clembed"
874 },
875 {
876 "nhwc/depth_to_space.cl",
877#include "./cl_kernels/nhwc/depth_to_space.clembed"
878 },
879 {
880 "nhwc/dequantization_layer.cl",
881#include "./cl_kernels/nhwc/dequantization_layer.clembed"
882 },
883 {
884 "nhwc/direct_convolution.cl",
885#include "./cl_kernels/nhwc/direct_convolution.clembed"
886 },
887 {
Giorgio Arena945ae9e2021-10-13 11:13:04 +0100888 "nhwc/direct_convolution3d.cl",
889#include "./cl_kernels/nhwc/direct_convolution3d.clembed"
890 },
891 {
Adnan AlSinanf81f51c2021-07-26 18:18:37 +0100892 "nhwc/dwc_native_fp_nhwc.cl",
893#include "./cl_kernels/nhwc/dwc_native_fp_nhwc.clembed"
894 },
895 {
896 "nhwc/dwc_native_quantized_nhwc.cl",
897#include "./cl_kernels/nhwc/dwc_native_quantized_nhwc.clembed"
898 },
899 {
900 "nhwc/normalization_layer.cl",
901#include "./cl_kernels/nhwc/normalization_layer.clembed"
902 },
903 {
904 "nhwc/normalize_planar_yuv_layer.cl",
905#include "./cl_kernels/nhwc/normalize_planar_yuv_layer.clembed"
906 },
907 {
908 "nhwc/normalize_planar_yuv_layer_quantized.cl",
909#include "./cl_kernels/nhwc/normalize_planar_yuv_layer_quantized.clembed"
910 },
911 {
912 "nhwc/im2col.cl",
913#include "./cl_kernels/nhwc/im2col.clembed"
914 },
915 {
Gian Marco Iodice5d016812022-11-17 11:03:39 +0000916 "nhwc/indirect_convolution.cl",
917#include "./cl_kernels/nhwc/indirect_convolution.clembed"
918 },
919 {
Adnan AlSinanf81f51c2021-07-26 18:18:37 +0100920 "nhwc/batchnormalization_layer.cl",
921#include "./cl_kernels/nhwc/batchnormalization_layer.clembed"
922 },
923 {
924 "nhwc/pooling_layer.cl",
925#include "./cl_kernels/nhwc/pooling_layer.clembed"
926 },
927 {
ramelg0137515692022-02-26 22:06:20 +0000928 "nhwc/pooling_3d_layer.cl",
929#include "./cl_kernels/nhwc/pooling_3d_layer.clembed"
930 },
931 {
Mohammed Suhail Munshi5e549fa2022-03-16 11:14:06 +0000932 "nhwc/pooling_3d_layer_quantized.cl",
933#include "./cl_kernels/nhwc/pooling_3d_layer_quantized.clembed"
934 },
935 {
Adnan AlSinanf81f51c2021-07-26 18:18:37 +0100936 "nhwc/pooling_layer_quantized.cl",
937#include "./cl_kernels/nhwc/pooling_layer_quantized.clembed"
938 },
939 {
Adnan AlSinanf81f51c2021-07-26 18:18:37 +0100940 "nhwc/reorg_layer.cl",
941#include "./cl_kernels/nhwc/reorg_layer.clembed"
942 },
943 {
944 "nhwc/scale.cl",
945#include "./cl_kernels/nhwc/scale.clembed"
946 },
947 {
Adnan AlSinanf81f51c2021-07-26 18:18:37 +0100948 "nhwc/space_to_batch.cl",
949#include "./cl_kernels/nhwc/space_to_batch.clembed"
950 },
951 {
952 "nhwc/space_to_depth.cl",
953#include "./cl_kernels/nhwc/space_to_depth.clembed"
954 },
955 {
Gunes Bayirec0113d2022-11-09 09:26:27 +0000956 "nhwc/transposed_convolution.cl",
957#include "./cl_kernels/nhwc/transposed_convolution.clembed"
958 },
959 {
Adnan AlSinanf81f51c2021-07-26 18:18:37 +0100960 "nhwc/winograd_filter_transform.cl",
961#include "./cl_kernels/nhwc/winograd_filter_transform.clembed"
962 },
963 {
964 "nhwc/winograd_input_transform.cl",
965#include "./cl_kernels/nhwc/winograd_input_transform.clembed"
966 },
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100967 {
968 "nhwc/winograd_output_transform.cl",
969#include "./cl_kernels/nhwc/winograd_output_transform.clembed"
Georgios Pinitas908f6162021-05-04 10:11:09 +0100970 },
Adnan AlSinanf81f51c2021-07-26 18:18:37 +0100971#endif /* ENABLE_NHWC_KERNELS */
Georgios Pinitas908f6162021-05-04 10:11:09 +0100972#endif /* EMBEDDED_KERNELS */
973};
974
975ClKernelLibrary &ClKernelLibrary::get()
976{
977 static ClKernelLibrary _kernel_library;
978 return _kernel_library;
979}
980
981std::string ClKernelLibrary::program_name(const std::string &kernel_name) const
982{
983 // Find which program contains the kernel
984 auto kernel_program_it = _kernel_program_map.find(kernel_name);
985
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100986 if (_kernel_program_map.end() == kernel_program_it)
Georgios Pinitas908f6162021-05-04 10:11:09 +0100987 {
988 ARM_COMPUTE_ERROR_VAR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str());
989 }
990
991 const std::string program_name = kernel_program_it->second;
992
993 return program_name;
994}
995
996void ClKernelLibrary::set_kernel_path(std::string kernel_path)
997{
998 _kernel_path = std::move(kernel_path);
999 _kernel_path += "/";
1000}
1001
1002const std::string &ClKernelLibrary::kernel_path() const
1003{
1004 return _kernel_path;
1005}
1006
1007ClKernelLibrary::ClProgramInfo ClKernelLibrary::program(const std::string &program_name) const
1008{
1009#ifdef EMBEDDED_KERNELS
1010#ifdef ARM_COMPUTE_COMPRESSED_KERNELS
1011 const auto inflatted_program_source_it = _decompressed_source_map.find(program_name);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001012 if (inflatted_program_source_it != _decompressed_source_map.end())
Georgios Pinitas908f6162021-05-04 10:11:09 +01001013 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001014 return ClProgramInfo{inflatted_program_source_it->second, false};
Georgios Pinitas908f6162021-05-04 10:11:09 +01001015 }
1016#endif /* ARM_COMPUTE_COMPRESSED_KERNELS */
1017
1018 const auto program_source_it = _program_source_map.find(program_name);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001019 if (program_source_it == _program_source_map.end())
Georgios Pinitas908f6162021-05-04 10:11:09 +01001020 {
1021 ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str());
1022 }
1023 std::string program_source = program_source_it->second;
1024
1025#ifdef ARM_COMPUTE_COMPRESSED_KERNELS
1026 std::string decompressed_program_source = decompress_zlib(decode_base64(program_source_it->second));
1027 ARM_COMPUTE_ERROR_ON_MSG(decompressed_program_source.empty(), "Cannot de-compress requested program");
1028 _decompressed_source_map.insert(std::make_pair(program_name, decompressed_program_source));
1029 program_source = std::move(decompressed_program_source);
1030#endif /* ARM_COMPUTE_COMPRESSED_KERNELS */
1031
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001032 return ClProgramInfo{program_source, false};
Georgios Pinitas908f6162021-05-04 10:11:09 +01001033#else /* EMBEDDED_KERNELS */
1034 // Check for binary
1035 std::string source_name = _kernel_path + program_name;
1036 std::string binary_name = source_name + "bin";
1037 std::string program_source{};
1038 bool is_binary = false;
1039
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001040 if (std::ifstream(binary_name).is_open())
Georgios Pinitas908f6162021-05-04 10:11:09 +01001041 {
1042 program_source = read_file(binary_name, true);
1043 is_binary = true;
1044 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001045 else if (std::ifstream(source_name).is_open())
Georgios Pinitas908f6162021-05-04 10:11:09 +01001046 {
1047 program_source = read_file(source_name, false);
1048 }
1049 else
1050 {
1051 ARM_COMPUTE_ERROR_VAR("Kernel file %s does not exist.", source_name.c_str());
1052 }
1053
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001054 return ClProgramInfo{program_source, is_binary};
Georgios Pinitas908f6162021-05-04 10:11:09 +01001055#endif /* EMBEDDED_KERNELS */
1056}
1057} // namespace opencl
1058} // namespace arm_compute