Michalis Spyrou | 04f089c | 2017-08-08 17:42:38 +0100 | [diff] [blame] | 1 | /* |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 2 | * Copyright (c) 2016-2021 Arm Limited. |
Michalis Spyrou | 04f089c | 2017-08-08 17:42:38 +0100 | [diff] [blame] | 3 | * |
| 4 | * SPDX-License-Identifier: MIT |
| 5 | * |
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 7 | * of this software and associated documentation files (the "Software"), to |
| 8 | * deal in the Software without restriction, including without limitation the |
| 9 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
| 10 | * sell copies of the Software, and to permit persons to whom the Software is |
| 11 | * furnished to do so, subject to the following conditions: |
| 12 | * |
| 13 | * The above copyright notice and this permission notice shall be included in all |
| 14 | * copies or substantial portions of the Software. |
| 15 | * |
| 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 22 | * SOFTWARE. |
| 23 | */ |
| 24 | #include "helpers.h" |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 25 | #include "helpers_asymm.h" |
Michalis Spyrou | 04f089c | 2017-08-08 17:42:38 +0100 | [diff] [blame] | 26 | |
Michalis Spyrou | 7317e39 | 2020-01-17 11:27:49 +0000 | [diff] [blame] | 27 | #if defined(FLOAT_DATA_TYPE) |
Giorgio Arena | 1fac037 | 2021-04-30 15:09:46 +0100 | [diff] [blame] | 28 | #define ISGREATER(x, y) (SELECT_VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE))(isgreater(x, y)) |
| 29 | #define ISLESS(x, y) (SELECT_VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE))(isless(x, y)) |
| 30 | #define ISGREATER_SCALAR(x, y) (SELECT_DATA_TYPE(DATA_TYPE_PROMOTED))(isgreater(x, y)) |
| 31 | #define ISLESS_SCALAR(x, y) (SELECT_DATA_TYPE(DATA_TYPE_PROMOTED))(isless(x, y)) |
Michalis Spyrou | 7317e39 | 2020-01-17 11:27:49 +0000 | [diff] [blame] | 32 | #else // !FLOAT_DATA_TYPE |
| 33 | #if defined(WIDTH) |
| 34 | #define ISGREATER(x, y) (x > y) ? 1 : 0 |
| 35 | #define ISLESS(x, y) (x < y) ? 1 : 0 |
Giorgio Arena | 1fac037 | 2021-04-30 15:09:46 +0100 | [diff] [blame] | 36 | #define ISGREATER_SCALAR ISGREATER |
| 37 | #define ISLESS_SCALAR ISLESS |
Michalis Spyrou | 7317e39 | 2020-01-17 11:27:49 +0000 | [diff] [blame] | 38 | #else // !defined(WIDTH) |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 39 | #define ISGREATER(x, y) select((VEC_DATA_TYPE(int, VEC_SIZE))0, (VEC_DATA_TYPE(int, VEC_SIZE)) - 1, x > y) |
| 40 | #define ISLESS(x, y) select((VEC_DATA_TYPE(int, VEC_SIZE))0, (VEC_DATA_TYPE(int, VEC_SIZE)) - 1, x < y) |
Michalis Spyrou | 7317e39 | 2020-01-17 11:27:49 +0000 | [diff] [blame] | 41 | #endif // defined(WIDTH) |
| 42 | #endif // defined(FLOAT_DATA_TYPE) |
| 43 | |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 44 | #if defined(WIDTH) |
Manuel Bottini | 34f88dd | 2019-10-18 10:37:46 +0000 | [diff] [blame] | 45 | #if defined(OPERATION) |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 46 | |
| 47 | #define sum(in0, in1, size) (in0 + SUM_REDUCE(in1, size)) |
| 48 | #define square_sum(in0, in1, size) (in0 + SUM_REDUCE((in1 * in1), size)) |
| 49 | #define product(in0, in1, size) (in0 * PROD_REDUCE(in1, size)) |
| 50 | |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 51 | /** This kernel performs parallel reduction given an operation on x-axis. |
Michalis Spyrou | 04f089c | 2017-08-08 17:42:38 +0100 | [diff] [blame] | 52 | * |
| 53 | * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float |
Michalis Spyrou | 04f089c | 2017-08-08 17:42:38 +0100 | [diff] [blame] | 54 | * @note The operation we want to perform must be passed at compile time using -DOPERATION e.g. -DOPERATION=square_sum |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 55 | * @note The mean flag must be passed at compile time using -DMEAN if we want to compute the mean value |
Manuel Bottini | b412fab | 2018-12-10 17:40:23 +0000 | [diff] [blame] | 56 | * @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 57 | * @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128 if we want to compute the mean value |
Michalis Spyrou | 04f089c | 2017-08-08 17:42:38 +0100 | [diff] [blame] | 58 | * |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 59 | * @param[in] input_ptr Pointer to the source tensor. Supported data types: F16/F32 |
| 60 | * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) |
| 61 | * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) |
| 62 | * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) |
| 63 | * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) |
| 64 | * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor |
| 65 | * @param[in] output_ptr Pointer to the destination tensor. Supported data types: same as @p input |
| 66 | * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes) |
| 67 | * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) |
| 68 | * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes) |
| 69 | * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) |
| 70 | * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor |
Michalis Spyrou | 04f089c | 2017-08-08 17:42:38 +0100 | [diff] [blame] | 71 | */ |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 72 | __kernel void reduction_operation_x( |
Giorgio Arena | 1fac037 | 2021-04-30 15:09:46 +0100 | [diff] [blame] | 73 | TENSOR3D_DECLARATION(input), |
| 74 | TENSOR3D_DECLARATION(output)) |
Michalis Spyrou | 04f089c | 2017-08-08 17:42:38 +0100 | [diff] [blame] | 75 | { |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 76 | int y = get_global_id(1); |
Giorgio Arena | 1fac037 | 2021-04-30 15:09:46 +0100 | [diff] [blame] | 77 | int z = get_global_id(2); |
Michalis Spyrou | 04f089c | 2017-08-08 17:42:38 +0100 | [diff] [blame] | 78 | |
Giorgio Arena | 1fac037 | 2021-04-30 15:09:46 +0100 | [diff] [blame] | 79 | __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + y * input_stride_y + z * input_stride_z; |
| 80 | __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + y * output_stride_y + z * output_stride_z; |
Michalis Spyrou | 04f089c | 2017-08-08 17:42:38 +0100 | [diff] [blame] | 81 | |
Manuel Bottini | b412fab | 2018-12-10 17:40:23 +0000 | [diff] [blame] | 82 | #if defined(PROD) |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 83 | DATA_TYPE res = (DATA_TYPE)1; |
| 84 | #else // defined(PROD) |
| 85 | DATA_TYPE res = (DATA_TYPE)0; |
Manuel Bottini | 34f88dd | 2019-10-18 10:37:46 +0000 | [diff] [blame] | 86 | #endif // defined(PROD) |
Michalis Spyrou | f6402dd | 2018-01-26 15:06:19 +0000 | [diff] [blame] | 87 | |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 88 | int x = 0; |
| 89 | |
| 90 | for(; x <= (WIDTH - VEC_SIZE); x += VEC_SIZE) |
| 91 | { |
| 92 | VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) |
| 93 | vals = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(input_addr + x * sizeof(DATA_TYPE))); |
| 94 | res = OPERATION(res, vals, VEC_SIZE); |
Michalis Spyrou | 04f089c | 2017-08-08 17:42:38 +0100 | [diff] [blame] | 95 | } |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 96 | |
| 97 | #if(WIDTH % VEC_SIZE) |
| 98 | _Pragma("unroll") for(; x < WIDTH; ++x) |
| 99 | { |
| 100 | DATA_TYPE val = *((__global DATA_TYPE *)(input_addr + x * sizeof(DATA_TYPE))); |
| 101 | res = OPERATION(res, val, 1); |
| 102 | } |
| 103 | #endif // (WIDTH % VEC_SIZE) |
| 104 | |
| 105 | #if defined(MEAN) |
| 106 | res /= WIDTH; |
| 107 | #endif // defined(MEAN) |
| 108 | *((__global DATA_TYPE *)output_addr) = res; |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 109 | } |
Manuel Bottini | 34f88dd | 2019-10-18 10:37:46 +0000 | [diff] [blame] | 110 | #endif // defined(OPERATION) |
Michalis Spyrou | 7930db4 | 2018-11-22 17:36:28 +0000 | [diff] [blame] | 111 | /** This kernel performs reduction on x-axis. (Non parallel) |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 112 | * |
Michalis Spyrou | 7930db4 | 2018-11-22 17:36:28 +0000 | [diff] [blame] | 113 | * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 114 | * @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128 |
Manuel Bottini | b412fab | 2018-12-10 17:40:23 +0000 | [diff] [blame] | 115 | * @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 116 | * |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 117 | * @param[in] input_ptr Pointer to the source tensor. Supported data types: S32/F16/F32 and QASYMM8/QASYMM8_SIGNED for operation MEAN |
| 118 | * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) |
| 119 | * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) |
| 120 | * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor |
| 121 | * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptr |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 122 | * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes) |
| 123 | * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) |
| 124 | * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor |
| 125 | */ |
Michalis Spyrou | 7930db4 | 2018-11-22 17:36:28 +0000 | [diff] [blame] | 126 | __kernel void reduction_operation_non_parallel_x( |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 127 | VECTOR_DECLARATION(input), |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 128 | VECTOR_DECLARATION(output)) |
| 129 | { |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 130 | Vector input = CONVERT_TO_VECTOR_STRUCT(input); |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 131 | Vector output = CONVERT_TO_VECTOR_STRUCT(output); |
| 132 | |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 133 | DATA_TYPE_PROMOTED res = CONVERT(*((__global DATA_TYPE *)vector_offset(&input, 0)), DATA_TYPE_PROMOTED); |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 134 | |
| 135 | // Convert input into F32 in order to perform quantized multiplication |
| 136 | #if defined(PROD) && defined(OFFSET) && defined(SCALE) |
| 137 | float res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, 1); |
| 138 | #endif // defined(PROD) && defined(OFFSET) && defined(SCALE) |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 139 | |
Michalis Spyrou | 7930db4 | 2018-11-22 17:36:28 +0000 | [diff] [blame] | 140 | for(unsigned int x = 1; x < WIDTH; ++x) |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 141 | { |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 142 | DATA_TYPE_PROMOTED in = CONVERT(*((__global DATA_TYPE *)vector_offset(&input, x)), DATA_TYPE_PROMOTED); |
Manuel Bottini | 7b9998d | 2019-10-21 17:59:07 +0100 | [diff] [blame] | 143 | #if defined(MIN) |
Giorgio Arena | 1fac037 | 2021-04-30 15:09:46 +0100 | [diff] [blame] | 144 | res = select(res, in, ISLESS_SCALAR(in, res)); |
Usama Arif | 048b0f3 | 2019-05-22 16:32:27 +0100 | [diff] [blame] | 145 | #elif defined(MAX) |
Giorgio Arena | 1fac037 | 2021-04-30 15:09:46 +0100 | [diff] [blame] | 146 | res = select(res, in, ISGREATER_SCALAR(in, res)); |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 147 | #elif defined(PROD) |
| 148 | #if defined(OFFSET) && defined(SCALE) |
| 149 | res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, 1); |
| 150 | #else // !(defined(OFFSET) && defined(SCALE)) |
| 151 | res *= in; |
| 152 | #endif // defined(OFFSET) && defined(SCALE) |
| 153 | #else // defined(SUM)) |
Michalis Spyrou | 7930db4 | 2018-11-22 17:36:28 +0000 | [diff] [blame] | 154 | res += in; |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 155 | #endif // defined(MAX) || defined(MIN) || defined(PROD) |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 156 | } |
| 157 | |
Michalis Spyrou | 7930db4 | 2018-11-22 17:36:28 +0000 | [diff] [blame] | 158 | // Store result |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 159 | #if defined(MEAN) |
| 160 | res /= WIDTH; |
Michalis Spyrou | 7930db4 | 2018-11-22 17:36:28 +0000 | [diff] [blame] | 161 | #endif // defined(MEAN) |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 162 | |
| 163 | // Subtract the offsets in case of quantized SUM |
| 164 | #if defined(SUM) && defined(OFFSET) && defined(SCALE) |
| 165 | res -= (WIDTH - 1) * OFFSET; |
| 166 | #endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE) |
| 167 | |
| 168 | // Re-quantize |
| 169 | #if defined(PROD) && defined(OFFSET) && defined(SCALE) |
| 170 | res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, 1); |
| 171 | #endif // defined(PROD) && defined(OFFSET) && defined(SCALE) |
| 172 | |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 173 | *((__global DATA_TYPE *)output.ptr) = CONVERT_SAT(res, DATA_TYPE); |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 174 | } |
Michalis Spyrou | b9626ab | 2019-05-13 17:41:01 +0100 | [diff] [blame] | 175 | #endif // defined(WIDTH) |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 176 | |
| 177 | #if defined(HEIGHT) |
| 178 | /** This kernel performs reduction on y-axis. |
| 179 | * |
Michalis Spyrou | 7930db4 | 2018-11-22 17:36:28 +0000 | [diff] [blame] | 180 | * @note The input data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 181 | * @note The height size must be passed at compile time using -DHEIGHT e.g. -DHEIGHT=128 |
| 182 | * |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 183 | * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32 |
| 184 | * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) |
| 185 | * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) |
| 186 | * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) |
| 187 | * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) |
| 188 | * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor |
| 189 | * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptr |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 190 | * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes) |
| 191 | * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) |
| 192 | * @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes) |
| 193 | * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) |
| 194 | * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor |
| 195 | */ |
| 196 | __kernel void reduction_operation_y( |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 197 | IMAGE_DECLARATION(input), |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 198 | IMAGE_DECLARATION(output)) |
| 199 | { |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 200 | int x = max((int)(get_global_id(0) * VEC_SIZE - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE), 0); |
| 201 | int y = get_global_id(1); |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 202 | |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 203 | __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + x * sizeof(DATA_TYPE) + y * input_stride_y; |
| 204 | __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + x * sizeof(DATA_TYPE) + y * output_stride_y; |
| 205 | |
| 206 | VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE) |
| 207 | res = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input_addr), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE)); |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 208 | |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 209 | // Convert input into F32 in order to perform quantized multiplication |
| 210 | #if defined(PROD) && defined(OFFSET) && defined(SCALE) |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 211 | VEC_DATA_TYPE(float, VEC_SIZE) |
| 212 | res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE); |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 213 | #endif // defined(PROD) && defined(OFFSET) && defined(SCALE) |
| 214 | |
Michalis Spyrou | 7930db4 | 2018-11-22 17:36:28 +0000 | [diff] [blame] | 215 | #if defined(SUM_SQUARE) |
| 216 | res *= res; |
| 217 | #endif // defined(SUM_SQUARE) |
| 218 | |
Michalis Spyrou | 7930db4 | 2018-11-22 17:36:28 +0000 | [diff] [blame] | 219 | for(unsigned int y = 1; y < HEIGHT; ++y) |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 220 | { |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 221 | VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE) |
| 222 | in = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(input_addr + y * input_stride_y)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE)); |
Manuel Bottini | 7b9998d | 2019-10-21 17:59:07 +0100 | [diff] [blame] | 223 | #if defined(MIN) |
Usama Arif | 048b0f3 | 2019-05-22 16:32:27 +0100 | [diff] [blame] | 224 | res = select(res, in, ISLESS(in, res)); |
| 225 | #elif defined(MAX) |
Michele Di Giorgio | 0333704 | 2020-02-26 17:47:55 +0000 | [diff] [blame] | 226 | res = select(res, in, ISGREATER(in, res)); |
Manuel Bottini | 7b9998d | 2019-10-21 17:59:07 +0100 | [diff] [blame] | 227 | #else // !(defined(MAX) || defined(MIN)) |
Michalis Spyrou | 8aaf93e | 2018-10-11 17:33:32 +0100 | [diff] [blame] | 228 | #if defined(SUM_SQUARE) |
| 229 | in *= in; |
Michalis Spyrou | 7930db4 | 2018-11-22 17:36:28 +0000 | [diff] [blame] | 230 | #endif // defined(SUM_SQUARE) |
Manuel Bottini | b412fab | 2018-12-10 17:40:23 +0000 | [diff] [blame] | 231 | #if defined(PROD) |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 232 | |
| 233 | #if defined(OFFSET) && defined(SCALE) |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 234 | res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE); |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 235 | #else // !(defined(OFFSET) && defined(SCALE)) |
Manuel Bottini | b412fab | 2018-12-10 17:40:23 +0000 | [diff] [blame] | 236 | res *= in; |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 237 | #endif // defined(OFFSET) && defined(SCALE) |
| 238 | |
Michalis Spyrou | b9626ab | 2019-05-13 17:41:01 +0100 | [diff] [blame] | 239 | #else // !defined(PROD) |
Michalis Spyrou | 8aaf93e | 2018-10-11 17:33:32 +0100 | [diff] [blame] | 240 | res += in; |
Michalis Spyrou | b9626ab | 2019-05-13 17:41:01 +0100 | [diff] [blame] | 241 | #endif // defined(PROD) |
Manuel Bottini | 7b9998d | 2019-10-21 17:59:07 +0100 | [diff] [blame] | 242 | #endif // defined(MAX) || defined(MIN) |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 243 | } |
| 244 | |
| 245 | #if defined(MEAN) |
| 246 | res /= HEIGHT; |
Michalis Spyrou | 7930db4 | 2018-11-22 17:36:28 +0000 | [diff] [blame] | 247 | #endif // defined(MEAN) |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 248 | |
| 249 | // Subtract the offsets in case of quantized SUM |
| 250 | #if defined(SUM) && defined(OFFSET) && defined(SCALE) |
| 251 | res -= (HEIGHT - 1) * OFFSET; |
| 252 | #endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE) |
| 253 | |
| 254 | // Re-quantize |
| 255 | #if defined(PROD) && defined(OFFSET) && defined(SCALE) |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 256 | res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE); |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 257 | #endif // defined(PROD) && defined(OFFSET) && defined(SCALE) |
| 258 | |
| 259 | // Store result |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 260 | VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) |
| 261 | res0 = CONVERT_SAT(res, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)); |
| 262 | STORE_VECTOR_SELECT(res, DATA_TYPE, output_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0); |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 263 | } |
Michalis Spyrou | b9626ab | 2019-05-13 17:41:01 +0100 | [diff] [blame] | 264 | #endif // defined(HEIGHT) |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 265 | |
| 266 | #if defined(DEPTH) |
| 267 | /** This kernel performs reduction on z-axis. |
| 268 | * |
| 269 | * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float |
| 270 | * @note The depth size must be passed at compile time using -DDEPTH e.g. -DDEPTH=128 |
| 271 | * |
Michele Di Giorgio | f6f7876 | 2020-07-06 11:27:21 +0100 | [diff] [blame] | 272 | * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32 |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 273 | * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) |
| 274 | * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) |
| 275 | * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) |
| 276 | * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) |
| 277 | * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) |
| 278 | * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) |
| 279 | * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor |
Michele Di Giorgio | f6f7876 | 2020-07-06 11:27:21 +0100 | [diff] [blame] | 280 | * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptr |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 281 | * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes) |
| 282 | * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) |
| 283 | * @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes) |
| 284 | * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) |
| 285 | * @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes) |
| 286 | * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) |
| 287 | * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor |
| 288 | */ |
| 289 | __kernel void reduction_operation_z( |
| 290 | TENSOR3D_DECLARATION(input), |
| 291 | TENSOR3D_DECLARATION(output)) |
| 292 | { |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 293 | int x = max((int)(get_global_id(0) * VEC_SIZE - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE), 0); |
| 294 | int y = get_global_id(1); |
| 295 | int z = get_global_id(2); |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 296 | |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 297 | __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + x * sizeof(DATA_TYPE) + y * input_stride_y + z * input_stride_z; |
| 298 | __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + x * sizeof(DATA_TYPE) + y * output_stride_y + z * output_stride_z; |
| 299 | |
| 300 | VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE) |
| 301 | res = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input_addr), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE)); |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 302 | |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 303 | // Convert input into F32 in order to perform quantized multiplication |
| 304 | #if defined(PROD) && defined(OFFSET) && defined(SCALE) |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 305 | VEC_DATA_TYPE(float, VEC_SIZE) |
| 306 | res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE); |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 307 | #endif // defined(PROD) && defined(OFFSET) && defined(SCALE) |
| 308 | |
Michalis Spyrou | 7930db4 | 2018-11-22 17:36:28 +0000 | [diff] [blame] | 309 | #if defined(SUM_SQUARE) |
| 310 | res *= res; |
| 311 | #endif // defined(SUM_SQUARE) |
| 312 | |
Michalis Spyrou | 7930db4 | 2018-11-22 17:36:28 +0000 | [diff] [blame] | 313 | for(unsigned int z = 1; z < DEPTH; ++z) |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 314 | { |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 315 | VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE) |
| 316 | in = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(input_addr + z * input_stride_z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE)); |
Georgios Pinitas | 8be9148 | 2019-03-26 17:23:28 +0000 | [diff] [blame] | 317 | |
Manuel Bottini | 7b9998d | 2019-10-21 17:59:07 +0100 | [diff] [blame] | 318 | #if defined(MIN) |
Usama Arif | 048b0f3 | 2019-05-22 16:32:27 +0100 | [diff] [blame] | 319 | res = select(res, in, ISLESS(in, res)); |
| 320 | #elif defined(MAX) |
Michele Di Giorgio | 0333704 | 2020-02-26 17:47:55 +0000 | [diff] [blame] | 321 | res = select(res, in, ISGREATER(in, res)); |
Manuel Bottini | 7b9998d | 2019-10-21 17:59:07 +0100 | [diff] [blame] | 322 | #else // !(defined(MAX) || defined(MIN)) |
Michalis Spyrou | 8aaf93e | 2018-10-11 17:33:32 +0100 | [diff] [blame] | 323 | #if defined(SUM_SQUARE) |
| 324 | in *= in; |
Michalis Spyrou | 7930db4 | 2018-11-22 17:36:28 +0000 | [diff] [blame] | 325 | #endif // defined(SUM_SQUARE) |
Manuel Bottini | b412fab | 2018-12-10 17:40:23 +0000 | [diff] [blame] | 326 | #if defined(PROD) |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 327 | |
| 328 | #if defined(OFFSET) && defined(SCALE) |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 329 | res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE); |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 330 | #else // !(defined(OFFSET) && defined(SCALE)) |
Manuel Bottini | b412fab | 2018-12-10 17:40:23 +0000 | [diff] [blame] | 331 | res *= in; |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 332 | #endif // defined(OFFSET) && defined(SCALE) |
| 333 | |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 334 | #else // !defined(PROD) |
Michalis Spyrou | 8aaf93e | 2018-10-11 17:33:32 +0100 | [diff] [blame] | 335 | res += in; |
Manuel Bottini | 7b9998d | 2019-10-21 17:59:07 +0100 | [diff] [blame] | 336 | #endif // defined(PROD) |
| 337 | #endif // defined(MAX) || defined(MIN) |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 338 | } |
| 339 | |
| 340 | #if defined(MEAN) |
| 341 | res /= DEPTH; |
Michalis Spyrou | 7930db4 | 2018-11-22 17:36:28 +0000 | [diff] [blame] | 342 | #endif // defined(MEAN) |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 343 | |
| 344 | // Subtract the offsets in case of quantized SUM |
| 345 | #if defined(SUM) && defined(OFFSET) && defined(SCALE) |
| 346 | res -= (DEPTH - 1) * OFFSET; |
| 347 | #endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE) |
| 348 | |
| 349 | // Re-quantize |
| 350 | #if defined(PROD) && defined(OFFSET) && defined(SCALE) |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 351 | res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE); |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 352 | #endif // defined(PROD) && defined(OFFSET) && defined(SCALE) |
| 353 | |
| 354 | // Store result |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 355 | VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) |
| 356 | res0 = CONVERT_SAT(res, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)); |
| 357 | |
| 358 | STORE_VECTOR_SELECT(res, DATA_TYPE, output_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0); |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 359 | } |
| 360 | #endif /* defined(DEPTH) */ |
| 361 | |
| 362 | #if defined(BATCH) && defined(DEPTH) |
| 363 | /** This kernel performs reduction on w-axis. |
| 364 | * |
| 365 | * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float |
| 366 | * @note The batch size must be passed at compile time using -DBATCH e.g. -DBATCH=128 |
Manuel Bottini | 34f88dd | 2019-10-18 10:37:46 +0000 | [diff] [blame] | 367 | * @note The depth size must be passed at compile time using -DBATCH e.g. -DDEPTH=128 |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 368 | * |
Michele Di Giorgio | f6f7876 | 2020-07-06 11:27:21 +0100 | [diff] [blame] | 369 | * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32 |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 370 | * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) |
| 371 | * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) |
| 372 | * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) |
| 373 | * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) |
| 374 | * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) |
| 375 | * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) |
| 376 | * @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes) |
| 377 | * @param[in] input_step_w input_stride_w * number of elements along W processed per workitem(in bytes) |
| 378 | * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor |
Michele Di Giorgio | f6f7876 | 2020-07-06 11:27:21 +0100 | [diff] [blame] | 379 | * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptr |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 380 | * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes) |
| 381 | * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) |
| 382 | * @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes) |
| 383 | * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) |
| 384 | * @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes) |
| 385 | * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) |
| 386 | * @param[in] output_stride_w Stride of the output tensor in W dimension (in bytes) |
| 387 | * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes) |
| 388 | * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor |
| 389 | */ |
| 390 | __kernel void reduction_operation_w( |
| 391 | TENSOR4D_DECLARATION(input), |
| 392 | TENSOR4D_DECLARATION(output)) |
| 393 | { |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 394 | int x = max((int)(get_global_id(0) * VEC_SIZE - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE), 0); |
| 395 | int y = get_global_id(1); |
| 396 | int z = get_global_id(2); |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 397 | |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 398 | __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + x * sizeof(DATA_TYPE) + y * input_stride_y + (z % DEPTH) * input_stride_z + (z / DEPTH) * input_stride_w; |
| 399 | __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + x * sizeof(DATA_TYPE) + y * output_stride_y + (z % DEPTH) * output_stride_z + (z / DEPTH) * output_stride_z; |
| 400 | |
| 401 | VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE) |
| 402 | res = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input_addr), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE)); |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 403 | |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 404 | // Convert input into F32 in order to perform quantized multiplication |
| 405 | #if defined(PROD) && defined(OFFSET) && defined(SCALE) |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 406 | VEC_DATA_TYPE(float, VEC_SIZE) |
| 407 | res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE); |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 408 | #endif // defined(PROD) && defined(OFFSET) && defined(SCALE) |
| 409 | |
Michalis Spyrou | 7930db4 | 2018-11-22 17:36:28 +0000 | [diff] [blame] | 410 | #if defined(SUM_SQUARE) |
| 411 | res *= res; |
| 412 | #endif // defined(SUM_SQUARE) |
| 413 | |
Michalis Spyrou | 7930db4 | 2018-11-22 17:36:28 +0000 | [diff] [blame] | 414 | for(unsigned int w = 1; w < BATCH; ++w) |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 415 | { |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 416 | VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE) |
| 417 | in = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(input_addr + w * input_stride_w)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE)); |
Michalis Spyrou | 7930db4 | 2018-11-22 17:36:28 +0000 | [diff] [blame] | 418 | |
Manuel Bottini | 7b9998d | 2019-10-21 17:59:07 +0100 | [diff] [blame] | 419 | #if defined(MIN) |
Usama Arif | 048b0f3 | 2019-05-22 16:32:27 +0100 | [diff] [blame] | 420 | res = select(res, in, ISLESS(in, res)); |
| 421 | #elif defined(MAX) |
Michele Di Giorgio | 0333704 | 2020-02-26 17:47:55 +0000 | [diff] [blame] | 422 | res = select(res, in, ISGREATER(in, res)); |
Manuel Bottini | 7b9998d | 2019-10-21 17:59:07 +0100 | [diff] [blame] | 423 | #else // !(defined(MAX) || defined(MIN)) |
Michalis Spyrou | 8aaf93e | 2018-10-11 17:33:32 +0100 | [diff] [blame] | 424 | #if defined(SUM_SQUARE) |
| 425 | in *= in; |
Michalis Spyrou | 7930db4 | 2018-11-22 17:36:28 +0000 | [diff] [blame] | 426 | #endif // defined(SUM_SQUARE) |
Manuel Bottini | b412fab | 2018-12-10 17:40:23 +0000 | [diff] [blame] | 427 | #if defined(PROD) |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 428 | |
| 429 | #if defined(OFFSET) && defined(SCALE) |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 430 | res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE); |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 431 | #else // !(defined(OFFSET) && defined(SCALE)) |
Manuel Bottini | b412fab | 2018-12-10 17:40:23 +0000 | [diff] [blame] | 432 | res *= in; |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 433 | #endif // defined(OFFSET) && defined(SCALE) |
| 434 | |
| 435 | #else // !defined(PROD) |
Michalis Spyrou | 8aaf93e | 2018-10-11 17:33:32 +0100 | [diff] [blame] | 436 | res += in; |
Manuel Bottini | b412fab | 2018-12-10 17:40:23 +0000 | [diff] [blame] | 437 | #endif //defined(PROD) |
Manuel Bottini | 7b9998d | 2019-10-21 17:59:07 +0100 | [diff] [blame] | 438 | #endif // defined(MAX) || defined(MIN) |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 439 | } |
| 440 | |
| 441 | #if defined(MEAN) |
| 442 | res /= BATCH; |
Michalis Spyrou | 7930db4 | 2018-11-22 17:36:28 +0000 | [diff] [blame] | 443 | #endif // defined(MEAN) |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 444 | |
| 445 | // Subtract the offsets in case of quantized SUM |
| 446 | #if defined(SUM) && defined(OFFSET) && defined(SCALE) |
| 447 | res -= (BATCH - 1) * OFFSET; |
| 448 | #endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE) |
| 449 | |
| 450 | // Re-quantize |
| 451 | #if defined(PROD) && defined(OFFSET) && defined(SCALE) |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 452 | res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE); |
Michalis Spyrou | 0b18d97 | 2020-01-30 18:11:13 +0000 | [diff] [blame] | 453 | #endif // defined(PROD) && defined(OFFSET) && defined(SCALE) |
| 454 | |
| 455 | // Store result |
Giorgio Arena | 3ecf9fe | 2021-04-28 16:11:51 +0100 | [diff] [blame] | 456 | VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) |
| 457 | res0 = CONVERT_SAT(res, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)); |
| 458 | STORE_VECTOR_SELECT(res, DATA_TYPE, output_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0); |
Michalis Spyrou | 7e9391b | 2018-10-05 14:49:28 +0100 | [diff] [blame] | 459 | } |
Manuel Bottini | b412fab | 2018-12-10 17:40:23 +0000 | [diff] [blame] | 460 | #endif /* defined(BATCH) && defined(DEPTH) */ |