blob: c07fac0e0d997ac125ab91c232f19d307f5d5cce [file] [log] [blame]
Adnan AlSinan4184e862023-07-10 15:20:44 +01001/*
2 * Copyright (c) 2023 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "GpuCkwActivation.h"
25
Adnan AlSinan4184e862023-07-10 15:20:44 +010026#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Validate.h"
Matthew Bentham314d3e22023-06-23 10:53:52 +000028#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
Adnan AlSinan4184e862023-07-10 15:20:44 +010029#include "ckw/TensorTileSampler.h"
30#include "src/core/helpers/WindowHelpers.h"
SiCong Li23882a92023-06-28 09:49:45 +010031#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
Adnan AlSinan4184e862023-07-10 15:20:44 +010032#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
SiCong Li23882a92023-06-28 09:49:45 +010033#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
34#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
Adnan AlSinan4184e862023-07-10 15:20:44 +010035#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
36#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
37#include <string>
38
39using namespace ckw;
40namespace arm_compute
41{
42namespace experimental
43{
44namespace dynamic_fusion
45{
46namespace
47{
48/** Create a simple sampler from tile of dimension [m0, n0]
49 */
50inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_t m0, int32_t n0)
51{
52 TensorTileSampler sampler;
53
54 auto &gid_0 = writer->declare_tile("gid_0", ckw::DataType::Int32);
55 auto &gid_1 = writer->declare_tile("gid_1", ckw::DataType::Int32);
56 auto &gid_2 = writer->declare_tile("gid_2", ckw::DataType::Int32);
57
58 auto &const_0 = writer->declare_tile("0", 0);
59 writer->op_get_global_id(gid_0, 0);
60 writer->op_get_global_id(gid_1, 1);
61 writer->op_get_global_id(gid_2, 2);
62
63 auto &x_coord = writer->declare_tile("x_coord", ckw::DataType::Int32);
64 auto &y_coord = writer->declare_tile("y_coord", ckw::DataType::Int32);
65 auto &m0_t = writer->declare_tile("m0", m0);
66 auto &n0_t = writer->declare_tile("n0", n0);
67 writer->op_binary_expression(x_coord, gid_0, BinaryOp::Mul, n0_t);
68 writer->op_binary_expression(y_coord, gid_1, BinaryOp::Mul, m0_t);
69
70 sampler.x(x_coord);
71 sampler.y(y_coord);
72 sampler.z(const_0); // 3rd dimension collapsed with 2nd dimension
73 sampler.b(gid_2);
74
75 sampler.width(n0);
76 sampler.height(m0);
77
78 sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension
79 sampler.address_mode_x(TensorSamplerAddressModeX::None);
80 sampler.address_mode_y(TensorSamplerAddressModeY::ClampToBorder);
81 sampler.address_mode_z(TensorSamplerAddressModeZ::Skip); // Dimensions higher than 3 not supported yet
82
83 return sampler;
84}
85} // namespace
86
87GpuCkwActivation::GpuCkwActivation(ComponentId id,
SiCong Li23882a92023-06-28 09:49:45 +010088 const ArgumentPack<ITensorInfo> &tensors,
89 const Attributes &attributes)
Adnan AlSinan4184e862023-07-10 15:20:44 +010090 : IGpuCkwComponentDriver{ id, tensors },
91 _src{},
92 _dst{},
93 _attributes{ attributes }
94{
95 _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
96 _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
97 ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
98}
99
100void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
101{
102 const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
103 const unsigned int n0 = root_window.x().step();
104 const unsigned int m0 = root_window.y().step();
105
SiCong Li23882a92023-06-28 09:49:45 +0100106 GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
107 GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
Adnan AlSinan4184e862023-07-10 15:20:44 +0100108
109 load_src_dst_tiles_and_prepare_sampler(writer, src, dst, m0, n0, create_sampler);
110
111 auto &src_tile = src->tile();
112 auto &dst_tile = dst->tile();
113
114 // Constants
SiCong Li23882a92023-06-28 09:49:45 +0100115 const auto &constant_minus_1 = writer->declare_tile("minus_1", -1);
116 const auto &constant_pos_1 = writer->declare_tile("one", 1);
117 const auto &constant_zero = writer->declare_tile("zero", 0);
118 const auto &constant_A = writer->declare_tile("A_VAL", _attributes.a());
119 const auto &constant_B = writer->declare_tile("B_VAL", _attributes.b());
Adnan AlSinan4184e862023-07-10 15:20:44 +0100120
121 // Perform the operation.
SiCong Li23882a92023-06-28 09:49:45 +0100122 switch(_attributes.activation())
Adnan AlSinan4184e862023-07-10 15:20:44 +0100123 {
124 case ActivationLayerInfo::ActivationFunction::LOGISTIC:
125 {
126 // dst = src * -1
127 writer->op_binary_expression(dst_tile, src_tile, BinaryOp::Mul, constant_minus_1);
128 // dst = exp(src * -1)
129 writer->op_unary_elementwise_function(dst_tile, UnaryFunction::Exp, dst_tile);
130 // dst = 1 + (exp(src * -1))
131 writer->op_binary_expression(dst_tile, dst_tile, BinaryOp::Add, constant_pos_1);
132 // dst = 1 / 1 + (exp(src * -1))
133 writer->op_binary_expression(dst_tile, constant_pos_1, BinaryOp::Div, dst_tile);
134 break;
135 }
136 case ActivationLayerInfo::ActivationFunction::TANH:
137 {
138 // dst = B_VAL * src
139 writer->op_binary_expression(dst_tile, src_tile, BinaryOp::Mul, constant_B);
140 // dst = tanh(B_VAL * src)
141 writer->op_unary_elementwise_function(dst_tile, UnaryFunction::Tanh, dst_tile);
142 // dst = A_VAL * tanh(B_VAL * src)
143 writer->op_binary_expression(dst_tile, dst_tile, BinaryOp::Mul, constant_A);
144 break;
145 }
146 case ActivationLayerInfo::ActivationFunction::RELU:
147 {
148 // dst = max(src, 0)
149 writer->op_binary_elementwise_function(dst_tile, ckw::BinaryFunction::Max, src_tile, constant_zero);
150 break;
151 }
152 case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
153 {
154 //dst = max(src, 0)
155 writer->op_binary_elementwise_function(dst_tile, ckw::BinaryFunction::Max, src_tile, constant_zero);
156 //dst = min(max(src, 0), A_VAL)
157 writer->op_binary_elementwise_function(dst_tile, ckw::BinaryFunction::Min, dst_tile, constant_A);
158 break;
159 }
160 case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
161 {
162 //dst = max(src, B_VAL)
163 writer->op_binary_elementwise_function(dst_tile, ckw::BinaryFunction::Max, src_tile, constant_B);
164 //dst = min(max(src, B_VAL), A_VAL)
165 writer->op_binary_elementwise_function(dst_tile, ckw::BinaryFunction::Min, dst_tile, constant_A);
166 break;
167 }
168 default:
169 CKW_ASSERT(false);
170 break;
171 }
172}
173
174Window GpuCkwActivation::get_window() const
175{
176 ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
177
178 TensorShape output_shape = _dst->tensor_shape();
179 // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
180 // This is in line with the collapsing convention used by operators like Conv2d
181 output_shape.collapse(2U, 1U);
SiCong Li23882a92023-06-28 09:49:45 +0100182 constexpr unsigned int vector_size_byte_opencl = 16;
183 const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
184 Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
Adnan AlSinan4184e862023-07-10 15:20:44 +0100185
186 return win;
187}
188
189} // namespace dynamic_fusion
190} // namespace experimental
Matthew Bentham314d3e22023-06-23 10:53:52 +0000191} // namespace arm_compute