blob: c488f90b9114ff193366a72b0f9a1d483b85462f [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
25
26#include "arm_compute/core/AccessWindowStatic.h"
27#include "arm_compute/core/CL/CLHelpers.h"
28#include "arm_compute/core/CL/CLKernelLibrary.h"
29#include "arm_compute/core/CL/ICLTensor.h"
30#include "arm_compute/core/CL/OpenCL.h"
31#include "arm_compute/core/Helpers.h"
32#include "arm_compute/core/TensorInfo.h"
33#include "arm_compute/core/Utils.h"
34#include "arm_compute/core/Validate.h"
35#include "arm_compute/core/Window.h"
36
37#include <set>
38#include <string>
39
40using namespace arm_compute;
41
42void CLLogits1DMaxKernel::configure(const ICLTensor *input, ICLTensor *output)
43{
Georgios Pinitase5f8fd62017-06-23 18:03:44 +010044 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010045 ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
Georgios Pinitase5f8fd62017-06-23 18:03:44 +010046 ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010047
48 _input = input;
49 _output = output;
50
51 // The kernel loops over all elements in steps of 16
52 const unsigned int num_elems_processed_per_iteration = ceil_to_multiple(input->info()->dimension(0), 16);
53
54 // Set build options
Georgios Pinitase5f8fd62017-06-23 18:03:44 +010055 std::set<std::string> build_opts;
56 build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
57 if(is_data_type_fixed_point(input->info()->data_type()))
58 {
59 build_opts.emplace(("-DFIXED_POINT_POSITION=" + val_to_string(input->info()->fixed_point_position())));
60 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +010061
62 // Tell the kernel that the width is not a multiple of 16
63 if((input->info()->dimension(0) % max_cl_vector_width) != 0)
64 {
65 build_opts.emplace("-DNON_MULTIPLE_OF_16");
66 }
67
68 // Create kernel
69 _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("softmax_layer_max", build_opts));
70
71 // Set fixed arguments
72 unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
73 _kernel.setArg<cl_uint>(idx++, input->info()->dimension(0));
74
75 // Configure kernel window
76 constexpr unsigned int num_elems_written_per_iteration = 1;
77
78 Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
79 AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
80 AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
81
82 update_window_and_padding(win, input_access, output_access);
83
84 output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
85
86 ICLKernel::configure(win);
87}
88
89CLLogits1DShiftExpSumKernel::CLLogits1DShiftExpSumKernel()
90 : _input(nullptr), _max(nullptr), _output(nullptr), _sum(nullptr)
91{
92}
93
94void CLLogits1DShiftExpSumKernel::configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum)
95{
Georgios Pinitase5f8fd62017-06-23 18:03:44 +010096 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010097 ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, max, sum);
Georgios Pinitase5f8fd62017-06-23 18:03:44 +010098 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
99 ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output, max, sum);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100100
101 _input = input;
102 _max = max;
103 _output = output;
104 _sum = sum;
105
106 // The kernel loops over all elements in steps of 16
107 const unsigned int num_elems_processed_per_iteration = ceil_to_multiple(input->info()->dimension(0), 16);
108
109 // Set build options
Georgios Pinitase5f8fd62017-06-23 18:03:44 +0100110 std::set<std::string> build_opts;
111 build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
112 if(is_data_type_fixed_point(input->info()->data_type()))
113 {
114 build_opts.emplace(("-DFIXED_POINT_POSITION=" + val_to_string(input->info()->fixed_point_position())));
115 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100116
117 // Tell the kernel that the width is not a multiple of 16
118 if((input->info()->dimension(0) % max_cl_vector_width) != 0)
119 {
120 build_opts.emplace("-DNON_MULTIPLE_OF_16");
121 }
122
123 // Create kernel
124 _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("softmax_layer_shift_exp_sum", build_opts));
125
126 // Set fixed arguments
127 unsigned int idx = 4 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
128 _kernel.setArg<cl_uint>(idx++, input->info()->dimension(0));
129
130 // Configure window
131 Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
132
133 AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
134 AccessWindowHorizontal max_access(max->info(), 0, 1);
135 AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
136 AccessWindowHorizontal sum_access(sum->info(), 0, 1);
137
138 update_window_and_padding(win, input_access, max_access, output_access, sum_access);
139
140 output_access.set_valid_region(win, input->info()->valid_region());
141 sum_access.set_valid_region(win, ValidRegion(Coordinates(), sum->info()->tensor_shape()));
142
143 ICLKernel::configure(win);
144}
145
146void CLLogits1DShiftExpSumKernel::run(const Window &window, cl::CommandQueue &queue)
147{
148 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
149 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
150
151 Window slice = window.first_slice_window_2D();
152
153 do
154 {
155 unsigned int idx = 0;
156 // Set inputs
157 add_2D_tensor_argument(idx, _input, slice);
158 add_2D_tensor_argument(idx, _max, slice);
159 add_2D_tensor_argument(idx, _output, slice);
160 add_2D_tensor_argument(idx, _sum, slice);
161 enqueue(queue, *this, slice);
162 }
163 while(window.slide_window_slice_2D(slice));
164}
165
166CLLogits1DNormKernel::CLLogits1DNormKernel()
167 : _input(nullptr), _sum(nullptr), _output(nullptr)
168{
169}
170
171void CLLogits1DNormKernel::configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output)
172{
Georgios Pinitase5f8fd62017-06-23 18:03:44 +0100173 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32);
174 ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum, output);
175 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
176 ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, sum, output);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100177
178 _input = input;
179 _sum = sum;
180 _output = output;
181
182 // Set build options
183 std::set<std::string> build_opts;
Georgios Pinitase5f8fd62017-06-23 18:03:44 +0100184 build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
185 if(is_data_type_fixed_point(input->info()->data_type()))
186 {
187 build_opts.emplace(("-DFIXED_POINT_POSITION=" + val_to_string(input->info()->fixed_point_position())));
188 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100189
190 // Create kernel
191 _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("softmax_layer_norm", build_opts));
192
193 // Configure window
194 constexpr unsigned int num_elems_processed_per_iteration = 16;
195
196 Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
197
198 AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
199 AccessWindowStatic sum_access(sum->info(), 0, 0, 1, sum->info()->dimension(1));
200 AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
201
202 update_window_and_padding(win, input_access, sum_access, output_access);
203
204 output_access.set_valid_region(win, input->info()->valid_region());
205
206 ICLKernel::configure(win);
207}
208
209void CLLogits1DNormKernel::run(const Window &window, cl::CommandQueue &queue)
210{
211 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
212 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
213
214 Window slice = window.first_slice_window_2D();
215
216 do
217 {
218 Window sum_slice = slice;
219 sum_slice.set(Window::DimX, Window::Dimension(0, 1, 1));
220
221 unsigned int idx = 0;
222 // Set inputs
223 add_2D_tensor_argument(idx, _input, slice);
224 add_2D_tensor_argument(idx, _sum, sum_slice);
225 add_2D_tensor_argument(idx, _output, slice);
226 enqueue(queue, *this, slice);
227 }
228 while(window.slide_window_slice_2D(slice));
229}