blob: f37f06b0ffa8aa277f83a51709fad4d1a9443f05 [file] [log] [blame]
Isabella Gottardif07d28d2018-02-06 14:52:43 +00001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2017-2020 Arm Limited.
Isabella Gottardif07d28d2018-02-06 14:52:43 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h"
25
26#include "arm_compute/core/PixelValue.h"
27#include "arm_compute/core/Size2D.h"
28#include "arm_compute/core/Utils.h"
29#include "arm_compute/core/Validate.h"
Georgios Pinitas78c00902018-01-09 17:33:11 +000030#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Isabella Gottardif07d28d2018-02-06 14:52:43 +000031#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
32#include "arm_compute/runtime/CL/CLScheduler.h"
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +010033#include "src/core/CL/kernels/CLCol2ImKernel.h"
34#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
35#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
36#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
37#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
38#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
39#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
40#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
41#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
42#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
43#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
44#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
45#include "src/core/CL/kernels/CLIm2ColKernel.h"
46#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010047#include "src/core/helpers/AutoConfiguration.h"
48#include "support/Cast.h"
Isabella Gottardif07d28d2018-02-06 14:52:43 +000049
50#include <cmath>
51#include <memory>
52#include <tuple>
53
Michalis Spyroub27e13a2019-09-27 11:04:27 +010054namespace arm_compute
55{
Georgios Pinitas78c00902018-01-09 17:33:11 +000056using namespace arm_compute::misc::shape_calculator;
Michalis Spyroub27e13a2019-09-27 11:04:27 +010057using namespace arm_compute::utils::cast;
Isabella Gottardif07d28d2018-02-06 14:52:43 +000058
Georgios Pinitasd8734b52017-12-22 15:27:52 +000059CLConvolutionLayerReshapeWeights::CLConvolutionLayerReshapeWeights()
Georgios Pinitas40f51a62020-11-21 03:04:18 +000060 : _weights_reshape_kernel(std::make_unique<CLWeightsReshapeKernel>())
Isabella Gottardif07d28d2018-02-06 14:52:43 +000061{
62}
63
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +010064CLConvolutionLayerReshapeWeights::~CLConvolutionLayerReshapeWeights() = default;
65
Gian Marco Iodice916d1bc2018-08-13 11:20:41 +010066void CLConvolutionLayerReshapeWeights::configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups)
Isabella Gottardif07d28d2018-02-06 14:52:43 +000067{
Manuel Bottini2b84be52020-04-08 10:15:51 +010068 configure(CLKernelLibrary::get().get_compile_context(), weights, biases, output, num_groups);
69}
70
71void CLConvolutionLayerReshapeWeights::configure(const CLCompileContext &compile_context, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups)
72{
Georgios Pinitas78c00902018-01-09 17:33:11 +000073 // Perform validation step
Isabella Gottardif07d28d2018-02-06 14:52:43 +000074 ARM_COMPUTE_ERROR_ON_NULLPTR(weights, output);
Georgios Pinitas78c00902018-01-09 17:33:11 +000075 ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayerReshapeWeights::validate(weights->info(),
76 (biases != nullptr) ? biases->info() : nullptr,
Gian Marco Iodice916d1bc2018-08-13 11:20:41 +010077 output->info(),
78 num_groups));
Georgios Pinitas78c00902018-01-09 17:33:11 +000079
80 const bool append_biases = (biases != nullptr) && !is_data_type_quantized_asymmetric(weights->info()->data_type());
81 const ICLTensor *biases_to_use = (append_biases) ? biases : nullptr;
82
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +010083 _weights_reshape_kernel->configure(compile_context, weights, biases_to_use, output, num_groups);
Georgios Pinitas78c00902018-01-09 17:33:11 +000084
85 output->info()->set_quantization_info(weights->info()->quantization_info());
86}
87
Gian Marco Iodice916d1bc2018-08-13 11:20:41 +010088Status CLConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups)
Georgios Pinitas78c00902018-01-09 17:33:11 +000089{
90 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(weights);
Manuel Bottini8481d832019-12-10 15:28:40 +000091 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::F16, DataType::F32);
Georgios Pinitas78c00902018-01-09 17:33:11 +000092 ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
Isabella Gottardif07d28d2018-02-06 14:52:43 +000093
94 if(biases != nullptr)
95 {
Georgios Pinitas19ea4192018-06-19 13:09:53 +010096 const int idx_kernels = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::BATCHES);
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +000097 ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized(weights->data_type()));
98
Georgios Pinitas78c00902018-01-09 17:33:11 +000099 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases);
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100100 ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(idx_kernels));
Georgios Pinitas78c00902018-01-09 17:33:11 +0000101 ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000102 }
103
Georgios Pinitas78c00902018-01-09 17:33:11 +0000104 if((output != nullptr) && (output->total_size() != 0))
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000105 {
Georgios Pinitas78c00902018-01-09 17:33:11 +0000106 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, output);
Gian Marco Iodice916d1bc2018-08-13 11:20:41 +0100107 CLWeightsReshapeKernel::validate(weights, biases, output, num_groups);
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000108 }
109
Georgios Pinitas78c00902018-01-09 17:33:11 +0000110 return Status{};
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000111}
112
113void CLConvolutionLayerReshapeWeights::run()
114{
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100115 CLScheduler::get().enqueue(*_weights_reshape_kernel);
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000116}
117
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100118CLGEMMConvolutionLayer::CLGEMMConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
Georgios Pinitas40f51a62020-11-21 03:04:18 +0000119 : _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(std::make_unique<CLIm2ColKernel>()), _mm_gemm(memory_manager,
120 weights_manager), _mm_gemmlowp(memory_manager), _col2im_kernel(std::make_unique<CLCol2ImKernel>()), _activationlayer_function(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(),
121 _gemm_output(), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _fuse_activation(true), _is_prepared(false)
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000122{
123}
124
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100125CLGEMMConvolutionLayer::~CLGEMMConvolutionLayer() = default;
126
Manuel Bottini2b84be52020-04-08 10:15:51 +0100127void CLGEMMConvolutionLayer::configure_mm(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
128 const GEMMLowpOutputStageInfo &gemmlowp_output_stage,
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100129 int gemm_3d_depth, const ActivationLayerInfo &act_info)
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000130{
131 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights);
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100132 ARM_COMPUTE_ERROR_THROW_ON(validate_mm(input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), gemmlowp_output_stage, gemm_3d_depth, _skip_im2col, act_info));
Georgios Pinitas78c00902018-01-09 17:33:11 +0000133
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100134 const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
135 false, // is_b_reshaped
136 true, // reshape_b_only_on_first_run
137 gemm_3d_depth, // depth_output_gemm3d
138 _skip_im2col, // reinterpret_input_as_3d
139 false, // retain_internal_weights
140 gemmlowp_output_stage, // gemmlowp_output_stage
141 false, // fp_mixed_precision
142 true, // broadcast_bias
143 act_info); // activation_info
Georgios Pinitas932491f2018-09-21 16:33:15 +0100144
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000145 if(_is_quantized)
146 {
Georgios Pinitas78c00902018-01-09 17:33:11 +0000147 // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
148 // Extract and negate input and weights offset
149 const QuantizationInfo input_quantization_info = input->info()->quantization_info();
150 const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000151
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100152 input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
153 weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000154
Manuel Bottini2b84be52020-04-08 10:15:51 +0100155 _mm_gemmlowp.configure(compile_context, input, weights, biases, output, gemm_info);
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000156
Georgios Pinitas78c00902018-01-09 17:33:11 +0000157 // Revert back QuantizatioInfo as input and weights could be used in other convolution layers
158 input->info()->set_quantization_info(input_quantization_info);
159 weights->info()->set_quantization_info(weights_quantization_info);
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000160 }
161 else
162 {
Georgios Pinitas78c00902018-01-09 17:33:11 +0000163 // Configure matrix multiply function
Manuel Bottini2b84be52020-04-08 10:15:51 +0100164 _mm_gemm.configure(compile_context, input, weights, biases, output, 1.0f, 1.0f, gemm_info);
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000165 }
166}
167
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100168Status CLGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100169 const GEMMLowpOutputStageInfo &gemmlowp_output_stage, int gemm_3d_depth, bool skip_im2col, const ActivationLayerInfo &act_info)
Georgios Pinitas78c00902018-01-09 17:33:11 +0000170{
171 const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
172
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100173 const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
174 false, // is_b_reshaped
175 true, // reshape_b_only_on_first_run
176 gemm_3d_depth, // depth_output_gemm3d
177 skip_im2col, // reinterpret_input_as_3d
178 false, // retain_internal_weights
179 gemmlowp_output_stage, // gemmlowp_output_stage
180 false, // fp_mixed_precision
181 true, // broadcast_bias
182 act_info); // activation_info
Georgios Pinitas932491f2018-09-21 16:33:15 +0100183
Georgios Pinitas78c00902018-01-09 17:33:11 +0000184 if(is_quantized)
185 {
186 // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
187 // Extract and negate input and weights offset
188 const QuantizationInfo input_quantization_info = input->quantization_info();
189 const QuantizationInfo weights_quantization_info = weights->quantization_info();
190
191 std::unique_ptr<ITensorInfo> input_qa = input->clone();
192 std::unique_ptr<ITensorInfo> weights_qa = weights->clone();
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100193 input_qa->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
194 weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
Georgios Pinitas78c00902018-01-09 17:33:11 +0000195
196 // Perform validation step on GEMMLowp
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100197 return CLGEMMLowpMatrixMultiplyCore::validate(input_qa.get(), weights_qa.get(), biases, output, gemm_info);
Georgios Pinitas78c00902018-01-09 17:33:11 +0000198 }
199 else
200 {
201 // Perform validation step on Matrix multiply function
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100202 return CLGEMM::validate(input, weights, biases, output, 1.0f, 1.0f, gemm_info);
Georgios Pinitas78c00902018-01-09 17:33:11 +0000203 }
Georgios Pinitas78c00902018-01-09 17:33:11 +0000204}
205
Alex Gilday7da29b62018-03-23 14:16:00 +0000206void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
Gian Marco Iodice916d1bc2018-08-13 11:20:41 +0100207 const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000208{
Manuel Bottini2b84be52020-04-08 10:15:51 +0100209 configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, weights_info, dilation, act_info, num_groups);
210}
211
212void CLGEMMConvolutionLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
213 const PadStrideInfo &conv_info,
214 const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
215{
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000216 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
Georgios Pinitas78c00902018-01-09 17:33:11 +0000217
218 ARM_COMPUTE_ERROR_THROW_ON(CLGEMMConvolutionLayer::validate(input->info(),
219 weights->info(),
220 biases != nullptr ? biases->info() : nullptr,
221 output->info(),
222 conv_info,
Alex Gilday7da29b62018-03-23 14:16:00 +0000223 weights_info,
Isabella Gottardi3f217ec2018-02-12 14:59:19 +0000224 dilation,
Gian Marco Iodice916d1bc2018-08-13 11:20:41 +0100225 act_info,
226 num_groups));
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000227
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100228 const DataType data_type = input->info()->data_type();
229 const DataLayout data_layout = input->info()->data_layout();
230 const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
231 const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100232 const int idx_kernels = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
233
234 const unsigned int kernel_width = weights->info()->dimension(idx_width);
235 const unsigned int kernel_height = weights->info()->dimension(idx_height);
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000236 const unsigned int num_kernels = weights->info()->dimension(idx_kernels);
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100237
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100238 const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100239 const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
240
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100241 _is_prepared = weights_info.retain_internal_weights();
242 _original_weights = weights;
243 _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
244 _skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
245 _skip_col2im = data_layout == DataLayout::NHWC;
246
247 // Only for quantize there are few cases where we cannot fuse the activation function in GEMM
248 _fuse_activation = true;
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000249
Georgios Pinitas78c00902018-01-09 17:33:11 +0000250 // Set the GPU target for im2col and col2im
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100251 _im2col_kernel->set_target(CLScheduler::get().target());
252 _col2im_kernel->set_target(CLScheduler::get().target());
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000253
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100254 const ICLTensor *gemm_input_to_use = input;
255 ICLTensor *gemm_output_to_use = output;
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000256
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000257 // Get parameters from conv_info
258 unsigned int stride_x = 0;
259 unsigned int stride_y = 0;
260 std::tie(stride_x, stride_y) = conv_info.stride();
261
262 // Get convolved dimensions
263 unsigned int conv_w = 0;
264 unsigned int conv_h = 0;
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100265 std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(idx_width),
266 input->info()->dimension(idx_height),
267 kernel_width,
268 kernel_height,
269 conv_info,
270 dilation);
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000271
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000272 unsigned int mat_weights_cols = num_kernels / num_groups;
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000273
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100274 const ICLTensor *biases_to_use = biases;
275 bool append_bias = false;
276
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100277 ICLTensor *weights_to_use = &_weights_reshaped;
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100278 if(num_groups != 1 && biases != nullptr)
279 {
280 // num_groups != 1 can only be for NCHW
281 // Since it is missing an utility function to reshape the biases, we append the biases into the weights tensor
282 biases_to_use = nullptr;
283 append_bias = true;
284
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100285 if(_weights_manager && _weights_manager->are_weights_managed(weights))
286 {
Manuel Bottini2b84be52020-04-08 10:15:51 +0100287 _reshape_weights_managed.configure(compile_context, weights, biases, num_groups);
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100288 weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(weights, &_reshape_weights_managed));
289 }
290 else
291 {
Manuel Bottini2b84be52020-04-08 10:15:51 +0100292 _reshape_weights.configure(compile_context, weights, biases, &_weights_reshaped, num_groups);
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100293 }
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100294 }
295 else
296 {
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100297 if(_weights_manager && _weights_manager->are_weights_managed(weights))
298 {
Manuel Bottini2b84be52020-04-08 10:15:51 +0100299 _reshape_weights_managed.configure(compile_context, weights, nullptr, num_groups);
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100300 weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(weights, &_reshape_weights_managed));
301 }
302 else
303 {
Manuel Bottini2b84be52020-04-08 10:15:51 +0100304 _reshape_weights.configure(compile_context, weights, nullptr, &_weights_reshaped, num_groups);
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100305 }
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100306 }
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000307
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000308 // Create tensor to store im2col reshaped inputs
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100309 if(!_skip_im2col)
310 {
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100311 _memory_group.manage(&_im2col_output);
312
Gian Marco Iodice215b4ea2018-06-28 16:29:29 +0100313 // Configure and tune im2col. im2col output shape is auto-initialized
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100314 _im2col_kernel->configure(compile_context, input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation, num_groups);
Gian Marco Iodice215b4ea2018-06-28 16:29:29 +0100315
316 // Set quantization info
317 _im2col_output.info()->set_quantization_info(input->info()->quantization_info());
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100318 CLScheduler::get().tune_kernel_static(*_im2col_kernel);
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100319
320 // Update GEMM input
321 gemm_input_to_use = &_im2col_output;
322 }
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000323
324 // Create GEMM output tensor
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100325 if(!_skip_col2im)
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100326 {
Georgios Pinitas932491f2018-09-21 16:33:15 +0100327 TensorShape shape_gemm;
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100328
329 // If we cannot skip col2im it means we run im2col as well
330 shape_gemm = _im2col_output.info()->tensor_shape();
331 shape_gemm.set(0, mat_weights_cols);
332 shape_gemm.set(1, conv_w * conv_h);
333
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100334 TensorInfo info_gemm(shape_gemm, 1, data_type);
Georgios Pinitas932491f2018-09-21 16:33:15 +0100335 info_gemm.set_quantization_info(output->info()->quantization_info()).set_data_layout(input->info()->data_layout());
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100336 _gemm_output.allocator()->init(info_gemm);
337 _memory_group.manage(&_gemm_output);
338
339 // Update GEMM output
340 gemm_output_to_use = &_gemm_output;
341 }
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000342
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100343 GEMMLowpOutputStageInfo gemmlowp_output_stage;
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000344 gemmlowp_output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
345 gemmlowp_output_stage.gemmlowp_offset = 0;
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000346
347 // Configure output stage for quantized case
348 if(_is_quantized)
349 {
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000350 const auto output_quant_info = (output->info()->total_size() == 0) ? iq_info : oq_info;
351 const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights->info()->data_type());
352 const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1;
Georgios Pinitas9be0c5a2018-02-19 12:46:29 +0000353
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000354 gemmlowp_output_stage.is_quantized_per_channel = is_quantized_per_channel;
355
356 gemmlowp_output_stage.gemmlowp_multipliers.resize(num_filters);
357 gemmlowp_output_stage.gemmlowp_shifts.resize(num_filters);
358 quantization::compute_quantized_multipliers_and_shifts(input->info(),
359 weights->info(),
360 output->info(),
361 idx_kernels,
362 gemmlowp_output_stage.gemmlowp_multipliers.data(),
363 gemmlowp_output_stage.gemmlowp_shifts.data());
364 gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0];
365 gemmlowp_output_stage.gemmlowp_shift = gemmlowp_output_stage.gemmlowp_shifts[0];
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100366
Giorgio Arena1856ff72020-02-07 13:46:45 +0000367 PixelValue min_val{};
368 PixelValue max_val{};
369 std::tie(min_val, max_val) = get_min_max(output->info()->data_type());
370
371 auto min_activation = min_val.get<int32_t>();
372 auto max_activation = max_val.get<int32_t>();
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100373
374 const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
Gian Marco Iodice3139f032018-11-05 14:26:32 +0000375 ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100376 ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU
377 };
378
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100379 if(act_info.enabled())
Georgios Pinitas932491f2018-09-21 16:33:15 +0100380 {
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100381 if(supported_acts.count(act_info.activation()) != 0)
382 {
Sang-Hoon Park4715cf92020-01-08 16:02:47 +0000383 std::tie(min_activation, max_activation) = get_quantized_activation_min_max(act_info, data_type, output_quant_info);
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100384 }
385 else
386 {
387 _fuse_activation = false;
388 }
Georgios Pinitas932491f2018-09-21 16:33:15 +0100389 }
Gian Marco Iodice68a3f562018-07-26 11:44:03 +0100390
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100391 // Set the GEMMLowp output stage info
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000392 gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
393 gemmlowp_output_stage.gemmlowp_min_bound = min_activation;
394 gemmlowp_output_stage.gemmlowp_max_bound = max_activation;
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100395 }
396
397 // Configure and tune GEMM
Gian Marco Iodice3139f032018-11-05 14:26:32 +0000398 // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix
399 const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0;
400
Manuel Bottini2b84be52020-04-08 10:15:51 +0100401 configure_mm(compile_context, gemm_input_to_use, weights_to_use, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, act_info);
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100402
403 if(!_skip_im2col)
404 {
405 _im2col_output.allocator()->allocate();
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000406 }
407
Georgios Pinitas932491f2018-09-21 16:33:15 +0100408 if(!_skip_col2im)
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100409 {
Georgios Pinitas932491f2018-09-21 16:33:15 +0100410 // Configure and tune Col2Im
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100411 _col2im_kernel->configure(compile_context, gemm_output_to_use, output, Size2D(conv_w, conv_h), num_groups);
412 CLScheduler::get().tune_kernel_static(*_col2im_kernel.get());
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100413 }
414
Georgios Pinitas932491f2018-09-21 16:33:15 +0100415 if(!_skip_col2im)
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100416 {
417 _gemm_output.allocator()->allocate();
418 }
419
420 ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(idx_width) != conv_w) || (output->info()->dimension(idx_height) != conv_h),
421 "Output shape does not match the expected one");
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000422
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100423 if(!_fuse_activation)
Isabella Gottardi3f217ec2018-02-12 14:59:19 +0000424 {
Manuel Bottini2b84be52020-04-08 10:15:51 +0100425 _activationlayer_function.configure(compile_context, output, nullptr, act_info);
Isabella Gottardi3f217ec2018-02-12 14:59:19 +0000426 }
427
Georgios Pinitas78c00902018-01-09 17:33:11 +0000428 ARM_COMPUTE_UNUSED(weights_info);
429}
430
431Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
Gian Marco Iodice916d1bc2018-08-13 11:20:41 +0100432 const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
Georgios Pinitas78c00902018-01-09 17:33:11 +0000433{
434 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
435 ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!");
Sang-Hoon Park4715cf92020-01-08 16:02:47 +0000436 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000437 const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights->data_type());
438
morgolockd13931d2020-06-23 15:49:35 +0100439 if(!is_quantized_per_channel)
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000440 {
441 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
442 }
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100443 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);
Gian Marco Iodice916d1bc2018-08-13 11:20:41 +0100444 ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported");
445 ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_type() == DataType::QASYMM8), "Grouping (num_groups != 1) is not supported with QASYMM8");
446 ARM_COMPUTE_RETURN_ERROR_ON(((input->dimension(2) / weights->dimension(2)) != num_groups) && (input->data_layout() == DataLayout::NCHW));
Georgios Pinitas78c00902018-01-09 17:33:11 +0000447
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100448 const DataLayout data_layout = input->data_layout();
449 const DataType data_type = input->data_type();
450 const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
451 const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
452 const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
453 const int idx_kernels = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
Isabella Gottardi3f217ec2018-02-12 14:59:19 +0000454
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100455 const unsigned int kernel_width = weights->dimension(idx_width);
456 const unsigned int kernel_height = weights->dimension(idx_height);
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000457 const unsigned int num_kernels = weights->dimension(idx_kernels);
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100458
Michalis Spyroubcfd09a2019-05-01 13:03:59 +0100459 TensorInfo im2col_reshaped_info{};
460 TensorInfo info_gemm{};
461 TensorInfo weights_reshaped_info{};
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100462 const ITensorInfo *gemm_input_to_use = input;
463 const ITensorInfo *gemm_output_to_use = output;
464 const ITensorInfo *weights_to_use = weights;
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000465 const bool is_quantized = is_data_type_quantized_asymmetric(data_type);
466 const bool skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
467 const bool skip_col2im = data_layout == DataLayout::NHWC;
468 bool fuse_activation = true;
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100469
Gian Marco Iodice916d1bc2018-08-13 11:20:41 +0100470 ARM_COMPUTE_RETURN_ERROR_ON((weights->dimension(idx_channel) * num_groups) != input->dimension(idx_channel));
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100471 ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
Georgios Pinitas78c00902018-01-09 17:33:11 +0000472
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100473 // Validate biases
Georgios Pinitas78c00902018-01-09 17:33:11 +0000474 if(biases != nullptr)
475 {
476 if(is_quantized)
477 {
478 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32);
479 }
480 else
481 {
482 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
483 }
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100484 ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(idx_kernels));
Georgios Pinitas78c00902018-01-09 17:33:11 +0000485 ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
486 }
487
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100488 if(act_info.enabled())
489 {
490 ARM_COMPUTE_ERROR_ON(act_info.b() > act_info.a());
491 }
492
493 // Get convolved dimensions
494 unsigned int conv_w = 0;
495 unsigned int conv_h = 0;
496
497 std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(idx_width),
498 input->dimension(idx_height),
499 kernel_width,
500 kernel_height,
501 conv_info,
502 dilation);
503
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000504 unsigned int mat_weights_cols = num_kernels / num_groups;
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100505
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100506 const ITensorInfo *biases_to_use = biases;
507 bool append_bias = false;
508
509 if(num_groups != 1 && biases != nullptr)
510 {
511 // num_groups != 1 can only be for NCHW
512 // Since it is missing an utility function to reshape the biases, we append the biases into the weights tensor
513 biases_to_use = nullptr;
514 append_bias = true;
515
516 ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayerReshapeWeights::validate(weights, biases, nullptr, num_groups));
517 weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, true, num_groups), 1, data_type);
518 }
519 else
520 {
521 ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayerReshapeWeights::validate(weights, nullptr, nullptr, num_groups));
522 weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, false, num_groups), 1, data_type);
523 }
524
525 weights_to_use = &weights_reshaped_info;
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100526
527 if(!skip_im2col)
528 {
Gian Marco Iodice215b4ea2018-06-28 16:29:29 +0100529 const Size2D kernel_dims(kernel_width, kernel_height);
530
531 // Output tensor auto initialization if not yet initialized
Gian Marco Iodice916d1bc2018-08-13 11:20:41 +0100532 TensorShape expected_output_shape = compute_im2col_conv_shape(input, kernel_dims, conv_info, append_bias, dilation, num_groups == 1, num_groups);
Gian Marco Iodice215b4ea2018-06-28 16:29:29 +0100533
534 auto_init_if_empty(im2col_reshaped_info, input->clone()->set_tensor_shape(expected_output_shape));
535
Gian Marco Iodice916d1bc2018-08-13 11:20:41 +0100536 ARM_COMPUTE_RETURN_ON_ERROR(CLIm2ColKernel::validate(input, &im2col_reshaped_info, kernel_dims, conv_info, append_bias, dilation, num_groups));
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100537 gemm_input_to_use = &im2col_reshaped_info;
538 }
539
540 // Create GEMM output tensor
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100541 if(!skip_col2im)
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100542 {
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100543 TensorShape shape_gemm;
544
545 shape_gemm = gemm_input_to_use->tensor_shape();
546 shape_gemm.set(0, mat_weights_cols);
547 shape_gemm.set(1, conv_w * conv_h);
548
549 info_gemm = TensorInfo(shape_gemm, 1, data_type);
Georgios Pinitas932491f2018-09-21 16:33:15 +0100550 info_gemm.set_quantization_info(output->quantization_info()).set_data_layout(input->data_layout());
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100551 gemm_output_to_use = &info_gemm;
552 }
553
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100554 GEMMLowpOutputStageInfo gemmlowp_output_stage;
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000555 gemmlowp_output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
556 gemmlowp_output_stage.gemmlowp_offset = 0;
557 gemmlowp_output_stage.is_quantized_per_channel = is_quantized_per_channel;
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100558
559 if(is_quantized)
560 {
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000561 const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
562 const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
563 const auto output_quant_info = (output->total_size() == 0) ? iq_info : oq_info;
564 const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1;
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100565
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000566 gemmlowp_output_stage.gemmlowp_multipliers.resize(num_filters);
567 gemmlowp_output_stage.gemmlowp_shifts.resize(num_filters);
568 quantization::compute_quantized_multipliers_and_shifts(input,
569 weights,
570 output,
571 idx_kernels,
572 gemmlowp_output_stage.gemmlowp_multipliers.data(),
573 gemmlowp_output_stage.gemmlowp_shifts.data());
574 gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0];
575 gemmlowp_output_stage.gemmlowp_shift = gemmlowp_output_stage.gemmlowp_shifts[0];
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100576
577 int min_activation = 0;
578 int max_activation = 0;
579
580 const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
581 ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
582 ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU
583 };
584
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100585 if(act_info.enabled())
Georgios Pinitas932491f2018-09-21 16:33:15 +0100586 {
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100587 if(supported_acts.count(act_info.activation()) != 0)
588 {
Sang-Hoon Park4715cf92020-01-08 16:02:47 +0000589 std::tie(min_activation, max_activation) = get_quantized_activation_min_max(act_info, data_type, output_quant_info);
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100590 }
591 else
592 {
593 fuse_activation = false;
594 }
Georgios Pinitas932491f2018-09-21 16:33:15 +0100595 }
Gian Marco Iodice3139f032018-11-05 14:26:32 +0000596
597 // Set the GEMMLowp output stage info
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000598 gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
599 gemmlowp_output_stage.gemmlowp_min_bound = min_activation;
600 gemmlowp_output_stage.gemmlowp_max_bound = max_activation;
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100601 }
602
Gian Marco Iodice3139f032018-11-05 14:26:32 +0000603 // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix
604 const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0;
605
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100606 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, skip_im2col, act_info));
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100607
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100608 // Validate Col2Im
Georgios Pinitas932491f2018-09-21 16:33:15 +0100609 if(!skip_col2im)
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100610 {
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100611 ARM_COMPUTE_RETURN_ON_ERROR(CLCol2ImKernel::validate(gemm_output_to_use, output, Size2D(conv_w, conv_h), num_groups));
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100612 }
613
Isabella Gottardi3f217ec2018-02-12 14:59:19 +0000614 //Validate Activation Layer
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100615 if(!fuse_activation)
Isabella Gottardi3f217ec2018-02-12 14:59:19 +0000616 {
Vidhya Sudhan Loganathanedf357c2018-04-27 14:25:30 +0100617 ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(output, nullptr, act_info));
Isabella Gottardi3f217ec2018-02-12 14:59:19 +0000618 }
619
Georgios Pinitas78c00902018-01-09 17:33:11 +0000620 return Status{};
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000621}
622
623void CLGEMMConvolutionLayer::run()
624{
Georgios Pinitase0437672018-05-02 14:07:55 +0100625 prepare();
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000626
Georgios Pinitasda953f22019-04-02 17:27:03 +0100627 MemoryGroupResourceScope scope_mg(_memory_group);
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000628
629 // Run im2col
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100630 if(!_skip_im2col)
631 {
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100632 CLScheduler::get().enqueue(*_im2col_kernel);
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100633 }
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000634
Georgios Pinitas78c00902018-01-09 17:33:11 +0000635 // Runs CLGEMM or CLGEMMLowpMatrixMultiplyCore functions
636 if(_is_quantized)
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000637 {
Georgios Pinitas78c00902018-01-09 17:33:11 +0000638 // Run gemmlowp
639 _mm_gemmlowp.run();
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000640 }
641 else
642 {
Georgios Pinitas78c00902018-01-09 17:33:11 +0000643 // Run gemm
644 _mm_gemm.run();
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000645 }
646
647 // Reshape output matrix
Georgios Pinitas932491f2018-09-21 16:33:15 +0100648 if(!_skip_col2im)
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100649 {
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100650 CLScheduler::get().enqueue(*_col2im_kernel.get(), false);
Georgios Pinitas19ea4192018-06-19 13:09:53 +0100651 }
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000652
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100653 //Run Activation Layer if we cannot fuse in GEMM
654 if(!_fuse_activation)
Isabella Gottardi3f217ec2018-02-12 14:59:19 +0000655 {
656 _activationlayer_function.run();
657 }
Georgios Pinitase0437672018-05-02 14:07:55 +0100658}
Georgios Pinitas82b51482018-04-24 15:14:12 +0100659
Georgios Pinitase0437672018-05-02 14:07:55 +0100660void CLGEMMConvolutionLayer::prepare()
661{
662 if(!_is_prepared)
663 {
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100664 ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
665 if(_weights_manager && _weights_manager->are_weights_managed(_original_weights))
666 {
667 _weights_manager->run(_original_weights, &_reshape_weights_managed);
668 }
669 else
670 {
671 // Run weights reshaping and mark original weights tensor as unused
672 _weights_reshaped.allocator()->allocate();
673 _reshape_weights.run();
674 _original_weights->mark_as_unused();
675 }
Georgios Pinitas72219332018-06-05 14:56:06 +0100676
677 // Prepare GEMM
678 _is_quantized ? _mm_gemmlowp.prepare() : _mm_gemm.prepare();
679 if(!_weights_reshaped.is_used())
Georgios Pinitase0437672018-05-02 14:07:55 +0100680 {
Georgios Pinitas72219332018-06-05 14:56:06 +0100681 _weights_reshaped.allocator()->free();
Georgios Pinitase0437672018-05-02 14:07:55 +0100682 }
683
684 CLScheduler::get().queue().finish();
685 _is_prepared = true;
686 }
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000687}
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100688} // namespace arm_compute