blob: f3c05adb477dfca7db7de2018201c2d3775d30e0 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Viet-Hoa Doedafe7f2023-05-04 17:39:30 +01002 * Copyright (c) 2017-2023 Arm Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
25
Sheri Zhang06d1efd2021-07-28 11:20:04 +010026#include "arm_compute/core/CL/CLKernelLibrary.h"
27#include "arm_compute/core/CL/ICLTensor.h"
28#include "arm_compute/core/KernelDescriptors.h"
Georgios Pinitas78c00902018-01-09 17:33:11 +000029#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Sheri Zhang06d1efd2021-07-28 11:20:04 +010030#include "arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h"
31#include "src/core/CL/ICLKernel.h"
32#include "src/core/helpers/MemoryHelpers.h"
Georgios Pinitas7891a732021-08-20 21:39:25 +010033#include "src/gpu/cl/operators/ClConv2d.h"
ramelg016d891572021-09-29 10:05:09 +010034
35#include "src/common/utils/Log.h"
Sheri Zhang06d1efd2021-07-28 11:20:04 +010036#include "support/Cast.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010037
Michele Di Giorgio14cbfb22019-10-23 10:53:10 +010038namespace arm_compute
39{
Georgios Pinitas78c00902018-01-09 17:33:11 +000040using namespace arm_compute::misc::shape_calculator;
Sheri Zhang06d1efd2021-07-28 11:20:04 +010041using namespace arm_compute::experimental;
42struct CLConvolutionLayer::Impl
43{
44 MemoryGroup memory_group{};
45 std::shared_ptr<IMemoryManager> memory_manager{};
46 std::unique_ptr<opencl::IClOperator> op{ nullptr };
47 ITensorPack run_pack{};
48 ITensorPack prep_pack{};
49 WorkspaceData<CLTensor> workspace{};
50 experimental::MemoryRequirements aux_mem_req{};
51 std::unique_ptr<IFunction> func{ nullptr };
52};
Anthony Barbier6ff3b192017-09-04 18:44:23 +010053
Georgios Pinitasbaf174e2017-09-08 19:47:30 +010054CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
Sheri Zhang06d1efd2021-07-28 11:20:04 +010055 : _impl(std::make_unique<Impl>())
Anthony Barbier6ff3b192017-09-04 18:44:23 +010056{
Sheri Zhang06d1efd2021-07-28 11:20:04 +010057 _impl->memory_manager = std::move(memory_manager);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010058}
59
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +010060CLConvolutionLayer::~CLConvolutionLayer() = default;
61
Alex Gilday7da29b62018-03-23 14:16:00 +000062void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
Jakub Sujak0d27b2e2023-08-24 14:01:20 +010063 const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups)
Chunosov5124be52017-11-22 20:42:13 +070064{
Jakub Sujak0d27b2e2023-08-24 14:01:20 +010065 configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, weights_info, dilation, act_info, enable_fast_math, num_groups);
Manuel Bottini2b84be52020-04-08 10:15:51 +010066}
67
68void CLConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
69 const WeightsInfo &weights_info,
Jakub Sujak0d27b2e2023-08-24 14:01:20 +010070 const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups)
Manuel Bottini2b84be52020-04-08 10:15:51 +010071{
Isabella Gottardif07d28d2018-02-06 14:52:43 +000072 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
Gian Marco Iodice2213d4b2018-04-27 10:39:06 +010073 ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info,
Gian Marco Iodice916d1bc2018-08-13 11:20:41 +010074 enable_fast_math, num_groups));
Jakub Sujak0d27b2e2023-08-24 14:01:20 +010075 ARM_COMPUTE_LOG_PARAMS(input, weights, biases, output, conv_info, weights_info, dilation, act_info, enable_fast_math, num_groups);
Isabella Gottardif07d28d2018-02-06 14:52:43 +000076
Jakub Sujak0d27b2e2023-08-24 14:01:20 +010077 const Conv2dInfo conv2d_info = Conv2dInfo(conv_info, dilation, act_info, enable_fast_math, num_groups);
Sheri Zhang06d1efd2021-07-28 11:20:04 +010078
79 switch(opencl::ClConv2d::get_convolution_method(input->info(), weights->info(), output->info(), conv2d_info,
80 weights_info, CLScheduler::get().target()))
Chunosov5124be52017-11-22 20:42:13 +070081 {
Gian Marco Iodicee52a3002018-04-11 15:59:10 +010082 case ConvolutionMethod::WINOGRAD:
Isabella Gottardif07d28d2018-02-06 14:52:43 +000083 case ConvolutionMethod::DIRECT:
Gian Marco Iodicea5cb79f2022-12-28 13:53:51 +000084 case ConvolutionMethod::INDIRECT:
Isabella Gottardif07d28d2018-02-06 14:52:43 +000085 case ConvolutionMethod::GEMM:
Gian Marco20d78482018-01-11 15:10:58 +000086 {
Sheri Zhang06d1efd2021-07-28 11:20:04 +010087 auto f = std::make_unique<opencl::ClConv2d>();
88 f->configure(compile_context, input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv2d_info, weights_info);
89 _impl->op = std::move(f);
Isabella Gottardif07d28d2018-02-06 14:52:43 +000090 break;
Gian Marco20d78482018-01-11 15:10:58 +000091 }
Vidhya Sudhan Loganathan8ec0bb62019-04-23 10:40:44 +010092 case ConvolutionMethod::FFT:
93 {
Sheri Zhang06d1efd2021-07-28 11:20:04 +010094 auto f = std::make_unique<CLFFTConvolutionLayer>(_impl->memory_manager);
Giorgio Arenaea7de7b2020-12-10 16:49:39 +000095 f->configure(compile_context, input, weights, biases, output, conv_info, act_info, enable_fast_math);
Sheri Zhang06d1efd2021-07-28 11:20:04 +010096 _impl->func = std::move(f);
Vidhya Sudhan Loganathan8ec0bb62019-04-23 10:40:44 +010097 break;
98 }
Isabella Gottardif07d28d2018-02-06 14:52:43 +000099 default:
100 ARM_COMPUTE_ERROR("Not supported.");
101 break;
Chunosov5124be52017-11-22 20:42:13 +0700102 }
Sheri Zhang06d1efd2021-07-28 11:20:04 +0100103
104 if(_impl->op)
105 {
Jakub Sujak0d27b2e2023-08-24 14:01:20 +0100106 _impl->memory_group = MemoryGroup(std::move(_impl->memory_manager));
107 _impl->aux_mem_req = _impl->op->workspace();
108 _impl->run_pack = { { ACL_SRC_0, input }, { ACL_SRC_1, weights }, { ACL_SRC_2, biases }, { ACL_DST, output } };
109 _impl->prep_pack = { { ACL_SRC_1, weights }, { ACL_SRC_2, biases } };
110 _impl->workspace = manage_workspace<CLTensor>(_impl->aux_mem_req, _impl->memory_group, _impl->run_pack, _impl->prep_pack);
Sheri Zhang06d1efd2021-07-28 11:20:04 +0100111 }
Chunosov5124be52017-11-22 20:42:13 +0700112}
113
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000114Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
Jakub Sujak0d27b2e2023-08-24 14:01:20 +0100115 const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100116{
Georgios Pinitas78c00902018-01-09 17:33:11 +0000117 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
Viet-Hoa Doedafe7f2023-05-04 17:39:30 +0100118 ARM_COMPUTE_RETURN_ERROR_ON_MSG(!weights->are_values_constant(), "Dynamic weights are not supported");
Gian Marco Iodice916d1bc2018-08-13 11:20:41 +0100119 ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported");
Chunosov5124be52017-11-22 20:42:13 +0700120
Sheri Zhang06d1efd2021-07-28 11:20:04 +0100121 const GPUTarget gpu_target = CLScheduler::get().target();
Jakub Sujak0d27b2e2023-08-24 14:01:20 +0100122 const Conv2dInfo conv2d_info = Conv2dInfo(conv_info, dilation, act_info, enable_fast_math, num_groups);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100123
Sheri Zhang06d1efd2021-07-28 11:20:04 +0100124 switch(opencl::ClConv2d::get_convolution_method(input, weights, output, conv2d_info, weights_info, gpu_target))
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100125 {
Gian Marco Iodicee52a3002018-04-11 15:59:10 +0100126 case ConvolutionMethod::WINOGRAD:
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000127 case ConvolutionMethod::DIRECT:
Gian Marco Iodicea5cb79f2022-12-28 13:53:51 +0000128 case ConvolutionMethod::INDIRECT:
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000129 case ConvolutionMethod::GEMM:
Chunosov5124be52017-11-22 20:42:13 +0700130 {
Sheri Zhang06d1efd2021-07-28 11:20:04 +0100131 ARM_COMPUTE_RETURN_ON_ERROR(opencl::ClConv2d::validate(input, weights, biases, output, conv2d_info, weights_info));
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000132 break;
Chunosov5124be52017-11-22 20:42:13 +0700133 }
Vidhya Sudhan Loganathan8ec0bb62019-04-23 10:40:44 +0100134 case ConvolutionMethod::FFT:
135 {
136 // Validate FFT-based convolution layer
Giorgio Arenaea7de7b2020-12-10 16:49:39 +0000137 ARM_COMPUTE_RETURN_ON_ERROR(CLFFTConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math));
Vidhya Sudhan Loganathan8ec0bb62019-04-23 10:40:44 +0100138 break;
139 }
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000140 default:
141 ARM_COMPUTE_ERROR("Not supported.");
142 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100143 }
144
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000145 return Status{};
146}
Gian Marco Iodice368da832017-07-03 12:33:49 +0100147
Gian Marco Iodicee52a3002018-04-11 15:59:10 +0100148ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info,
Gian Marco Iodice2213d4b2018-04-27 10:39:06 +0100149 const WeightsInfo &weights_info, const ActivationLayerInfo &act_info, const GPUTarget gpu_target, const Size2D &dilation, bool enable_fast_math)
Isabella Gottardif07d28d2018-02-06 14:52:43 +0000150{
Sheri Zhang06d1efd2021-07-28 11:20:04 +0100151 const Conv2dInfo conv2d_info = Conv2dInfo(conv_info, dilation, act_info, enable_fast_math, 1);
152 return opencl::ClConv2d::get_convolution_method(input, weights, output, conv2d_info, weights_info, gpu_target);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100153}
154
155void CLConvolutionLayer::run()
156{
Georgios Pinitase0437672018-05-02 14:07:55 +0100157 prepare();
Sheri Zhang06d1efd2021-07-28 11:20:04 +0100158
159 MemoryGroupResourceScope scope_mg(_impl->memory_group);
160
161 if(_impl->func)
162 {
163 _impl->func->run();
164 }
165 else
166 {
167 _impl->op->run(_impl->run_pack);
168 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100169}
Georgios Pinitase0437672018-05-02 14:07:55 +0100170
171void CLConvolutionLayer::prepare()
172{
Sheri Zhang06d1efd2021-07-28 11:20:04 +0100173 if(_impl->func)
174 {
175 _impl->func->prepare();
176 }
177 else
178 {
179 _impl->op->prepare(_impl->prep_pack);
180
181 // Release temporary tensors that are only used in prepare stage
182 release_temporaries(_impl->aux_mem_req, _impl->workspace);
183 }
Georgios Pinitase0437672018-05-02 14:07:55 +0100184}
ramelg016d891572021-09-29 10:05:09 +0100185} // namespace arm_compute