blob: 818d87811900bf389b20fe88de0baa9e373d3cf0 [file] [log] [blame]
Pablo Tello89519332017-11-17 11:52:36 +00001/*
ramelg01a1f78512022-06-29 16:28:10 +01002 * Copyright (c) 2017-2022 Arm Limited.
Pablo Tello89519332017-11-17 11:52:36 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
ramelg01a1f78512022-06-29 16:28:10 +010024
Georgios Pinitas7891a732021-08-20 21:39:25 +010025#include "src/cpu/kernels/CpuWinogradConv2dKernel.h"
Pablo Tello89519332017-11-17 11:52:36 +000026
Pablo Tello89519332017-11-17 11:52:36 +000027namespace arm_compute
28{
Michalis Spyrou96f977e2021-07-01 12:20:56 +010029namespace cpu
30{
ramelg01a1f78512022-06-29 16:28:10 +010031CpuWinogradConv2dTransformInputKernel::CpuWinogradConv2dTransformInputKernel(arm_conv::winograd::WinogradImpl &w_impl, arm_conv::ConvolutionArgs &_c_args, uint32_t nthreads)
32 : _winograd_impl{ w_impl }, _conv_args{ _c_args }, _nthreads{ nthreads }
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +010033{
Pablo Tellobda6e4b2018-08-22 11:40:33 +010034}
35
ramelg01a1f78512022-06-29 16:28:10 +010036void CpuWinogradConv2dTransformInputKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +010037{
ramelg01a1f78512022-06-29 16:28:10 +010038 ARM_COMPUTE_UNUSED(window);
39 const ITensor *input_nhwc = tensors.get_const_tensor(TensorType::ACL_SRC);
40 const ITensor *winograd_input_transform = tensors.get_const_tensor(TensorType::ACL_DST);
41 const ITensor *workspace = tensors.get_const_tensor(TensorType::ACL_INT);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +010042
ramelg01a1f78512022-06-29 16:28:10 +010043 const unsigned int width_idx = 1;
44 const unsigned int height_idx = 2;
45 const unsigned int batch_idx = 3;
46 int element_size_in_bytes = input_nhwc->info()->element_size();
47 const auto src_strides = input_nhwc->info()->strides_in_bytes();
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +010048
ramelg01a1f78512022-06-29 16:28:10 +010049 const size_t input_row_stride = src_strides[height_idx] / element_size_in_bytes;
50 const size_t input_col_stride = src_strides[width_idx] / element_size_in_bytes;
51 const size_t input_batch_stride = src_strides[batch_idx] / element_size_in_bytes;
52 const auto input_nhwc_ptr = reinterpret_cast<const void *>(input_nhwc->buffer() + input_nhwc->info()->offset_first_element_in_bytes());
53 auto win_transf_ptr = reinterpret_cast<void *>(winograd_input_transform->buffer() + winograd_input_transform->info()->offset_first_element_in_bytes());
54
55 _winograd_impl.input_transform->execute(
56 _conv_args,
57 input_nhwc_ptr,
58 input_batch_stride,
59 input_row_stride,
60 input_col_stride,
61 win_transf_ptr,
62 _winograd_impl.winograd_spec,
63 workspace->buffer(),
64 info.thread_id,
65 _nthreads);
66}
67
68CpuWinogradConv2dTransformOutputKernel::CpuWinogradConv2dTransformOutputKernel(arm_conv::winograd::WinogradImpl &w_impl, arm_conv::ConvolutionArgs &_c_args, uint32_t nthreads)
69 : _winograd_impl{ w_impl }, _conv_args{ _c_args }, _nthreads{ nthreads }
70{
71}
72
73// Inherited methods overridden:
74void CpuWinogradConv2dTransformOutputKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
75{
76 ARM_COMPUTE_UNUSED(window);
77 const ITensor *dst_nhwc = tensors.get_const_tensor(TensorType::ACL_DST);
78 const ITensor *winograd_output_transform = tensors.get_const_tensor(TensorType::ACL_SRC_0);
79 const ITensor *biases = tensors.get_const_tensor(TensorType::ACL_SRC_1);
80 const ITensor *workspace = tensors.get_tensor(TensorType::ACL_INT);
81
82 const unsigned int width_idx = 1;
83 const unsigned int height_idx = 2;
84 const unsigned int batch_idx = 3;
85 const int element_size_in_bytes = dst_nhwc->info()->element_size();
86 const auto dst_strides = dst_nhwc->info()->strides_in_bytes();
87
88 const size_t out_row_stride = dst_strides[height_idx] / element_size_in_bytes;
89 const size_t out_col_stride = dst_strides[width_idx] / element_size_in_bytes;
90 const size_t out_batch_stride = dst_strides[batch_idx] / element_size_in_bytes;
91 const auto wout_transf_ptr = reinterpret_cast<const void *>(winograd_output_transform->buffer() + winograd_output_transform->info()->offset_first_element_in_bytes());
92 auto dst_nhwc_ptr = reinterpret_cast<void *>(dst_nhwc->buffer() + dst_nhwc->info()->offset_first_element_in_bytes());
93 void *biases_data_ptr = nullptr;
94 if(biases != nullptr)
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +010095 {
ramelg01a1f78512022-06-29 16:28:10 +010096 biases_data_ptr = reinterpret_cast<void *>(biases->buffer() + biases->info()->offset_first_element_in_bytes());
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +010097 }
98
ramelg01a1f78512022-06-29 16:28:10 +010099 // Output transform
100 _winograd_impl.output_transform->execute(
101 _conv_args,
102 wout_transf_ptr,
103 _winograd_impl.winograd_spec,
104 biases_data_ptr,
105 dst_nhwc_ptr,
106 out_batch_stride,
107 out_row_stride,
108 out_col_stride,
109 workspace->buffer(),
110 info.thread_id,
111 _nthreads);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100112}
113
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100114} // namespace cpu
ramelg01a1f78512022-06-29 16:28:10 +0100115} // namespace arm_compute