blob: aba32871d034fbd66518926745b201b63411e597 [file] [log] [blame]
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +01001/*
2 * Copyright (c) 2022 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Ramy Elgammaldf6a3b02022-11-30 16:23:10 +000024#include "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.h"
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010025
26#include "arm_compute/core/CL/CLHelpers.h"
27#include "arm_compute/core/CL/CLKernelLibrary.h"
28#include "arm_compute/core/GPUTarget.h"
29#include "arm_compute/core/TensorInfo.h"
30#include "arm_compute/core/TensorShape.h"
31#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010032
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010033#include <utility>
34
35namespace arm_compute
36{
37namespace cl_direct_conv
38{
39using namespace arm_compute::misc::shape_calculator;
40
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010041ClDirectConvDefaultConfigBifrost::ClDirectConvDefaultConfigBifrost(GPUTarget gpu) : IClDirectConvKernelConfig(gpu)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010042{
43}
44
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010045DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure(const ITensorInfo *src,
46 const ITensorInfo *wei,
47 const PadStrideInfo &conv_info)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010048{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010049 using ConfigurationFunctionExecutorPtr = DirectConvComputeKernelInfo (ClDirectConvDefaultConfigBifrost::*)(
50 const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info);
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010051
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010052 ClDirectConvConfigArray<ConfigurationFunctionExecutorPtr> configs_G71(
53 &ClDirectConvDefaultConfigBifrost::configure_G71_f32, &ClDirectConvDefaultConfigBifrost::configure_G71_f16,
54 &ClDirectConvDefaultConfigBifrost::configure_G71_u8);
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010055
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010056 ClDirectConvConfigArray<ConfigurationFunctionExecutorPtr> configs_default(
57 &ClDirectConvDefaultConfigBifrost::configure_default_f32,
58 &ClDirectConvDefaultConfigBifrost::configure_default_f16, &ClDirectConvDefaultConfigBifrost::configure_G71_u8);
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010059
60 ConfigurationFunctionExecutorPtr func = nullptr;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010061 switch (_target)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010062 {
63 case GPUTarget::G71:
64 func = configs_G71.get_function(src->data_type());
65 break;
66 default:
67 func = configs_default.get_function(src->data_type());
68 break;
69 }
70
71 ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not supported for direct convolution");
72 return (this->*func)(src, wei, conv_info);
73}
74
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010075DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure_G71_f32(const ITensorInfo *src,
76 const ITensorInfo *wei,
77 const PadStrideInfo &conv_info)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010078{
79 DirectConvComputeKernelInfo desc;
80
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010081 if (src->data_layout() == DataLayout::NHWC)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010082 {
83 // Get the output shape
84 TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
85
86 desc.n0 = 4;
87
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010088 if (output_shape[0] > 16)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010089 {
90 desc.m0 = 2;
91 }
92
93 desc.k0 = 8;
94
95 desc.export_weights_to_cl_image = false;
96 }
97
98 return desc;
99}
100
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100101DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure_G71_f16(const ITensorInfo *src,
102 const ITensorInfo *wei,
103 const PadStrideInfo &conv_info)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100104{
105 DirectConvComputeKernelInfo desc;
106
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100107 if (src->data_layout() == DataLayout::NHWC)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100108 {
109 // Get the output shape
110 TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
111
112 desc.n0 = 4;
113
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100114 if (output_shape[0] > 16)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100115 {
116 desc.m0 = 4;
117 }
118
119 desc.k0 = 8;
120
121 desc.export_weights_to_cl_image = false;
122 }
123
124 return desc;
125}
126
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100127DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure_G71_u8(const ITensorInfo *src,
128 const ITensorInfo *wei,
129 const PadStrideInfo &conv_info)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100130{
131 DirectConvComputeKernelInfo desc;
132
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100133 if (src->data_layout() == DataLayout::NHWC)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100134 {
135 // Get the output shape
136 TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
137
138 desc.n0 = 4;
139
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100140 if (output_shape[0] > 16)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100141 {
142 desc.m0 = 4;
143 }
144
145 desc.k0 = 16;
146
147 desc.export_weights_to_cl_image = false;
148 }
149
150 return desc;
151}
152
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100153DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure_default_f32(const ITensorInfo *src,
154 const ITensorInfo *wei,
155 const PadStrideInfo &conv_info)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100156{
157 DirectConvComputeKernelInfo desc;
158
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100159 if (src->data_layout() == DataLayout::NHWC)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100160 {
161 // Get the output shape
162 TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
163
164 desc.n0 = 4;
165
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100166 if (output_shape[0] > 16)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100167 {
168 desc.m0 = 2;
169 }
170
171 desc.k0 = 8;
172
Gian Marco Iodicead9a7ed2022-09-16 14:14:21 +0100173 desc.export_weights_to_cl_image = export_to_cl_image(wei);
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100174 }
175
176 return desc;
177}
178
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100179DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure_default_f16(const ITensorInfo *src,
180 const ITensorInfo *wei,
181 const PadStrideInfo &conv_info)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100182{
183 DirectConvComputeKernelInfo desc;
184
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100185 if (src->data_layout() == DataLayout::NHWC)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100186 {
187 // Get the output shape
188 TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
189
190 desc.n0 = 4;
191
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100192 if (output_shape[0] > 16)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100193 {
194 desc.m0 = 4;
195 }
196
197 desc.k0 = 8;
198
Gian Marco Iodicead9a7ed2022-09-16 14:14:21 +0100199 desc.export_weights_to_cl_image = export_to_cl_image(wei);
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100200 }
201
202 return desc;
203}
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100204} // namespace cl_direct_conv
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100205} // namespace arm_compute