Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 1 | /* |
Jakub Sujak | 0d27b2e | 2023-08-24 14:01:20 +0100 | [diff] [blame] | 2 | * Copyright (c) 2021-2023 Arm Limited. |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 3 | * |
| 4 | * SPDX-License-Identifier: MIT |
| 5 | * |
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 7 | * of this software and associated documentation files (the "Software"), to |
| 8 | * deal in the Software without restriction, including without limitation the |
| 9 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
| 10 | * sell copies of the Software, and to permit persons to whom the Software is |
| 11 | * furnished to do so, subject to the following conditions: |
| 12 | * |
| 13 | * The above copyright notice and this permission notice shall be included in all |
| 14 | * copies or substantial portions of the Software. |
| 15 | * |
| 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 22 | * SOFTWARE. |
| 23 | */ |
Georgios Pinitas | 7891a73 | 2021-08-20 21:39:25 +0100 | [diff] [blame] | 24 | #include "src/gpu/cl/operators/ClConv2d.h" |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 25 | |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 26 | #include "arm_compute/core/utils/misc/ShapeCalculator.h" |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 27 | #include "arm_compute/core/Validate.h" |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 28 | #include "arm_compute/runtime/CL/CLScheduler.h" |
| 29 | #include "arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h" |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 30 | |
| 31 | #include "src/common/utils/Log.h" |
Georgios Pinitas | 7891a73 | 2021-08-20 21:39:25 +0100 | [diff] [blame] | 32 | #include "src/gpu/cl/operators/ClDirectConv2d.h" |
| 33 | #include "src/gpu/cl/operators/ClGemmConv2d.h" |
Gian Marco Iodice | a5cb79f | 2022-12-28 13:53:51 +0000 | [diff] [blame] | 34 | #include "src/gpu/cl/operators/ClIndirectConv2d.h" |
Georgios Pinitas | 7891a73 | 2021-08-20 21:39:25 +0100 | [diff] [blame] | 35 | #include "src/gpu/cl/operators/ClWinogradConv2d.h" |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 36 | |
| 37 | #include <memory> |
| 38 | |
Gian Marco Iodice | ebbd529 | 2021-08-17 16:25:37 +0100 | [diff] [blame] | 39 | namespace |
| 40 | { |
| 41 | /** Get the suitable kernel size for using direct convolution method with NHWC data layout. |
| 42 | * |
| 43 | * @note Direct convolution should be executed when the kernel has the spatial dimensions greater than or equal to the value returned by this function |
| 44 | * |
| 45 | * @param[in] gpu_target GPU target |
| 46 | * |
| 47 | * @return the suitable kernel size for using direct convolution method with NHWC data layout |
| 48 | */ |
| 49 | size_t get_direct_conv_kernel_threshold_nhwc(arm_compute::GPUTarget gpu_target) |
| 50 | { |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 51 | switch (gpu_target) |
Gian Marco Iodice | ebbd529 | 2021-08-17 16:25:37 +0100 | [diff] [blame] | 52 | { |
| 53 | case arm_compute::GPUTarget::G76: |
| 54 | case arm_compute::GPUTarget::G77: |
| 55 | case arm_compute::GPUTarget::G78: |
| 56 | return 5; |
| 57 | case arm_compute::GPUTarget::G71: |
| 58 | case arm_compute::GPUTarget::G72: |
| 59 | case arm_compute::GPUTarget::MIDGARD: |
| 60 | case arm_compute::GPUTarget::BIFROST: |
| 61 | return 7; |
| 62 | default: |
| 63 | return 5; |
| 64 | } |
| 65 | } |
| 66 | } // namespace |
| 67 | |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 68 | namespace arm_compute |
| 69 | { |
| 70 | namespace opencl |
| 71 | { |
| 72 | using namespace arm_compute::misc::shape_calculator; |
| 73 | |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 74 | ClConv2d::ClConv2d() : _operator() |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 75 | { |
| 76 | } |
| 77 | |
| 78 | ClConv2d::~ClConv2d() = default; |
| 79 | |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 80 | void ClConv2d::configure(const CLCompileContext &compile_context, |
| 81 | ITensorInfo *src, |
| 82 | ITensorInfo *weights, |
| 83 | ITensorInfo *biases, |
| 84 | ITensorInfo *dst, |
| 85 | const Conv2dInfo &conv2d_info, |
| 86 | const WeightsInfo &weights_info) |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 87 | { |
| 88 | ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst); |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 89 | ARM_COMPUTE_ERROR_THROW_ON( |
| 90 | ClConv2d::validate(src, weights, ((biases != nullptr) ? biases : nullptr), dst, conv2d_info, weights_info)); |
ramelg01 | 2e53f17 | 2021-09-22 10:48:25 +0100 | [diff] [blame] | 91 | ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, conv2d_info, weights_info); |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 92 | |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 93 | switch (ClConv2d::get_convolution_method(src, weights, dst, conv2d_info, weights_info, CLScheduler::get().target())) |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 94 | { |
| 95 | case ConvolutionMethod::WINOGRAD: |
| 96 | { |
| 97 | ARM_COMPUTE_ERROR_ON(conv2d_info.num_groups != 1); |
| 98 | auto f = std::make_unique<ClWinogradConv2d>(); |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 99 | f->configure(compile_context, src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info, |
| 100 | conv2d_info.enable_fast_math); |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 101 | _operator = std::move(f); |
| 102 | break; |
| 103 | } |
| 104 | case ConvolutionMethod::DIRECT: |
| 105 | { |
| 106 | ARM_COMPUTE_ERROR_ON(conv2d_info.num_groups != 1); |
| 107 | auto f = std::make_unique<ClDirectConv2d>(); |
| 108 | f->configure(compile_context, src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info); |
| 109 | _operator = std::move(f); |
| 110 | break; |
| 111 | } |
Gian Marco Iodice | a5cb79f | 2022-12-28 13:53:51 +0000 | [diff] [blame] | 112 | case ConvolutionMethod::INDIRECT: |
| 113 | { |
| 114 | ARM_COMPUTE_ERROR_ON(conv2d_info.num_groups != 1); |
Gian Marco Iodice | a5cb79f | 2022-12-28 13:53:51 +0000 | [diff] [blame] | 115 | auto f = std::make_unique<ClIndirectConv2d>(); |
| 116 | f->configure(compile_context, src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info); |
| 117 | _operator = std::move(f); |
| 118 | break; |
| 119 | } |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 120 | case ConvolutionMethod::GEMM: |
| 121 | { |
Georgios Pinitas | 1988463 | 2021-08-16 12:38:54 +0100 | [diff] [blame] | 122 | auto f = std::make_unique<ClGemmConv2d>(); |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 123 | f->configure(compile_context, src, weights, biases, dst, conv2d_info, weights_info); |
| 124 | _operator = std::move(f); |
| 125 | break; |
| 126 | } |
| 127 | default: |
| 128 | ARM_COMPUTE_ERROR("Not supported."); |
| 129 | break; |
| 130 | } |
| 131 | _aux_mem = _operator->workspace(); |
| 132 | } |
| 133 | |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 134 | Status ClConv2d::validate(const ITensorInfo *src, |
| 135 | const ITensorInfo *weights, |
| 136 | const ITensorInfo *biases, |
| 137 | const ITensorInfo *dst, |
| 138 | const Conv2dInfo &conv2d_info, |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 139 | const WeightsInfo &weights_info) |
| 140 | { |
| 141 | ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst); |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 142 | ARM_COMPUTE_RETURN_ERROR_ON_MSG((conv2d_info.num_groups != 1) && (src->data_layout() != DataLayout::NCHW), |
| 143 | "Grouping (num_groups != 1) with NHWC data layout is not supported"); |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 144 | |
| 145 | const GPUTarget gpu_target = CLScheduler::get().target(); |
| 146 | |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 147 | switch (ClConv2d::get_convolution_method(src, weights, dst, conv2d_info, weights_info, gpu_target)) |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 148 | { |
| 149 | case ConvolutionMethod::WINOGRAD: |
| 150 | { |
| 151 | //Validate Winograd |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 152 | ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.num_groups != 1, |
| 153 | "Grouping (num_groups != 1) with ClWinogradConv2d is not supported"); |
| 154 | ARM_COMPUTE_RETURN_ON_ERROR(ClWinogradConv2d::validate(src, weights, biases, dst, conv2d_info.conv_info, |
| 155 | conv2d_info.act_info, conv2d_info.enable_fast_math)); |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 156 | break; |
| 157 | } |
| 158 | case ConvolutionMethod::DIRECT: |
| 159 | { |
| 160 | // Validate direct convolution layer |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 161 | ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.num_groups != 1, |
| 162 | "Grouping (num_groups != 1) with ClDirectConv2d is not supported"); |
| 163 | ARM_COMPUTE_RETURN_ON_ERROR( |
| 164 | ClDirectConv2d::validate(src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info)); |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 165 | break; |
| 166 | } |
Gian Marco Iodice | a5cb79f | 2022-12-28 13:53:51 +0000 | [diff] [blame] | 167 | case ConvolutionMethod::INDIRECT: |
| 168 | { |
| 169 | // Validate indirect convolution layer |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 170 | ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.num_groups != 1, |
| 171 | "Grouping (num_groups != 1) with ClIndirectConv2d is not supported"); |
| 172 | ARM_COMPUTE_RETURN_ON_ERROR( |
| 173 | ClIndirectConv2d::validate(src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info)); |
Gian Marco Iodice | a5cb79f | 2022-12-28 13:53:51 +0000 | [diff] [blame] | 174 | break; |
| 175 | } |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 176 | case ConvolutionMethod::GEMM: |
| 177 | { |
| 178 | // Validate gemm-based convolution layer |
Georgios Pinitas | 1988463 | 2021-08-16 12:38:54 +0100 | [diff] [blame] | 179 | ARM_COMPUTE_RETURN_ON_ERROR(ClGemmConv2d::validate(src, weights, biases, dst, conv2d_info, weights_info)); |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 180 | break; |
| 181 | } |
| 182 | default: |
| 183 | ARM_COMPUTE_ERROR("Not supported."); |
| 184 | break; |
| 185 | } |
| 186 | |
| 187 | return Status{}; |
| 188 | } |
| 189 | |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 190 | ConvolutionMethod ClConv2d::get_convolution_method(const ITensorInfo *src, |
| 191 | const ITensorInfo *weights, |
| 192 | const ITensorInfo *dst, |
| 193 | const Conv2dInfo &conv2d_info, |
| 194 | const WeightsInfo &weights_info, |
| 195 | const GPUTarget gpu_target) |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 196 | { |
| 197 | ARM_COMPUTE_ERROR_ON_NULLPTR(src); |
| 198 | ARM_COMPUTE_ERROR_ON_NULLPTR(dst); |
| 199 | ARM_COMPUTE_ERROR_ON_NULLPTR(weights); |
| 200 | ARM_COMPUTE_UNUSED(weights_info); |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 201 | |
| 202 | const PadStrideInfo conv_info = conv2d_info.conv_info; |
| 203 | const ActivationLayerInfo act_info = conv2d_info.act_info; |
| 204 | const Size2D dilation = conv2d_info.dilation; |
| 205 | bool enable_fast_math = conv2d_info.enable_fast_math; |
| 206 | |
| 207 | const size_t idx_w = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH); |
| 208 | const size_t idx_h = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT); |
| 209 | const size_t idx_c = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::CHANNEL); |
| 210 | |
| 211 | /* Input spatial dims, kernel size, IFM/OFM, conv info*/ |
| 212 | using ConvolutionConfiguration = std::tuple<Size2D, Size2D, Size2D, PadStrideInfo, DataLayout>; |
| 213 | using ConfigurationMethod = std::pair<ConvolutionConfiguration, ConvolutionMethod>; |
| 214 | |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 215 | const std::vector<ConfigurationMethod> known_configs = { |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 216 | // Alexnet |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 217 | ConfigurationMethod(ConvolutionConfiguration(Size2D(27U, 27U), Size2D(5U, 5U), Size2D(48U, 128U), |
| 218 | PadStrideInfo(1U, 1U, 2U, 2U), DataLayout::NCHW), |
| 219 | ConvolutionMethod::DIRECT), |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 220 | // VGG16 / VGG19 |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 221 | ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 64U), |
| 222 | PadStrideInfo(1U, 1U, 1U, 1U), DataLayout::NCHW), |
| 223 | ConvolutionMethod::DIRECT), |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 224 | // Mobilenet 224 |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 225 | ConfigurationMethod(ConvolutionConfiguration( |
| 226 | Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), |
| 227 | PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NCHW), |
| 228 | ConvolutionMethod::GEMM), |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 229 | // Mobilenet 160 |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 230 | ConfigurationMethod(ConvolutionConfiguration( |
| 231 | Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), |
| 232 | PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NCHW), |
| 233 | ConvolutionMethod::GEMM), |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 234 | // Mobilenet 224 |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 235 | ConfigurationMethod(ConvolutionConfiguration( |
| 236 | Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), |
| 237 | PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NHWC), |
| 238 | ConvolutionMethod::GEMM), |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 239 | // Mobilenet 160 |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 240 | ConfigurationMethod(ConvolutionConfiguration( |
| 241 | Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), |
| 242 | PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NHWC), |
| 243 | ConvolutionMethod::GEMM), |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 244 | }; |
| 245 | |
| 246 | const auto find_config = [&](ConfigurationMethod c) |
| 247 | { |
| 248 | const ConvolutionConfiguration config = c.first; |
| 249 | const PadStrideInfo info = std::get<3>(config); |
| 250 | const DataLayout data_layout = std::get<4>(config); |
| 251 | |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 252 | return std::get<0>(config) == Size2D(src->dimension(idx_w), src->dimension(idx_h)) && |
| 253 | std::get<1>(config) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h)) && |
| 254 | std::get<2>(config) == Size2D(weights->dimension(idx_c), weights->dimension(3)) && |
| 255 | info.pad_top() == conv_info.pad_top() && info.pad_right() == conv_info.pad_right() && |
| 256 | info.pad_bottom() == conv_info.pad_bottom() && info.pad_left() == conv_info.pad_left() && |
| 257 | info.stride() == conv_info.stride() && (data_layout == src->data_layout()); |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 258 | }; |
| 259 | |
| 260 | std::vector<ConfigurationMethod>::const_iterator found; |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 261 | if ((found = std::find_if(known_configs.begin(), known_configs.end(), find_config)) != known_configs.end()) |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 262 | { |
| 263 | return (*found).second; |
| 264 | } |
| 265 | |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 266 | if (dilation != Size2D(1U, 1U)) |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 267 | { |
| 268 | return ConvolutionMethod::GEMM; |
| 269 | } |
| 270 | else |
| 271 | { |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 272 | if (src->data_layout() == DataLayout::NCHW) |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 273 | { |
| 274 | // SRGAN |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 275 | if ((src->dimension(idx_h) > 720U) && (dst->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && |
| 276 | (conv_info.pad_top() < 3) && |
| 277 | (ClDirectConv2d::validate(src, weights, nullptr, dst, conv_info, act_info))) |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 278 | { |
| 279 | return ConvolutionMethod::DIRECT; |
| 280 | } |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 281 | if ((weights->dimension(idx_h) > 5) && (src->dimension(idx_c) > dst->dimension(idx_c)) && |
| 282 | (CLFFTConvolutionLayer::validate(src, weights, nullptr, dst, conv_info, act_info, enable_fast_math))) |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 283 | { |
| 284 | return ConvolutionMethod::FFT; |
| 285 | } |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 286 | if (src->dimension(idx_c) < 16) |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 287 | { |
| 288 | return ConvolutionMethod::GEMM; |
| 289 | } |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 290 | return bool(ClWinogradConv2d::validate(src, weights, nullptr, dst, conv_info, act_info, enable_fast_math)) |
| 291 | ? ConvolutionMethod::WINOGRAD |
| 292 | : ConvolutionMethod::GEMM; |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 293 | } |
| 294 | else |
| 295 | { |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 296 | const bool is_direct_valid = |
| 297 | bool(ClDirectConv2d::validate(src, weights, nullptr, dst, conv_info, act_info)); |
| 298 | const bool is_wino_valid = |
| 299 | bool(ClWinogradConv2d::validate(src, weights, nullptr, dst, conv_info, act_info, enable_fast_math)); |
Gian Marco Iodice | ebbd529 | 2021-08-17 16:25:37 +0100 | [diff] [blame] | 300 | const size_t kernel_sz_direct_conv_thr = get_direct_conv_kernel_threshold_nhwc(gpu_target); |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 301 | |
| 302 | // SRGAN case |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 303 | if ((src->dimension(idx_h) > 720U) && (dst->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && |
| 304 | (conv_info.pad_top() < 3) && is_direct_valid) |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 305 | { |
| 306 | return ConvolutionMethod::DIRECT; |
| 307 | } |
| 308 | |
| 309 | // Floating-point case: GeMM/Direct/Winograd |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 310 | if (is_data_type_float(src->data_type())) |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 311 | { |
Gian Marco Iodice | 0bae3ee | 2022-01-20 16:33:29 +0000 | [diff] [blame] | 312 | // Get dst shape |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 313 | TensorShape output_shape = |
| 314 | misc::shape_calculator::compute_deep_convolution_shape(*src, *weights, conv_info); |
| 315 | const bool is_large_kernel_sz = (weights->dimension(idx_w) >= kernel_sz_direct_conv_thr) && |
| 316 | (weights->dimension(idx_h) >= kernel_sz_direct_conv_thr); |
| 317 | const bool is_ifm_ge_8 = src->dimension(idx_c) >= 8; |
| 318 | const bool is_ifm_ge_16 = src->dimension(idx_c) >= 16; |
| 319 | const bool is_ofm_lte_8 = weights->dimension(3U) <= 8; |
| 320 | const bool is_ofm_lt_64 = weights->dimension(3U) < 64; |
| 321 | const bool workload_gte_8192 = (output_shape[0] * output_shape[1] * output_shape[2]) / 16 >= 8192; |
| 322 | const bool is_ifm_gt_ofm = src->dimension(idx_c) > weights->dimension(3U); |
| 323 | const bool is_m_one = output_shape[1] * output_shape[2] == 1; |
| 324 | const bool is_unit_stride = |
| 325 | (conv2d_info.conv_info.stride().first == 1) && (conv2d_info.conv_info.stride().second == 1); |
| 326 | const int32_t kernel_sz = weights->dimension(idx_w) * weights->dimension(idx_h); |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 327 | |
Gian Marco Iodice | 1257131 | 2022-08-25 12:25:44 +0100 | [diff] [blame] | 328 | // Run Winograd if valid and IFM >= 8 |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 329 | if (is_wino_valid && is_ifm_ge_8) |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 330 | { |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 331 | if (is_ofm_lte_8) |
Adnan AlSinan | e871207 | 2022-07-21 16:34:49 +0100 | [diff] [blame] | 332 | { |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 333 | if (gpu_target == arm_compute::GPUTarget::G71 || gpu_target == arm_compute::GPUTarget::G72 || |
| 334 | get_arch_from_target(gpu_target) == arm_compute::GPUTarget::MIDGARD) |
Adnan AlSinan | e871207 | 2022-07-21 16:34:49 +0100 | [diff] [blame] | 335 | { |
| 336 | return ConvolutionMethod::WINOGRAD; |
| 337 | } |
| 338 | } |
| 339 | else |
| 340 | { |
| 341 | return ConvolutionMethod::WINOGRAD; |
| 342 | } |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 343 | } |
Gian Marco Iodice | 78baa48 | 2021-12-01 09:26:14 +0000 | [diff] [blame] | 344 | |
Gian Marco Iodice | 0bae3ee | 2022-01-20 16:33:29 +0000 | [diff] [blame] | 345 | // Direct convolution case |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 346 | if (is_direct_valid) |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 347 | { |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 348 | if ((gpu_target == arm_compute::GPUTarget::G71 || gpu_target == arm_compute::GPUTarget::G72 || |
| 349 | get_arch_from_target(gpu_target) == arm_compute::GPUTarget::MIDGARD)) |
Gian Marco Iodice | 0bae3ee | 2022-01-20 16:33:29 +0000 | [diff] [blame] | 350 | { |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 351 | if (is_large_kernel_sz && is_ifm_ge_16 && is_ifm_gt_ofm) |
Gian Marco Iodice | 51d7119 | 2022-02-16 14:41:28 +0000 | [diff] [blame] | 352 | { |
| 353 | return ConvolutionMethod::DIRECT; |
| 354 | } |
| 355 | } |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 356 | else if (gpu_target == arm_compute::GPUTarget::G76) |
Gian Marco Iodice | 51d7119 | 2022-02-16 14:41:28 +0000 | [diff] [blame] | 357 | { |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 358 | if ((is_large_kernel_sz && workload_gte_8192 && is_ifm_ge_16) || (is_ofm_lte_8 && is_ifm_ge_16)) |
Gian Marco Iodice | 51d7119 | 2022-02-16 14:41:28 +0000 | [diff] [blame] | 359 | { |
| 360 | return ConvolutionMethod::DIRECT; |
| 361 | } |
Gian Marco Iodice | 0bae3ee | 2022-01-20 16:33:29 +0000 | [diff] [blame] | 362 | } |
Adnan AlSinan | e871207 | 2022-07-21 16:34:49 +0100 | [diff] [blame] | 363 | else |
| 364 | { |
Gian Marco Iodice | a5cb79f | 2022-12-28 13:53:51 +0000 | [diff] [blame] | 365 | ConvolutionMethod preferred_conv_method = ConvolutionMethod::DIRECT; |
| 366 | |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 367 | const bool is_indirect_valid = |
| 368 | bool(ClIndirectConv2d::validate(src, weights, nullptr, dst, conv_info, act_info)); |
Gian Marco Iodice | a5cb79f | 2022-12-28 13:53:51 +0000 | [diff] [blame] | 369 | |
| 370 | // indirect conv2d should be called when: |
| 371 | // 1- When the kernel size is greater than 1x1 and less than or equal to 9x9 (81) |
| 372 | // 2- When the kernel size is odd |
| 373 | // 3- When the Gpu target is Arm Mali-G77 |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 374 | if (is_indirect_valid) |
Gian Marco Iodice | a5cb79f | 2022-12-28 13:53:51 +0000 | [diff] [blame] | 375 | { |
| 376 | const bool is_kernel_sz_odd = kernel_sz % 2; |
| 377 | const bool is_g77 = gpu_target == GPUTarget::G77; |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 378 | preferred_conv_method = (kernel_sz > 1) && (kernel_sz <= 81) && is_kernel_sz_odd && is_g77 |
| 379 | ? ConvolutionMethod::INDIRECT |
| 380 | : ConvolutionMethod::DIRECT; |
Gian Marco Iodice | a5cb79f | 2022-12-28 13:53:51 +0000 | [diff] [blame] | 381 | } |
| 382 | |
| 383 | // Direct/indirect convolution used for the first layer of the network |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 384 | if (workload_gte_8192 && !is_ifm_ge_16 && !is_unit_stride && is_ofm_lt_64) |
Gian Marco Iodice | 4478e1c | 2022-09-06 15:06:40 +0100 | [diff] [blame] | 385 | { |
| 386 | // In general, the question we should ask for the first convolution layer of a model is: |
| 387 | // when the execution time of im2col + gemm < direct?. Since im2col does not depend on the OFM, it means that |
| 388 | // when OFM is big enough, the contribution of im2col is small and the GEMM approach is preferable. |
| 389 | // From internal experiments, the OFM threshold is 64 (is_ofm_lt_64) |
Gian Marco Iodice | a5cb79f | 2022-12-28 13:53:51 +0000 | [diff] [blame] | 390 | return preferred_conv_method; |
Gian Marco Iodice | 4478e1c | 2022-09-06 15:06:40 +0100 | [diff] [blame] | 391 | } |
| 392 | |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 393 | if ((is_large_kernel_sz || is_m_one) && workload_gte_8192 && is_ifm_ge_16) |
Gian Marco Iodice | 4478e1c | 2022-09-06 15:06:40 +0100 | [diff] [blame] | 394 | { |
Gian Marco Iodice | a5cb79f | 2022-12-28 13:53:51 +0000 | [diff] [blame] | 395 | return preferred_conv_method; |
Gian Marco Iodice | 4478e1c | 2022-09-06 15:06:40 +0100 | [diff] [blame] | 396 | } |
| 397 | |
| 398 | // Direct convolution used for the last layer of the network |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 399 | if (is_ofm_lte_8) |
Adnan AlSinan | e871207 | 2022-07-21 16:34:49 +0100 | [diff] [blame] | 400 | { |
Gian Marco Iodice | a5cb79f | 2022-12-28 13:53:51 +0000 | [diff] [blame] | 401 | return preferred_conv_method; |
Adnan AlSinan | e871207 | 2022-07-21 16:34:49 +0100 | [diff] [blame] | 402 | } |
| 403 | } |
Sheri Zhang | 06d1efd | 2021-07-28 11:20:04 +0100 | [diff] [blame] | 404 | } |
| 405 | |
| 406 | // Default case |
| 407 | return ConvolutionMethod::GEMM; |
| 408 | } |
| 409 | |
| 410 | // Generic case for quantized. Only GeMM |
| 411 | return ConvolutionMethod::GEMM; |
| 412 | } |
| 413 | } |
| 414 | } |
| 415 | |
| 416 | void ClConv2d::run(ITensorPack &tensors) |
| 417 | { |
| 418 | prepare(tensors); |
| 419 | _operator->run(tensors); |
| 420 | } |
| 421 | |
| 422 | void ClConv2d::prepare(ITensorPack &tensors) |
| 423 | { |
| 424 | _operator->prepare(tensors); |
| 425 | } |
| 426 | |
| 427 | experimental::MemoryRequirements ClConv2d::workspace() const |
| 428 | { |
| 429 | return _aux_mem; |
| 430 | } |
| 431 | } // namespace opencl |
| 432 | } // namespace arm_compute |