Blame - src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp - ml/ComputeLibrary

2017-10-18 17:58:22 +0100

[diff] [blame]

1

/*

Ramy Elgammal

a8db612

2023-05-08 03:33:43 +0100

[diff] [blame]

2

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

3

*

4

* SPDX-License-Identifier: MIT

5

*

6

* Permission is hereby granted, free of charge, to any person obtaining a copy

7

* of this software and associated documentation files (the "Software"), to

8

* deal in the Software without restriction, including without limitation the

9

* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

10

* sell copies of the Software, and to permit persons to whom the Software is

11

* furnished to do so, subject to the following conditions:

12

*

13

* The above copyright notice and this permission notice shall be included in all

14

* copies or substantial portions of the Software.

15

*

16

* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

17

* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

18

* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

19

* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

20

* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

21

* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

22

* SOFTWARE.

23

*/

Giorgio Arena

04a8f8c

2017-11-23 11:45:24 +0000

[diff] [blame]

24

#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

25

Giorgio Arena

d93e263

2019-10-15 11:09:33 +0100

[diff] [blame]

26

#include "arm_compute/core/utils/misc/InfoHelpers.h"

Georgios Pinitas

d05dce4

2018-01-22 16:29:17 +0000

[diff] [blame]

27

#include "arm_compute/core/utils/misc/ShapeCalculator.h"

Georgios Pinitas

f72f936

2018-01-12 16:29:45 +0000

[diff] [blame]

28

#include "arm_compute/core/utils/quantization/AsymmHelpers.h"

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

29

#include "arm_compute/runtime/NEON/NEScheduler.h"

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

30

ramelg01

cbbb038

2021-09-17 17:36:57 +0100

[diff] [blame]

31

#include "src/common/utils/Log.h"

Georgios Pinitas

7891a73

2021-08-20 21:39:25 +0100

[diff] [blame]

32

#include "src/cpu/operators/CpuDepthwiseConv2d.h"

Georgios Pinitas

47d39dc

2019-03-11 14:03:23 +0000

[diff] [blame]

33

Georgios Pinitas

d05dce4

2018-01-22 16:29:17 +0000

[diff] [blame]

34

using namespace arm_compute::misc;

Georgios Pinitas

4074c99

2018-01-30 18:13:46 +0000

[diff] [blame]

35

using namespace arm_compute::misc::shape_calculator;

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

36

Georgios Pinitas

47d39dc

2019-03-11 14:03:23 +0000

[diff] [blame]

37

namespace arm_compute

38

{

Michalis Spyrou

ebcebf1

2020-10-21 00:04:14 +0100

[diff] [blame]

39

NEDepthwiseConvolutionLayer::~NEDepthwiseConvolutionLayer() = default;

40

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

41

struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::Impl

42

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

43

ITensor *src{nullptr}; // SRC_0

44

ITensor *dst{nullptr}; // DST_0

45

const ITensor *weights{nullptr}; // SRC_1

46

const ITensor *biases{nullptr}; // SRC_2

Manuel Bottini

2021-05-24 16:01:32 +0100

[diff] [blame]

47

Tensor permuted_input{}; // INT_0

48

Tensor permuted_weights{}; // INT_1

49

Tensor permuted_output{}; // INT_2

50

Tensor workspace{}; // INT_3

51

Tensor packed_weights{}; // INT_4

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

52

std::shared_ptr<cpu::CpuDepthwiseConv2d> op{nullptr};

53

bool is_prepared{false};

54

bool permute{false};

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

55

};

56

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

57

NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::NEDepthwiseConvolutionLayerOptimizedInternal(

58

std::shared_ptr<IMemoryManager> memory_manager)

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

59

: _memory_group(memory_manager), _impl(std::make_unique<Impl>())

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

60

{

61

}

62

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

63

void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::configure(

64

ITensor *input,

65

const ITensor *weights,

66

const ITensor *biases,

67

ITensor *output,

68

const PadStrideInfo &conv_info,

69

unsigned int depth_multiplier,

70

const ActivationLayerInfo &act_info,

71

const Size2D &dilation)

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

72

{

Georgios Pinitas

7d0adc6

2020-09-04 15:25:24 +0100

[diff] [blame]

73

ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

74

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

75

bool is_nhwc = input->info()->data_layout() == DataLayout::NCHW;

76

_impl->src = input;

77

_impl->weights = weights;

78

_impl->biases = biases;

79

_impl->dst = output;

80

_impl->permute = is_nhwc;

81

Manuel Bottini

2021-05-24 16:01:32 +0100

[diff] [blame]

82

_impl->op = std::make_unique<cpu::CpuDepthwiseConv2d>();

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

83

ConvolutionInfo info{conv_info, depth_multiplier, act_info, dilation};

84

_impl->op->configure(_impl->src->info(), _impl->weights->info(),

85

_impl->biases == nullptr ? nullptr : _impl->biases->info(), _impl->dst->info(), info);

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

86

Georgios Pinitas

7d0adc6

2020-09-04 15:25:24 +0100

[diff] [blame]

87

// Configure pipeline

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

88

ActivationLayerInfo act_info_to_use = ActivationLayerInfo();

89

const bool is_relu = arm_compute::utils::info_helpers::is_relu(act_info);

90

const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(act_info);

91

bool is_activationlayer_enabled = act_info.enabled() && !(is_relu || is_relu6);

92

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

93

if (!is_activationlayer_enabled)

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

94

{

95

act_info_to_use = act_info;

96

}

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

97

info = ConvolutionInfo{conv_info, depth_multiplier, act_info_to_use, dilation};

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

98

Manuel Bottini

2021-05-24 16:01:32 +0100

[diff] [blame]

99

auto dwc_optimized_func = std::make_unique<cpu::CpuDepthwiseConv2dAssemblyDispatch>();

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

100

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

101

if (is_nhwc)

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

102

{

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

103

auto permute_input = std::make_unique<cpu::CpuPermute>();

104

auto permute_weights = std::make_unique<cpu::CpuPermute>();

105

auto permute_output = std::make_unique<cpu::CpuPermute>();

106

107

_memory_group.manage(&_impl->permuted_input);

108

_memory_group.manage(&_impl->permuted_weights);

109

_memory_group.manage(&_impl->permuted_output);

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

110

111

// Configure the function to transform the input tensor from NCHW -> NHWC

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

112

permute_input->configure(input->info(), _impl->permuted_input.info(), PermutationVector(2U, 0U, 1U));

113

_impl->permuted_input.info()->set_data_layout(DataLayout::NHWC);

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

114

115

// Configure the function to transform the weights tensor from IHW -> HWI

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

116

permute_weights->configure(weights->info(), _impl->permuted_weights.info(), PermutationVector(2U, 0U, 1U));

117

_impl->permuted_weights.info()->set_data_layout(DataLayout::NHWC);

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

118

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

119

_impl->permuted_output.info()->set_data_layout(DataLayout::NHWC);

120

_impl->permuted_output.info()->set_quantization_info(output->info()->quantization_info());

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

121

122

// Configure optimized depthwise

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

123

dwc_optimized_func->configure(_impl->permuted_input.info(), _impl->permuted_weights.info(),

124

biases == nullptr ? nullptr : biases->info(), _impl->permuted_output.info(),

125

info);

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

126

127

// Configure the function to transform the convoluted output to ACL's native ordering format NCHW

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

128

_impl->permuted_output.info()->set_data_layout(DataLayout::NHWC);

129

permute_output->configure(_impl->permuted_output.info(), output->info(), PermutationVector(1U, 2U, 0U));

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

130

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

131

_impl->permuted_input.allocator()->allocate();

132

_impl->permuted_output.allocator()->allocate();

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

133

}

134

else

135

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

136

dwc_optimized_func->configure(_impl->src->info(), _impl->weights->info(),

137

biases == nullptr ? nullptr : biases->info(), _impl->dst->info(), info);

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

138

}

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

139

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

140

// Allocate memory based on the internal memory requirements

141

experimental::MemoryRequirements mem_req = dwc_optimized_func->workspace();

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

142

_impl->workspace.allocator()->init(TensorInfo(TensorShape{mem_req[0].size + mem_req[0].alignment}, 1, DataType::S8),

143

mem_req[0].alignment);

144

_impl->packed_weights.allocator()->init(

145

TensorInfo(TensorShape{mem_req[1].size + mem_req[1].alignment}, 1, DataType::S8), mem_req[1].alignment);

Michele Di Giorgio

d02d5ed

2021-01-22 09:47:04 +0000

[diff] [blame]

146

_memory_group.manage(&_impl->workspace);

147

_memory_group.manage(&_impl->packed_weights);

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

148

_impl->workspace.allocator()->allocate();

149

_impl->packed_weights.allocator()->allocate();

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

150

}

151

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

152

Status

153

NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::validate(const ITensorInfo *input,

154

const ITensorInfo *weights,

155

const ITensorInfo *biases,

156

const ITensorInfo *output,

157

const PadStrideInfo &conv_info,

158

unsigned int depth_multiplier,

159

const ActivationLayerInfo &act_info,

160

const Size2D &dilation)

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

161

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

162

ConvolutionInfo info{conv_info, depth_multiplier, act_info, dilation};

Manuel Bottini

2021-05-24 16:01:32 +0100

[diff] [blame]

163

return cpu::CpuDepthwiseConv2d::validate(input, weights, biases, output, info);

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

164

}

165

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

166

void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::run()

Georgios Pinitas

2019-06-24 14:56:34 +0100

[diff] [blame]

167

{

168

prepare();

Georgios Pinitas

2019-06-24 14:56:34 +0100

[diff] [blame]

169

MemoryGroupResourceScope scope_mg(_memory_group);

170

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

171

ITensorPack pack;

172

pack.add_tensor(TensorType::ACL_SRC_0, _impl->src);

173

pack.add_tensor(TensorType::ACL_SRC_1, _impl->weights);

174

pack.add_tensor(TensorType::ACL_SRC_2, _impl->biases);

175

pack.add_tensor(TensorType::ACL_INT_0, &_impl->permuted_input);

176

pack.add_tensor(TensorType::ACL_INT_1, &_impl->permuted_weights);

177

pack.add_tensor(TensorType::ACL_INT_2, &_impl->permuted_output);

178

pack.add_tensor(TensorType::ACL_INT_3, &_impl->workspace);

179

pack.add_tensor(TensorType::ACL_INT_4, &_impl->packed_weights);

180

pack.add_tensor(TensorType::ACL_DST_0, _impl->dst);

Georgios Pinitas

2019-06-24 14:56:34 +0100

[diff] [blame]

181

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

182

_impl->op->run(pack);

Georgios Pinitas

2019-06-24 14:56:34 +0100

[diff] [blame]

183

}

184

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

185

void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::prepare()

Georgios Pinitas

2019-06-24 14:56:34 +0100

[diff] [blame]

186

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

187

if (!_impl->is_prepared)

Georgios Pinitas

2019-06-24 14:56:34 +0100

[diff] [blame]

188

{

189

// Permute weights

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

190

if (_impl->permute)

Georgios Pinitas

2019-06-24 14:56:34 +0100

[diff] [blame]

191

{

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

192

_impl->permuted_weights.allocator()->allocate();

Georgios Pinitas

2019-06-24 14:56:34 +0100

[diff] [blame]

193

}

194

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

195

if (!_impl->permuted_weights.is_used())

Georgios Pinitas

2019-06-24 14:56:34 +0100

[diff] [blame]

196

{

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

197

_impl->permuted_weights.allocator()->free();

Georgios Pinitas

2019-06-24 14:56:34 +0100

[diff] [blame]

198

}

199

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

200

_impl->is_prepared = true;

Georgios Pinitas

2019-06-24 14:56:34 +0100

[diff] [blame]

}

}

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

204

struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::Impl

205

{

Manuel Bottini

2021-05-24 16:01:32 +0100

[diff] [blame]

206

Tensor permuted_input{};

207

Tensor permuted_weights{};

208

Tensor permuted_output{};

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

209

bool is_prepared{false};

210

bool is_nchw{false};

211

bool is_activationlayer_enabled{false};

212

const ITensor *weights{nullptr};

213

const ITensor *biases{nullptr};

214

const ITensor *src{nullptr};

215

ITensor *dst{nullptr};

216

std::shared_ptr<cpu::CpuDepthwiseConv2d> op{nullptr};

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

217

};

218

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

219

NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::NEDepthwiseConvolutionLayerGeneric()

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

220

: _impl(std::make_unique<Impl>())

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

221

{

222

}

223

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

224

void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::configure(ITensor *input,

225

const ITensor *weights,

226

const ITensor *biases,

227

ITensor *output,

228

const PadStrideInfo &conv_info,

229

unsigned int depth_multiplier,

230

const ActivationLayerInfo &act_info,

231

const Size2D &dilation)

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

232

{

Michele Di Giorgio

ff27192

2019-07-17 15:59:32 +0100

[diff] [blame]

233

ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

234

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

235

const ConvolutionInfo info{conv_info, depth_multiplier, act_info, dilation};

Manuel Bottini

2021-05-24 16:01:32 +0100

[diff] [blame]

236

_impl->op = std::make_unique<cpu::CpuDepthwiseConv2d>();

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

237

_impl->op->configure(input->info(), weights->info(), biases == nullptr ? nullptr : biases->info(), output->info(),

238

info);

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

_impl->src = input;

_impl->dst = output;

_impl->weights = weights;

243

_impl->biases = biases;

244

_impl->is_nchw = input->info()->data_layout() == DataLayout::NCHW;

245

_impl->is_prepared = !_impl->is_nchw;

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

246

Giorgio Arena

d93e263

2019-10-15 11:09:33 +0100

[diff] [blame]

247

ITensor *input_to_use = input;

248

const ITensor *weights_to_use = weights;

249

ITensor *output_to_use = output;

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

250

if (_impl->is_nchw)

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

251

{

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

252

auto permute_input = std::make_unique<cpu::CpuPermute>();

253

auto permute_weights = std::make_unique<cpu::CpuPermute>();

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

254

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

255

permute_input->configure(input->info(), _impl->permuted_input.info(), PermutationVector(2U, 0U, 1U));

256

_impl->permuted_input.info()->set_data_layout(DataLayout::NHWC);

257

input_to_use = &_impl->permuted_input;

Giorgio Arena

44f5572

2019-07-12 14:49:49 +0100

[diff] [blame]

258

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

259

permute_weights->configure(weights->info(), _impl->permuted_weights.info(), PermutationVector(2U, 0U, 1U));

260

_impl->permuted_weights.info()->set_data_layout(DataLayout::NHWC);

261

weights_to_use = &_impl->permuted_weights;

Giorgio Arena

44f5572

2019-07-12 14:49:49 +0100

[diff] [blame]

262

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

263

_impl->permuted_output.allocator()->init(

264

output->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape()));

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

265

output_to_use = &_impl->permuted_output;

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

266

}

267

Manuel Bottini

2021-05-24 16:01:32 +0100

[diff] [blame]

268

auto depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConv2dNativeKernel>();

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

269

depthwise_conv_kernel->configure(input_to_use->info(), weights_to_use->info(),

270

biases == nullptr ? nullptr : biases->info(), output_to_use->info(), info);

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

271

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

272

if (_impl->is_nchw)

Georgios Pinitas

60e9825

2018-10-22 16:17:20 +0100

[diff] [blame]

273

{

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

274

auto permute_output = std::make_unique<cpu::CpuPermute>();

275

permute_output->configure(_impl->permuted_output.info(), output->info(), PermutationVector(1U, 2U, 0U));

276

_impl->permuted_output.info()->set_data_layout(DataLayout::NHWC);

277

278

_impl->permuted_input.allocator()->allocate();

279

_impl->permuted_weights.allocator()->allocate();

280

_impl->permuted_output.allocator()->allocate();

Georgios Pinitas

60e9825

2018-10-22 16:17:20 +0100

[diff] [blame]

281

}

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

282

}

283

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

284

Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::validate(const ITensorInfo *input,

285

const ITensorInfo *weights,

286

const ITensorInfo *biases,

287

const ITensorInfo *output,

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

288

const PadStrideInfo &conv_info,

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

289

unsigned int depth_multiplier,

290

const ActivationLayerInfo &act_info,

291

const Size2D &dilation)

Abe Mbise

7784c83

2018-05-31 16:48:41 +0100

[diff] [blame]

292

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

293

ConvolutionInfo info{conv_info, depth_multiplier, act_info, dilation};

Manuel Bottini

2021-05-24 16:01:32 +0100

[diff] [blame]

294

return cpu::CpuDepthwiseConv2d::validate(input, weights, biases, output, info);

Abe Mbise

7784c83

2018-05-31 16:48:41 +0100

[diff] [blame]

295

}

296

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

297

void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::run()

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

298

{

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

299

ITensorPack pack;

300

pack.add_tensor(TensorType::ACL_SRC_0, _impl->src);

301

pack.add_tensor(TensorType::ACL_SRC_1, _impl->weights);

302

pack.add_tensor(TensorType::ACL_SRC_2, _impl->biases);

303

pack.add_tensor(TensorType::ACL_INT_0, &_impl->permuted_input);

304

pack.add_tensor(TensorType::ACL_INT_1, &_impl->permuted_weights);

305

pack.add_tensor(TensorType::ACL_INT_2, &_impl->permuted_output);

306

pack.add_tensor(TensorType::ACL_DST_0, _impl->dst);

Giorgio Arena

d93e263

2019-10-15 11:09:33 +0100

[diff] [blame]

307

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

308

_impl->op->run(pack);

Georgios Pinitas

7221933

2018-06-05 14:56:06 +0100

[diff] [blame]

309

}

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

310

311

NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

312

: _memory_group(std::move(memory_manager)), _impl(std::make_unique<Impl>())

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

{

}

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

316

#ifndef DOXYGEN_SKIP_THIS

317

struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer::Impl

318

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

319

DepthwiseConvolutionFunction depth_conv_func{DepthwiseConvolutionFunction::OPTIMIZED};

320

NEDepthwiseConvolutionLayerOptimizedInternal func_optimized{nullptr};

Manuel Bottini

2021-05-24 16:01:32 +0100

[diff] [blame]

321

NEDepthwiseConvolutionLayerGeneric func_generic{};

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

322

std::shared_ptr<cpu::CpuDepthwiseConv2d> op{nullptr};

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

323

};

324

#endif // DOXYGEN_SKIP_THIS

325

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

326

void NEDepthwiseConvolutionLayer::configure(ITensor *input,

327

const ITensor *weights,

328

const ITensor *biases,

329

ITensor *output,

330

const PadStrideInfo &conv_info,

331

unsigned int depth_multiplier,

332

const ActivationLayerInfo &act_info,

333

const Size2D &dilation)

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

334

{

Jakub Sujak

ed9eb30

2023-05-11 13:15:46 +0100

[diff] [blame]

335

ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);

Ramy Elgammal

a8db612

2023-05-08 03:33:43 +0100

[diff] [blame]

336

ramelg01

cbbb038

2021-09-17 17:36:57 +0100

[diff] [blame]

337

ARM_COMPUTE_LOG_PARAMS(input, weights, output, conv_info, depth_multiplier, biases, act_info, dilation);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

338

ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayer::validate(

339

input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(), output->info(), conv_info,

340

depth_multiplier, act_info, dilation));

ramelg01

cbbb038

2021-09-17 17:36:57 +0100

[diff] [blame]

341

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

342

const ConvolutionInfo info{conv_info, depth_multiplier, act_info, dilation};

Manuel Bottini

2021-05-24 16:01:32 +0100

[diff] [blame]

343

_impl->op = std::make_shared<cpu::CpuDepthwiseConv2d>();

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

344

_impl->depth_conv_func = _impl->op->get_depthwiseconvolution_function(

345

input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), info);

346

switch (_impl->depth_conv_func)

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

347

{

348

case DepthwiseConvolutionFunction::OPTIMIZED:

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

349

_impl->func_optimized.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info,

350

dilation);

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

351

break;

352

case DepthwiseConvolutionFunction::GENERIC:

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

353

_impl->func_generic.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info,

354

dilation);

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

355

break;

356

default:

357

ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");

358

}

359

}

360

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

361

Status NEDepthwiseConvolutionLayer::validate(const ITensorInfo *input,

362

const ITensorInfo *weights,

363

const ITensorInfo *biases,

364

const ITensorInfo *output,

365

const PadStrideInfo &conv_info,

366

unsigned int depth_multiplier,

367

const ActivationLayerInfo &act_info,

368

const Size2D &dilation)

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

369

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

370

ConvolutionInfo info{conv_info, depth_multiplier, act_info, dilation};

Manuel Bottini

2021-05-24 16:01:32 +0100

[diff] [blame]

371

return cpu::CpuDepthwiseConv2d::validate(input, weights, biases, output, info);

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

372

}

373

374

void NEDepthwiseConvolutionLayer::run()

375

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

376

switch (_impl->depth_conv_func)

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

377

{

378

case DepthwiseConvolutionFunction::OPTIMIZED:

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

379

_impl->func_optimized.run();

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

380

break;

381

case DepthwiseConvolutionFunction::GENERIC:

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

382

_impl->func_generic.run();

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

383

break;

384

default:

385

ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");

}

}

void NEDepthwiseConvolutionLayer::prepare()

390

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

391

switch (_impl->depth_conv_func)

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

392

{

393

case DepthwiseConvolutionFunction::OPTIMIZED:

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

394

_impl->func_optimized.prepare();

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

395

break;

396

case DepthwiseConvolutionFunction::GENERIC:

Michalis Spyrou

2021-04-08 12:02:58 +0100

[diff] [blame]

397

_impl->func_generic.prepare();

Manuel Bottini