Blame - arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h - ml/ComputeLibrary

2018-01-30 14:48:11 +0000

[diff] [blame]

int n_batches,

int n_channels,

int n_rows,

int n_cols,

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

98

bool same_padding) const override;

99

100

/** Gets the stride between matrices in the input worspace

101

*

102

* @param[in] kernel_shape The shape of the weights tensor.

103

* @param[in] input_shape The shape of the input tensor.

104

* @param[in] padding_type The type of padding to be used.

105

*

106

* @return Stride expressed in bytes.

107

*/

108

int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const override;

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

109

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame]

110

NEWinogradLayerTransformInputKernel();

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

111

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

112

const char *name() const override

113

{

114

return "NEWinogradLayerTransformInputKernel";

115

}

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame]

116

117

/** Configure the output transform kernel.

118

*

119

* @param[in] input Input tensor data

120

* @param[in] n_batches Number of batches in input tensor.

121

* @param[in] n_rows Number of rows in input tensor.

122

* @param[in] n_cols Number of columns in input tensor.

123

* @param[in] n_channels Number of channels in input tensor.

124

* @param[in] padding Padding type.

125

* @param[out] output Base of output matrices.

126

* @param[in] matrix_stride Stride between output matrices.

127

*/

128

void configure(

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

129

const T *const input,

const int n_batches,

const int n_rows,

const int n_cols,

const int n_channels,

134

const PaddingType padding,

135

T *const output,

136

const int matrix_stride) override;

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame]

137

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

138

// Inherited methods overridden:

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

139

void run(const Window &window, const ThreadInfo &info) override;

140

bool is_parallelisable() const override;

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame]

141

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

142

using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelCols, KernelCols>;

143

using WinogradConv = typename WinogradBase::template Convolution<T, T>;

144

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame]

145

private:

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

146

using InputTransform = typename WinogradBase::template InputTransform<T>;

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame]

147

std::unique_ptr<InputTransform> _transform;

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

148

};

149

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

150

template <typename T>

151

class INEWinogradLayerTransformOutputKernel : public INEKernel

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

152

{

153

public:

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame]

154

/** Determine how much memory (in units of TOut) to allocate for the

155

* (Winograd domain) output.

156

*

157

* @param[in] n_batches Number of batches in the output tensor.

158

* @param[in] n_rows Number of rows in each feature map of the input tensor.

159

* @param[in] n_cols Number of columns in each feature map of the input tensor.

160

* @param[in] n_output_channels Number of feature maps in the output tensor.

161

* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".

162

*/

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

163

virtual unsigned int get_output_storage_size(int n_batches, int n_rows, int n_cols, int n_output_channels, bool same_padding) const = 0;

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame]

164

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

165

/** Gets the stride between matrices in the output worspace

166

*

167

* @param[in] kernel_shape The shape of the weights tensor.

168

* @param[in] input_shape The shape of the input tensor.

169

* @param[in] padding_type The type of padding to be used.

170

*

171

* @return Stride expressed in bytes.

172

*/

173

virtual int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const = 0;

174

175

/** Get the output shape of a convolution.

176

*

177

* @param[in] kernel_shape The shape of the weights tensor.

178

* @param[in] in_shape The shape of the input tensor.

179

* @param[in] padding The type of padding to be used.

180

*

181

* @return Stride expressed in bytes.

182

*/

183

virtual Tensor4DShape get_output_shape(const KernelShape &kernel_shape, const Tensor4DShape &in_shape, const PaddingType padding) const = 0;

184

185

/** Configure the output transform kernel.

186

*

187

* @param[in] biases Pointer to the biases tensor.

188

* @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain.

189

* @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()

190

* @param[out] output Pointer to NHWC ordered output tensor, in the spatial domain.

191

* @param[in] n_batches Number of batches in the input tensor.

192

* @param[in] n_rows Number of rows in output tensor.

193

* @param[in] n_cols Number of columns in output tensor.

194

* @param[in] n_channels Number of feature maps in the output tensor.

195

*/

196

virtual void configure(

197

const ITensor *biases,

198

const T *const output_workingspace,

199

const int matrix_stride,

T *const output,

const int n_batches,

const int n_rows,

const int n_cols,

const int n_channels) = 0;

205

206

virtual ~INEWinogradLayerTransformOutputKernel()

{

}

};

template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>

212

class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel<T>

213

{

214

public:

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

215

const char *name() const override

216

{

217

return "NEWinogradLayerTransformOutputKernel";

218

}

219

/** Constructor */

220

NEWinogradLayerTransformOutputKernel();

221

222

/** Prevent instances of this class from being copied (As this class contains pointers) */

223

NEWinogradLayerTransformOutputKernel(const NEWinogradLayerTransformOutputKernel &) = delete;

224

/** Prevent instances of this class from being copied (As this class contains pointers) */

225

NEWinogradLayerTransformOutputKernel &operator=(const NEWinogradLayerTransformOutputKernel &) = delete;

226

/** Allow instances of this class to be moved */

227

NEWinogradLayerTransformOutputKernel(NEWinogradLayerTransformOutputKernel &&) = default;

228

/** Allow instances of this class to be moved */

229

NEWinogradLayerTransformOutputKernel &operator=(NEWinogradLayerTransformOutputKernel &&) = default;

230

231

~NEWinogradLayerTransformOutputKernel() = default;

232

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

233

// Inherited methods overridden:

234

/** Determine how much memory (in units of TOut) to allocate for the

235

* (Winograd domain) output.

236

*

237

* @param[in] n_batches Number of batches in the output tensor.

238

* @param[in] n_rows Number of rows in each feature map of the input tensor.

239

* @param[in] n_cols Number of columns in each feature map of the input tensor.

240

* @param[in] n_output_channels Number of feature maps in the output tensor.

241

* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".

242

*/

243

unsigned int get_output_storage_size(int n_batches, int n_rows, int n_cols, int n_output_channels, bool same_padding) const override;

244

245

/** Gets the stride between matrices in the output worspace

246

*

247

* @param[in] kernel_shape The shape of the weights tensor.

248

* @param[in] input_shape The shape of the input tensor.

249

* @param[in] padding_type The type of padding to be used.

250

*

251

* @return Stride expressed in bytes.

252

*/

253

int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const override;

254

/** Get the output shape of a convolution.

255

*

256

* @param[in] kernel_shape The shape of the weights tensor.

257

* @param[in] in_shape The shape of the input tensor.

258

* @param[in] padding The type of padding to be used.

259

*

260

* @return Stride expressed in bytes.

261

*/

262

Tensor4DShape get_output_shape(const KernelShape &kernel_shape, const Tensor4DShape &in_shape, const PaddingType padding) const override;

263

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

264

/** Configure the output transform kernel.

265

*

266

* @param[in] biases Pointer to the biases tensor.

267

* @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain.

268

* @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()

269

* @param[out] output Pointer to NHWC ordered output tensor, in the spatial domain.

270

* @param[in] n_batches Number of batches in the input tensor.

271

* @param[in] n_rows Number of rows in output tensor.

272

* @param[in] n_cols Number of columns in output tensor.

273

* @param[in] n_channels Number of feature maps in the output tensor.

274

*/

275

void configure(

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

276

const ITensor *biases,

277

const T *const output_workingspace,

278

const int matrix_stride,

T *const output,

const int n_batches,

const int n_rows,

const int n_cols,

const int n_channels) override;

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

284

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

285

void run(const Window &window, const ThreadInfo &info) override;

286

bool is_parallelisable() const override;

287

288

private:

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame]

289

using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

290

using WinogradConv = typename WinogradBase::template Convolution<T, T>;

291

using OutputTransform = typename WinogradBase::template OutputTransform<T>;

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame]

292

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

293

const ITensor *_biases;

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

294

const T *_output_workspace;

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

295

int _matrix_stride;

296

int _matrix_row_stride;

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

297

T *_output;

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

int _n_batches;

int _n_rows;

int _n_cols;

int _n_channels;

};

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

304

template <typename T>

305

class INEWinogradLayerTransformWeightsKernel : public INEKernel

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

306

{

307

public:

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

308

/** Determine how much memory (in units of T) to allocate for the

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame]

309

* transformed weights.

310

*

311

* @param[in] n_output_channels Number of output feature maps.

312

* @param[in] n_input_channels Number of input feature maps.

313

*/

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

314

virtual unsigned int get_weight_storage_size(int n_output_channels, int n_input_channels) const = 0;

315

/** Gets the stride between matrices in the kernel worspace

316

*

317

* @param[in] kernel_shape The shape of the weights tensor.

318

*

319

* @return Stride expressed in bytes.

320

*/

321

virtual int get_matrix_stride(const KernelShape &kernel_shape) const = 0;

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame]

322

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

323

/** Configure the weights transform kernel.

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame]

324

*

325

* @param[in] weights_hwio Pointer to the weights tensor

326

* @param[in] output Pointer to working space for the output tensor in the Winograd domain.

327

* @param[in] matrix_stride Stride across matrices in the output workspace.

328

* @param[in] n_output_channels Number of filters.

329

* @param[in] n_input_channels Number of channels in each filter.

330

*/

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

331

virtual void configure(const ITensor *weights_hwio, T *const output, const int matrix_stride, const int n_output_channels, const int n_input_channels) = 0;

332

333

virtual ~INEWinogradLayerTransformWeightsKernel()

{

}

};

template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>

339

class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel<T>

340

{

341

public:

342

NEWinogradLayerTransformWeightsKernel();

343

const char *name() const override

344

{

345

return "NEWinogradLayerTransformWeightsKernel";

346

}

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame]

347

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

348

// Inherited methods overridden:

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

349

void configure(const ITensor *weights_hwio, T *const output, const int matrix_stride, const int n_output_channels, const int n_input_channels) override;

350

unsigned int get_weight_storage_size(int n_output_channels, int n_input_channels) const override;

351

int get_matrix_stride(const KernelShape &kernel_shape) const override;

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

352

void run(const Window &window, const ThreadInfo &info) override;

353

bool is_parallelisable() const override;

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame]

354

355

private:

356

using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

357

using WinogradConv = typename WinogradBase::template Convolution<T, T>;

358

using WeightsTransform = typename WinogradBase::template WeightsTransform<T>;

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame]

359

std::unique_ptr<WeightsTransform> _transform;

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

360

};

361

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

362

template <typename TIn, typename TOut>

363

class INEWinogradLayerBatchedGEMMKernel : public INEKernel

364

{

365

public:

366

/** Get the number of GEMMs to compute

367

*/

368

virtual unsigned int get_number_gemms() const = 0;

369

/** Initialise the kernel

370

*

371

* @param[in] n_gemms Number of GEMMs to compute.

372

* @param[in] M in_shape.n_batches * tile_rows * tile_cols.

373

* @param[in] K Number of channels in the input tensor.

374

* @param[in] N Number of channels in the output tensor.

375

* @param[in] a_matrix_stride Stride between input matrices.

376

* @param[in] a_row_stride Row stride inside input matrix.

377

* @param[in] b_matrix_stride Stride between weights matrices.

378

* @param[in] b_row_stride Row stride inside the weights matrix.

379

* @param[in] c_matrix_stride Stride between output matrices.

380

* @param[in] c_row_stride Row stride inside the output matrix.

381

* @param[out] a_ptr Input workspace.

382

* @param[out] b_ptr Kernel workspace.

383

* @param[out] c_ptr Output workspace.

384

*/

385

virtual void configure(

386

const unsigned int n_gemms,

387

const int M, const int K, const int N,

388

const int a_matrix_stride,

389

const int a_row_stride,

390

const int b_matrix_stride,

391

const int b_row_stride,

392

const int c_matrix_stride,

393

const int c_row_stride,

394

const TIn *const a_ptr,

395

const TIn *const b_ptr,

396

TOut *const c_ptr) = 0;

397

398

/** Get the number of tiles per row

399

*/

400

virtual int get_output_tile_rows() const = 0;

401

/** Get the number of tiles per columns

402

*/

403

virtual int get_output_tile_cols() const = 0;

404

/** Get the number of blocks

405

*/

406

virtual int get_number_blocks() const = 0;

407

};

408

409

template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>

410

class NEWinogradLayerBatchedGEMMKernel : public INEWinogradLayerBatchedGEMMKernel<TIn, TOut>

Pablo Tello

2017-11-17 11:52:36 +0000

[diff] [blame]

411

{

412

public:

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame]

413

using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

414

using WinogradConv = typename WinogradBase::template Convolution<TIn, TOut>;

415

using MultiGEMM = winograd::BatchedBlockedGemm<WinogradConv::M_BLOCK, WinogradConv::N_BLOCK, TIn, TOut>;

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame]

416

417

static const int _output_tile_rows = OutputTileRows;

418

static const int _output_tile_cols = OutputTileCols;

419

Anthony Barbier

e8a4983

2018-01-18 10:04:05 +0000

[diff] [blame]

420

const char *name() const override

421

{

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

422

return "NEWinogradLayerBatchedGEMMKernel";

Anthony Barbier

e8a4983

2018-01-18 10:04:05 +0000

[diff] [blame]

423

}

Pablo Tello

2017-11-17 11:52:36 +0000

[diff] [blame]

424

/** Constructor */

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

425

NEWinogradLayerBatchedGEMMKernel();

Pablo Tello

2017-11-17 11:52:36 +0000

[diff] [blame]

426

427

/** Prevent instances of this class from being copied (As this class contains pointers) */

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

428

NEWinogradLayerBatchedGEMMKernel(const NEWinogradLayerBatchedGEMMKernel &) = delete;

Pablo Tello

2017-11-17 11:52:36 +0000

[diff] [blame]

429

/** Prevent instances of this class from being copied (As this class contains pointers) */

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

430

NEWinogradLayerBatchedGEMMKernel &operator=(const NEWinogradLayerBatchedGEMMKernel &) = delete;

Pablo Tello

2017-11-17 11:52:36 +0000

[diff] [blame]

431

/** Allow instances of this class to be moved */

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

432

NEWinogradLayerBatchedGEMMKernel(NEWinogradLayerBatchedGEMMKernel &&) = default;

Pablo Tello

2017-11-17 11:52:36 +0000

[diff] [blame]

433

/** Allow instances of this class to be moved */

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

434

NEWinogradLayerBatchedGEMMKernel &operator=(NEWinogradLayerBatchedGEMMKernel &&) = default;

Pablo Tello

2017-11-17 11:52:36 +0000

[diff] [blame]

435

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

436

~NEWinogradLayerBatchedGEMMKernel() = default;

437

438

// Inherited methods overridden:

439

440

unsigned int get_number_gemms() const override;

441

int get_output_tile_rows() const override;

442

int get_output_tile_cols() const override;

443

int get_number_blocks() const override;

Pablo Tello

2017-11-17 11:52:36 +0000

[diff] [blame]

444

445

/** Initialise the kernel

446

*

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame]

447

* @param[in] n_gemms Number of GEMMs to compute.

448

* @param[in] M in_shape.n_batches * tile_rows * tile_cols.

449

* @param[in] K Number of channels in the input tensor.

450

* @param[in] N Number of channels in the output tensor.

451

* @param[in] a_matrix_stride Stride between input matrices.

452

* @param[in] a_row_stride Row stride inside input matrix.

453

* @param[in] b_matrix_stride Stride between weights matrices.

454

* @param[in] b_row_stride Row stride inside the weights matrix.

455

* @param[in] c_matrix_stride Stride between output matrices.

456

* @param[in] c_row_stride Row stride inside the output matrix.

457

* @param[out] a_ptr Input workspace.

458

* @param[out] b_ptr Kernel workspace.

459

* @param[out] c_ptr Output workspace.

Pablo Tello

2017-11-17 11:52:36 +0000

[diff] [blame]

460

*/

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame]

461

void configure(

462

const unsigned int n_gemms,

463

const int M, const int K, const int N,

Pablo Tello

2018-02-14 12:47:30 +0000

[diff] [blame^]

464

const int a_matrix_stride,

465

const int a_row_stride,

466

const int b_matrix_stride,

467

const int b_row_stride,

468

const int c_matrix_stride,

469

const int c_row_stride,

470

const TIn *const a_ptr,

471

const TIn *const b_ptr,

472

TOut *const c_ptr) override;

Pablo Tello

2017-11-17 11:52:36 +0000

[diff] [blame]

473

Pablo Tello

2017-11-17 11:52:36 +0000

[diff] [blame]

474

void run(const Window &window, const ThreadInfo &info) override;

475

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame]

476

private:

477

std::unique_ptr<MultiGEMM> _gemms;

Pablo Tello