Blame - arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h - ml/ComputeLibrary

2018-01-30 14:48:11 +0000

[diff] [blame^]

159

using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;

160

using WinogradConv = typename WinogradBase::template Convolution<float, float>;

161

using OutputTransform = typename WinogradBase::template OutputTransform<float>;

162

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

163

const ITensor *_biases;

164

const float *_output_workspace;

165

int _matrix_stride;

166

int _matrix_row_stride;

float *_output;

int _n_batches;

int _n_rows;

int _n_cols;

int _n_channels;

};

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame^]

174

template <int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>

175

class NEWinogradLayerTransformWeightsKernel final : public INEKernel

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

176

{

177

public:

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame^]

178

/** Determine how much memory (in units of TIn) to allocate for the

179

* transformed weights.

180

*

181

* @param[in] n_output_channels Number of output feature maps.

182

* @param[in] n_input_channels Number of input feature maps.

183

*/

184

static unsigned int get_weight_storage_size(int n_output_channels, int n_input_channels);

185

186

NEWinogradLayerTransformWeightsKernel();

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

187

const char *name() const override

188

{

189

return "NEWinogradLayerTransformWeightsKernel";

190

}

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame^]

191

/** Configure the output transform kernel.

192

*

193

* @param[in] weights_hwio Pointer to the weights tensor

194

* @param[in] output Pointer to working space for the output tensor in the Winograd domain.

195

* @param[in] matrix_stride Stride across matrices in the output workspace.

196

* @param[in] n_output_channels Number of filters.

197

* @param[in] n_input_channels Number of channels in each filter.

198

*/

199

void configure(

200

const ITensor *weights_hwio,

201

float *const output,

202

const int matrix_stride,

203

const int n_output_channels,

204

const int n_input_channels);

205

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

206

// Inherited methods overridden:

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame^]

207

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

208

void run(const Window &window, const ThreadInfo &info) override;

209

bool is_parallelisable() const override;

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame^]

210

211

private:

212

using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;

213

using WinogradConv = typename WinogradBase::template Convolution<float, float>;

214

using WeightsTransform = typename WinogradBase::template WeightsTransform<float>;

215

std::unique_ptr<WeightsTransform> _transform;

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

216

};

217

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame^]

218

template <int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>

219

class NEWinogradLayerKernel : public INEKernel

Pablo Tello

2017-11-17 11:52:36 +0000

[diff] [blame]

220

{

221

public:

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame^]

222

using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;

223

using WinogradConv = typename WinogradBase::template Convolution<float, float>;

224

using MultiGEMM = winograd::BatchedBlockedGemm<WinogradConv::M_BLOCK, WinogradConv::N_BLOCK, float, float>;

225

226

static const int _output_tile_rows = OutputTileRows;

227

static const int _output_tile_cols = OutputTileCols;

228

Anthony Barbier

e8a4983

2018-01-18 10:04:05 +0000

[diff] [blame]

229

const char *name() const override

230

{

231

return "NEWinogradLayerKernel";

232

}

Pablo Tello

2017-11-17 11:52:36 +0000

[diff] [blame]

233

/** Constructor */

234

NEWinogradLayerKernel();

235

236

/** Prevent instances of this class from being copied (As this class contains pointers) */

237

NEWinogradLayerKernel(const NEWinogradLayerKernel &) = delete;

238

/** Prevent instances of this class from being copied (As this class contains pointers) */

239

NEWinogradLayerKernel &operator=(const NEWinogradLayerKernel &) = delete;

240

/** Allow instances of this class to be moved */

241

NEWinogradLayerKernel(NEWinogradLayerKernel &&) = default;

242

/** Allow instances of this class to be moved */

243

NEWinogradLayerKernel &operator=(NEWinogradLayerKernel &&) = default;

244

Pablo Tello

2018-01-23 09:36:04 +0000

[diff] [blame]

245

~NEWinogradLayerKernel() = default;

Pablo Tello

2017-11-17 11:52:36 +0000

[diff] [blame]

246

247

/** Initialise the kernel

248

*

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame^]

249

* @param[in] n_gemms Number of GEMMs to compute.

250

* @param[in] M in_shape.n_batches * tile_rows * tile_cols.

251

* @param[in] K Number of channels in the input tensor.

252

* @param[in] N Number of channels in the output tensor.

253

* @param[in] a_matrix_stride Stride between input matrices.

254

* @param[in] a_row_stride Row stride inside input matrix.

255

* @param[in] b_matrix_stride Stride between weights matrices.

256

* @param[in] b_row_stride Row stride inside the weights matrix.

257

* @param[in] c_matrix_stride Stride between output matrices.

258

* @param[in] c_row_stride Row stride inside the output matrix.

259

* @param[out] a_ptr Input workspace.

260

* @param[out] b_ptr Kernel workspace.

261

* @param[out] c_ptr Output workspace.

Pablo Tello

2017-11-17 11:52:36 +0000

[diff] [blame]

262

*/

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame^]

263

void configure(

264

const unsigned int n_gemms,

265

const int M, const int K, const int N,

266

const int a_matrix_stride,

267

const int a_row_stride,

268

const int b_matrix_stride,

269

const int b_row_stride,

270

const int c_matrix_stride,

271

const int c_row_stride,

272

const float *const a_ptr,

273

const float *const b_ptr,

274

float *const c_ptr);

Pablo Tello

2017-11-17 11:52:36 +0000

[diff] [blame]

275

276

// Inherited methods overridden:

277

void run(const Window &window, const ThreadInfo &info) override;

278

Pablo Tello

2018-01-30 14:48:11 +0000

[diff] [blame^]

279

private:

280

std::unique_ptr<MultiGEMM> _gemms;

Pablo Tello