blob: b2e44f8e09eb1b1268f28bc08b120f7bb7cd49bf [file] [log] [blame]
Pablo Tello89519332017-11-17 11:52:36 +00001/*
Pablo Tello9ceebbe2018-01-10 16:44:13 +00002 * Copyright (c) 2017-2018 ARM Limited.
Pablo Tello89519332017-11-17 11:52:36 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h"
25
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
29#include "arm_compute/core/TensorInfo.h"
Pablo Tello3d4968a2017-12-04 15:03:35 +000030#include "support/ToolchainSupport.h"
31
Pablo Tello89519332017-11-17 11:52:36 +000032namespace arm_compute
33{
Pablo Tello52140b42018-01-30 14:48:11 +000034//Batched Gemms
Pablo Tellof6c572c2018-02-14 12:47:30 +000035template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
36NEWinogradLayerBatchedGEMMKernel<TIn, TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::NEWinogradLayerBatchedGEMMKernel()
Pablo Tello52140b42018-01-30 14:48:11 +000037 : _gemms()
Pablo Tello3d4968a2017-12-04 15:03:35 +000038{
39}
40
Pablo Tellof6c572c2018-02-14 12:47:30 +000041template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
42void NEWinogradLayerBatchedGEMMKernel<TIn, TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::configure(
Pablo Tello52140b42018-01-30 14:48:11 +000043 const unsigned int n_gemms,
44 const int M, const int K, const int N,
Pablo Tellof6c572c2018-02-14 12:47:30 +000045 const int a_matrix_stride,
46 const int a_row_stride,
47 const int b_matrix_stride,
48 const int b_row_stride,
49 const int c_matrix_stride,
50 const int c_row_stride,
51 const TIn *const a_ptr,
52 const TIn *const b_ptr,
53 TOut *const c_ptr)
Pablo Tello3d4968a2017-12-04 15:03:35 +000054{
Pablo Tello52140b42018-01-30 14:48:11 +000055 _gemms = support::cpp14::make_unique<MultiGEMM>(n_gemms, M, K, N, a_matrix_stride, a_row_stride, b_matrix_stride, b_row_stride, c_matrix_stride, c_row_stride, a_ptr, b_ptr, c_ptr);
Pablo Tello02541fb2017-12-15 09:48:59 +000056 Window win;
Pablo Tello52140b42018-01-30 14:48:11 +000057 auto win_last = _gemms->get_window();
Pablo Tello9ceebbe2018-01-10 16:44:13 +000058 win.set(Window::DimX, Window::Dimension(0, win_last, 1));
Pablo Tello89519332017-11-17 11:52:36 +000059 INEKernel::configure(win);
60}
61
Pablo Tellof6c572c2018-02-14 12:47:30 +000062template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
63void NEWinogradLayerBatchedGEMMKernel<TIn, TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::run(const Window &window, const ThreadInfo &info)
Pablo Tello89519332017-11-17 11:52:36 +000064{
Pablo Tello89519332017-11-17 11:52:36 +000065 ARM_COMPUTE_UNUSED(info);
66 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
Pablo Tello02541fb2017-12-15 09:48:59 +000067 const size_t first_gemm = window.x().start();
68 const size_t last_gemm = window.x().end();
Pablo Tello52140b42018-01-30 14:48:11 +000069 _gemms->run(first_gemm, last_gemm);
Pablo Tello89519332017-11-17 11:52:36 +000070}
Pablo Tellod6ca4782018-01-23 09:36:04 +000071
Pablo Tellof6c572c2018-02-14 12:47:30 +000072template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
73unsigned int NEWinogradLayerBatchedGEMMKernel<TIn, TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::get_number_gemms() const
74{
75 return WinogradBase::N_GEMMS;
76}
77
78template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
79int NEWinogradLayerBatchedGEMMKernel<TIn, TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::get_output_tile_rows() const
80{
81 return _output_tile_rows;
82}
83
84template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
85int NEWinogradLayerBatchedGEMMKernel<TIn, TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::get_output_tile_cols() const
86{
87 return _output_tile_cols;
88}
89
90template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
91int NEWinogradLayerBatchedGEMMKernel<TIn, TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::get_number_blocks() const
92{
93 return WinogradConv::N_BLOCK;
94}
95
96template class NEWinogradLayerBatchedGEMMKernel<float, float, 2, 2, 3, 3>;
97template class NEWinogradLayerBatchedGEMMKernel<float, float, 2, 2, 5, 5>;
Pablo Tellod6ca4782018-01-23 09:36:04 +000098
99// Weights transform
100
Pablo Tellof6c572c2018-02-14 12:47:30 +0000101template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
102unsigned int NEWinogradLayerTransformWeightsKernel<T, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::get_weight_storage_size(int n_output_channels, int n_input_channels) const
Pablo Tellod6ca4782018-01-23 09:36:04 +0000103{
Pablo Tello52140b42018-01-30 14:48:11 +0000104 const KernelShape shape(n_output_channels, KernelRows, KernelCols, n_input_channels);
105 return static_cast<unsigned int>(
Pablo Tellof6c572c2018-02-14 12:47:30 +0000106 // WinogradConv returns the size in bytes, we divide by `sizeof(T)` to express that in units of T
107 WinogradConv::get_kernel_storage_size(shape) / sizeof(T));
Pablo Tello52140b42018-01-30 14:48:11 +0000108}
109
Pablo Tellof6c572c2018-02-14 12:47:30 +0000110template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
111NEWinogradLayerTransformWeightsKernel<T, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::NEWinogradLayerTransformWeightsKernel()
Pablo Tello52140b42018-01-30 14:48:11 +0000112 : _transform()
113{
114}
115
Pablo Tellof6c572c2018-02-14 12:47:30 +0000116template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
117int NEWinogradLayerTransformWeightsKernel<T, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::get_matrix_stride(const KernelShape &kernel_shape) const
118{
119 return WinogradConv::get_kernel_matrix_stride(kernel_shape);
120}
121
122template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
123void NEWinogradLayerTransformWeightsKernel<T, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::configure(
Pablo Tello52140b42018-01-30 14:48:11 +0000124 const ITensor *weights_hwio,
Pablo Tellof6c572c2018-02-14 12:47:30 +0000125 T *const output,
Pablo Tello52140b42018-01-30 14:48:11 +0000126 const int matrix_stride, /** Stride across matrices in the output. */
127 const int n_output_channels, /** Number of filters. */
128 const int n_input_channels) /** Number of channels in each filter. */
129{
130 const int matrix_row_stride = roundup(n_output_channels, WinogradConv::N_BLOCK);
Pablo Tellof6c572c2018-02-14 12:47:30 +0000131 _transform = support::cpp14::make_unique<WeightsTransform>(reinterpret_cast<T *>(weights_hwio->buffer()), output, matrix_stride, matrix_row_stride, n_output_channels,
Pablo Tello52140b42018-01-30 14:48:11 +0000132 n_input_channels);
Pablo Tellod6ca4782018-01-23 09:36:04 +0000133 Window win;
Pablo Tello52140b42018-01-30 14:48:11 +0000134 auto win_last = _transform->get_window();
Pablo Tellod6ca4782018-01-23 09:36:04 +0000135 win.set(Window::DimX, Window::Dimension(0, win_last, 1));
136 INEKernel::configure(win);
137}
138
Pablo Tellof6c572c2018-02-14 12:47:30 +0000139template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
140void NEWinogradLayerTransformWeightsKernel<T, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::run(const Window &window, const ThreadInfo &info)
Pablo Tellod6ca4782018-01-23 09:36:04 +0000141{
142 ARM_COMPUTE_UNUSED(info);
143 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
144 const size_t fst = window.x().start();
145 const size_t lst = window.x().end();
Pablo Tello52140b42018-01-30 14:48:11 +0000146 _transform->run(fst, lst);
Pablo Tellod6ca4782018-01-23 09:36:04 +0000147}
148
Pablo Tellof6c572c2018-02-14 12:47:30 +0000149template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
150bool NEWinogradLayerTransformWeightsKernel<T, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::is_parallelisable() const
Pablo Tellod6ca4782018-01-23 09:36:04 +0000151{
152 return false;
153}
154
Pablo Tellof6c572c2018-02-14 12:47:30 +0000155template class NEWinogradLayerTransformWeightsKernel<float, 2, 2, 3, 3>;
156template class NEWinogradLayerTransformWeightsKernel<float, 2, 2, 5, 5>;
Pablo Tello52140b42018-01-30 14:48:11 +0000157
Pablo Tellod6ca4782018-01-23 09:36:04 +0000158// Input transform
159
Pablo Tellof6c572c2018-02-14 12:47:30 +0000160template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
161unsigned int NEWinogradLayerTransformInputKernel<T, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::get_input_storage_size(
Pablo Tello52140b42018-01-30 14:48:11 +0000162 int n_batches, /** Number of batches in the input tensor. */
163 int n_channels, /** Number of feature maps in the input tensor. */
164 int n_rows, /** Number of rows in each feature map. */
165 int n_cols, /** Number of columns in each feature map. */
166 bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000167) const
Pablo Tellod6ca4782018-01-23 09:36:04 +0000168{
Pablo Tello52140b42018-01-30 14:48:11 +0000169 // Construct shapes for the input and kernel tensors.
170 const Tensor4DShape input_shape(n_batches, n_rows, n_cols, n_channels);
171 const KernelShape kern_shape(1, KernelRows, KernelCols, n_channels);
172 const PaddingType padding = (same_padding) ? PADDING_SAME : PADDING_VALID;
173 // Return the size, converted into units of TIn
Pablo Tellof6c572c2018-02-14 12:47:30 +0000174 return static_cast<unsigned int>(WinogradConv::get_input_storage_size(kern_shape, input_shape, padding) / sizeof(T));
Pablo Tello52140b42018-01-30 14:48:11 +0000175}
176
Pablo Tellof6c572c2018-02-14 12:47:30 +0000177template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
178int NEWinogradLayerTransformInputKernel<T, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::get_matrix_stride(
179 const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const
180{
181 return WinogradConv::get_input_matrix_stride(kernel_shape, input_shape, padding_type);
182}
183
184template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
185NEWinogradLayerTransformInputKernel<T, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::NEWinogradLayerTransformInputKernel()
Pablo Tello52140b42018-01-30 14:48:11 +0000186 : _transform()
187{
188}
189
Pablo Tellof6c572c2018-02-14 12:47:30 +0000190template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
191void NEWinogradLayerTransformInputKernel<T, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::configure(
192 const T *const input, /** Input tensor data */
193 const int n_batches, /** Number of batches in input tensor. */
194 const int n_rows, /** Number of rows in input tensor. */
195 const int n_cols, /** Number of columns in input tensor. */
196 const int n_channels, /** Number of channels in input tensor. */
197 const PaddingType padding, /** Padding type. */
198 T *const output, /** Base of output matrices. */
199 const int matrix_stride) /** Stride between output matrices. */
Pablo Tello52140b42018-01-30 14:48:11 +0000200{
201 // _input_matrix_row_stride(n_input_channels),
202 _transform = support::cpp14::make_unique<InputTransform>(input, n_batches, n_rows, n_cols, n_channels, padding, output, matrix_stride, n_channels);
Pablo Tellod6ca4782018-01-23 09:36:04 +0000203 Window win;
Pablo Tello52140b42018-01-30 14:48:11 +0000204 auto win_last = _transform->get_window();
Pablo Tellod6ca4782018-01-23 09:36:04 +0000205 win.set(Window::DimX, Window::Dimension(0, win_last, 1));
206 INEKernel::configure(win);
207}
208
Pablo Tellof6c572c2018-02-14 12:47:30 +0000209template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
210void NEWinogradLayerTransformInputKernel<T, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::run(const Window &window, const ThreadInfo &info)
Pablo Tellod6ca4782018-01-23 09:36:04 +0000211{
212 ARM_COMPUTE_UNUSED(info);
213 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
214 const size_t fst = window.x().start();
215 const size_t lst = window.x().end();
Pablo Tello52140b42018-01-30 14:48:11 +0000216 _transform->run(fst, lst);
Pablo Tellod6ca4782018-01-23 09:36:04 +0000217}
Pablo Tello52140b42018-01-30 14:48:11 +0000218
Pablo Tellof6c572c2018-02-14 12:47:30 +0000219template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
220bool NEWinogradLayerTransformInputKernel<T, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::is_parallelisable() const
Pablo Tellod6ca4782018-01-23 09:36:04 +0000221{
222 return false;
223}
224
Pablo Tellof6c572c2018-02-14 12:47:30 +0000225template class NEWinogradLayerTransformInputKernel<float, 2, 2, 3, 3>;
226template class NEWinogradLayerTransformInputKernel<float, 2, 2, 5, 5>;
Pablo Tello52140b42018-01-30 14:48:11 +0000227
Pablo Tellod6ca4782018-01-23 09:36:04 +0000228// Output transform
Pablo Tello52140b42018-01-30 14:48:11 +0000229
Pablo Tellof6c572c2018-02-14 12:47:30 +0000230template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
231unsigned int NEWinogradLayerTransformOutputKernel<T, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::get_output_storage_size(
Pablo Tello52140b42018-01-30 14:48:11 +0000232 int n_batches, /** Number of batches in the output tensor. */
233 int n_rows, /** Number of rows in each feature map of the input tensor. */
234 int n_cols, /** Number of columns in each feature map of the input tensor. */
235 int n_output_channels, /** Number of feature maps in the output tensor. */
236 bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000237) const
Pablo Tello52140b42018-01-30 14:48:11 +0000238{
239 // Construct shapes for the input and kernel tensors.
240 const Tensor4DShape input_shape(n_batches, n_rows, n_cols, 1);
241 const KernelShape kern_shape(n_output_channels, KernelRows, KernelCols, 1);
242 const PaddingType padding = (same_padding) ? PADDING_SAME : PADDING_VALID;
243
244 // Return the size, converted into units of TOut
245 return static_cast<unsigned int>(
Pablo Tellof6c572c2018-02-14 12:47:30 +0000246 WinogradConv::get_output_storage_size(kern_shape, input_shape, padding) / sizeof(T));
Pablo Tello52140b42018-01-30 14:48:11 +0000247}
248
Pablo Tellof6c572c2018-02-14 12:47:30 +0000249template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
250NEWinogradLayerTransformOutputKernel<T, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::NEWinogradLayerTransformOutputKernel()
Pablo Tellod6ca4782018-01-23 09:36:04 +0000251 : _biases(nullptr), _output_workspace(nullptr), _matrix_stride(0), _matrix_row_stride(0), _output(nullptr), _n_batches(0), _n_rows(0), _n_cols(0), _n_channels(0)
252{
253}
254
Pablo Tellof6c572c2018-02-14 12:47:30 +0000255template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
256int NEWinogradLayerTransformOutputKernel<T, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::get_matrix_stride(
257 const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const
258{
259 return WinogradConv::get_output_matrix_stride(kernel_shape, input_shape, padding_type);
260}
261template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
262Tensor4DShape NEWinogradLayerTransformOutputKernel<T, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::get_output_shape(
263 const KernelShape &kernel_shape, const Tensor4DShape &in_shape, const PaddingType padding) const
264{
265 return WinogradConv::get_output_shape(kernel_shape, in_shape, padding);
266}
267
268template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
269void NEWinogradLayerTransformOutputKernel<T, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::configure(
270 const ITensor *biases,
271 const T *const output_workingspace,
272 const int matrix_stride,
273 T *const output,
274 const int n_batches,
275 const int n_rows,
276 const int n_cols,
277 const int n_channels)
Pablo Tellod6ca4782018-01-23 09:36:04 +0000278{
Pablo Tellod6ca4782018-01-23 09:36:04 +0000279 _biases = biases;
280 _output_workspace = output_workingspace;
281 _matrix_stride = matrix_stride;
Pablo Tello52140b42018-01-30 14:48:11 +0000282 _matrix_row_stride = roundup(n_channels, WinogradConv::N_BLOCK);
Pablo Tellod6ca4782018-01-23 09:36:04 +0000283 _output = output;
284 _n_batches = n_batches;
285 _n_rows = n_rows;
286 _n_cols = n_cols;
287 _n_channels = n_channels;
288
289 // We don't have the biases buffer at this stage as it hasn't been allocated, we pass in nullptr OutputTransform is only used here to compute the window
290 OutputTransform output_transform(_output_workspace, _matrix_stride, _matrix_row_stride, nullptr, _output, _n_batches, _n_rows, _n_cols, _n_channels);
291 Window win;
292 auto win_last = output_transform.get_window();
293 win.set(Window::DimX, Window::Dimension(0, win_last, 1));
294 INEKernel::configure(win);
295}
296
Pablo Tellof6c572c2018-02-14 12:47:30 +0000297template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
298void NEWinogradLayerTransformOutputKernel<T, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::run(const Window &window, const ThreadInfo &info)
Pablo Tellod6ca4782018-01-23 09:36:04 +0000299{
300 ARM_COMPUTE_UNUSED(info);
301 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
302 ARM_COMPUTE_ERROR_ON_NULLPTR(_biases->buffer());
303 ARM_COMPUTE_ERROR_ON_NULLPTR(_output_workspace);
304 ARM_COMPUTE_ERROR_ON_NULLPTR(_output);
305
Pablo Tellod6ca4782018-01-23 09:36:04 +0000306 OutputTransform output_transform(_output_workspace, _matrix_stride, _matrix_row_stride,
Pablo Tellof6c572c2018-02-14 12:47:30 +0000307 reinterpret_cast<T *>(_biases->buffer()), _output,
Pablo Tellod6ca4782018-01-23 09:36:04 +0000308 _n_batches, _n_rows, _n_cols, _n_channels);
309
310 // The code below cannot be moved to configure because biases hasn't been allocated at that point
311 const size_t fst = window.x().start();
312 const size_t lst = window.x().end();
313 output_transform.run(fst, lst);
314}
315
Pablo Tellof6c572c2018-02-14 12:47:30 +0000316template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
317bool NEWinogradLayerTransformOutputKernel<T, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::is_parallelisable() const
Pablo Tellod6ca4782018-01-23 09:36:04 +0000318{
319 return false;
320}
321
Pablo Tellof6c572c2018-02-14 12:47:30 +0000322template class NEWinogradLayerTransformOutputKernel<float, 2, 2, 3, 3>;
323template class NEWinogradLayerTransformOutputKernel<float, 2, 2, 5, 5>;
Pablo Tello52140b42018-01-30 14:48:11 +0000324
Pablo Tello89519332017-11-17 11:52:36 +0000325} // namespace arm_compute