Blame - src/runtime/NEON/functions/NEWinogradLayer.cpp - ml/ComputeLibrary

blob: f82845c7ad58addbe88130db915492a7ae4cfce1 [file] [log] [blame]

Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	1	/*
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	2	* Copyright (c) 2017-2018 ARM Limited.
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/runtime/NEON/functions/NEWinogradLayer.h"
				25
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	26	#include "arm_compute/core/Error.h"
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	27	#include "arm_compute/core/Utils.h"
				28	#include "arm_compute/core/Validate.h"
				29	#include "arm_compute/runtime/NEON/NEScheduler.h"
				30	#include "support/ToolchainSupport.h"
				31
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	32	#include "arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h"
				33
Georgios Pinitas	4074c99	2018-01-30 18:13:46 +0000	[diff] [blame]	34	#include "arm_compute/core/NEON/kernels/convolution/winograd/winograd_gemm.hpp"
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	35
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	36	namespace
				37	{
				38	inline Tensor4DShape internal_get_input_shape(const arm_compute::ITensor *input)
				39	{
				40	const int in_width = input->info()->dimension(0);
				41	const int in_height = input->info()->dimension(1);
				42	const int in_batches = input->info()->dimension(3);
				43	const int in_channels = input->info()->dimension(2);
				44	return Tensor4DShape({ in_batches, in_height, in_width, in_channels });
				45	}
				46	} /* namespace */
				47
				48	namespace arm_compute
				49	{
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	50	namespace
				51	{
				52	Status validate_arguments(const ITensorInfo input, const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo output, const PadStrideInfo &conv_info)
				53	{
				54	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
				55	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, biases);
				56	ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(0) != 3 && weights->dimension(0) != 5, "Only 3 and 5 kernels are supported");
				57	ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
				58
				59	if(biases != nullptr)
				60	{
				61	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
				62	ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
				63	}
				64
				65	// Get parameters from conv_info
				66	unsigned int stride_x = 0;
				67	unsigned int stride_y = 0;
				68	std::tie(stride_x, stride_y) = conv_info.stride();
				69	ARM_COMPUTE_RETURN_ERROR_ON_MSG(stride_y != 1 \|\| stride_x != 1, "Winograd layer only supports unit strides.");
				70
				71	ARM_COMPUTE_UNUSED(output);
				72
				73	return Status{};
				74	}
				75	} //namespace
				76
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	77	NEWinogradLayer::NEWinogradLayer(std::shared_ptr<IMemoryManager> memory_manager)
Isabella Gottardi	3f217ec	2018-02-12 14:59:19 +0000	[diff] [blame^]	78	: _memory_group(std::move(memory_manager)), _batched_gemm_kernel(nullptr), _transform_input_kernel(nullptr), _transform_output_kernel(nullptr), _transform_weights_kernel(nullptr),
				79	_activationlayer_function(), _permute_input(), _permute_weights(), _permute_output(), _input_workspace(), _output_workspace(), _kernel_storage(), _input_nhwc(), _output_nhwc(), _weights_hwio(),
				80	_input(), _weights(), _output(), _reshaped_kernel(false), _is_activationlayer_enabled(false)
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	81	{
				82	} /* arm_compute */
				83
Isabella Gottardi	3f217ec	2018-02-12 14:59:19 +0000	[diff] [blame^]	84	void NEWinogradLayer::configure(const ITensor input, const ITensor weights, const ITensor biases, ITensor output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	85	{
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	86	ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, biases, output);
				87	ARM_COMPUTE_UNUSED(conv_info);
				88	ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), biases->info(), output->info(), conv_info));
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	89
				90	_weights = weights;
				91	_input = input;
				92	_output = output;
				93
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	94	std::unique_ptr<INEWinogradLayerBatchedGEMMKernel<float, float>> batched_gemm_kernel;
				95	std::unique_ptr<INEWinogradLayerTransformInputKernel<float>> transform_input_kernel;
				96	std::unique_ptr<INEWinogradLayerTransformWeightsKernel<float>> transform_weights_kernel;
				97	std::unique_ptr<INEWinogradLayerTransformOutputKernel<float>> transform_output_kernel;
				98
				99	switch(weights->info()->dimension(0))
				100	{
				101	case 3:
				102	{
				103	batched_gemm_kernel = support::cpp14::make_unique<NEWinogradLayerBatchedGEMMKernel<float, float, 2, 2, 3, 3>>();
				104	transform_input_kernel = support::cpp14::make_unique<NEWinogradLayerTransformInputKernel<float, 2, 2, 3, 3>>();
				105	transform_weights_kernel = support::cpp14::make_unique<NEWinogradLayerTransformWeightsKernel<float, 2, 2, 3, 3>>();
				106	transform_output_kernel = support::cpp14::make_unique<NEWinogradLayerTransformOutputKernel<float, 2, 2, 3, 3>>();
				107	break;
				108	}
				109	case 5:
				110	{
				111	batched_gemm_kernel = support::cpp14::make_unique<NEWinogradLayerBatchedGEMMKernel<float, float, 2, 2, 5, 5>>();
				112	transform_input_kernel = support::cpp14::make_unique<NEWinogradLayerTransformInputKernel<float, 2, 2, 5, 5>>();
				113	transform_weights_kernel = support::cpp14::make_unique<NEWinogradLayerTransformWeightsKernel<float, 2, 2, 5, 5>>();
				114	transform_output_kernel = support::cpp14::make_unique<NEWinogradLayerTransformOutputKernel<float, 2, 2, 5, 5>>();
				115	break;
				116	}
				117	default:
				118	{
				119	ARM_COMPUTE_ERROR("Not supported.");
				120	break;
				121	}
				122	}
				123
Pablo Tello	679463a	2018-02-06 11:47:59 +0000	[diff] [blame]	124	const PaddingType use_padding_type = (conv_info.pad_left() != 0u) ? PADDING_SAME : PADDING_VALID;
				125	const bool use_same_padding = use_padding_type == PADDING_SAME;
				126
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	127	// Get parameters from conv_info
				128	unsigned int stride_x = 0;
				129	unsigned int stride_y = 0;
				130	std::tie(stride_x, stride_y) = conv_info.stride();
				131	ARM_COMPUTE_ERROR_ON_MSG(stride_y != 1 \|\| stride_x != 1, "Winograd layer only supports unit strides.");
				132
				133	// Get convolved dimensions
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	134	const int in_channels = input->info()->dimension(2);
				135	const int out_channels = output->info()->dimension(2);
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	136
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	137	const Tensor4DShape in_shape(internal_get_input_shape(input));
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	138	const size_t data_type_size = input->info()->element_size();
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	139	// Get the memory required to instantiate a new Winograd operator.
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	140	constexpr size_t storage_alignment = 64;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	141	const size_t kernel_storage_size = transform_weights_kernel->get_weight_storage_size(out_channels, in_channels) * data_type_size;
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	142	_kernel_storage.allocator()->init(TensorInfo(TensorShape{ (kernel_storage_size + storage_alignment - 1) }, 1, DataType::U8));
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	143	_kernel_storage.allocator()->allocate();
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	144	// Input storage
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	145	const size_t input_storage_size = transform_input_kernel->get_input_storage_size(in_shape.n_batches, in_shape.n_channels, in_shape.n_rows, in_shape.n_cols, use_same_padding) * data_type_size;
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	146	_input_workspace.allocator()->init(TensorInfo(TensorShape{ (input_storage_size + storage_alignment - 1) }, 1, DataType::U8));
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	147	_input_workspace.allocator()->allocate();
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	148
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	149	// Output storage
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	150	const size_t output_storage_size = transform_output_kernel->get_output_storage_size(in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, out_channels, use_same_padding) * data_type_size;
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	151	_output_workspace.allocator()->init(TensorInfo(TensorShape{ (output_storage_size + storage_alignment - 1) }, 1, DataType::U8));
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	152	_output_workspace.allocator()->allocate();
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	153
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	154	// configure and allocate dst tensor to be used to convert from winograd domain to spatial domain when calling to reshape_output()
				155	TensorInfo info(TensorShape(_output->info()->dimension(2), _output->info()->dimension(0),
				156	_output->info()->dimension(1), _output->info()->dimension(3)),
				157	1, _output->info()->data_type());
				158	_output_nhwc.allocator()->init(info);
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	159	_output_nhwc.allocator()->allocate();
Pablo Tello	02541fb	2017-12-15 09:48:59 +0000	[diff] [blame]	160
				161	// Re-order a weight tensor from [Output feature map x Input feature map x Height x Width] to [Height x Width x Input feature map x Output feature map]
Georgios Pinitas	02ee429	2018-02-15 17:22:36 +0000	[diff] [blame]	162	_permute_weights.configure(weights, &_weights_hwio, PermutationVector(3U, 2U, 0U, 1U));
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	163	_weights_hwio.allocator()->allocate();
				164
Pablo Tello	02541fb	2017-12-15 09:48:59 +0000	[diff] [blame]	165	// configure the kernel to transform the input tensor from NCHW -> NHWC
				166	_permute_input.configure(input, &_input_nhwc, PermutationVector(2U, 0U, 1U));
Pablo Tello	02541fb	2017-12-15 09:48:59 +0000	[diff] [blame]	167	_input_nhwc.allocator()->allocate();
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	168
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	169	const int weights_width = weights->info()->dimension(0);
				170	const int weights_height = weights->info()->dimension(1);
				171	const KernelShape kernel_shape({ out_channels, weights_height, weights_width, in_channels });
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	172
				173	// Configure the InputTransform
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	174	const int input_matrix_stride = transform_input_kernel->get_matrix_stride(kernel_shape, in_shape, use_padding_type);
				175	transform_input_kernel->configure(reinterpret_cast<float *>(_input_nhwc.buffer()), in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, in_shape.n_channels, use_padding_type,
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	176	reinterpret_cast<float *>(_input_workspace.buffer()), input_matrix_stride);
				177
				178	// Configure WeightsTransform
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	179	const int kernel_matrix_stride = transform_weights_kernel->get_matrix_stride(kernel_shape);
				180	transform_weights_kernel->configure(&_weights_hwio, reinterpret_cast<float *>(_kernel_storage.buffer()), kernel_matrix_stride, out_channels, in_channels);
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	181
				182	// Configure OutputTransform
				183	//The biases tensor has not been allocated at this point in time, the output transform will add the biases to the final result in the run() method
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	184	const int output_matrix_stride = transform_output_kernel->get_matrix_stride(kernel_shape, in_shape, use_padding_type);
				185	const auto output_shape(transform_output_kernel->get_output_shape(kernel_shape, in_shape, use_padding_type));
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	186
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	187	transform_output_kernel->configure(biases, reinterpret_cast<float *>(_output_workspace.buffer()),
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	188	output_matrix_stride, reinterpret_cast<float *>(_output_nhwc.buffer()),
				189	in_shape.n_batches, output_shape.n_rows, output_shape.n_cols, out_channels);
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	190
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	191	// Configure Batched GEMMs
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	192	const int output_tile_rows = batched_gemm_kernel->get_output_tile_rows();
				193	const int output_tile_cols = batched_gemm_kernel->get_output_tile_cols();
				194	const int n_block = batched_gemm_kernel->get_number_blocks();
				195	const int tile_rows = iceildiv(output_shape.n_rows, output_tile_rows);
				196	const int tile_cols = iceildiv(output_shape.n_cols, output_tile_cols);
				197	const int m = in_shape.n_batches * tile_rows * tile_cols;
				198	const int k = in_shape.n_channels;
				199	const int n = out_channels;
				200	const int input_matrix_row_stride = in_shape.n_channels;
				201	const int kernel_matrix_row_stride = roundup(out_channels, n_block);
				202	const int output_matrix_row_stride = kernel_matrix_row_stride;
				203	const unsigned n_gemms = batched_gemm_kernel->get_number_gemms();
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	204
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	205	batched_gemm_kernel->configure(n_gemms, m, k, n,
				206	input_matrix_stride, input_matrix_row_stride,
				207	kernel_matrix_stride, kernel_matrix_row_stride,
				208	output_matrix_stride, output_matrix_row_stride,
				209	reinterpret_cast<float *>(_input_workspace.buffer()),
				210	reinterpret_cast<float *>(_kernel_storage.buffer()),
				211	reinterpret_cast<float *>(_output_workspace.buffer()));
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	212
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	213	// Reorder the convoluted output to ACL's ordering NCHW
				214	_permute_output.configure(&_output_nhwc, _output, PermutationVector(1U, 2U, 0U));
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	215
				216	_transform_input_kernel = std::move(transform_input_kernel);
				217	_transform_weights_kernel = std::move(transform_weights_kernel);
				218	_transform_output_kernel = std::move(transform_output_kernel);
				219	_batched_gemm_kernel = std::move(batched_gemm_kernel);
Isabella Gottardi	3f217ec	2018-02-12 14:59:19 +0000	[diff] [blame^]	220
				221	//Configure Activation Layer
				222	_is_activationlayer_enabled = act_info.enabled();
				223	if(_is_activationlayer_enabled)
				224	{
				225	_activationlayer_function.configure(output, nullptr, act_info);
				226	}
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	227	}
				228
				229	void NEWinogradLayer::run()
				230	{
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	231	_memory_group.acquire();
				232	if(!_reshaped_kernel)
				233	{
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	234	_reshaped_kernel = true;
Pablo Tello	02541fb	2017-12-15 09:48:59 +0000	[diff] [blame]	235	_permute_weights.run();
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	236	NEScheduler::get().schedule(_transform_weights_kernel.get(), Window::DimX);
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	237	}
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	238	//Bring channels to the front as Winograd code expects the tensor to be in the format NHWC
Pablo Tello	02541fb	2017-12-15 09:48:59 +0000	[diff] [blame]	239	_permute_input.run();
Pablo Tello	679463a	2018-02-06 11:47:59 +0000	[diff] [blame]	240
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	241	// Transform input tensor to the winograd domain
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	242	NEScheduler::get().schedule(_transform_input_kernel.get(), Window::DimX);
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	243
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	244	//Run 16 GEMMs in multiple threads, each kernel runs one or more GEMMs
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	245	NEScheduler::get().schedule(_batched_gemm_kernel.get(), Window::DimX);
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	246
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	247	// Transform output tensor to the spatial domain
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	248	NEScheduler::get().schedule(_transform_output_kernel.get(), Window::DimX);
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	249
Pablo Tello	02541fb	2017-12-15 09:48:59 +0000	[diff] [blame]	250	// Reorder the convoluted output to ACL's ordering NCHW
Pablo Tello	02541fb	2017-12-15 09:48:59 +0000	[diff] [blame]	251	_permute_output.run();
Isabella Gottardi	3f217ec	2018-02-12 14:59:19 +0000	[diff] [blame^]	252
				253	if(_is_activationlayer_enabled)
				254	{
				255	_activationlayer_function.run();
				256	}
				257
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	258	_memory_group.release();
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	259	}
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	260
				261	Status NEWinogradLayer::validate(const ITensorInfo input, const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo output, const PadStrideInfo &conv_info)
				262	{
				263	ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, biases, output);
Georgios Pinitas	d9eb275	2018-04-03 13:44:29 +0100	[diff] [blame]	264	ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, biases, output, conv_info));
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	265
				266	return Status{};
				267	}
				268
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	269	} // namespace arm_compute