Blame - tests/validation_old/TensorOperations.h - ml/ComputeLibrary

blob: 0c1ab4134e1072a8c7d1aa39f23dc2d64284d56e [file] [log] [blame]

Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2017 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#ifndef __ARM_COMPUTE_TEST_TENSOR_OPERATIONS_H__
				25	#define __ARM_COMPUTE_TEST_TENSOR_OPERATIONS_H__
				26
Isabella Gottardi	1fab09f	2017-07-13 15:55:57 +0100	[diff] [blame]	27	#include "arm_compute/core/FixedPoint.h"
				28	#include "arm_compute/core/Helpers.h"
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	29	#include "arm_compute/core/Types.h"
Moritz Pflanzer	e49e266	2017-07-21 15:55:28 +0100	[diff] [blame]	30	#include "support/ToolchainSupport.h"
				31	#include "tests/Types.h"
				32	#include "tests/Utils.h"
Moritz Pflanzer	a09de0c	2017-09-01 20:41:12 +0100	[diff] [blame]	33	#include "tests/validation_old/FixedPoint.h"
				34	#include "tests/validation_old/Tensor.h"
				35	#include "tests/validation_old/ValidationUserConfiguration.h"
				36	#include "tests/validation_old/half.h"
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	37
				38	#include <algorithm>
				39	#include <array>
				40	#include <cmath>
Giorgio Arena	50f9fd7	2017-06-19 17:05:30 +0100	[diff] [blame]	41	#include <random>
Georgios Pinitas	ac4e873	2017-07-05 17:02:25 +0100	[diff] [blame]	42	#include <string>
Georgios Pinitas	d4f8c27	2017-06-30 16:16:19 +0100	[diff] [blame]	43	#include <vector>
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	44
				45	namespace arm_compute
				46	{
				47	namespace test
				48	{
				49	namespace validation
				50	{
				51	namespace tensor_operations
				52	{
				53	namespace
				54	{
Pablo Tello	383deec	2017-06-23 10:40:05 +0100	[diff] [blame]	55	template <class T>
				56	struct is_floating_point
				57	: std::integral_constant < bool,
Moritz Pflanzer	e49e266	2017-07-21 15:55:28 +0100	[diff] [blame]	58	std::is_same<float, typename std::remove_cv<T>::type>::value \|\| std::is_same<half_float::half, typename std::remove_cv<T>::type>::value
				59	\|\| std::is_same<double, typename std::remove_cv<T>::type>::value \|\| std::is_same<long double, typename std::remove_cv<T>::type>::value >
Pablo Tello	383deec	2017-06-23 10:40:05 +0100	[diff] [blame]	60	{
				61	};
				62
SiCong Li	bacaf9a	2017-06-19 13:41:45 +0100	[diff] [blame]	63	// Return a tensor element at a specified coordinate with different border modes
Giorgio Arena	fc2817d	2017-06-27 17:26:37 +0100	[diff] [blame]	64	template <typename T>
				65	T tensor_elem_at(const Tensor<T> &in, Coordinates coord, BorderMode border_mode, T constant_border_value)
Giorgio Arena	50f9fd7	2017-06-19 17:05:30 +0100	[diff] [blame]	66	{
				67	const int x = coord.x();
				68	const int y = coord.y();
				69	const int width = static_cast<int>(in.shape().x());
				70	const int height = static_cast<int>(in.shape().y());
				71
SiCong Li	bacaf9a	2017-06-19 13:41:45 +0100	[diff] [blame]	72	// If coordinates beyond range of tensor's width or height
Giorgio Arena	50f9fd7	2017-06-19 17:05:30 +0100	[diff] [blame]	73	if(x < 0 \|\| y < 0 \|\| x >= width \|\| y >= height)
				74	{
SiCong Li	bacaf9a	2017-06-19 13:41:45 +0100	[diff] [blame]	75	if(border_mode == BorderMode::REPLICATE)
Giorgio Arena	50f9fd7	2017-06-19 17:05:30 +0100	[diff] [blame]	76	{
				77	coord.set(0, std::max(0, std::min(x, width - 1)));
				78	coord.set(1, std::max(0, std::min(y, height - 1)));
Giorgio Arena	50f9fd7	2017-06-19 17:05:30 +0100	[diff] [blame]	79	}
				80	else
				81	{
SiCong Li	bacaf9a	2017-06-19 13:41:45 +0100	[diff] [blame]	82	return constant_border_value;
Giorgio Arena	50f9fd7	2017-06-19 17:05:30 +0100	[diff] [blame]	83	}
				84	}
Giorgio Arena	fc2817d	2017-06-27 17:26:37 +0100	[diff] [blame]	85
				86	return in[coord2index(in.shape(), coord)];
Giorgio Arena	50f9fd7	2017-06-19 17:05:30 +0100	[diff] [blame]	87	}
				88
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	89	/** Apply 2D spatial filter on a single element of @p in at coordinates @p coord
				90	*
				91	* - filter sizes have to be odd number
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	92	* - Row major order of filter assumed
				93	* - TO_ZERO rounding policy assumed
				94	* - SATURATE convert policy assumed
				95	*
				96	*/
				97	template <typename T1, typename T2, typename T3>
Giorgio Arena	50f9fd7	2017-06-19 17:05:30 +0100	[diff] [blame]	98	void apply_2d_spatial_filter(Coordinates coord, const Tensor<T1> &in, Tensor<T3> &out, const TensorShape &filter_shape, const T2 *filter_itr, float scale, BorderMode border_mode,
				99	T1 constant_border_value = 0)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	100	{
Giorgio Arena	50f9fd7	2017-06-19 17:05:30 +0100	[diff] [blame]	101	double val = 0;
				102	const int x = coord.x();
				103	const int y = coord.y();
				104	for(int j = y - static_cast<int>(filter_shape[1] / 2); j <= y + static_cast<int>(filter_shape[1] / 2); ++j)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	105	{
Giorgio Arena	50f9fd7	2017-06-19 17:05:30 +0100	[diff] [blame]	106	for(int i = x - static_cast<int>(filter_shape[0] / 2); i <= x + static_cast<int>(filter_shape[0] / 2); ++i)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	107	{
				108	coord.set(0, i);
				109	coord.set(1, j);
SiCong Li	bacaf9a	2017-06-19 13:41:45 +0100	[diff] [blame]	110	val += static_cast<double>(filter_itr) tensor_elem_at(in, coord, border_mode, constant_border_value);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	111	++filter_itr;
				112	}
				113	}
				114	coord.set(0, x);
				115	coord.set(1, y);
Moritz Pflanzer	d0ae8b8	2017-06-29 14:51:57 +0100	[diff] [blame]	116	const double rounded_val = support::cpp11::trunc(val * static_cast<double>(scale));
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	117	out[coord2index(in.shape(), coord)] = saturate_cast<T3>(rounded_val);
				118	}
				119	} // namespace
				120
Isabella Gottardi	1fab09f	2017-07-13 15:55:57 +0100	[diff] [blame]	121	template <typename T>
				122	T bilinear_policy(const Tensor<T> &in, Coordinates id, float xn, float yn, BorderMode border_mode, uint8_t constant_border_value)
				123	{
				124	int idx = std::floor(xn);
				125	int idy = std::floor(yn);
				126
				127	const float dx = xn - idx;
				128	const float dy = yn - idy;
				129	const float dx_1 = 1.0f - dx;
				130	const float dy_1 = 1.0f - dy;
				131
				132	id.set(0, idx);
				133	id.set(1, idy);
				134	const T tl = tensor_elem_at(in, id, border_mode, constant_border_value);
				135	id.set(0, idx + 1);
				136	id.set(1, idy);
				137	const T tr = tensor_elem_at(in, id, border_mode, constant_border_value);
				138	id.set(0, idx);
				139	id.set(1, idy + 1);
				140	const T bl = tensor_elem_at(in, id, border_mode, constant_border_value);
				141	id.set(0, idx + 1);
				142	id.set(1, idy + 1);
				143	const T br = tensor_elem_at(in, id, border_mode, constant_border_value);
				144
				145	return tl * (dx_1 * dy_1) + tr * (dx * dy_1) + bl * (dx_1 * dy) + br * (dx * dy);
				146	}
				147
				148	bool valid_bilinear_policy(float xn, float yn, int width, int height, BorderMode border_mode)
				149	{
				150	if(border_mode != BorderMode::UNDEFINED)
				151	{
				152	return true;
				153	}
				154	if((0 <= yn + 1) && (yn + 1 < height) && (0 <= xn + 1) && (xn + 1 < width))
				155	{
				156	return true;
				157	}
				158	return false;
				159	}
				160
Giorgio Arena	50f9fd7	2017-06-19 17:05:30 +0100	[diff] [blame]	161	// Sobel 3x3
				162	template <typename T1, typename T2>
				163	void sobel_3x3(Tensor<T1> &in, Tensor<T2> &out_x, Tensor<T2> &out_y, BorderMode border_mode, uint8_t constant_border_value)
				164	{
				165	const std::array<int8_t, 9> sobel_x{ { -1, 0, 1, -2, 0, 2, -1, 0, 1 } };
				166	const std::array<int8_t, 9> sobel_y{ { -1, -2, -1, 0, 0, 0, 1, 2, 1 } };
				167
				168	for(int element_idx = 0; element_idx < in.num_elements(); ++element_idx)
				169	{
				170	const Coordinates id = index2coord(in.shape(), element_idx);
				171
				172	apply_2d_spatial_filter(id, in, out_x, TensorShape(3U, 3U), sobel_x.data(), 1.f, border_mode, constant_border_value);
				173	apply_2d_spatial_filter(id, in, out_y, TensorShape(3U, 3U), sobel_y.data(), 1.f, border_mode, constant_border_value);
				174	}
				175	}
				176
				177	// Sobel 5x5
				178	template <typename T1, typename T2>
				179	void sobel_5x5(Tensor<T1> &in, Tensor<T2> &out_x, Tensor<T2> &out_y, BorderMode border_mode, uint8_t constant_border_value)
				180	{
				181	const std::array<int8_t, 25> sobel_x{ {
				182	-1, -2, 0, 2, 1,
				183	-4, -8, 0, 8, 4,
				184	-6, -12, 0, 12, 6,
				185	-4, -8, 0, 8, 4,
				186	-1, -2, 0, 2, 1
				187	} };
				188
				189	const std::array<int8_t, 25> sobel_y{ {
				190	-1, -4, -6, -4, -1,
				191	-2, -8, -12, -8, -2,
				192	0, 0, 0, 0, 0,
				193	2, 8, 12, 8, 2,
				194	1, 4, 6, 4, 1
				195	} };
				196
				197	for(int element_idx = 0; element_idx < in.num_elements(); ++element_idx)
				198	{
				199	const Coordinates id = index2coord(in.shape(), element_idx);
				200
				201	apply_2d_spatial_filter(id, in, out_x, TensorShape(5U, 5U), sobel_x.data(), 1.f, border_mode, constant_border_value);
				202	apply_2d_spatial_filter(id, in, out_y, TensorShape(5U, 5U), sobel_y.data(), 1.f, border_mode, constant_border_value);
				203	}
				204	}
				205
Giorgio Arena	fc2817d	2017-06-27 17:26:37 +0100	[diff] [blame]	206	// Sobel 7x7
				207	template <typename T1, typename T2>
				208	void sobel_7x7(Tensor<T1> &in, Tensor<T2> &out_x, Tensor<T2> &out_y, BorderMode border_mode, uint8_t constant_border_value)
				209	{
				210	const std::array<int8_t, 49> sobel_x{ {
				211	-1, -4, -5, 0, 5, 4, 1,
				212	-6, -24, -30, 0, 30, 24, 6,
				213	-15, -60, -75, 0, 75, 60, 15,
				214	-20, -80, -100, 0, 100, 80, 20,
				215	-15, -60, -75, 0, 75, 60, 15,
				216	-6, -24, -30, 0, 30, 24, 6,
				217	-1, -4, -5, 0, 5, 4, 1
				218	} };
				219
				220	const std::array<int8_t, 49> sobel_y{ {
				221	-1, -6, -15, -20, -15, -6, -1,
				222	-4, -24, -60, -80, -60, -24, -4,
				223	-5, -30, -75, -100, -75, -30, -5,
				224	0, 0, 0, 0, 0, 0, 0,
				225	5, 30, 75, 100, 75, 30, 5,
				226	4, 24, 60, 80, 60, 24, 4,
				227	1, 6, 15, 20, 15, 6, 1
				228	} };
				229
				230	for(int element_idx = 0; element_idx < in.num_elements(); ++element_idx)
				231	{
				232	const Coordinates id = index2coord(in.shape(), element_idx);
				233
				234	apply_2d_spatial_filter(id, in, out_x, TensorShape(7U, 7U), sobel_x.data(), 1.f, border_mode, constant_border_value);
				235	apply_2d_spatial_filter(id, in, out_y, TensorShape(7U, 7U), sobel_y.data(), 1.f, border_mode, constant_border_value);
				236	}
				237	}
				238
				239	template <typename T>
				240	void non_maxima_suppression_3x3(Tensor<T> &in, Tensor<T> &out, BorderMode border_mode)
				241	{
				242	for(int i = 0; i < in.num_elements(); ++i)
				243	{
				244	Coordinates coord = index2coord(in.shape(), i);
				245	int x = coord.x();
				246	int y = coord.y();
				247
				248	if(in[i] >= tensor_elem_at(in, Coordinates(x - 1, y - 1), border_mode, 0.f) && in[i] >= tensor_elem_at(in, Coordinates(x, y - 1), border_mode, 0.f)
				249	&& in[i] >= tensor_elem_at(in, Coordinates(x + 1, y - 1), border_mode, 0.f) && in[i] >= tensor_elem_at(in, Coordinates(x - 1, y), border_mode, 0.f)
				250	&& in[i] > tensor_elem_at(in, Coordinates(x + 1, y), border_mode, 0.f) && in[i] > tensor_elem_at(in, Coordinates(x - 1, y + 1), border_mode, 0.f)
				251	&& in[i] > tensor_elem_at(in, Coordinates(x, y + 1), border_mode, 0.f) && in[i] > tensor_elem_at(in, Coordinates(x + 1, y + 1), border_mode, 0.f))
				252	{
				253	out[i] = in[i];
				254	}
				255	else
				256	{
				257	out[i] = 0;
				258	}
				259	}
				260	}
				261
				262	// Harris corners
				263	template <typename T1, typename T2, typename T3>
				264	void harris_corners(Tensor<T1> &in, Tensor<T2> &Gx, Tensor<T2> &Gy, Tensor<T3> &candidates, Tensor<T3> &non_maxima, float threshold, float min_dist, float sensitivity,
				265	int32_t gradient_size, int32_t block_size, KeyPointArray &corners, BorderMode border_mode, uint8_t constant_border_value)
				266	{
				267	ARM_COMPUTE_ERROR_ON(block_size != 3 && block_size != 5 && block_size != 7);
				268
				269	ValidRegion valid_region = shape_to_valid_region(candidates.shape());
				270	float norm_factor = 0.f;
				271
				272	// Sobel
				273	switch(gradient_size)
				274	{
				275	case 3:
				276	sobel_3x3(in, Gx, Gy, border_mode, constant_border_value);
				277	norm_factor = 1.f / (4 * 255 * block_size);
				278	break;
				279	case 5:
				280	sobel_5x5(in, Gx, Gy, border_mode, constant_border_value);
				281	norm_factor = 1.f / (16 * 255 * block_size);
				282	break;
				283	case 7:
				284	sobel_7x7(in, Gx, Gy, border_mode, constant_border_value);
				285	norm_factor = 1.f / (64 * 255 * block_size);
				286	break;
				287	default:
				288	ARM_COMPUTE_ERROR("Gradient size not supported.");
				289	}
				290
				291	//Calculate scores
				292	for(int i = 0; i < in.num_elements(); ++i)
				293	{
				294	Coordinates in_coord = index2coord(in.shape(), i);
				295
				296	float Gx2 = 0;
				297	float Gy2 = 0;
				298	float Gxy = 0;
				299
				300	// Calculate Gx^2, Gy^2 and Gxy within the given window
				301	for(int y = in_coord.y() - block_size / 2; y <= in_coord.y() + block_size / 2; ++y)
				302	{
				303	for(int x = in_coord.x() - block_size / 2; x <= in_coord.x() + block_size / 2; ++x)
				304	{
				305	Coordinates block_coord(x, y);
				306
				307	float norm_gx = tensor_elem_at(Gx, block_coord, border_mode, static_cast<T2>(constant_border_value)) * norm_factor;
				308	float norm_gy = tensor_elem_at(Gy, block_coord, border_mode, static_cast<T2>(constant_border_value)) * norm_factor;
				309
				310	Gx2 += std::pow(norm_gx, 2);
				311	Gy2 += std::pow(norm_gy, 2);
				312	Gxy += norm_gx * norm_gy;
				313	}
				314	}
				315
				316	float trace2 = std::pow(Gx2 + Gy2, 2);
				317	float det = Gx2 * Gy2 - std::pow(Gxy, 2);
				318	float response = det - sensitivity * trace2;
				319
				320	if(response > threshold)
				321	{
				322	candidates[i] = response;
				323	}
				324	else
				325	{
				326	candidates[i] = 0.f;
				327	}
				328	}
				329
				330	// Update valid region and remove candidates on borders for border_mode == UNDEFINED
				331	if(border_mode == BorderMode::UNDEFINED)
				332	{
				333	valid_region = shape_to_valid_region(candidates.shape(), true, BorderSize((gradient_size / 2) + (block_size / 2)));
				334
				335	for(int i = 0; i < candidates.num_elements(); ++i)
				336	{
				337	if(!is_in_valid_region(valid_region, index2coord(candidates.shape(), i)))
				338	{
				339	candidates[i] = 0.f;
				340	}
				341	}
				342	}
				343
				344	// Suppress non-maxima candidates
				345	non_maxima_suppression_3x3(candidates, non_maxima, border_mode != BorderMode::UNDEFINED ? BorderMode::CONSTANT : BorderMode::UNDEFINED);
				346	if(border_mode == BorderMode::UNDEFINED)
				347	{
				348	valid_region = shape_to_valid_region(non_maxima.shape(), true, BorderSize((gradient_size / 2) + (block_size / 2) + 1));
				349	}
				350
				351	// Create vector of candidate corners
				352	KeyPointArray candidates_vector(corners.max_num_values());
				353	for(int i = 0; i < non_maxima.num_elements(); ++i)
				354	{
				355	Coordinates coord = index2coord(non_maxima.shape(), i);
				356
				357	if(non_maxima[i] != 0.f && is_in_valid_region(valid_region, coord))
				358	{
				359	KeyPoint corner;
				360	corner.x = coord.x();
				361	corner.y = coord.y();
				362	corner.tracking_status = 1;
				363	corner.strength = non_maxima[i];
				364
				365	corner.scale = 0.f;
				366	corner.orientation = 0.f;
				367	corner.error = 0.f;
				368
				369	candidates_vector.push_back(corner);
				370	}
				371	}
				372
				373	// If there are any candidates, sort them by strength and add them to the output corners vector if there are no stronger corners within the given euclidean radius
				374	if(candidates_vector.num_values() > 0)
				375	{
				376	std::sort(candidates_vector.buffer(), candidates_vector.buffer() + candidates_vector.num_values(), [](KeyPoint a, KeyPoint b)
				377	{
				378	return a.strength > b.strength;
				379	});
				380	corners.push_back(candidates_vector.at(0));
				381
				382	for(size_t j = 0; j < candidates_vector.num_values(); ++j)
				383	{
				384	bool found = false;
				385	int32_t x = candidates_vector.at(j).x;
				386	int32_t y = candidates_vector.at(j).y;
				387
				388	for(size_t i = 0; i < corners.num_values(); ++i)
				389	{
				390	int32_t corners_x = corners.at(i).x;
				391	int32_t corners_y = corners.at(i).y;
				392
				393	// Euclidean distance
				394	if(std::sqrt((std::pow(x - corners_x, 2) + std::pow(y - corners_y, 2))) < min_dist)
				395	{
				396	found = true;
				397	}
				398	}
				399
				400	// If no stronger corners within the given euclidean radius
				401	if(!found)
				402	{
				403	corners.push_back(candidates_vector.at(j));
				404	}
				405	}
				406	}
				407	}
				408
Michele Di Giorgio	ef4b4ae	2017-07-04 17:19:43 +0100	[diff] [blame]	409	template <typename T>
				410	void compute_min_max(const Tensor<T> &in, void min, void max)
Giorgio Arena	2ca209e	2017-06-13 15:49:37 +0100	[diff] [blame]	411	{
Michele Di Giorgio	ef4b4ae	2017-07-04 17:19:43 +0100	[diff] [blame]	412	using type = typename std::conditional<std::is_same<T, float>::value, float, int32_t>::type;
Giorgio Arena	2ca209e	2017-06-13 15:49:37 +0100	[diff] [blame]	413
Michele Di Giorgio	ef4b4ae	2017-07-04 17:19:43 +0100	[diff] [blame]	414	// Set min and max to first pixel
				415	type tmp_min = static_cast<type>(in[0]);
				416	type tmp_max = static_cast<type>(in[0]);
Giorgio Arena	2ca209e	2017-06-13 15:49:37 +0100	[diff] [blame]	417
				418	// Look for min and max values
				419	for(int i = 1; i < in.num_elements(); ++i)
				420	{
Michele Di Giorgio	ef4b4ae	2017-07-04 17:19:43 +0100	[diff] [blame]	421	if(static_cast<type>(in[i]) < tmp_min)
Giorgio Arena	2ca209e	2017-06-13 15:49:37 +0100	[diff] [blame]	422	{
Michele Di Giorgio	ef4b4ae	2017-07-04 17:19:43 +0100	[diff] [blame]	423	tmp_min = static_cast<type>(in[i]);
Giorgio Arena	2ca209e	2017-06-13 15:49:37 +0100	[diff] [blame]	424	}
Michele Di Giorgio	ef4b4ae	2017-07-04 17:19:43 +0100	[diff] [blame]	425	if(static_cast<type>(in[i]) > tmp_max)
Giorgio Arena	2ca209e	2017-06-13 15:49:37 +0100	[diff] [blame]	426	{
Michele Di Giorgio	ef4b4ae	2017-07-04 17:19:43 +0100	[diff] [blame]	427	tmp_max = static_cast<type>(in[i]);
Giorgio Arena	2ca209e	2017-06-13 15:49:37 +0100	[diff] [blame]	428	}
				429	}
				430
Michele Di Giorgio	ef4b4ae	2017-07-04 17:19:43 +0100	[diff] [blame]	431	static_cast<type >(min) = tmp_min;
				432	static_cast<type >(max) = tmp_max;
				433	}
				434
				435	// Min max location
				436	template <typename T1>
				437	void min_max_location(const Tensor<T1> &in, void min, void max, IArray<Coordinates2D> &min_loc, IArray<Coordinates2D> &max_loc, uint32_t &min_count, uint32_t &max_count)
				438	{
				439	const size_t width = in.shape().x();
				440
				441	compute_min_max(in, min, max);
				442
				443	using type = typename std::conditional<std::is_same<T1, float>::value, float, int32_t>::type;
				444
				445	type min_value = static_cast<type >(min);
				446	type max_value = static_cast<type >(max);
				447
				448	min_count = 0;
				449	max_count = 0;
Giorgio Arena	2ca209e	2017-06-13 15:49:37 +0100	[diff] [blame]	450	for(int i = 0; i < in.num_elements(); ++i)
				451	{
Michele Di Giorgio	ef4b4ae	2017-07-04 17:19:43 +0100	[diff] [blame]	452	if(static_cast<type>(in[i]) == min_value)
Giorgio Arena	2ca209e	2017-06-13 15:49:37 +0100	[diff] [blame]	453	{
				454	Coordinates2D min_coord;
				455	min_coord.x = static_cast<int32_t>(i % width);
				456	min_coord.y = static_cast<int32_t>(i / width);
				457
				458	min_loc.push_back(min_coord);
				459
				460	min_count++;
				461	}
Michele Di Giorgio	ef4b4ae	2017-07-04 17:19:43 +0100	[diff] [blame]	462	if(static_cast<type>(in[i]) == max_value)
Giorgio Arena	2ca209e	2017-06-13 15:49:37 +0100	[diff] [blame]	463	{
				464	Coordinates2D max_coord;
				465	max_coord.x = static_cast<int32_t>(i % width);
				466	max_coord.y = static_cast<int32_t>(i / width);
				467
				468	max_loc.push_back(max_coord);
				469
				470	max_count++;
				471	}
				472	}
				473	}
				474
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	475	// Integral Image
				476	void integral_image(const Tensor<uint8_t> &in, Tensor<uint32_t> &out)
				477	{
				478	// Length of dimensions
				479	const size_t width = in.shape().x();
				480	const size_t height = in.shape().y();
				481	const size_t depth = in.shape().z() * in.shape()[3] * in.shape()[4] * in.shape()[5];
				482
				483	const size_t image_size = width * height;
				484
				485	for(size_t z = 0; z < depth; ++z)
				486	{
				487	size_t current_image = z * image_size;
				488
				489	//First element of each image
				490	out[current_image] = in[current_image];
				491
				492	// First row of each image (add only pixel on the left)
				493	for(size_t x = 1; x < width; ++x)
				494	{
				495	out[current_image + x] = static_cast<uint32_t>(in[current_image + x]) + out[current_image + x - 1];
				496	}
				497
				498	// Subsequent rows
				499	for(size_t y = 1; y < height; ++y)
				500	{
				501	size_t current_row = current_image + (width * y);
				502
				503	// First element of each row (add only pixel up)
				504	out[current_row] = static_cast<uint32_t>(in[current_row]) + out[current_row - width];
				505
				506	// Following row elements
				507	for(size_t x = 1; x < width; ++x)
				508	{
				509	size_t current_pixel = current_row + x;
				510
				511	// out = in + up(out) + left(out) - up_left(out)
				512	out[current_pixel] = static_cast<uint32_t>(in[current_pixel]) + out[current_pixel - 1]
				513	+ out[current_pixel - width] - out[current_pixel - width - 1];
				514	}
				515	}
				516	}
				517	}
				518
				519	// Absolute difference
				520	template <typename T1, typename T2, typename T3>
				521	void absolute_difference(const Tensor<T1> &in1, const Tensor<T2> &in2, Tensor<T3> &out)
				522	{
				523	using intermediate_type = typename common_promoted_signed_type<T1, T2, T3>::intermediate_type;
				524
				525	for(int i = 0; i < in1.num_elements(); ++i)
				526	{
Moritz Pflanzer	e49e266	2017-07-21 15:55:28 +0100	[diff] [blame]	527	intermediate_type val(std::abs(static_cast<intermediate_type>(in1[i]) - static_cast<intermediate_type>(in2[i])));
				528	out[i] = saturate_cast<T3>(val);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	529	}
				530	}
				531
				532	// Accumulate
				533	template <typename T1, typename T2>
				534	void accumulate(const Tensor<T1> &in, Tensor<T2> &out)
				535	{
				536	using intermediate_type = typename common_promoted_signed_type<T1, T2>::intermediate_type;
				537
				538	for(int i = 0; i < in.num_elements(); ++i)
				539	{
				540	intermediate_type val = static_cast<intermediate_type>(out[i]) + static_cast<intermediate_type>(in[i]);
				541	out[i] = saturate_cast<T2>(val);
				542	}
				543	}
				544
				545	// Accumulate squared
				546	template <typename T1, typename T2>
				547	void accumulate_squared(const Tensor<T1> &in, Tensor<T2> &out, uint32_t shift)
				548	{
				549	if(shift > 15)
				550	{
				551	ARM_COMPUTE_ERROR("Shift in accumulate_squared must be within the range [0, 15]");
				552	}
				553	using intermediate_type = typename common_promoted_signed_type<T1, T2>::intermediate_type;
				554	intermediate_type denom = 1 << shift;
				555
				556	for(int i = 0; i < in.num_elements(); ++i)
				557	{
				558	intermediate_type val = static_cast<intermediate_type>(out[i]) + (static_cast<intermediate_type>(in[i]) * static_cast<intermediate_type>(in[i]) / denom);
				559	out[i] = saturate_cast<T2>(val);
				560	}
				561	}
				562
Isabella Gottardi	6203153	2017-07-04 11:21:28 +0100	[diff] [blame]	563	// Accumulate weighted total_size = init_auto_padding(tensor_shape, num_channels, type);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	564	template <typename T>
				565	void accumulate_weighted(const Tensor<T> &in, Tensor<T> &out, float alpha)
				566	{
				567	if(alpha < 0.f \|\| alpha > 1.f)
				568	{
				569	ARM_COMPUTE_ERROR("Weight (alpha) specified in accumulate_weighted must be within the range [0, 1]");
				570	}
				571	using intermediate_type = typename common_promoted_signed_type<T>::intermediate_type;
				572
				573	for(int i = 0; i < in.num_elements(); ++i)
				574	{
				575	double val = (1. - static_cast<double>(alpha)) * static_cast<intermediate_type>(out[i]) + static_cast<double>(alpha) * static_cast<intermediate_type>(in[i]);
				576	out[i] = static_cast<T>(val);
				577	}
				578	}
				579
SiCong Li	5a53664	2017-06-19 14:47:05 +0100	[diff] [blame]	580	// Gaussian3x3 filter
				581	template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
				582	void gaussian3x3(const Tensor<T> &in, Tensor<T> &out, BorderMode border_mode, T constant_border_value)
				583	{
				584	const std::array<T, 9> filter{ { 1, 2, 1, 2, 4, 2, 1, 2, 1 } };
				585	const float scale = 1.f / 16.f;
				586	for(int element_idx = 0; element_idx < in.num_elements(); ++element_idx)
				587	{
				588	const Coordinates id = index2coord(in.shape(), element_idx);
				589	apply_2d_spatial_filter(id, in, out, TensorShape(3U, 3U), filter.data(), scale, border_mode, constant_border_value);
				590	}
				591	}
				592
SiCong Li	3eb263e	2017-06-19 15:31:43 +0100	[diff] [blame]	593	// Gaussian5x5 filter
				594	template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
				595	void gaussian5x5(const Tensor<T> &in, Tensor<T> &out, BorderMode border_mode, T constant_border_value)
				596	{
				597	const std::array<T, 25> filter{ {
				598	1, 4, 6, 4, 1,
				599	4, 16, 24, 16, 4,
				600	6, 24, 36, 24, 6,
				601	4, 16, 24, 16, 4,
				602	1, 4, 6, 4, 1
				603	} };
				604	const float scale = 1.f / 256.f;
				605	for(int element_idx = 0; element_idx < in.num_elements(); ++element_idx)
				606	{
				607	const Coordinates id = index2coord(in.shape(), element_idx);
				608	apply_2d_spatial_filter(id, in, out, TensorShape(5U, 5U), filter.data(), scale, border_mode, constant_border_value);
				609	}
				610	}
				611
Isabella Gottardi	3b77e9d	2017-06-22 11:05:41 +0100	[diff] [blame]	612	// Non linear filter
				613	template <typename T>
				614	void non_linear_filter(const Tensor<T> &in, Tensor<T> &out, NonLinearFilterFunction function, unsigned int mask_size,
				615	MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode, uint8_t constant_border_value)
				616	{
SiCong Li	7a03575	2017-06-28 15:27:02 +0100	[diff] [blame]	617	ARM_COMPUTE_ERROR_ON(pattern == MatrixPattern::OTHER && mask == nullptr);
Isabella Gottardi	3b77e9d	2017-06-22 11:05:41 +0100	[diff] [blame]	618
				619	using intermediate_type = typename common_promoted_signed_type<T>::intermediate_type;
				620
				621	const int sq_mask_size = mask_size * mask_size;
				622	const int half_mask_size = mask_size / 2;
				623	std::vector<intermediate_type> vals(sq_mask_size);
				624	intermediate_type current_value = 0;
				625
SiCong Li	7a03575	2017-06-28 15:27:02 +0100	[diff] [blame]	626	const ValidRegion valid_region = shape_to_valid_region(in.shape(), border_mode == BorderMode::UNDEFINED, BorderSize(half_mask_size));
Isabella Gottardi	3b77e9d	2017-06-22 11:05:41 +0100	[diff] [blame]	627
				628	for(int element_idx = 0, count = 0, index = 0; element_idx < in.num_elements(); ++element_idx, count = 0, index = 0)
				629	{
				630	Coordinates id = index2coord(in.shape(), element_idx);
				631	if(is_in_valid_region(valid_region, id))
				632	{
				633	int idx = id.x();
				634	int idy = id.y();
				635	for(int y = idy - half_mask_size; y <= idy + half_mask_size; ++y)
				636	{
				637	for(int x = idx - half_mask_size; x <= idx + half_mask_size; ++x, ++index)
				638	{
				639	id.set(0, x);
				640	id.set(1, y);
				641	current_value = tensor_elem_at(in, id, border_mode, constant_border_value);
				642
				643	if(mask[index] == 255)
				644	{
				645	vals[count] = static_cast<intermediate_type>(current_value);
				646	++count;
				647	}
				648	}
				649	}
				650	std::sort(vals.begin(), vals.begin() + count);
				651	switch(function)
				652	{
				653	case NonLinearFilterFunction::MIN:
				654	out[element_idx] = saturate_cast<T>(vals[0]);
				655	break;
				656	case NonLinearFilterFunction::MAX:
				657	out[element_idx] = saturate_cast<T>(vals[count - 1]);
				658	break;
				659	case NonLinearFilterFunction::MEDIAN:
				660	out[element_idx] = saturate_cast<T>(vals[count / 2]);
				661	break;
				662	default:
				663	ARM_COMPUTE_ERROR("Unsupported NonLinearFilter function.");
				664	}
				665	}
				666	}
				667	}
				668
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	669	// Pixel-wise multiplication
				670	template <typename T1, typename T2, typename T3>
				671	void pixel_wise_multiplication(const Tensor<T1> &in1, const Tensor<T2> &in2, Tensor<T3> &out, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
				672	{
				673	if(scale < 0)
				674	{
				675	ARM_COMPUTE_ERROR("Scale of pixel-wise multiplication must be non-negative");
				676	}
				677	using intermediate_type = typename common_promoted_signed_type<T1, T2, T3>::intermediate_type;
				678	for(int i = 0; i < in1.num_elements(); ++i)
				679	{
				680	double val = static_cast<intermediate_type>(in1[i]) * static_cast<intermediate_type>(in2[i]) * static_cast<double>(scale);
Pablo Tello	383deec	2017-06-23 10:40:05 +0100	[diff] [blame]	681	if(is_floating_point<T3>::value)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	682	{
				683	out[i] = val;
				684	}
				685	else
				686	{
				687	double rounded_val = 0;
				688	switch(rounding_policy)
				689	{
				690	case(RoundingPolicy::TO_ZERO):
Moritz Pflanzer	d0ae8b8	2017-06-29 14:51:57 +0100	[diff] [blame]	691	rounded_val = support::cpp11::trunc(val);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	692	break;
				693	case(RoundingPolicy::TO_NEAREST_UP):
Moritz Pflanzer	d0ae8b8	2017-06-29 14:51:57 +0100	[diff] [blame]	694	rounded_val = round_half_up(val);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	695	break;
				696	case(RoundingPolicy::TO_NEAREST_EVEN):
Moritz Pflanzer	d0ae8b8	2017-06-29 14:51:57 +0100	[diff] [blame]	697	rounded_val = round_half_even(val);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	698	break;
				699	default:
				700	ARM_COMPUTE_ERROR("Unsupported rounding policy");
				701	}
				702	out[i] = (convert_policy == ConvertPolicy::SATURATE) ? saturate_cast<T3>(rounded_val) : static_cast<T3>(rounded_val);
				703	}
				704	}
				705	}
				706
				707	// Fixed-point Pixel-wise Multiplication
				708	template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
Michele Di Giorgio	1b80b6c	2017-07-17 15:06:34 +0100	[diff] [blame]	709	void fixed_point_pixel_wise_multiplication(const Tensor<T> &in1, const Tensor<T> &in2, Tensor<T> &out, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	710	{
				711	using namespace fixed_point_arithmetic;
				712
				713	const int fixed_point_position = in1.fixed_point_position();
				714
				715	ARM_COMPUTE_ERROR_ON_MSG(in1.data_type() != in2.data_type() \|\| in1.data_type() != out.data_type(),
				716	"Tensors must all have the same DataType");
				717	ARM_COMPUTE_ERROR_ON_MSG(fixed_point_position != in2.fixed_point_position() \|\| fixed_point_position != out.fixed_point_position(),
				718	"Fixed-point position must be the same for both inputs and outputs");
				719
				720	// Validate fixed_point_position
				721	ARM_COMPUTE_ERROR_ON((in1.data_type() == DataType::QS8) && (fixed_point_position == 0 \|\| fixed_point_position > 7));
				722	ARM_COMPUTE_ERROR_ON((in1.data_type() == DataType::QS16) && (fixed_point_position == 0 \|\| fixed_point_position > 15));
				723
Michele Di Giorgio	1b80b6c	2017-07-17 15:06:34 +0100	[diff] [blame]	724	const fixed_point<T> fp_scale(scale, fixed_point_position);
				725	const bool is_sat = convert_policy == ConvertPolicy::SATURATE;
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	726
				727	for(int i = 0; i < in1.num_elements(); ++i)
				728	{
Michele Di Giorgio	1b80b6c	2017-07-17 15:06:34 +0100	[diff] [blame]	729	const fixed_point<T> val1(in1[i], fixed_point_position, true);
				730	fixed_point<T> res(in2[i], fixed_point_position, true);
				731	if(is_sat)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	732	{
Michele Di Giorgio	1b80b6c	2017-07-17 15:06:34 +0100	[diff] [blame]	733	res = mul(mul(res, val1), fp_scale);
				734	}
				735	else
				736	{
				737	res = mul<OverflowPolicy::WRAP>(mul<OverflowPolicy::WRAP>(res, val1), fp_scale);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	738	}
				739	out[i] = res.raw();
				740	}
				741	}
				742
				743	// Threshold
				744	template <typename T>
				745	void threshold(const Tensor<T> &in, Tensor<T> &out, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper)
				746	{
				747	switch(type)
				748	{
				749	case ThresholdType::BINARY:
				750	for(int i = 0; i < in.num_elements(); ++i)
				751	{
				752	out[i] = ((in[i] > threshold) ? true_value : false_value);
				753	}
				754	break;
				755	case ThresholdType::RANGE:
				756	for(int i = 0; i < in.num_elements(); ++i)
				757	{
				758	if(in[i] > upper)
				759	{
				760	out[i] = false_value;
				761	}
				762	else if(in[i] < threshold)
				763	{
				764	out[i] = false_value;
				765	}
				766	else
				767	{
				768	out[i] = true_value;
				769	}
				770	}
				771	break;
				772	default:
				773	ARM_COMPUTE_ERROR("Thresholding type not recognised");
				774	break;
				775	}
				776	}
				777
Isabella Gottardi	6203153	2017-07-04 11:21:28 +0100	[diff] [blame]	778	// Warp Perspective
				779	template <typename T>
				780	void warp_perspective(const Tensor<T> &in, Tensor<T> &out, Tensor<T> &valid_mask, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value)
				781	{
				782	// x0 = M00 * x + M01 * y + M02
				783	// y0 = M10 * x + M11 * y + M12
				784	// z0 = M20 * x + M21 * y + M22
				785	// xn = x0 / z0
				786	// yn = y0 / z0
				787	const float M00 = matrix[0];
				788	const float M10 = matrix[1];
				789	const float M20 = matrix[2];
				790	const float M01 = matrix[0 + 1 * 3];
				791	const float M11 = matrix[1 + 1 * 3];
				792	const float M21 = matrix[2 + 1 * 3];
				793	const float M02 = matrix[0 + 2 * 3];
				794	const float M12 = matrix[1 + 2 * 3];
				795	const float M22 = matrix[2 + 2 * 3];
				796
				797	const int width = in.shape().x();
				798	const int height = in.shape().y();
				799
				800	for(int element_idx = 0; element_idx < in.num_elements(); ++element_idx)
				801	{
				802	valid_mask[element_idx] = 1;
				803	Coordinates id = index2coord(in.shape(), element_idx);
				804	int idx = id.x();
				805	int idy = id.y();
				806	const float z0 = M20 * idx + M21 * idy + M22;
				807
				808	float x0 = (M00 * idx + M01 * idy + M02);
				809	float y0 = (M10 * idx + M11 * idy + M12);
				810
				811	float xn = x0 / z0;
				812	float yn = y0 / z0;
				813	id.set(0, static_cast<int>(std::floor(xn)));
				814	id.set(1, static_cast<int>(std::floor(yn)));
				815	if((0 <= yn) && (yn < height) && (0 <= xn) && (xn < width))
				816	{
				817	switch(policy)
				818	{
				819	case InterpolationPolicy::NEAREST_NEIGHBOR:
				820	out[element_idx] = tensor_elem_at(in, id, border_mode, constant_border_value);
				821	break;
				822	case InterpolationPolicy::BILINEAR:
				823	(valid_bilinear_policy(xn, yn, width, height, border_mode)) ? out[element_idx] = bilinear_policy(in, id, xn, yn, border_mode, constant_border_value) : valid_mask[element_idx] = 0;
				824	break;
				825	case InterpolationPolicy::AREA:
				826	default:
				827	ARM_COMPUTE_ERROR("Interpolation not supported");
				828	}
				829	}
				830	else
				831	{
				832	if(border_mode == BorderMode::UNDEFINED)
				833	{
				834	valid_mask[element_idx] = 0;
				835	}
				836	else
				837	{
				838	switch(policy)
				839	{
				840	case InterpolationPolicy::NEAREST_NEIGHBOR:
				841	if(border_mode == BorderMode::CONSTANT)
				842	{
				843	out[element_idx] = constant_border_value;
				844	}
				845	else if(border_mode == BorderMode::REPLICATE)
				846	{
				847	id.set(0, std::max(0, std::min(static_cast<int>(xn), width - 1)));
				848	id.set(1, std::max(0, std::min(static_cast<int>(yn), height - 1)));
				849	out[element_idx] = in[coord2index(in.shape(), id)];
				850	}
				851	break;
				852	case InterpolationPolicy::BILINEAR:
				853	out[element_idx] = bilinear_policy(in, id, xn, yn, border_mode, constant_border_value);
				854	break;
				855	case InterpolationPolicy::AREA:
				856	default:
				857	ARM_COMPUTE_ERROR("Interpolation not supported");
				858	}
				859	}
				860	}
				861	}
				862	}
				863
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	864	// Batch Normalization Layer for fixed point type
				865	template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type * = nullptr>
				866	void batch_normalization_layer(const Tensor<T> &in, Tensor<T> &out, const Tensor<T> &mean, const Tensor<T> &var, const Tensor<T> &beta, const Tensor<T> &gamma, float epsilon, int fixed_point_position)
				867	{
				868	const int cols = static_cast<int>(in.shape()[0]);
				869	const int rows = static_cast<int>(in.shape()[1]);
				870	const int depth = static_cast<int>(in.shape()[2]);
				871	int upper_dims = in.shape().total_size() / (cols * rows * depth);
				872
				873	for(int r = 0; r < upper_dims; ++r)
				874	{
				875	for(int i = 0; i < depth; ++i)
				876	{
				877	for(int k = 0; k < rows; ++k)
				878	{
				879	for(int l = 0; l < cols; ++l)
				880	{
				881	const int pos = l + k * cols + i * rows * cols + r * cols * rows * depth;
Michalis Spyrou	172e570	2017-06-26 14:18:47 +0100	[diff] [blame]	882	fixed_point_arithmetic::fixed_point<T> in_qs(in[pos], fixed_point_position, true);
				883	fixed_point_arithmetic::fixed_point<T> var_qs(var[i], fixed_point_position, true);
				884	fixed_point_arithmetic::fixed_point<T> mean_qs(mean[i], fixed_point_position, true);
				885	fixed_point_arithmetic::fixed_point<T> beta_qs(beta[i], fixed_point_position, true);
				886	fixed_point_arithmetic::fixed_point<T> gamma_qs(gamma[i], fixed_point_position, true);
				887	fixed_point_arithmetic::fixed_point<T> epsilon_qs(epsilon, fixed_point_position);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	888
Michalis Spyrou	172e570	2017-06-26 14:18:47 +0100	[diff] [blame]	889	auto denominator = fixed_point_arithmetic::inv_sqrt(var_qs + epsilon_qs);
				890	auto numerator = in_qs - mean_qs;
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	891	auto x_bar = numerator * denominator;
Michalis Spyrou	172e570	2017-06-26 14:18:47 +0100	[diff] [blame]	892	x_bar = beta_qs + x_bar * gamma_qs;
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	893	out[pos] = x_bar.raw();
				894	}
				895	}
				896	}
				897	}
				898	}
				899
				900	// Batch Normalization Layer for floating point type
Pablo Tello	383deec	2017-06-23 10:40:05 +0100	[diff] [blame]	901	template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type * = nullptr>
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	902	void batch_normalization_layer(const Tensor<T> &in, Tensor<T> &out, const Tensor<T> &mean, const Tensor<T> &var, const Tensor<T> &beta, const Tensor<T> &gamma, float epsilon, int fixed_point_position)
				903	{
				904	const int cols = static_cast<int>(in.shape()[0]);
				905	const int rows = static_cast<int>(in.shape()[1]);
				906	const int depth = static_cast<int>(in.shape()[2]);
				907	int upper_dims = in.shape().total_size() / (cols * rows * depth);
				908
				909	for(int r = 0; r < upper_dims; ++r)
				910	{
				911	for(int i = 0; i < depth; ++i)
				912	{
				913	for(int k = 0; k < rows; ++k)
				914	{
				915	for(int l = 0; l < cols; ++l)
				916	{
				917	const int pos = l + k * cols + i * rows * cols + r * cols * rows * depth;
				918	const float denominator = sqrt(var[i] + epsilon);
				919	const float numerator = in[pos] - mean[i];
				920	const float x_bar = numerator / denominator;
				921	out[pos] = beta[i] + x_bar * gamma[i];
				922	}
				923	}
				924	}
				925	}
				926	}
				927
Michalis Spyrou	bbd9fb9	2017-06-22 12:57:51 +0100	[diff] [blame]	928	// ROI Pooling layer
Georgios Pinitas	7b7858d	2017-06-21 16:44:24 +0100	[diff] [blame]	929	template <typename T>
				930	void roi_pooling_layer(const Tensor<T> &in, Tensor<T> &out, const std::vector<ROI> &rois, const ROIPoolingLayerInfo &pool_info)
				931	{
				932	const int num_rois = rois.size();
				933	const int width_in = in.shape().x();
				934	const int height_in = in.shape().y();
				935	const int fms = in.shape().z();
				936	const int volume_in = width_in * height_in * fms;
				937	const int pool_w = pool_info.pooled_width();
				938	const int pool_h = pool_info.pooled_height();
				939	const int volume_out = pool_w * pool_h * fms;
				940	const float roi_scale = pool_info.spatial_scale();
				941
				942	// Iterate through all rois
				943	for(int roi_idx = 0; roi_idx < num_rois; ++roi_idx)
				944	{
				945	// Get dimensions of current ROI
				946	const ROI &roi = rois[roi_idx];
				947
				948	int batch_id = roi.batch_idx;
				949	int roi_start_x = support::cpp11::round(roi.rect.x * roi_scale);
				950	int roi_start_y = support::cpp11::round(roi.rect.y * roi_scale);
				951	int roi_width = std::max(support::cpp11::round(roi.rect.width * roi_scale), 1.f);
				952	int roi_height = std::max(support::cpp11::round(roi.rect.height * roi_scale), 1.f);
				953
Georgios Pinitas	7b7858d	2017-06-21 16:44:24 +0100	[diff] [blame]	954	// Iterate through all channel
				955	for(int fm = 0; fm < fms; ++fm)
				956	{
				957	// Calculate each output pixel
				958	for(int py = 0; py < pool_h; ++py)
				959	{
				960	for(int px = 0; px < pool_w; ++px)
				961	{
SiCong Li	cfb6553	2017-09-12 19:06:28 +0100	[diff] [blame]	962	int region_start_x = static_cast<int>(std::floor((static_cast<float>(px) / pool_w) * roi_width));
				963	int region_end_x = static_cast<int>(std::floor((static_cast<float>(px + 1) / pool_w) * roi_width));
				964	int region_start_y = static_cast<int>(std::floor((static_cast<float>(py) / pool_h) * roi_height));
				965	int region_end_y = static_cast<int>(std::floor((static_cast<float>(py + 1) / pool_h) * roi_height));
Georgios Pinitas	7b7858d	2017-06-21 16:44:24 +0100	[diff] [blame]	966
				967	region_start_x = std::min(std::max(region_start_x + roi_start_x, 0), width_in);
				968	region_end_x = std::min(std::max(region_end_x + roi_start_x, 0), width_in);
				969	region_start_y = std::min(std::max(region_start_y + roi_start_y, 0), height_in);
				970	region_end_y = std::min(std::max(region_end_y + roi_start_y, 0), height_in);
				971
				972	// Iterate through each pixel in the pooling region
				973	if((region_end_x <= region_start_x) \|\| (region_end_y <= region_start_y))
				974	{
				975	out[roi_idx * volume_out + fm * pool_w * pool_h + py * pool_w + px] = 0;
				976	}
				977	else
				978	{
				979	T curr_max = std::numeric_limits<T>::lowest();
				980	for(int j = region_start_y; j < region_end_y; ++j)
				981	{
				982	for(int i = region_start_x; i < region_end_x; ++i)
				983	{
				984	const auto val = in[batch_id * volume_in + fm * width_in * height_in + j * width_in + i];
				985	curr_max = std::max(val, curr_max);
				986	}
				987	}
				988	out[roi_idx * volume_out + fm * pool_w * pool_h + py * pool_w + px] = curr_max;
				989	}
				990	}
				991	}
				992	}
				993	}
				994	}
				995
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	996	// Fixed point operations
				997	template <typename T>
				998	void fixed_point_operation(const Tensor<T> &in, Tensor<T> &out, FixedPointOp op)
				999	{
				1000	int p = in.fixed_point_position();
				1001	switch(op)
				1002	{
				1003	case FixedPointOp::EXP:
				1004	for(int i = 0; i < in.num_elements(); ++i)
				1005	{
				1006	out[i] = fixed_point_arithmetic::exp(fixed_point_arithmetic::fixed_point<T>(in[i], p, true)).raw();
				1007	}
				1008	break;
				1009	case FixedPointOp::LOG:
				1010	for(int i = 0; i < in.num_elements(); ++i)
				1011	{
				1012	out[i] = fixed_point_arithmetic::log(fixed_point_arithmetic::fixed_point<T>(in[i], p, true)).raw();
				1013	}
				1014	break;
				1015	case FixedPointOp::INV_SQRT:
				1016	for(int i = 0; i < in.num_elements(); ++i)
				1017	{
				1018	out[i] = fixed_point_arithmetic::inv_sqrt(fixed_point_arithmetic::fixed_point<T>(in[i], p, true)).raw();
				1019	}
				1020	break;
				1021	case FixedPointOp::RECIPROCAL:
				1022	for(int i = 0; i < in.num_elements(); ++i)
				1023	{
				1024	out[i] = fixed_point_arithmetic::div(fixed_point_arithmetic::fixed_point<T>(1, p), fixed_point_arithmetic::fixed_point<T>(in[i], p, true)).raw();
				1025	}
				1026	break;
				1027	default:
				1028	ARM_COMPUTE_ERROR("Fixed point operation not supported");
				1029	break;
				1030	}
				1031	}
				1032
				1033	// Tensor print
				1034	template <typename T>
				1035	void print(const Tensor<T> &in, std::ostream &out)
				1036	{
				1037	out << "\n";
				1038	for(int i = 0; i < in.num_elements(); ++i)
				1039	{
				1040	out << in[i] << " ";
				1041	}
				1042	out << "\n";
				1043	}
				1044	} // namespace tensor_operations
				1045	} // namespace validation
				1046	} // namespace test
				1047	} // namespace arm_compute
				1048
				1049	#endif /* __ARM_COMPUTE_TEST_TENSOR_OPERATIONS_H__ */