Blame - src/core/NEON/kernels/NELKTrackerKernel.cpp - ml/ComputeLibrary

blob: 442f001102adf014301cfc0363f4ef6802dce6f0 [file] [log] [blame]

Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1	/*
Sang-Hoon Park	68dd25f	2020-10-19 16:00:11 +0100	[diff] [blame]	2	* Copyright (c) 2016-2020 Arm Limited.
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
Michalis Spyrou	ebcebf1	2020-10-21 00:04:14 +0100	[diff] [blame]	24	#include "src/core/NEON/kernels/NELKTrackerKernel.h"
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	25
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	26	#include "arm_compute/core/Coordinates.h"
				27	#include "arm_compute/core/Error.h"
				28	#include "arm_compute/core/Helpers.h"
				29	#include "arm_compute/core/ITensor.h"
				30	#include "arm_compute/core/TensorInfo.h"
				31	#include "arm_compute/core/Validate.h"
				32	#include "arm_compute/core/Window.h"
Sang-Hoon Park	68dd25f	2020-10-19 16:00:11 +0100	[diff] [blame]	33	#include "src/core/AccessWindowStatic.h"
				34	#include "src/core/helpers/AutoConfiguration.h"
				35	#include "src/core/helpers/WindowHelpers.h"
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	36
				37	#include <arm_neon.h>
				38	#include <cmath>
				39
				40	using namespace arm_compute;
				41
				42	/** Constants used for Lucas-Kanade Algorithm */
				43	constexpr int W_BITS = 14;
				44	constexpr float D0 = 1 << W_BITS;
				45	constexpr float DETERMINANT_THRESHOLD = 1.0e-07f; // Threshold for the determinant. Used for lost tracking criteria
				46	constexpr float EIGENVALUE_THRESHOLD = 1.0e-04f; // Thresholds for minimum eigenvalue. Used for lost tracking criteria
				47	constexpr float FLT_SCALE = 1.0f / (1 << 20);
				48
				49	namespace
				50	{
				51	enum class BilinearInterpolation
				52	{
				53	BILINEAR_OLD_NEW,
				54	BILINEAR_SCHARR
				55	};
				56
				57	template <typename T>
				58	constexpr int INT_ROUND(T x, int n)
				59	{
				60	return (x + (1 << (n - 1))) >> n;
				61	}
				62
				63	template <typename T>
				64	inline int get_pixel(const ITensor *tensor, int xi, int yi, int iw00, int iw01, int iw10, int iw11, int scale)
				65	{
				66	const auto px00 = reinterpret_cast<const T >(tensor->buffer() + tensor->info()->offset_element_in_bytes(Coordinates(xi, yi)));
				67	const auto px01 = reinterpret_cast<const T >(tensor->buffer() + tensor->info()->offset_element_in_bytes(Coordinates(xi + 1, yi)));
				68	const auto px10 = reinterpret_cast<const T >(tensor->buffer() + tensor->info()->offset_element_in_bytes(Coordinates(xi, yi + 1)));
				69	const auto px11 = reinterpret_cast<const T >(tensor->buffer() + tensor->info()->offset_element_in_bytes(Coordinates(xi + 1, yi + 1)));
				70
				71	return INT_ROUND(px00 * iw00 + px01 * iw01 + px10 * iw10 + px11 * iw11, scale);
				72	}
				73
				74	inline int32x4_t compute_bilinear_interpolation(int16x8_t top_row, int16x8_t bottom_row, int16x4_t w00, int16x4_t w01, int16x4_t w10, int16x4_t w11, int32x4_t shift)
				75	{
				76	// Get the left column of upper row
				77	const int16x4_t px00 = vget_low_s16(top_row);
				78
				79	// Get the right column of upper row
				80	const int16x4_t px01 = vext_s16(px00, vget_high_s16(top_row), 1);
				81
				82	// Get the left column of lower row
				83	const int16x4_t px10 = vget_low_s16(bottom_row);
				84
				85	// Get the right column of right row
				86	const int16x4_t px11 = vext_s16(px10, vget_high_s16(bottom_row), 1);
				87
				88	// Apply the bilinear filter
				89	return vqrshlq_s32(vmull_s16(px00, w00) + vmull_s16(px01, w01) + vmull_s16(px10, w10) + vmull_s16(px11, w11), shift);
				90	}
				91	} // namespace
				92
				93	void NELKTrackerKernel::init_keypoints(int start, int end)
				94	{
				95	if(_level == _num_levels - 1)
				96	{
				97	const float level_scale = pow(_pyramid_scale, _level);
				98
				99	for(int i = start; i < end; ++i)
				100	{
				101	_old_points_internal->at(i).x = _old_points->at(i).x * level_scale;
				102	_old_points_internal->at(i).y = _old_points->at(i).y * level_scale;
				103	_old_points_internal->at(i).tracking_status = true;
				104
				105	NELKInternalKeypoint keypoint_to_track;
				106
				107	if(_use_initial_estimate)
				108	{
				109	keypoint_to_track.x = _new_points_estimates->at(i).x * level_scale;
				110	keypoint_to_track.y = _new_points_estimates->at(i).y * level_scale;
				111	keypoint_to_track.tracking_status = (_new_points_estimates->at(i).tracking_status == 1);
				112	}
				113	else
				114	{
				115	keypoint_to_track.x = _old_points_internal->at(i).x;
				116	keypoint_to_track.y = _old_points_internal->at(i).y;
				117	keypoint_to_track.tracking_status = true;
				118	}
				119
				120	_new_points_internal->at(i) = keypoint_to_track;
				121	}
				122	}
				123	else
				124	{
				125	for(int i = start; i < end; ++i)
				126	{
				127	_old_points_internal->at(i).x /= _pyramid_scale;
				128	_old_points_internal->at(i).y /= _pyramid_scale;
				129	_new_points_internal->at(i).x /= _pyramid_scale;
				130	_new_points_internal->at(i).y /= _pyramid_scale;
				131	}
				132	}
				133	}
				134
Michalis Spyrou	490bf2e	2017-09-29 11:24:55 +0100	[diff] [blame]	135	std::tuple<int, int, int> NELKTrackerKernel::compute_spatial_gradient_matrix(const NELKInternalKeypoint &keypoint, int32_t bilinear_ix, int32_t bilinear_iy)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	136	{
				137	int iA11 = 0;
				138	int iA12 = 0;
				139	int iA22 = 0;
				140
				141	int32x4_t nA11 = vdupq_n_s32(0);
				142	int32x4_t nA12 = vdupq_n_s32(0);
				143	int32x4_t nA22 = vdupq_n_s32(0);
				144
				145	float keypoint_int_x = 0;
				146	float keypoint_int_y = 0;
				147
				148	const float wx = std::modf(keypoint.x, &keypoint_int_x);
				149	const float wy = std::modf(keypoint.y, &keypoint_int_y);
				150
				151	const int iw00 = roundf((1.0f - wx) * (1.0f - wy) * D0);
				152	const int iw01 = roundf(wx * (1.0f - wy) * D0);
				153	const int iw10 = roundf((1.0f - wx) * wy * D0);
				154	const int iw11 = D0 - iw00 - iw01 - iw10;
				155
				156	const int16x4_t nw00 = vdup_n_s16(iw00);
				157	const int16x4_t nw01 = vdup_n_s16(iw01);
				158	const int16x4_t nw10 = vdup_n_s16(iw10);
				159	const int16x4_t nw11 = vdup_n_s16(iw11);
				160
				161	// Convert stride from uint_t* to int16_t*
				162	const size_t row_stride = _old_scharr_gx->info()->strides_in_bytes()[1] / 2;
				163	const Coordinates top_left_window_corner(static_cast<int>(keypoint_int_x) - _window_dimension / 2, static_cast<int>(keypoint_int_y) - _window_dimension / 2);
				164	auto idx = reinterpret_cast<const int16_t *>(_old_scharr_gx->buffer() + _old_scharr_gx->info()->offset_element_in_bytes(top_left_window_corner));
				165	auto idy = reinterpret_cast<const int16_t *>(_old_scharr_gy->buffer() + _old_scharr_gy->info()->offset_element_in_bytes(top_left_window_corner));
				166	static const int32x4_t nshifter_scharr = vdupq_n_s32(-W_BITS);
				167
				168	for(int ky = 0; ky < _window_dimension; ++ky, idx += row_stride, idy += row_stride)
				169	{
				170	int kx = 0;
				171
				172	// Calculate elements in blocks of four as long as possible
				173	for(; kx <= _window_dimension - 4; kx += 4)
				174	{
				175	// Interpolation X
				176	const int16x8_t ndx_row1 = vld1q_s16(idx + kx);
				177	const int16x8_t ndx_row2 = vld1q_s16(idx + kx + row_stride);
				178
				179	const int32x4_t nxval = compute_bilinear_interpolation(ndx_row1, ndx_row2, nw00, nw01, nw10, nw11, nshifter_scharr);
				180
				181	// Interpolation Y
				182	const int16x8_t ndy_row1 = vld1q_s16(idy + kx);
				183	const int16x8_t ndy_row2 = vld1q_s16(idy + kx + row_stride);
				184
				185	const int32x4_t nyval = compute_bilinear_interpolation(ndy_row1, ndy_row2, nw00, nw01, nw10, nw11, nshifter_scharr);
				186
				187	// Store the intermediate data so that we don't need to recalculate them in later stage
				188	vst1q_s32(bilinear_ix + kx + ky * _window_dimension, nxval);
				189	vst1q_s32(bilinear_iy + kx + ky * _window_dimension, nyval);
				190
				191	// Accumulate Ix^2
				192	nA11 = vmlaq_s32(nA11, nxval, nxval);
				193	// Accumulate Ix * Iy
				194	nA12 = vmlaq_s32(nA12, nxval, nyval);
				195	// Accumulate Iy^2
				196	nA22 = vmlaq_s32(nA22, nyval, nyval);
				197	}
				198
				199	// Calculate the leftover elements
				200	for(; kx < _window_dimension; ++kx)
				201	{
				202	const int32_t ixval = get_pixel<int16_t>(_old_scharr_gx, top_left_window_corner.x() + kx, top_left_window_corner.y() + ky,
				203	iw00, iw01, iw10, iw11, W_BITS);
				204	const int32_t iyval = get_pixel<int16_t>(_old_scharr_gy, top_left_window_corner.x() + kx, top_left_window_corner.y() + ky,
				205	iw00, iw01, iw10, iw11, W_BITS);
				206
				207	iA11 += ixval * ixval;
				208	iA12 += ixval * iyval;
				209	iA22 += iyval * iyval;
				210
				211	bilinear_ix[kx + ky * _window_dimension] = ixval;
				212	bilinear_iy[kx + ky * _window_dimension] = iyval;
				213	}
				214	}
				215
				216	iA11 += vgetq_lane_s32(nA11, 0) + vgetq_lane_s32(nA11, 1) + vgetq_lane_s32(nA11, 2) + vgetq_lane_s32(nA11, 3);
				217	iA12 += vgetq_lane_s32(nA12, 0) + vgetq_lane_s32(nA12, 1) + vgetq_lane_s32(nA12, 2) + vgetq_lane_s32(nA12, 3);
				218	iA22 += vgetq_lane_s32(nA22, 0) + vgetq_lane_s32(nA22, 1) + vgetq_lane_s32(nA22, 2) + vgetq_lane_s32(nA22, 3);
				219
				220	return std::make_tuple(iA11, iA12, iA22);
				221	}
				222
Michalis Spyrou	490bf2e	2017-09-29 11:24:55 +0100	[diff] [blame]	223	std::pair<int, int> NELKTrackerKernel::compute_image_mismatch_vector(const NELKInternalKeypoint &old_keypoint, const NELKInternalKeypoint &new_keypoint, const int32_t *bilinear_ix,
				224	const int32_t *bilinear_iy)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	225	{
				226	int ib1 = 0;
				227	int ib2 = 0;
				228
				229	int32x4_t nb1 = vdupq_n_s32(0);
				230	int32x4_t nb2 = vdupq_n_s32(0);
				231
				232	// Compute weights for the old keypoint
				233	float old_keypoint_int_x = 0;
				234	float old_keypoint_int_y = 0;
				235
				236	const float old_wx = std::modf(old_keypoint.x, &old_keypoint_int_x);
				237	const float old_wy = std::modf(old_keypoint.y, &old_keypoint_int_y);
				238
				239	const int iw00_old = roundf((1.0f - old_wx) * (1.0f - old_wy) * D0);
				240	const int iw01_old = roundf(old_wx * (1.0f - old_wy) * D0);
				241	const int iw10_old = roundf((1.0f - old_wx) * old_wy * D0);
				242	const int iw11_old = D0 - iw00_old - iw01_old - iw10_old;
				243
				244	const int16x4_t nw00_old = vdup_n_s16(iw00_old);
				245	const int16x4_t nw01_old = vdup_n_s16(iw01_old);
				246	const int16x4_t nw10_old = vdup_n_s16(iw10_old);
				247	const int16x4_t nw11_old = vdup_n_s16(iw11_old);
				248
				249	// Compute weights for the new keypoint
				250	float new_keypoint_int_x = 0;
				251	float new_keypoint_int_y = 0;
				252
				253	const float new_wx = std::modf(new_keypoint.x, &new_keypoint_int_x);
				254	const float new_wy = std::modf(new_keypoint.y, &new_keypoint_int_y);
				255
				256	const int iw00_new = roundf((1.0f - new_wx) * (1.0f - new_wy) * D0);
				257	const int iw01_new = roundf(new_wx * (1.0f - new_wy) * D0);
				258	const int iw10_new = roundf((1.0f - new_wx) * new_wy * D0);
				259	const int iw11_new = D0 - iw00_new - iw01_new - iw10_new;
				260
				261	const int16x4_t nw00_new = vdup_n_s16(iw00_new);
				262	const int16x4_t nw01_new = vdup_n_s16(iw01_new);
				263	const int16x4_t nw10_new = vdup_n_s16(iw10_new);
				264	const int16x4_t nw11_new = vdup_n_s16(iw11_new);
				265
				266	const int row_stride = _input_new->info()->strides_in_bytes()[1];
				267	const Coordinates top_left_window_corner_old(static_cast<int>(old_keypoint_int_x) - _window_dimension / 2, static_cast<int>(old_keypoint_int_y) - _window_dimension / 2);
				268	const Coordinates top_left_window_corner_new(static_cast<int>(new_keypoint_int_x) - _window_dimension / 2, static_cast<int>(new_keypoint_int_y) - _window_dimension / 2);
				269	const uint8_t *old_ptr = _input_old->buffer() + _input_old->info()->offset_element_in_bytes(top_left_window_corner_old);
				270	const uint8_t *new_ptr = _input_new->buffer() + _input_new->info()->offset_element_in_bytes(top_left_window_corner_new);
				271	static const int32x4_t nshifter_tensor = vdupq_n_s32(-(W_BITS - 5));
				272
				273	for(int ky = 0; ky < _window_dimension; ++ky, new_ptr += row_stride, old_ptr += row_stride)
				274	{
				275	int kx = 0;
				276
				277	// Calculate elements in blocks of four as long as possible
				278	for(; kx <= _window_dimension - 4; kx += 4)
				279	{
				280	// Interpolation old tensor
				281	const int16x8_t nold_row1 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(old_ptr + kx)));
				282	const int16x8_t nold_row2 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(old_ptr + kx + row_stride)));
				283
				284	const int32x4_t noldval = compute_bilinear_interpolation(nold_row1, nold_row2, nw00_old, nw01_old, nw10_old, nw11_old, nshifter_tensor);
				285
				286	// Interpolation new tensor
				287	const int16x8_t nnew_row1 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(new_ptr + kx)));
				288	const int16x8_t nnew_row2 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(new_ptr + kx + row_stride)));
				289
				290	const int32x4_t nnewval = compute_bilinear_interpolation(nnew_row1, nnew_row2, nw00_new, nw01_new, nw10_new, nw11_new, nshifter_tensor);
				291
				292	// Calculate It gradient, i.e. pixelwise difference between old and new tensor
				293	const int32x4_t diff = vsubq_s32(nnewval, noldval);
				294
				295	// Load the Ix and Iy gradient computed in the previous stage
				296	const int32x4_t nxval = vld1q_s32(bilinear_ix + kx + ky * _window_dimension);
				297	const int32x4_t nyval = vld1q_s32(bilinear_iy + kx + ky * _window_dimension);
				298
				299	// Caculate Ix * It and Iy * It, and accumulate the results
				300	nb1 = vmlaq_s32(nb1, diff, nxval);
				301	nb2 = vmlaq_s32(nb2, diff, nyval);
				302	}
				303
				304	// Calculate the leftover elements
				305	for(; kx < _window_dimension; ++kx)
				306	{
				307	const int32_t ival = get_pixel<uint8_t>(_input_old, top_left_window_corner_old.x() + kx, top_left_window_corner_old.y() + ky,
				308	iw00_old, iw01_old, iw10_old, iw11_old, W_BITS - 5);
				309	const int32_t jval = get_pixel<uint8_t>(_input_new, top_left_window_corner_new.x() + kx, top_left_window_corner_new.y() + ky,
				310	iw00_new, iw01_new, iw10_new, iw11_new, W_BITS - 5);
				311
				312	const int32_t diff = jval - ival;
				313
				314	ib1 += diff * bilinear_ix[kx + ky * _window_dimension];
				315	ib2 += diff * bilinear_iy[kx + ky * _window_dimension];
				316	}
				317	}
				318
				319	ib1 += vgetq_lane_s32(nb1, 0) + vgetq_lane_s32(nb1, 1) + vgetq_lane_s32(nb1, 2) + vgetq_lane_s32(nb1, 3);
				320	ib2 += vgetq_lane_s32(nb2, 0) + vgetq_lane_s32(nb2, 1) + vgetq_lane_s32(nb2, 2) + vgetq_lane_s32(nb2, 3);
				321
				322	return std::make_pair(ib1, ib2);
				323	}
				324
				325	NELKTrackerKernel::NELKTrackerKernel()
				326	: _input_old(nullptr), _input_new(nullptr), _old_scharr_gx(nullptr), _old_scharr_gy(nullptr), _new_points(nullptr), _new_points_estimates(nullptr), _old_points(nullptr), _old_points_internal(),
				327	_new_points_internal(), _termination(Termination::TERM_CRITERIA_EPSILON), _use_initial_estimate(false), _pyramid_scale(0.0f), _epsilon(0.0f), _num_iterations(0), _window_dimension(0), _level(0),
				328	_num_levels(0), _valid_region()
				329	{
				330	}
				331
				332	BorderSize NELKTrackerKernel::border_size() const
				333	{
				334	return BorderSize(1);
				335	}
				336
				337	void NELKTrackerKernel::configure(const ITensor input_old, const ITensor input_new, const ITensor old_scharr_gx, const ITensor old_scharr_gy,
				338	const IKeyPointArray old_points, const IKeyPointArray new_points_estimates, IKeyPointArray *new_points,
				339	INELKInternalKeypointArray old_points_internal, INELKInternalKeypointArray new_points_internal,
				340	Termination termination, bool use_initial_estimate, float epsilon, unsigned int num_iterations, size_t window_dimension,
				341	size_t level, size_t num_levels, float pyramid_scale)
				342
				343	{
				344	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_old, 1, DataType::U8);
				345	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_new, 1, DataType::U8);
				346	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(old_scharr_gx, 1, DataType::S16);
				347	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(old_scharr_gy, 1, DataType::S16);
				348
				349	_input_old = input_old;
				350	_input_new = input_new;
				351	_old_scharr_gx = old_scharr_gx;
				352	_old_scharr_gy = old_scharr_gy;
				353	_old_points = old_points;
				354	_new_points_estimates = new_points_estimates;
				355	_new_points = new_points;
				356	_old_points_internal = old_points_internal;
				357	_new_points_internal = new_points_internal;
				358	_termination = termination;
				359	_use_initial_estimate = use_initial_estimate;
				360	_epsilon = epsilon;
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	361	_window_dimension = window_dimension;
				362	_level = level;
				363	_num_levels = num_levels;
				364	_pyramid_scale = pyramid_scale;
				365	_num_levels = num_levels;
				366
John Richardson	8de9261	2018-02-22 14:09:31 +0000	[diff] [blame]	367	// Set maximum number of iterations used for convergence
				368	const size_t max_iterations = 1000;
				369	_num_iterations = (termination == Termination::TERM_CRITERIA_EPSILON) ? max_iterations : num_iterations;
				370
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	371	Window window;
				372	window.set(Window::DimX, Window::Dimension(0, old_points->num_values()));
				373	window.set(Window::DimY, Window::Dimension(0, 1));
				374
				375	_valid_region = intersect_valid_regions(
				376	input_old->info()->valid_region(),
				377	input_new->info()->valid_region(),
				378	old_scharr_gx->info()->valid_region(),
				379	old_scharr_gy->info()->valid_region());
				380
				381	update_window_and_padding(window,
				382	AccessWindowStatic(input_old->info(), _valid_region.start(0), _valid_region.start(1),
				383	_valid_region.end(0), _valid_region.end(1)),
				384	AccessWindowStatic(input_new->info(), _valid_region.start(0), _valid_region.start(1),
				385	_valid_region.end(0), _valid_region.end(1)),
				386	AccessWindowStatic(old_scharr_gx->info(), _valid_region.start(0), _valid_region.start(1),
				387	_valid_region.end(0), _valid_region.end(1)),
				388	AccessWindowStatic(old_scharr_gy->info(), _valid_region.start(0), _valid_region.start(1),
				389	_valid_region.end(0), _valid_region.end(1)));
				390
				391	INEKernel::configure(window);
				392	}
				393
Moritz Pflanzer	c186b57	2017-09-07 09:48:04 +0100	[diff] [blame]	394	void NELKTrackerKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	395	{
Moritz Pflanzer	c186b57	2017-09-07 09:48:04 +0100	[diff] [blame]	396	ARM_COMPUTE_UNUSED(info);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	397	ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
				398	ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
				399
				400	ARM_COMPUTE_ERROR_ON(_input_old->buffer() == nullptr);
				401	ARM_COMPUTE_ERROR_ON(_input_new->buffer() == nullptr);
				402	ARM_COMPUTE_ERROR_ON(_old_scharr_gx->buffer() == nullptr);
				403	ARM_COMPUTE_ERROR_ON(_old_scharr_gy->buffer() == nullptr);
				404
				405	const int list_end = window.x().end();
				406	const int list_start = window.x().start();
				407
				408	init_keypoints(list_start, list_end);
				409
Michalis Spyrou	bcfd09a	2019-05-01 13:03:59 +0100	[diff] [blame]	410	const int buffer_size = _window_dimension * _window_dimension;
				411	std::vector<int32_t> bilinear_ix(buffer_size);
				412	std::vector<int32_t> bilinear_iy(buffer_size);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	413
				414	const int half_window = _window_dimension / 2;
				415
				416	auto is_invalid_keypoint = [&](const NELKInternalKeypoint & keypoint)
				417	{
				418	const int x = std::floor(keypoint.x);
				419	const int y = std::floor(keypoint.y);
				420
				421	return (x - half_window < _valid_region.start(0)) \|\| (x + half_window >= _valid_region.end(0) - 1) \|\| (y - half_window < _valid_region.start(1)) \|\| (y + half_window >= _valid_region.end(1) - 1);
				422	};
				423
				424	for(int list_indx = list_start; list_indx < list_end; ++list_indx)
				425	{
				426	NELKInternalKeypoint &old_keypoint = _old_points_internal->at(list_indx);
				427	NELKInternalKeypoint &new_keypoint = _new_points_internal->at(list_indx);
				428
				429	if(!old_keypoint.tracking_status)
				430	{
				431	continue;
				432	}
				433
				434	if(is_invalid_keypoint(old_keypoint))
				435	{
				436	if(_level == 0)
				437	{
				438	new_keypoint.tracking_status = false;
				439	}
				440
				441	continue;
				442	}
				443
				444	// Compute spatial gradient matrix
				445	int iA11 = 0;
				446	int iA12 = 0;
				447	int iA22 = 0;
				448
Michalis Spyrou	bcfd09a	2019-05-01 13:03:59 +0100	[diff] [blame]	449	std::tie(iA11, iA12, iA22) = compute_spatial_gradient_matrix(old_keypoint, bilinear_ix.data(), bilinear_iy.data());
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	450
				451	const float A11 = iA11 * FLT_SCALE;
				452	const float A12 = iA12 * FLT_SCALE;
				453	const float A22 = iA22 * FLT_SCALE;
				454
				455	// Calculate minimum eigenvalue
				456	const float sum_A11_A22 = A11 + A22;
				457	const float discriminant = sum_A11_A22 * sum_A11_A22 - 4.0f * (A11 * A22 - A12 * A12);
				458	// Divide by _window_dimension^2 to reduce the floating point accummulation error
				459	const float minimum_eigenvalue = (sum_A11_A22 - std::sqrt(discriminant)) / (2.0f * _window_dimension * _window_dimension);
				460
				461	// Determinant
				462	const double D = A11 * A22 - A12 * A12;
				463
				464	// Check if it is a good point to track
				465	if(minimum_eigenvalue < EIGENVALUE_THRESHOLD \|\| D < DETERMINANT_THRESHOLD)
				466	{
				467	// Invalidate tracked point
				468	if(_level == 0)
				469	{
				470	new_keypoint.tracking_status = false;
				471	}
				472
				473	continue;
				474	}
				475
				476	float prev_delta_x = 0.0f;
				477	float prev_delta_y = 0.0f;
				478
John Richardson	8de9261	2018-02-22 14:09:31 +0000	[diff] [blame]	479	for(unsigned int j = 0; j < _num_iterations; ++j)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	480	{
				481	if(is_invalid_keypoint(new_keypoint))
				482	{
				483	if(_level == 0)
				484	{
				485	new_keypoint.tracking_status = false;
				486	}
				487
				488	break;
				489	}
				490
				491	// Compute image mismatch vector
				492	int ib1 = 0;
				493	int ib2 = 0;
				494
Michalis Spyrou	bcfd09a	2019-05-01 13:03:59 +0100	[diff] [blame]	495	std::tie(ib1, ib2) = compute_image_mismatch_vector(old_keypoint, new_keypoint, bilinear_ix.data(), bilinear_iy.data());
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	496
				497	double b1 = ib1 * FLT_SCALE;
				498	double b2 = ib2 * FLT_SCALE;
				499
				500	// Compute motion vector -> A^-1 * -b
				501	const float delta_x = (A12 * b2 - A22 * b1) / D;
				502	const float delta_y = (A12 * b1 - A11 * b2) / D;
				503
				504	// Update the new position
				505	new_keypoint.x += delta_x;
				506	new_keypoint.y += delta_y;
				507
				508	const float mag2 = delta_x * delta_x + delta_y * delta_y;
				509
				510	// Check if termination criteria is EPSILON and if it is satisfied
				511	if(mag2 <= _epsilon && (_termination == Termination::TERM_CRITERIA_EPSILON \|\| _termination == Termination::TERM_CRITERIA_BOTH))
				512	{
				513	break;
				514	}
				515
				516	// Check convergence analyzing the previous delta
				517	if(j > 0 && std::fabs(delta_x + prev_delta_x) < 0.01f && std::fabs(delta_y + prev_delta_y) < 0.01f)
				518	{
				519	new_keypoint.x -= delta_x * _pyramid_scale;
				520	new_keypoint.y -= delta_y * _pyramid_scale;
				521	break;
				522	}
				523
				524	prev_delta_x = delta_x;
				525	prev_delta_y = delta_y;
				526	}
				527	}
				528
				529	if(_level == 0)
				530	{
				531	for(int list_indx = list_start; list_indx < list_end; ++list_indx)
				532	{
				533	const NELKInternalKeypoint &new_keypoint = _new_points_internal->at(list_indx);
				534
				535	_new_points->at(list_indx).x = roundf(new_keypoint.x);
				536	_new_points->at(list_indx).y = roundf(new_keypoint.y);
				537	_new_points->at(list_indx).tracking_status = new_keypoint.tracking_status ? 1 : 0;
				538	}
				539	}
				540	}