COMPMID-481: Add thread info parameter
Change-Id: Iebb50a88d017445b6b37a86563ebd4abd86c5cf5
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/86788
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/src/runtime/CL/functions/CLHOGMultiDetection.cpp b/src/runtime/CL/functions/CLHOGMultiDetection.cpp
index 6def2de..9eed355 100644
--- a/src/runtime/CL/functions/CLHOGMultiDetection.cpp
+++ b/src/runtime/CL/functions/CLHOGMultiDetection.cpp
@@ -29,6 +29,7 @@
#include "arm_compute/runtime/CL/CLArray.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/Scheduler.h"
#include "support/ToolchainSupport.h"
using namespace arm_compute;
@@ -246,7 +247,7 @@
{
// Map detection windows array before computing non maxima suppression
_detection_windows->map(CLScheduler::get().queue(), true);
- _non_maxima_kernel->run(_non_maxima_kernel->window());
+ Scheduler::get().schedule(_non_maxima_kernel.get(), Window::DimY);
_detection_windows->unmap(CLScheduler::get().queue());
}
}
diff --git a/src/runtime/CL/functions/CLHarrisCorners.cpp b/src/runtime/CL/functions/CLHarrisCorners.cpp
index 8f9fcdc..2140240 100644
--- a/src/runtime/CL/functions/CLHarrisCorners.cpp
+++ b/src/runtime/CL/functions/CLHarrisCorners.cpp
@@ -165,6 +165,6 @@
_nonmax.unmap();
_corners->map(CLScheduler::get().queue(), true);
- _sort_euclidean.run(_sort_euclidean.window());
+ Scheduler::get().schedule(&_sort_euclidean, Window::DimY);
_corners->unmap(CLScheduler::get().queue());
}
diff --git a/src/runtime/CPP/CPPScheduler.cpp b/src/runtime/CPP/CPPScheduler.cpp
index 2a321a1..9cc3f03 100644
--- a/src/runtime/CPP/CPPScheduler.cpp
+++ b/src/runtime/CPP/CPPScheduler.cpp
@@ -52,7 +52,7 @@
* This function will return as soon as the kernel has been sent to the worker thread.
* wait() needs to be called to ensure the execution is complete.
*/
- void start(ICPPKernel *kernel, const Window &window);
+ void start(ICPPKernel *kernel, const Window &window, const ThreadInfo &info);
/** Wait for the current kernel execution to complete
*/
void wait();
@@ -64,13 +64,14 @@
std::thread _thread;
ICPPKernel *_kernel{ nullptr };
Window _window;
+ ThreadInfo _info;
sem_t _wait_for_work;
sem_t _job_complete;
std::exception_ptr _current_exception;
};
Thread::Thread()
- : _thread(), _window(), _wait_for_work(), _job_complete(), _current_exception(nullptr)
+ : _thread(), _window(), _info(), _wait_for_work(), _job_complete(), _current_exception(nullptr)
{
int ret = sem_init(&_wait_for_work, 0, 0);
ARM_COMPUTE_ERROR_ON(ret < 0);
@@ -87,7 +88,7 @@
{
ARM_COMPUTE_ERROR_ON(!_thread.joinable());
- start(nullptr, Window());
+ start(nullptr, Window(), ThreadInfo());
_thread.join();
int ret = sem_destroy(&_wait_for_work);
@@ -99,10 +100,11 @@
ARM_COMPUTE_UNUSED(ret);
}
-void Thread::start(ICPPKernel *kernel, const Window &window)
+void Thread::start(ICPPKernel *kernel, const Window &window, const ThreadInfo &info)
{
_kernel = kernel;
_window = window;
+ _info = info;
int ret = sem_post(&_wait_for_work);
ARM_COMPUTE_UNUSED(ret);
ARM_COMPUTE_ERROR_ON(ret < 0);
@@ -133,7 +135,7 @@
try
{
_window.validate();
- _kernel->run(_window);
+ _kernel->run(_window, _info);
}
catch(...)
{
@@ -163,8 +165,7 @@
CPPScheduler::CPPScheduler()
: _num_threads(std::thread::hardware_concurrency()),
- _threads(std::unique_ptr<Thread[], void(*)(Thread *)>(new Thread[std::thread::hardware_concurrency() - 1], delete_threads)),
- _target(CPUTarget::INTRINSICS)
+ _threads(std::unique_ptr<Thread[], void(*)(Thread *)>(new Thread[std::thread::hardware_concurrency() - 1], delete_threads))
{
}
@@ -179,50 +180,42 @@
return _num_threads;
}
-void CPPScheduler::set_target(CPUTarget target)
-{
- _target = target;
-}
-
-CPUTarget CPPScheduler::target() const
-{
- return _target;
-}
-
void CPPScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension)
{
ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel");
/** [Scheduler example] */
+ ThreadInfo info;
+ info.cpu = _target;
+
const Window &max_window = kernel->window();
const unsigned int num_iterations = max_window.num_iterations(split_dimension);
- const unsigned int num_threads = std::min(num_iterations, _num_threads);
+ info.num_threads = std::min(num_iterations, _num_threads);
- if(!kernel->is_parallelisable() || 1 == num_threads)
+ if(!kernel->is_parallelisable() || info.num_threads == 1)
{
- kernel->run(max_window);
+ kernel->run(max_window, info);
}
else
{
- for(unsigned int t = 0; t < num_threads; ++t)
+ for(int t = 0; t < info.num_threads; ++t)
{
- Window win = max_window.split_window(split_dimension, t, num_threads);
- win.set_thread_id(t);
- win.set_num_threads(num_threads);
+ Window win = max_window.split_window(split_dimension, t, info.num_threads);
+ info.thread_id = t;
- if(t != num_threads - 1)
+ if(t != info.num_threads - 1)
{
- _threads[t].start(kernel, win);
+ _threads[t].start(kernel, win, info);
}
else
{
- kernel->run(win);
+ kernel->run(win, info);
}
}
try
{
- for(unsigned int t = 1; t < num_threads; ++t)
+ for(int t = 1; t < info.num_threads; ++t)
{
_threads[t - 1].wait();
}
diff --git a/src/runtime/CPP/SingleThreadScheduler.cpp b/src/runtime/CPP/SingleThreadScheduler.cpp
index f086813..4e46a59 100644
--- a/src/runtime/CPP/SingleThreadScheduler.cpp
+++ b/src/runtime/CPP/SingleThreadScheduler.cpp
@@ -38,12 +38,15 @@
void SingleThreadScheduler::set_num_threads(unsigned int num_threads)
{
ARM_COMPUTE_UNUSED(num_threads);
+ ARM_COMPUTE_ERROR_ON(num_threads != 1);
}
void SingleThreadScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension)
{
ARM_COMPUTE_UNUSED(split_dimension);
- kernel->run(kernel->window());
+ ThreadInfo info;
+ info.cpu = _target;
+ kernel->run(kernel->window(), info);
}
unsigned int SingleThreadScheduler::num_threads() const
diff --git a/src/runtime/NEON/INESimpleFunction.cpp b/src/runtime/NEON/INESimpleFunction.cpp
index a748a1e..23d9872 100644
--- a/src/runtime/NEON/INESimpleFunction.cpp
+++ b/src/runtime/NEON/INESimpleFunction.cpp
@@ -35,6 +35,6 @@
void INESimpleFunction::run()
{
- _border_handler.run(_border_handler.window());
+ NEScheduler::get().schedule(&_border_handler, Window::DimZ);
NEScheduler::get().schedule(_kernel.get(), Window::DimY);
}
diff --git a/src/runtime/NEON/functions/NECannyEdge.cpp b/src/runtime/NEON/functions/NECannyEdge.cpp
index ca8877e..318cea2 100644
--- a/src/runtime/NEON/functions/NECannyEdge.cpp
+++ b/src/runtime/NEON/functions/NECannyEdge.cpp
@@ -161,7 +161,7 @@
_sobel->run();
// Fill border before non-maxima suppression. Nop for border mode undefined.
- _border_mag_gradient.run(_border_mag_gradient.window());
+ NEScheduler::get().schedule(&_border_mag_gradient, Window::DimZ);
// Run gradient
NEScheduler::get().schedule(_gradient.get(), Window::DimY);
@@ -173,8 +173,8 @@
memset(_output->buffer(), 0, _output->info()->total_size());
// Fill border before edge trace
- _border_edge_trace.run(_border_edge_trace.window());
+ NEScheduler::get().schedule(&_border_edge_trace, Window::DimZ);
// Run edge tracing
- _edge_trace.run(_edge_trace.window());
+ NEScheduler::get().schedule(&_edge_trace, Window::DimY);
}
diff --git a/src/runtime/NEON/functions/NEConvolution.cpp b/src/runtime/NEON/functions/NEConvolution.cpp
index 4ad6450..249274b 100644
--- a/src/runtime/NEON/functions/NEConvolution.cpp
+++ b/src/runtime/NEON/functions/NEConvolution.cpp
@@ -94,7 +94,7 @@
template <unsigned int matrix_size>
void NEConvolutionSquare<matrix_size>::run()
{
- _border_handler.run(_border_handler.window());
+ NEScheduler::get().schedule(&_border_handler, Window::DimZ);
if(_is_separable)
{
diff --git a/src/runtime/NEON/functions/NEDerivative.cpp b/src/runtime/NEON/functions/NEDerivative.cpp
index c50db14..8118030 100644
--- a/src/runtime/NEON/functions/NEDerivative.cpp
+++ b/src/runtime/NEON/functions/NEDerivative.cpp
@@ -47,6 +47,6 @@
void NEDerivative::run()
{
- _border_handler.run(_border_handler.window());
+ NEScheduler::get().schedule(&_border_handler, Window::DimZ);
NEScheduler::get().schedule(&_kernel, Window::DimY);
}
diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
index 2e3a683..810efe5 100644
--- a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
@@ -85,7 +85,7 @@
void NEDirectConvolutionLayer::run()
{
- _input_border_handler.run(_input_border_handler.window());
+ NEScheduler::get().schedule(&_input_border_handler, Window::DimZ);
NEScheduler::get().schedule(&_conv_kernel, Window::DimZ);
NEScheduler::get().schedule(&_accumulate_bias_kernel, Window::DimY);
diff --git a/src/runtime/NEON/functions/NEEqualizeHistogram.cpp b/src/runtime/NEON/functions/NEEqualizeHistogram.cpp
index f6ec677..70b93ca 100644
--- a/src/runtime/NEON/functions/NEEqualizeHistogram.cpp
+++ b/src/runtime/NEON/functions/NEEqualizeHistogram.cpp
@@ -55,7 +55,7 @@
NEScheduler::get().schedule(&_histogram_kernel, Window::DimY);
// Calculate cumulative distribution of histogram and create LUT.
- _cd_histogram_kernel.run(_cd_histogram_kernel.window());
+ NEScheduler::get().schedule(&_cd_histogram_kernel, Window::DimY);
// Map input to output using created LUT.
NEScheduler::get().schedule(&_map_histogram_kernel, Window::DimY);
diff --git a/src/runtime/NEON/functions/NEFastCorners.cpp b/src/runtime/NEON/functions/NEFastCorners.cpp
index 33a58f1..265041f 100644
--- a/src/runtime/NEON/functions/NEFastCorners.cpp
+++ b/src/runtime/NEON/functions/NEFastCorners.cpp
@@ -88,7 +88,7 @@
void NEFastCorners::run()
{
- _border_handler.run(_border_handler.window());
+ NEScheduler::get().schedule(&_border_handler, Window::DimZ);
NEScheduler::get().schedule(&_fast_corners_kernel, Window::DimY);
diff --git a/src/runtime/NEON/functions/NEGaussian5x5.cpp b/src/runtime/NEON/functions/NEGaussian5x5.cpp
index 69639d0..a1ce985 100644
--- a/src/runtime/NEON/functions/NEGaussian5x5.cpp
+++ b/src/runtime/NEON/functions/NEGaussian5x5.cpp
@@ -54,7 +54,7 @@
void NEGaussian5x5::run()
{
- _border_handler.run(_border_handler.window());
+ NEScheduler::get().schedule(&_border_handler, Window::DimZ);
NEScheduler::get().schedule(&_kernel_hor, Window::DimY);
NEScheduler::get().schedule(&_kernel_vert, Window::DimY);
}
diff --git a/src/runtime/NEON/functions/NEGaussianPyramid.cpp b/src/runtime/NEON/functions/NEGaussianPyramid.cpp
index e857aab..90bd584 100644
--- a/src/runtime/NEON/functions/NEGaussianPyramid.cpp
+++ b/src/runtime/NEON/functions/NEGaussianPyramid.cpp
@@ -109,7 +109,7 @@
for(unsigned int i = 0; i < num_levels - 1; ++i)
{
- _border_handler[i].run(_border_handler[i].window());
+ NEScheduler::get().schedule(_border_handler.get() + i, Window::DimZ);
NEScheduler::get().schedule(_horizontal_reduction.get() + i, Window::DimY);
NEScheduler::get().schedule(_vertical_reduction.get() + i, Window::DimY);
}
diff --git a/src/runtime/NEON/functions/NEHOGMultiDetection.cpp b/src/runtime/NEON/functions/NEHOGMultiDetection.cpp
index 8b3d014..1a038a2 100644
--- a/src/runtime/NEON/functions/NEHOGMultiDetection.cpp
+++ b/src/runtime/NEON/functions/NEHOGMultiDetection.cpp
@@ -239,6 +239,6 @@
// Run non-maxima suppression kernel if enabled
if(_non_maxima_suppression)
{
- _non_maxima_kernel->run(_non_maxima_kernel->window());
+ NEScheduler::get().schedule(_non_maxima_kernel.get(), Window::DimY);
}
}
diff --git a/src/runtime/NEON/functions/NEHarrisCorners.cpp b/src/runtime/NEON/functions/NEHarrisCorners.cpp
index 24b2bcb..7ec681d 100644
--- a/src/runtime/NEON/functions/NEHarrisCorners.cpp
+++ b/src/runtime/NEON/functions/NEHarrisCorners.cpp
@@ -207,8 +207,8 @@
_sobel->run();
// Fill border before harris score kernel
- _border_gx.run(_border_gx.window());
- _border_gy.run(_border_gy.window());
+ NEScheduler::get().schedule(&_border_gx, Window::DimZ);
+ NEScheduler::get().schedule(&_border_gy, Window::DimZ);
// Run harris score kernel
NEScheduler::get().schedule(_harris_score.get(), Window::DimY);
@@ -220,5 +220,5 @@
NEScheduler::get().schedule(&_candidates, Window::DimY);
// Run sort & euclidean distance
- _sort_euclidean.run(_sort_euclidean.window());
+ NEScheduler::get().schedule(&_sort_euclidean, Window::DimY);
}
diff --git a/src/runtime/NEON/functions/NEMeanStdDev.cpp b/src/runtime/NEON/functions/NEMeanStdDev.cpp
index ab8e72b..2304bc8 100644
--- a/src/runtime/NEON/functions/NEMeanStdDev.cpp
+++ b/src/runtime/NEON/functions/NEMeanStdDev.cpp
@@ -43,6 +43,6 @@
_global_sum = 0;
_global_sum_squared = 0;
- _fill_border_kernel.run(_fill_border_kernel.window());
+ NEScheduler::get().schedule(&_fill_border_kernel, Window::DimZ);
NEScheduler::get().schedule(&_mean_stddev_kernel, Window::DimY);
}
diff --git a/src/runtime/NEON/functions/NESobel5x5.cpp b/src/runtime/NEON/functions/NESobel5x5.cpp
index 8967a22..305d211 100644
--- a/src/runtime/NEON/functions/NESobel5x5.cpp
+++ b/src/runtime/NEON/functions/NESobel5x5.cpp
@@ -75,7 +75,7 @@
void NESobel5x5::run()
{
- _border_handler.run(_border_handler.window());
+ NEScheduler::get().schedule(&_border_handler, Window::DimZ);
NEScheduler::get().schedule(&_sobel_hor, Window::DimY);
NEScheduler::get().schedule(&_sobel_vert, Window::DimY);
}
diff --git a/src/runtime/NEON/functions/NESobel7x7.cpp b/src/runtime/NEON/functions/NESobel7x7.cpp
index f628da9..57fe028 100644
--- a/src/runtime/NEON/functions/NESobel7x7.cpp
+++ b/src/runtime/NEON/functions/NESobel7x7.cpp
@@ -75,7 +75,7 @@
void NESobel7x7::run()
{
- _border_handler.run(_border_handler.window());
+ NEScheduler::get().schedule(&_border_handler, Window::DimZ);
NEScheduler::get().schedule(&_sobel_hor, Window::DimY);
NEScheduler::get().schedule(&_sobel_vert, Window::DimY);
}