COMPMID-443 Collapse higher dimension for pooling layer and normalization layer
Change-Id: Icd08eefbd938c11c77dc4264af1fa3664fb336bc
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/80568
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
index ca9034b..132fcc4 100644
--- a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
@@ -66,6 +66,7 @@
const ICLTensor *_squared_input;
ICLTensor *_output;
BorderSize _border_size;
+ bool _is_in_map;
};
}
#endif /*__ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H__ */
diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
index b2bcb92..1afd76a 100644
--- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
@@ -35,7 +35,7 @@
using namespace arm_compute;
CLNormalizationLayerKernel::CLNormalizationLayerKernel()
- : _input(nullptr), _squared_input(nullptr), _output(nullptr), _border_size(0)
+ : _input(nullptr), _squared_input(nullptr), _output(nullptr), _border_size(0), _is_in_map(false)
{
}
@@ -65,8 +65,8 @@
_squared_input = squared_input;
_output = output;
- const bool is_in_map = (norm_info.type() == NormType::IN_MAP_1D);
- const unsigned int border_width = is_in_map ? std::min(norm_info.norm_size() / 2, 3U) : 0;
+ _is_in_map = (norm_info.type() == NormType::IN_MAP_1D);
+ const unsigned int border_width = _is_in_map ? std::min(norm_info.norm_size() / 2, 3U) : 0;
_border_size = BorderSize(0, border_width);
// Create kernel
@@ -81,7 +81,7 @@
_kernel.setArg<cl_uint>(idx++, norm_info.norm_size() / 2);
// Configure kernel window
- const unsigned int num_elems_processed_per_iteration = (is_in_map) ? 4 : 1;
+ const unsigned int num_elems_processed_per_iteration = (_is_in_map) ? 4 : 1;
const unsigned int num_elems_read_per_iteration = num_elems_processed_per_iteration + 2 * (norm_info.norm_size() / 2);
Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
@@ -102,7 +102,9 @@
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
- Window slice = window.first_slice_window_3D();
+ const int collapsed_dimension = _is_in_map ? Window::DimZ : 4;
+ Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), collapsed_dimension);
+ Window slice = window_collapsed.first_slice_window_3D();
do
{
@@ -112,5 +114,5 @@
add_3D_tensor_argument(idx, _output, slice);
enqueue(queue, *this, slice);
}
- while(window.slide_window_slice_3D(slice));
+ while(window_collapsed.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
index 3777e3b..ca75fd5 100644
--- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
@@ -167,7 +167,8 @@
std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();
std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
- Window slice = window.first_slice_window_3D();
+ Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
+ Window slice = window_collapsed.first_slice_window_3D();
do
{
@@ -182,5 +183,5 @@
add_3D_tensor_argument(idx, _output, slice);
enqueue(queue, *this, slice);
}
- while(window.slide_window_slice_3D(slice));
+ while(window_collapsed.slide_window_slice_3D(slice));
}