COMPMID-415 - Fixed bug in CLDepthConcatenateKernel

Change-Id: Ieedb714cb3666504c175aa488505e0485778c589
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/86705
Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h b/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h
index e85e0ec..ac108a4 100644
--- a/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h
@@ -71,6 +71,7 @@
     ICLTensor       *_output;
     int              _top_bottom;
     int              _left_right;
+    unsigned int     _depth_offset;
 };
 }
 #endif /* __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__ */
diff --git a/src/core/CL/kernels/CLDepthConcatenateKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateKernel.cpp
index 6a699ae..edfbf82 100644
--- a/src/core/CL/kernels/CLDepthConcatenateKernel.cpp
+++ b/src/core/CL/kernels/CLDepthConcatenateKernel.cpp
@@ -42,7 +42,7 @@
 using namespace arm_compute;
 
 CLDepthConcatenateKernel::CLDepthConcatenateKernel()
-    : _input(nullptr), _output(nullptr), _top_bottom(0), _left_right(0)
+    : _input(nullptr), _output(nullptr), _top_bottom(0), _left_right(0), _depth_offset(0)
 {
 }
 
@@ -75,8 +75,9 @@
     ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) - input->info()->dimension(0)) % 2);
     ARM_COMPUTE_ERROR_ON((output->info()->dimension(1) - input->info()->dimension(1)) % 2);
 
-    _input  = input;
-    _output = output;
+    _input        = input;
+    _output       = output;
+    _depth_offset = depth_offset;
 
     // Add build options
     auto                  config = configs_map.find(static_cast<int>(input->info()->element_size()));
@@ -91,8 +92,6 @@
     _left_right = (output->info()->dimension(0) - input->info()->dimension(0)) / 2;
     _top_bottom = (output->info()->dimension(1) - input->info()->dimension(1)) / 2;
 
-    const int offset_to_first_elements_in_bytes = depth_offset * output->info()->strides_in_bytes()[2];
-
     const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size();
     const unsigned int num_elems_read_per_iteration      = 16 / input->info()->element_size();
     const unsigned int num_rows_read_per_iteration       = 1;
@@ -106,6 +105,18 @@
     update_window_and_padding(win, input_access, output_access);
     output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->info()->tensor_shape()));
 
+    ICLKernel::configure(win);
+}
+
+void CLDepthConcatenateKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+    Window slice = window.first_slice_window_3D();
+
+    const int offset_to_first_elements_in_bytes = _depth_offset * _output->info()->strides_in_bytes()[2];
+
     unsigned int  idx = 2 * num_arguments_per_3D_tensor(); // Skip the input and output parameters
     const cl_int3 offsets =
     {
@@ -117,16 +128,6 @@
     };
     _kernel.setArg<cl_int3>(idx, offsets);
 
-    ICLKernel::configure(win);
-}
-
-void CLDepthConcatenateKernel::run(const Window &window, cl::CommandQueue &queue)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
-    Window slice = window.first_slice_window_3D();
-
     do
     {
         unsigned int idx = 0;