APPBROWSER-340: Implement NormalizePlanarYUV operator

Change-Id: I943fe27ed83e6c499d8f9919c7083f39f6f25acc
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/113211
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Stephen Li <stephen.li@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/src/core/GLES_COMPUTE/GCKernelLibrary.cpp b/src/core/GLES_COMPUTE/GCKernelLibrary.cpp
index e8c6ad2..484c8aa 100644
--- a/src/core/GLES_COMPUTE/GCKernelLibrary.cpp
+++ b/src/core/GLES_COMPUTE/GCKernelLibrary.cpp
@@ -220,6 +220,7 @@
     { "batchnormalization_layer", "batchnormalization_layer.cs" },
     { "concatenate_depth", "concatenate.cs" },
     { "dropout", "dropout.cs" },
+    { "normalize_planar_yuv_layer", "normalize_planar_yuv_layer.cs" },
 };
 
 const std::map<std::string, std::string> GCKernelLibrary::_program_source_map =
@@ -289,6 +290,10 @@
         "dropout.cs",
 #include "./cs_shaders/dropout.csembed"
     },
+    {
+        "normalize_planar_yuv_layer.cs",
+#include "./cs_shaders/normalize_planar_yuv_layer.csembed"
+    },
 #endif /* EMBEDDED_KERNELS */
 };
 
diff --git a/src/core/GLES_COMPUTE/cs_shaders/normalize_planar_yuv_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/normalize_planar_yuv_layer.cs
new file mode 100644
index 0000000..18a9af7
--- /dev/null
+++ b/src/core/GLES_COMPUTE/cs_shaders/normalize_planar_yuv_layer.cs
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
+
+#include "helpers_cs.h"
+
+precision mediump float;
+
+/** Apply normalize_planar_yuv layer.
+ *
+ * @param[in]  src_ptr    Pointer to the first source tensor. Supported data types: F16
+ * @param[in]  src_attrs  The attributes of the source tensor
+ * @param[out] dst_ptr    Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_attrs  The attributes of the destination tensor
+ * @param[in]  mean_ptr   Pointer to the mean source tensor. Supported data types: same as @p src_ptr
+ * @param[in]  mean_attrs The attributes of the mean tensor
+ * @param[in]  sd_ptr     Standard deviation values tensor,pointer to the sd tensor. Supported data types: same as @p src_ptr
+ * @param[in]  sd_attrs   The attributes of the sd tensor
+ */
+SHADER_PARAMS_DECLARATION
+{
+    Tensor3DAttributes src_attrs;
+    Tensor3DAttributes dst_attrs;
+    VectorAttributes   mean_attrs;
+    VectorAttributes   sd_attrs;
+};
+
+TENSOR_DECLARATION(1, srcBuffer, uvec2, src_ptr, src_shift, 3, readonly);
+TENSOR_DECLARATION(2, dstBuffer, uvec2, dst_ptr, dst_shift, 3, writeonly);
+TENSOR_DECLARATION(3, meanBuffer, uvec2, mean_ptr, mean_shift, 3, readonly);
+TENSOR_DECLARATION(4, sdBuffer, uvec2, sd_ptr, sd_shift, 3, readonly);
+
+void main(void)
+{
+    Tensor3DIterator src_iter  = CONVERT_TO_TENSOR3D_ITERATOR(src_attrs, src_shift);
+    Tensor3DIterator dst_iter  = CONVERT_TO_TENSOR3D_ITERATOR(dst_attrs, dst_shift);
+    VectorIterator   mean_iter = CONVERT_TO_VECTOR_ITERATOR(mean_attrs, mean_shift);
+    VectorIterator   sd_iter   = CONVERT_TO_VECTOR_ITERATOR(sd_attrs, sd_shift);
+
+    vec4 unpacked_s[3];
+    vec4 tmp;
+    vec4 result;
+
+    uint current_slice = gl_GlobalInvocationID.z;
+    unpacked_s[0]      = LOAD_UNPACK4_CURRENT_ITEM_HALF(src_ptr, src_iter);
+    unpacked_s[1]      = LOAD_UNPACK4_HALF(mean_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(mean_iter, current_slice * mean_attrs.stride_x));
+    unpacked_s[2]      = LOAD_UNPACK4_HALF(sd_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(sd_iter, current_slice * sd_attrs.stride_x));
+
+    if((current_slice % uint(4)) == uint(0))
+    {
+        tmp    = unpacked_s[0] - unpacked_s[1].x;
+        result = tmp / unpacked_s[2].x;
+
+        STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, result);
+    }
+    else if((current_slice % uint(4)) == uint(1))
+    {
+        tmp    = unpacked_s[0] - unpacked_s[1].y;
+        result = tmp / unpacked_s[2].y;
+
+        STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, result);
+    }
+    else if((current_slice % uint(4)) == uint(2))
+    {
+        tmp    = unpacked_s[0] - unpacked_s[1].z;
+        result = tmp / unpacked_s[2].z;
+
+        STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, result);
+    }
+    else
+    {
+        tmp    = unpacked_s[0] - unpacked_s[1].w;
+        result = tmp / unpacked_s[2].w;
+
+        STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, result);
+    }
+}
diff --git a/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp
new file mode 100644
index 0000000..bc9c7eb
--- /dev/null
+++ b/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.h"
+
+#include "arm_compute/core/AccessWindowStatic.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+GCNormalizePlanarYUVLayerKernel::GCNormalizePlanarYUVLayerKernel()
+    : _input(nullptr), _output(nullptr), _mean(nullptr), _sd(nullptr)
+{
+}
+
+void GCNormalizePlanarYUVLayerKernel::configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *sd)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, mean, sd);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(mean, sd);
+    ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != mean->info()->dimension(0));
+
+    _input  = input;
+    _output = output;
+    _mean   = mean;
+    _sd     = sd;
+
+    const unsigned int num_elems_processed_per_iteration = 4;
+
+    // Set build options
+    std::set<std::string> build_opts;
+    build_opts.emplace(("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)));
+    build_opts.emplace(("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)));
+    build_opts.emplace(("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)));
+
+    // Create kernel
+    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("normalize_planar_yuv_layer", build_opts));
+
+    // Configure kernel window
+    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
+
+    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+    const int              mean_padding = ceil_to_multiple(mean->info()->dimension(0), num_elems_processed_per_iteration) - mean->info()->dimension(0);
+    const int              sd_padding   = ceil_to_multiple(sd->info()->dimension(0), num_elems_processed_per_iteration) - sd->info()->dimension(0);
+    AccessWindowStatic     mean_access(mean->info(), 0, 0, mean->info()->dimension(0) + mean_padding, mean->info()->dimension(1));
+    AccessWindowStatic     sd_access(sd->info(), 0, 0, sd->info()->dimension(0) + sd_padding, sd->info()->dimension(1));
+
+    update_window_and_padding(win, input_access, output_access, mean_access, sd_access);
+    output_access.set_valid_region(win, input->info()->valid_region());
+
+    IGCKernel::configure(win);
+}
+
+void GCNormalizePlanarYUVLayerKernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+
+    _kernel.use();
+
+    Window slice = window.first_slice_window_3D();
+
+    Window slice_in;
+    //slice_in.use_tensor_dimensions(_mean->info()->tensor_shape());
+    slice_in = window.first_slice_window_1D();
+    slice_in.set(Window::DimX, Window::Dimension(0, 0, 0));
+
+    unsigned int idx = 2 * num_arguments_per_3D_tensor();
+    add_1D_tensor_argument(idx, _mean, 3, slice_in);
+    add_1D_tensor_argument(idx, _sd, 4, slice_in);
+
+    do
+    {
+        idx = 0;
+        add_3D_tensor_argument(idx, _input, 1, slice);
+        add_3D_tensor_argument(idx, _output, 2, slice);
+
+        _kernel.update_shader_params();
+
+        enqueue(*this, slice);
+    }
+    while(window.slide_window_slice_3D(slice));
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCNormalizePlanarYUVLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCNormalizePlanarYUVLayer.cpp
new file mode 100755
index 0000000..de04a28
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCNormalizePlanarYUVLayer.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizePlanarYUVLayer.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+
+using namespace arm_compute;
+
+GCNormalizePlanarYUVLayer::GCNormalizePlanarYUVLayer()
+    : _norm_kernel()
+{
+}
+
+void GCNormalizePlanarYUVLayer::configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *sd)
+{
+    _norm_kernel.configure(input, output, mean, sd);
+}
+
+void GCNormalizePlanarYUVLayer::run()
+{
+    GCScheduler::get().enqueue(_norm_kernel, true);
+}