COMPMID-477 - Optimized CLNormalizationLayer
CLPixelWiseMultiplication has been removed within the function

Change-Id: Ibe7edd7921d5cef6ff68fdeeca89771129a8eaea
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/84459
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
diff --git a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
index 5eedc31..5f8c9c9 100644
--- a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
@@ -48,14 +48,12 @@
 
     /** Set the input and output tensors.
      *
-     * @param[in]  input         Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
-     *                           and an optional 4th dimension for batch of inputs. Data types supported: QS8/QS16/F16/F32.
-     * @param[in]  squared_input Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM].
-     *                           Data types supported: same as @p input.
-     * @param[out] output        Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input.
-     * @param[in]  norm_info     Normalization layer information like the normalization type, normalization size and other parameters.
+     * @param[in]  input     Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
+     *                       and an optional 4th dimension for batch of inputs. Data types supported: QS8/QS16/F16/F32.
+     * @param[out] output    Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input.
+     * @param[in]  norm_info Normalization layer information like the normalization type, normalization size and other parameters.
      */
-    void configure(const ICLTensor *input, const ICLTensor *squared_input, ICLTensor *output, NormalizationLayerInfo norm_info);
+    void configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
@@ -63,7 +61,6 @@
 
 private:
     const ICLTensor *_input;
-    const ICLTensor *_squared_input;
     ICLTensor       *_output;
     BorderSize       _border_size;
     bool             _is_in_map;