COMPMID-1959: Implements 2D FFT on OpenCL

Change-Id: I73cf3984a5463acc854c8a59dc2bd9a5234cd99c
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/936
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h
index e3ffcd0..5749871 100644
--- a/arm_compute/core/CL/CLKernels.h
+++ b/arm_compute/core/CL/CLKernels.h
@@ -66,6 +66,7 @@
 #include "arm_compute/core/CL/kernels/CLErodeKernel.h"
 #include "arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h"
 #include "arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h"
+#include "arm_compute/core/CL/kernels/CLFFTScaleKernel.h"
 #include "arm_compute/core/CL/kernels/CLFastCornersKernel.h"
 #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
 #include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h"
diff --git a/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h b/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h
index 10652cd..3082cb1 100644
--- a/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h
+++ b/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h
@@ -26,6 +26,8 @@
 
 #include "arm_compute/core/CL/ICLKernel.h"
 
+#include "arm_compute/core/KernelDescriptors.h"
+
 namespace arm_compute
 {
 // Forward declarations
@@ -52,19 +54,19 @@
      * @param[in]  input  Source tensor. Data types supported: F32.
      * @param[out] output Destination tensor. Data type supported: same as @p input
      * @param[in]  idx    Digit reverse index tensor. Data type supported: U32
-     * @param[in]  axis   Axis to perform digit reverse on.
+     * @param[in]  config Kernel configuration.
      */
-    void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, unsigned int axis);
+    void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config);
     /** Static function to check if given info will lead to a valid configuration of @ref CLFFTDigitReverseKernel
      *
      * @param[in] input  Source tensor info. Data types supported: F32.
      * @param[in] output Destination tensor info. Data type supported: same as @p input
      * @param[in] idx    Digit reverse index tensor info. Data type supported: U32
-     * @param[in] axis   Axis to perform digit reverse on.
+     * @param[in] config Kernel configuration.
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, unsigned int axis);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h b/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h
index 9de775e..16fa390 100644
--- a/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h
+++ b/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h
@@ -59,7 +59,7 @@
      * @param[out]    output Destination tensor. Can be nullptr. Data type supported: same as @p input
      * @param[in]     config FFT descriptor metadata.
      */
-    void configure(ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelDescriptor &config);
+    void configure(ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config);
     /** Static function to check if given info will lead to a valid configuration of @ref CLFFTRadixStageKernel
      *
      * @param[in] input  Source tensor info. Data types supported: F32.
@@ -68,7 +68,7 @@
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelDescriptor &config);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelInfo &config);
     /** Returns the radix that are support by the FFT kernel
      *
      * @return A set of supported radix
diff --git a/arm_compute/core/CL/kernels/CLFFTScaleKernel.h b/arm_compute/core/CL/kernels/CLFFTScaleKernel.h
new file mode 100644
index 0000000..39ecac4
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLFFTScaleKernel.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLFFTSCALEKERNEL_H__
+#define __ARM_COMPUTE_CLFFTSCALEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+#include "arm_compute/core/KernelDescriptors.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ICLTensor;
+
+/** Interface for the inverse fft scale kernel. */
+class CLFFTScaleKernel : public ICLKernel
+{
+public:
+    /** Constructor */
+    CLFFTScaleKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLFFTScaleKernel(const CLFFTScaleKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLFFTScaleKernel &operator=(const CLFFTScaleKernel &) = delete;
+    /** Default Move Constructor. */
+    CLFFTScaleKernel(CLFFTScaleKernel &&) = default;
+    /** Default move assignment operator */
+    CLFFTScaleKernel &operator=(CLFFTScaleKernel &&) = default;
+    /** Default destructor */
+    ~CLFFTScaleKernel() = default;
+    /** Set the input and output tensors.
+     *
+     * @param[in,out] input  Source tensor. Data types supported: F32.
+     * @param[out]    output Destination tensor. Data type supported: same as @p input
+     * @param[in]     config Kernel configuration
+     */
+    void configure(ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config);
+    /** Static function to check if given info will lead to a valid configuration of @ref CLFFTScaleKernel
+     *
+     * @param[in] input  Source tensor info. Data types supported: F32.
+     * @param[in] output Destination tensor info. Data type supported: same as @p input
+     * @param[in] config Kernel configuration
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTScaleKernelInfo &config);
+
+    // Inherited methods overridden:
+    void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+    ICLTensor *_input;
+    ICLTensor *_output;
+    bool       _run_in_place;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLFFTSCALEKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
index b835aa7..804182b 100644
--- a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
+++ b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,11 +29,10 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ICLTensor;
 
-/** Interface for the pixelwise multiplication kernel.
- *
- */
+/** Interface for the pixelwise multiplication kernel. */
 class CLPixelWiseMultiplicationKernel : public ICLKernel
 {
 public:
@@ -83,5 +82,46 @@
     const ICLTensor *_input2;
     ICLTensor       *_output;
 };
+
+/** Interface for the complex pixelwise multiplication kernel. */
+class CLComplexPixelWiseMultiplicationKernel : public ICLKernel
+{
+public:
+    /** Default constructor.*/
+    CLComplexPixelWiseMultiplicationKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLComplexPixelWiseMultiplicationKernel(const CLComplexPixelWiseMultiplicationKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLComplexPixelWiseMultiplicationKernel &operator=(const CLComplexPixelWiseMultiplicationKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    CLComplexPixelWiseMultiplicationKernel(CLComplexPixelWiseMultiplicationKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    CLComplexPixelWiseMultiplicationKernel &operator=(CLComplexPixelWiseMultiplicationKernel &&) = default;
+    /** Initialise the kernel's input, output and border mode.
+     *
+     * @param[in]  input1 An input tensor. Data types supported: F32. Number of channels supported: 2.
+     * @param[in]  input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+     * @param[out] output The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+     */
+    void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+    /** Static function to check if given info will lead to a valid configuration of @ref CLComplexPixelWiseMultiplicationKernel
+     *
+     * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2.
+     * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+     * @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window, cl::CommandQueue &queue) override;
+    BorderSize border_size() const override;
+
+private:
+    const ICLTensor *_input1;
+    const ICLTensor *_input2;
+    ICLTensor       *_output;
+};
 } // namespace arm_compute
 #endif /*__ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H__ */
diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h
index 186dbfb..83131f4 100644
--- a/arm_compute/core/KernelDescriptors.h
+++ b/arm_compute/core/KernelDescriptors.h
@@ -26,10 +26,24 @@
 
 namespace arm_compute
 {
-/** Descriptor used by the FFT core kernels */
-struct FFTRadixStageKernelDescriptor
+/** Descriptor for FFT scale kernels */
+struct FFTScaleKernelInfo
 {
-    unsigned int axis{ 0 };               /**< Axis to run the FFT on. */
+    float scale{ 0.f };      /**< Axis to perform the kernel on. */
+    bool  conjugate{ true }; /**< Flag to conjugate the output/ */
+};
+
+/** Descriptor for FFT digit reverse kernels */
+struct FFTDigitReverseKernelInfo
+{
+    unsigned int axis{ 0 };          /**< Axis to perform the kernel on. */
+    bool         conjugate{ false }; /**< Flag to conjugate the output/ */
+};
+
+/** Descriptor used by the FFT core kernels */
+struct FFTRadixStageKernelInfo
+{
+    unsigned int axis{ 0 };               /**< Axis to run the kernel on. */
     unsigned int radix{ 0 };              /**< Radix to use. */
     unsigned int Nx{ 0 };                 /**< Nx coefficient. */
     bool         is_first_stage{ false }; /**< Flags if the FFT kernels is the first stage of a decomposed FFT. */
diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h
index f102184..a4fcdc2 100644
--- a/arm_compute/runtime/CL/CLFunctions.h
+++ b/arm_compute/runtime/CL/CLFunctions.h
@@ -67,6 +67,8 @@
 #include "arm_compute/runtime/CL/functions/CLEqualizeHistogram.h"
 #include "arm_compute/runtime/CL/functions/CLErode.h"
 #include "arm_compute/runtime/CL/functions/CLFFT1D.h"
+#include "arm_compute/runtime/CL/functions/CLFFT2D.h"
+#include "arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h"
 #include "arm_compute/runtime/CL/functions/CLFastCorners.h"
 #include "arm_compute/runtime/CL/functions/CLFillBorder.h"
 #include "arm_compute/runtime/CL/functions/CLFlattenLayer.h"
diff --git a/arm_compute/runtime/CL/functions/CLFFT1D.h b/arm_compute/runtime/CL/functions/CLFFT1D.h
index 1612cf7..029023c 100644
--- a/arm_compute/runtime/CL/functions/CLFFT1D.h
+++ b/arm_compute/runtime/CL/functions/CLFFT1D.h
@@ -28,6 +28,7 @@
 
 #include "arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h"
 #include "arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h"
+#include "arm_compute/core/CL/kernels/CLFFTScaleKernel.h"
 #include "arm_compute/runtime/CL/CLMemoryGroup.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/FunctionDescriptors.h"
@@ -39,8 +40,9 @@
 
 /** Basic function to execute one dimensional FFT. This function calls the following OpenCL kernels:
  *
- * -# @ref CLFFTDigitReverseKernel Performs digit reverse
- * -# @ref CLFFTRadixStageKernel   A list of FFT kernels depending on the radix decomposition
+ * -# @ref CLFFTDigitReverseKernel Performs digit reverse.
+ * -# @ref CLFFTRadixStageKernel   A list of FFT kernels depending on the radix decomposition.
+ * -# @ref CLFFTScaleKernel        Performs output scaling in case of in inverse FFT.
  */
 class CLFFT1D : public IFunction
 {
@@ -69,11 +71,13 @@
 
 protected:
     CLMemoryGroup                            _memory_group;
-    CLTensor                                 _digit_reversed_input;
-    CLTensor                                 _digit_reverse_indices;
     CLFFTDigitReverseKernel                  _digit_reverse_kernel;
     std::unique_ptr<CLFFTRadixStageKernel[]> _fft_kernels;
+    CLFFTScaleKernel                         _scale_kernel;
+    CLTensor                                 _digit_reversed_input;
+    CLTensor                                 _digit_reverse_indices;
     unsigned int                             _num_ffts;
+    bool                                     _run_scale;
 };
 } // namespace arm_compute
 #endif /*__ARM_COMPUTE_CLFFT1D_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLFFT2D.h b/arm_compute/runtime/CL/functions/CLFFT2D.h
new file mode 100644
index 0000000..a0673ec
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLFFT2D.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLFFT2D_H__
+#define __ARM_COMPUTE_CLFFT2D_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLFFT1D.h"
+#include "arm_compute/runtime/FunctionDescriptors.h"
+
+namespace arm_compute
+{
+// Forward declaration
+class ICLTensor;
+
+/** Basic function to execute two dimensional FFT. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLFFT1D 1D FFT is performed on the first given axis
+ * -# @ref CLFFT1D 1D FFT is performed on the second given axis
+ */
+class CLFFT2D : public IFunction
+{
+public:
+    /** Default Constructor */
+    CLFFT2D(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in]  input  Source tensor. Data types supported: F32.
+     * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
+     * @param[in]  config FFT related configuration
+     */
+    void configure(const ICLTensor *input, ICLTensor *output, const FFT2DInfo &config);
+    /** Static function to check if given info will lead to a valid configuration of @ref CLFFT2D.
+     *
+     * @param[in] input  Source tensor info. Data types supported: F32.
+     * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input.
+     * @param[in] config FFT related configuration
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFT2DInfo &config);
+
+    // Inherited methods overridden:
+    void run() override;
+
+protected:
+    CLMemoryGroup _memory_group;
+    CLFFT1D       _first_pass_func;
+    CLFFT1D       _second_pass_func;
+    CLTensor      _first_pass_tensor;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLFFT2D_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h
new file mode 100644
index 0000000..0fd2cf3
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLFFTCONVOLUTIONLAYER_H__
+#define __ARM_COMPUTE_CLFFTCONVOLUTIONLAYER_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
+#include "arm_compute/runtime/CL/functions/CLFFT2D.h"
+#include "arm_compute/runtime/CL/functions/CLPadLayer.h"
+#include "arm_compute/runtime/CL/functions/CLPermute.h"
+#include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h"
+#include "arm_compute/runtime/CL/functions/CLReductionOperation.h"
+#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
+#include "arm_compute/runtime/CL/functions/CLReverse.h"
+#include "arm_compute/runtime/CL/functions/CLSlice.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ICLTensor;
+
+/** Basic function to execute FFT-based convolution on OpenCL. This function calls the following OpenCL functions/kernels:
+ *
+ *  -# @ref CLPermute                        Permute input if NHWC(only NCHW is supported).
+ *  -# @ref CLPadLayer                       Pad input.
+ *  -# @ref CLFFT2D                          Forward transform to the frequency domain.
+ *  -# @ref CLComplexPixelWiseMultiplication Complex element-wise product of input and the weights.
+ *  -# @ref CLReductionOperation             Reduction across channels.
+ *  -# @ref CLFFT2D                          Inverse transform back to the time domain.
+ *  -# @ref CLStridedSlice                   Extract valid output.
+ *  -# @ref CLArithmeticAddition             Add bias.
+ *  -# @ref CLActivationLayer                Perform activation.
+ *  -# @ref CLPermute                        Permute output if NHWC(only NCHW is supported).
+ */
+class CLFFTConvolutionLayer : public IFunction
+{
+public:
+    /** Default constructor */
+    CLFFTConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLFFTConvolutionLayer(const CLFFTConvolutionLayer &) = delete;
+    /** Default move constructor */
+    CLFFTConvolutionLayer(CLFFTConvolutionLayer &&) = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLFFTConvolutionLayer &operator=(const CLFFTConvolutionLayer &) = delete;
+    /** Default move assignment operator */
+    CLFFTConvolutionLayer &operator=(CLFFTConvolutionLayer &&) = default;
+    /** Set the input and output tensors.
+     *
+     * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
+     *
+     * @param[in]  input     Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+     *                       while every optional dimension from 4 and above represent a batch of inputs.
+     *                       Data types supported: F32.
+     * @param[in]  weights   Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+     * @param[in]  biases    Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
+     * @param[out] output    Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+     *                       Data types supported: Same as @p input.
+     * @param[in]  conv_info Contains padding and stride information described in @ref PadStrideInfo.
+     * @param[in]  act_info  (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                   const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Static function to check if given info will lead to a valid configuration of @ref CLFFTConvolutionLayer
+     *
+     * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
+     *
+     * @param[in]  input     Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+     *                       while every optional dimension from 4 and above represent a batch of inputs.
+     *                       Data types supported: F32.
+     * @param[in]  weights   Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+     * @param[in]  biases    Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
+     * @param[out] output    Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+     *                       Data types supported: Same as @p input.
+     * @param[in]  conv_info Contains padding and stride information described in @ref PadStrideInfo.
+     * @param[in]  act_info  (Optional) Activation layer information in case of a fused activation.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+                           const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+    // Inherited methods overridden:
+    void run() override;
+    void prepare() override;
+
+private:
+    CLMemoryGroup                    _memory_group;
+    CLReverse                        _flip_weights_func;
+    CLPermute                        _permute_input_func;
+    CLPermute                        _permute_output_func;
+    CLPermute                        _permute_weights_func;
+    CLPermute                        _permute_bias_func;
+    CLPadLayer                       _pad_input_func;
+    CLPadLayer                       _pad_weights_func;
+    CLFFT2D                          _transform_input_func;
+    CLFFT2D                          _transform_weights_func;
+    CLFFT2D                          _itransform_output_func;
+    CLComplexPixelWiseMultiplication _prod_func;
+    CLReductionOperation             _reduce_func;
+    CLSlice                          _extract_output_func;
+    CLArithmeticAddition             _bias_add_func;
+    CLActivationLayer                _activation_layer_func;
+
+    CLTensor _permuted_input;
+    CLTensor _permuted_weights;
+    CLTensor _permuted_bias;
+    CLTensor _permuted_output;
+    CLTensor _padded_input;
+    CLTensor _padded_weights;
+    CLTensor _flip_axis;
+    CLTensor _flipped_weights;
+    CLTensor _transformed_input;
+    CLTensor _transformed_weights;
+    CLTensor _input_weights_product;
+    CLTensor _output_product;
+    CLTensor _output_reduced;
+    CLTensor _itransformed_output;
+    CLTensor _reshaped_output;
+    CLTensor _bias_output;
+
+    const ICLTensor *_original_weights;
+    const ICLTensor *_original_bias;
+    bool             _is_activationlayer_enabled;
+    bool             _needs_permute;
+    bool             _has_bias;
+    bool             _is_prepared;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLFFTCONVOLUTIONLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
index a59fb4a..0fa40a7 100644
--- a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
+++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,6 +29,7 @@
 
 namespace arm_compute
 {
+// Forward declaration
 class ICLTensor;
 
 /** Basic function to run @ref CLPixelWiseMultiplicationKernel. */
@@ -64,5 +65,27 @@
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale,
                            ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
 };
-}
+
+/** Basic function to run @ref CLComplexPixelWiseMultiplicationKernel. */
+class CLComplexPixelWiseMultiplication : public ICLSimpleFunction
+{
+public:
+    /** Initialise the kernel's inputs, output.
+     *
+     * @param[in, out] input1 An input tensor. Data types supported: F32. Number of channels supported: 2.
+     *                        The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+     *                        The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+     */
+    void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output);
+    /** Static function to check if given info will lead to a valid configuration of @ref CLComplexPixelWiseMultiplication
+     *
+     * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2.
+     * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+     * @param[in] output The output tensor info, Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+     */
+    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+};
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H__ */
diff --git a/arm_compute/runtime/FunctionDescriptors.h b/arm_compute/runtime/FunctionDescriptors.h
index 7ff2501..f9b16e4 100644
--- a/arm_compute/runtime/FunctionDescriptors.h
+++ b/arm_compute/runtime/FunctionDescriptors.h
@@ -24,12 +24,29 @@
 #ifndef __ARM_COMPUTE_RUNTIME_FUNCTION_DESCRIPTORS_H__
 #define __ARM_COMPUTE_RUNTIME_FUNCTION_DESCRIPTORS_H__
 
+#include <utility>
+
 namespace arm_compute
 {
-/** Descriptor used by the FFT1d function */
+/** FFT direction to use */
+enum class FFTDirection
+{
+    Forward,
+    Inverse
+};
+
+/** Descriptor used by the FFT1D function */
 struct FFT1DInfo
 {
-    unsigned int axis{ 0 }; /**< Axis to run the FFT on. */
+    unsigned int axis{ 0 };                          /**< Axis to run the FFT on. */
+    FFTDirection direction{ FFTDirection::Forward }; /**< Direction of the FFT. */
+};
+
+/** Descriptor used by the FFT2D function */
+struct FFT2DInfo
+{
+    std::pair<unsigned int, unsigned int> axes{ 0, 1 }; /**< Axes to run on. If same, multiple transforms are performed on single axis*/
+    FFTDirection direction{ FFTDirection::Forward };    /**< Direction of the FFT. */
 };
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_RUNTIME_FUNCTION_DESCRIPTORS_H__ */