COMPMID-344 Updated doxygen

Change-Id: I32f7b84daa560e460b77216add529c8fa8b327ae
diff --git a/arm_compute/runtime/CL/CLArray.h b/arm_compute/runtime/CL/CLArray.h
new file mode 100644
index 0000000..f4c2ef0
--- /dev/null
+++ b/arm_compute/runtime/CL/CLArray.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLARRAY_H__
+#define __ARM_COMPUTE_CLARRAY_H__
+
+#include "arm_compute/core/CL/ICLArray.h"
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+namespace arm_compute
+{
+/** CLArray implementation  */
+template <class T>
+class CLArray : public ICLArray<T>
+{
+public:
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLArray(const CLArray &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    const CLArray &operator=(const CLArray &) = delete;
+    /** Constructor: initializes an array which can contain up to max_num_points values
+     *
+     * @param[in] max_num_values Maximum number of values the array will be able to stored
+     */
+    CLArray(size_t max_num_values)
+        : ICLArray<T>(max_num_values), _buffer(cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, max_num_values * sizeof(T)))
+    {
+    }
+    /** Enqueue a map operation of the allocated buffer.
+     *
+     * @param[in] blocking If true, then the mapping will be ready to use by the time
+     *                     this method returns, else it is the caller's responsibility
+     *                     to flush the queue and wait for the mapping operation to have completed.
+     */
+    void map(bool blocking = true)
+    {
+        ICLArray<T>::map(CLScheduler::get().queue(), blocking);
+    }
+    using ICLArray<T>::map;
+    /** Enqueue an unmap operation of the allocated and mapped buffer.
+     *
+     * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+     *       the memory is accessed by the device.
+     */
+    void unmap()
+    {
+        ICLArray<T>::unmap(CLScheduler::get().queue());
+    }
+    using ICLArray<T>::unmap;
+
+    // Inherited methods overridden:
+    const cl::Buffer &cl_buffer() const override
+    {
+        return _buffer;
+    }
+
+protected:
+    // Inherited methods overridden:
+    uint8_t *do_map(cl::CommandQueue &q, bool blocking) override
+    {
+        ARM_COMPUTE_ERROR_ON(nullptr == _buffer.get());
+        return static_cast<uint8_t *>(q.enqueueMapBuffer(_buffer, blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, 0, this->max_num_values() * sizeof(T)));
+    }
+    void do_unmap(cl::CommandQueue &q, uint8_t *mapping) override
+    {
+        ARM_COMPUTE_ERROR_ON(nullptr == _buffer.get());
+        q.enqueueUnmapMemObject(_buffer, mapping);
+    }
+
+private:
+    cl::Buffer _buffer;
+};
+
+using CLKeyPointArray        = CLArray<KeyPoint>;
+using CLCoordinates2DArray   = CLArray<Coordinates2D>;
+using CLDetectionWindowArray = CLArray<DetectionWindow>;
+using CLSize2DArray          = CLArray<Size2D>;
+using CLUInt8Array           = CLArray<cl_uchar>;
+using CLUInt16Array          = CLArray<cl_ushort>;
+using CLUInt32Array          = CLArray<cl_uint>;
+using CLInt16Array           = CLArray<cl_short>;
+using CLInt32Array           = CLArray<cl_int>;
+using CLFloatArray           = CLArray<cl_float>;
+}
+#endif /* __ARM_COMPUTE_CLARRAY_H__ */
diff --git a/arm_compute/runtime/CL/CLDistribution1D.h b/arm_compute/runtime/CL/CLDistribution1D.h
new file mode 100644
index 0000000..55dd124
--- /dev/null
+++ b/arm_compute/runtime/CL/CLDistribution1D.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDISTRIBUTION1D_H__
+#define __ARM_COMPUTE_CLDISTRIBUTION1D_H__
+
+#include "arm_compute/core/CL/ICLDistribution1D.h"
+#include "arm_compute/core/CL/OpenCL.h"
+
+#include <cstddef>
+#include <cstdint>
+
+namespace arm_compute
+{
+/** CLDistribution1D object class */
+class CLDistribution1D : public ICLDistribution1D
+{
+public:
+    /** Constructor: Creates a 1D CLDistribution of a consecutive interval [offset, offset + range - 1]
+     *               defined by a start offset and valid range, divided equally into num_bins parts.
+     *
+     * @param[in] num_bins The number of bins the distribution is divided in.
+     * @param[in] offset   The start of the values to use.
+     * @param[in] range    The total number of the consecutive values of the distribution interval.
+     */
+    CLDistribution1D(size_t num_bins, int32_t offset, uint32_t range);
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    CLDistribution1D(const CLDistribution1D &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    CLDistribution1D &operator=(const CLDistribution1D &) = delete;
+    /** Enqueue a map operation of the allocated buffer.
+     *
+     * @param[in] blocking If true, then the mapping will be ready to use by the time
+     *                     this method returns, else it is the caller's responsibility
+     *                     to flush the queue and wait for the mapping operation to have completed.
+     */
+    void map(bool blocking = true);
+    using ICLDistribution1D::map;
+    /** Enqueue an unmap operation of the allocated and mapped buffer.
+     *
+     * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+     *       the memory is accessed by the device.
+     */
+    void unmap();
+    using ICLDistribution1D::unmap;
+
+    // Inherited methods overridden:
+    cl::Buffer &cl_buffer() override;
+
+protected:
+    // Inherited methods overridden:
+    uint32_t *do_map(cl::CommandQueue &q, bool blocking) override;
+    void do_unmap(cl::CommandQueue &q) override;
+
+private:
+    cl::Buffer _mem;
+};
+}
+#endif /* __ARM_COMPUTE_CLDISTRIBUTION1D_H__ */
diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h
new file mode 100644
index 0000000..82929ba
--- /dev/null
+++ b/arm_compute/runtime/CL/CLFunctions.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLFUNCTIONS_H__
+#define __ARM_COMPUTE_CLFUNCTIONS_H__
+
+/* Header regrouping all the CL functions */
+#include "arm_compute/runtime/CL/functions/CLAbsoluteDifference.h"
+#include "arm_compute/runtime/CL/functions/CLAccumulate.h"
+#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLArithmeticAddition.h"
+#include "arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h"
+#include "arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLBitwiseAnd.h"
+#include "arm_compute/runtime/CL/functions/CLBitwiseNot.h"
+#include "arm_compute/runtime/CL/functions/CLBitwiseOr.h"
+#include "arm_compute/runtime/CL/functions/CLBitwiseXor.h"
+#include "arm_compute/runtime/CL/functions/CLBox3x3.h"
+#include "arm_compute/runtime/CL/functions/CLCannyEdge.h"
+#include "arm_compute/runtime/CL/functions/CLChannelCombine.h"
+#include "arm_compute/runtime/CL/functions/CLChannelExtract.h"
+#include "arm_compute/runtime/CL/functions/CLColorConvert.h"
+#include "arm_compute/runtime/CL/functions/CLConvolution.h"
+#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
+#include "arm_compute/runtime/CL/functions/CLDepthConcatenate.h"
+#include "arm_compute/runtime/CL/functions/CLDepthConvert.h"
+#include "arm_compute/runtime/CL/functions/CLDerivative.h"
+#include "arm_compute/runtime/CL/functions/CLDilate.h"
+#include "arm_compute/runtime/CL/functions/CLEqualizeHistogram.h"
+#include "arm_compute/runtime/CL/functions/CLErode.h"
+#include "arm_compute/runtime/CL/functions/CLFastCorners.h"
+#include "arm_compute/runtime/CL/functions/CLFillBorder.h"
+#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
+#include "arm_compute/runtime/CL/functions/CLGEMM.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMLowp.h"
+#include "arm_compute/runtime/CL/functions/CLGaussian3x3.h"
+#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
+#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h"
+#include "arm_compute/runtime/CL/functions/CLHOGDescriptor.h"
+#include "arm_compute/runtime/CL/functions/CLHOGDetector.h"
+#include "arm_compute/runtime/CL/functions/CLHOGGradient.h"
+#include "arm_compute/runtime/CL/functions/CLHOGMultiDetection.h"
+#include "arm_compute/runtime/CL/functions/CLHarrisCorners.h"
+#include "arm_compute/runtime/CL/functions/CLHistogram.h"
+#include "arm_compute/runtime/CL/functions/CLIntegralImage.h"
+#include "arm_compute/runtime/CL/functions/CLLaplacianPyramid.h"
+#include "arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h"
+#include "arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h"
+#include "arm_compute/runtime/CL/functions/CLMagnitude.h"
+#include "arm_compute/runtime/CL/functions/CLMeanStdDev.h"
+#include "arm_compute/runtime/CL/functions/CLMedian3x3.h"
+#include "arm_compute/runtime/CL/functions/CLMinMaxLocation.h"
+#include "arm_compute/runtime/CL/functions/CLNonLinearFilter.h"
+#include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h"
+#include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLOpticalFlow.h"
+#include "arm_compute/runtime/CL/functions/CLPhase.h"
+#include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h"
+#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h"
+#include "arm_compute/runtime/CL/functions/CLRemap.h"
+#include "arm_compute/runtime/CL/functions/CLScale.h"
+#include "arm_compute/runtime/CL/functions/CLScharr3x3.h"
+#include "arm_compute/runtime/CL/functions/CLSobel3x3.h"
+#include "arm_compute/runtime/CL/functions/CLSobel5x5.h"
+#include "arm_compute/runtime/CL/functions/CLSobel7x7.h"
+#include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h"
+#include "arm_compute/runtime/CL/functions/CLTableLookup.h"
+#include "arm_compute/runtime/CL/functions/CLThreshold.h"
+#include "arm_compute/runtime/CL/functions/CLTranspose.h"
+#include "arm_compute/runtime/CL/functions/CLWarpAffine.h"
+#include "arm_compute/runtime/CL/functions/CLWarpPerspective.h"
+
+#endif /* __ARM_COMPUTE_CLFUNCTIONS_H__ */
diff --git a/arm_compute/runtime/CL/CLHOG.h b/arm_compute/runtime/CL/CLHOG.h
new file mode 100644
index 0000000..9b4a303
--- /dev/null
+++ b/arm_compute/runtime/CL/CLHOG.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLHOG_H__
+#define __ARM_COMPUTE_CLHOG_H__
+
+#include "arm_compute/core/CL/ICLHOG.h"
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/HOGInfo.h"
+#include "arm_compute/core/Types.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+/** OpenCL implementation of HOG data-object */
+class CLHOG : public ICLHOG
+{
+public:
+    /** Default constructor */
+    CLHOG();
+    /** Allocate the HOG descriptor using the given HOG's metadata
+     *
+     * @param[in] input HOG's metadata used to allocate the HOG descriptor
+     */
+    void init(const HOGInfo &input);
+
+    /** Enqueue a map operation of the allocated buffer.
+     *
+     * @param[in] blocking If true, then the mapping will be ready to use by the time
+     *                     this method returns, else it is the caller's responsibility
+     *                     to flush the queue and wait for the mapping operation to have completed.
+     */
+    void map(bool blocking = true);
+    using ICLHOG::map;
+
+    /** Enqueue an unmap operation of the allocated and mapped buffer.
+     *
+     * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+     *       the memory is accessed by the device.
+     */
+    void unmap();
+    using ICLHOG::unmap;
+
+    // Inherited method overridden:
+    void              free() override;
+    const HOGInfo    *info() const override;
+    const cl::Buffer &cl_buffer() const override;
+
+protected:
+    // Inherited methods overridden:
+    uint8_t *do_map(cl::CommandQueue &q, bool blocking) override;
+    void do_unmap(cl::CommandQueue &q) override;
+
+private:
+    HOGInfo    _info;
+    cl::Buffer _buffer;
+};
+}
+#endif /* __ARM_COMPUTE_CLHOG_H__ */
diff --git a/arm_compute/runtime/CL/CLLut.h b/arm_compute/runtime/CL/CLLut.h
new file mode 100644
index 0000000..9bac2b4
--- /dev/null
+++ b/arm_compute/runtime/CL/CLLut.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLLUT_H__
+#define __ARM_COMPUTE_CLLUT_H__
+
+#include "arm_compute/core/CL/ICLLut.h"
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLLutAllocator.h"
+
+#include <cstddef>
+#include <cstdint>
+
+namespace arm_compute
+{
+class ILutAllocator;
+
+/** Basic implementation of the OpenCL lut interface */
+class CLLut : public ICLLut
+{
+public:
+    /** Constructor */
+    CLLut();
+    /** Constructor: initializes a LUT which can contain num_values values of data_type type.
+     *
+     * @param[in] num_elements Number of elements of the LUT.
+     * @param[in] data_type    Data type of each element.
+     */
+    CLLut(size_t num_elements, DataType data_type);
+    /** Return a pointer to the lut's allocator
+     *
+     * @return A pointer to the lut's allocator
+     */
+    ILutAllocator *allocator();
+    /** Enqueue a map operation of the allocated buffer.
+     *
+     * @param[in] blocking If true, then the mapping will be ready to use by the time
+     *                     this method returns, else it is the caller's responsibility
+     *                     to flush the queue and wait for the mapping operation to have completed.
+     */
+    void map(bool blocking = true);
+    using ICLLut::map;
+    /** Enqueue an unmap operation of the allocated and mapped buffer.
+     *
+     * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+     *       the memory is accessed by the device.
+     */
+    void unmap();
+    using ICLLut::unmap;
+
+    // Inherited methods overridden:
+    size_t            num_elements() const override;
+    uint32_t          index_offset() const override;
+    size_t            size_in_bytes() const override;
+    DataType          type() const override;
+    const cl::Buffer &cl_buffer() const override;
+    void              clear() override;
+
+protected:
+    // Inherited methods overridden:
+    uint8_t *do_map(cl::CommandQueue &q, bool blocking) override;
+    void do_unmap(cl::CommandQueue &q) override;
+
+private:
+    CLLutAllocator _allocator; /**< Instance of the OpenCL lut allocator */
+};
+}
+#endif /*__ARM_COMPUTE_CLLUT_H__ */
diff --git a/arm_compute/runtime/CL/CLLutAllocator.h b/arm_compute/runtime/CL/CLLutAllocator.h
new file mode 100644
index 0000000..4648ffb
--- /dev/null
+++ b/arm_compute/runtime/CL/CLLutAllocator.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLLUTALLOCATOR_H__
+#define __ARM_COMPUTE_CLLUTALLOCATOR_H__
+
+#include "arm_compute/runtime/ILutAllocator.h"
+
+#include "arm_compute/core/CL/OpenCL.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+/** Basic implementation of a CL memory LUT allocator. */
+class CLLutAllocator : public ILutAllocator
+{
+public:
+    /** Default constructor. */
+    CLLutAllocator();
+    /** Default destructor. */
+    ~CLLutAllocator() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    CLLutAllocator(const CLLutAllocator &) = delete;
+    /** Prevent instances of this class from being copy assigned (As this class contains pointers). */
+    const CLLutAllocator &operator=(const CLLutAllocator &) = delete;
+    /** Interface to be implemented by the child class to return the pointer to the mapped data. */
+    uint8_t *data();
+    /** Interface to be implemented by the child class to return the pointer to the CL data. */
+    const cl::Buffer &cl_data() const;
+    /** Enqueue a map operation of the allocated buffer on the given queue.
+     *
+     * @param[in,out] q        The CL command queue to use for the mapping operation.
+     * @param[in]     blocking If true, then the mapping will be ready to use by the time
+     *                         this method returns, else it is the caller's responsibility
+     *                         to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
+     *
+     * @return The mapping address.
+     */
+    uint8_t *map(cl::CommandQueue &q, bool blocking);
+    /** Enqueue an unmap operation of the allocated buffer on the given queue.
+     *
+     * @note This method simply enqueue the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+     *       the memory is accessed by the device.
+     *
+     * @param[in,out] q       The CL command queue to use for the mapping operation.
+     * @param[in]     mapping The cpu mapping to unmap.
+     */
+    void unmap(cl::CommandQueue &q, uint8_t *mapping);
+
+protected:
+    /** Allocate num_elements() * sizeof(type()) of OpenCL memory. */
+    void allocate() override;
+    /** Call map() on the OpenCL buffer.
+     *
+     * @return A pointer to the beginning of the LUT's allocation.
+     */
+    uint8_t *lock() override;
+    /** Call unmap() on the OpenCL buffer. */
+    void unlock() override;
+
+private:
+    cl::Buffer _buffer;  /**< OpenCL buffer containing the LUT data. */
+    uint8_t   *_mapping; /**< Pointer to the CPU mapping of the OpenCL buffer. */
+};
+}
+
+#endif /* __ARM_COMPUTE_CLLUTALLOCATOR_H__ */
diff --git a/arm_compute/runtime/CL/CLMultiHOG.h b/arm_compute/runtime/CL/CLMultiHOG.h
new file mode 100644
index 0000000..17bb4e0
--- /dev/null
+++ b/arm_compute/runtime/CL/CLMultiHOG.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLMULTIHOG_H__
+#define __ARM_COMPUTE_CLMULTIHOG_H__
+
+#include "arm_compute/core/CL/ICLMultiHOG.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLHOG.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+/** Basic implementation of the CL multi HOG data-objects */
+class CLMultiHOG : public ICLMultiHOG
+{
+public:
+    /** Constructor
+     *
+     * @param[in] num_models Number of HOG data objects to contain
+     *
+     */
+    CLMultiHOG(size_t num_models);
+
+    // Inherited methods overridden:
+    size_t  num_models() const override;
+    ICLHOG *cl_model(size_t index) override;
+    const ICLHOG *cl_model(size_t index) const override;
+
+private:
+    size_t                   _num_models;
+    std::unique_ptr<CLHOG[]> _model;
+};
+}
+#endif /*__ARM_COMPUTE_CLMULTIHOG_H__ */
diff --git a/arm_compute/runtime/CL/CLMultiImage.h b/arm_compute/runtime/CL/CLMultiImage.h
new file mode 100644
index 0000000..f70929d
--- /dev/null
+++ b/arm_compute/runtime/CL/CLMultiImage.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLMULTIIMAGE_H__
+#define __ARM_COMPUTE_CLMULTIIMAGE_H__
+
+#include "arm_compute/core/CL/ICLMultiImage.h"
+#include "arm_compute/core/MultiImageInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+#include <array>
+
+namespace arm_compute
+{
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Basic implementation of the CL multi-planar image interface */
+class CLMultiImage : public ICLMultiImage
+{
+public:
+    /** Constructor */
+    CLMultiImage();
+    /** Init the multi-planar image
+     *
+     *  @param[in] width  Width of the whole image
+     *  @param[in] height Heigth of the whole image
+     *  @param[in] format Format of the whole image
+     */
+    void init(unsigned int width, unsigned int height, Format format);
+    /** Init the multi-planar image
+     *
+     * @note Uses conservative padding strategy which fits all kernels.
+     *
+     *  @param[in] width  Width of the whole image
+     *  @param[in] height Height of the whole image
+     *  @param[in] format Format of the whole image
+     */
+    void init_auto_padding(unsigned int width, unsigned int height, Format format);
+    /** Allocated a previously initialised multi image
+     *
+     * @note The multi image must not already be allocated when calling this function.
+     *
+     **/
+    void allocate();
+
+    // Inherited methods overridden:
+    const MultiImageInfo *info() const override;
+    CLImage *cl_plane(unsigned int index) override;
+    const CLImage *cl_plane(unsigned int index) const override;
+
+private:
+    /** Init the multi-planar image
+     *
+     *  @param[in] width        Width of the whole image
+     *  @param[in] height       Height of the whole image
+     *  @param[in] format       Format of the whole image
+     *  @param[in] auto_padding Specifies whether the image uses auto padding
+     */
+    void internal_init(unsigned int width, unsigned int height, Format format, bool auto_padding);
+
+    MultiImageInfo _info;          /** Instance of the multi-planar image's meta data */
+    std::array<CLImage, 3> _plane; /* Instance CLImage to hold the planar's information */
+};
+}
+#endif /*__ARM_COMPUTE_CLMULTIIMAGE_H__ */
diff --git a/arm_compute/runtime/CL/CLPyramid.h b/arm_compute/runtime/CL/CLPyramid.h
new file mode 100644
index 0000000..5e0afb3
--- /dev/null
+++ b/arm_compute/runtime/CL/CLPyramid.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLPYRAMID_H__
+#define __ARM_COMPUTE_CLPYRAMID_H__
+
+#include "arm_compute/core/IPyramid.h"
+#include "arm_compute/core/PyramidInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+#include <cstddef>
+#include <memory>
+
+namespace arm_compute
+{
+class CLTensor;
+
+/** Basic implementation of the OpenCL pyramid interface */
+class CLPyramid : public IPyramid
+{
+public:
+    /** Default constructor */
+    CLPyramid();
+    /** Initialize pyramid data-object using the given Pyramid's metadata
+     *
+     * @param[in] info Pyramid's metadata
+     */
+    void init(const PyramidInfo &info);
+
+    /** Initialize pyramid data-object using the given Pyramid's metadata
+     *
+     * @note Uses conservative padding strategy which fits all kernels.
+     *
+     * @param[in] info Pyramid's metadata
+     */
+    void init_auto_padding(const PyramidInfo &info);
+
+    /** Allocate the planes in the pyramid
+     *
+     * @note The pyramid must not already be allocated when calling this function.
+     *
+     **/
+    void allocate();
+
+    // Inherited method overridden
+    const PyramidInfo *info() const override;
+    CLTensor *get_pyramid_level(size_t index) const override;
+
+private:
+    /** Initialize pyramid data-object using the given Pyramid's metadata
+     *
+     * @param[in] info         Pyramid's metadata
+     * @param[in] auto_padding Specifies whether the image in the pyramid use auto padding
+     */
+    void internal_init(const PyramidInfo &info, bool auto_padding);
+
+    PyramidInfo                 _info;
+    std::unique_ptr<CLTensor[]> _pyramid;
+};
+}
+#endif /*__ARM_COMPUTE_CLPYRAMID_H__ */
diff --git a/arm_compute/runtime/CL/CLScheduler.h b/arm_compute/runtime/CL/CLScheduler.h
new file mode 100644
index 0000000..8e80259
--- /dev/null
+++ b/arm_compute/runtime/CL/CLScheduler.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLSCHEDULER_H__
+#define __ARM_COMPUTE_CLSCHEDULER_H__
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/CLTypes.h"
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLKernel;
+
+/** Provides global access to a CL context and command queue. */
+class CLScheduler
+{
+private:
+    /** Constructor */
+    CLScheduler();
+
+public:
+    /** Access the scheduler singleton.
+     *
+     * @return The scheduler
+     */
+    static CLScheduler &get();
+    /** Initialises the context and command queue used by the scheduler to default values
+     *  and sets a default device and kernel path for the @ref CLKernelLibrary.
+     */
+    void default_init()
+    {
+        CLKernelLibrary::get().init("./cl_kernels/", cl::Context::getDefault(), cl::Device::getDefault());
+        init(cl::Context::getDefault(), cl::CommandQueue::getDefault(), cl::Device::getDefault());
+    }
+    /** Schedule the execution of the passed kernel if possible.
+     *
+     * @param[in] kernel Kernel to execute.
+     * @param[in] flush  (Optional) Specifies if the command queue will be flushed after running the kernel.
+     */
+    void enqueue(ICLKernel &kernel, bool flush = true);
+
+    /** Initialises the context and command queue to be used by the scheduler.
+     *
+     * @param[in] context A CL context.
+     * @param[in] queue   A CL command queue.
+     * @param[in] device  A CL device.
+     */
+    void init(cl::Context context = cl::Context::getDefault(), cl::CommandQueue queue = cl::CommandQueue::getDefault(),
+              cl::Device device = cl::Device::getDefault())
+    {
+        _context = std::move(context);
+        _queue   = std::move(queue);
+        _target  = get_target_from_device(device);
+    }
+
+    /** Accessor for the associated CL context.
+     *
+     * @return A CL context.
+     */
+    cl::Context &context()
+    {
+        return _context;
+    }
+
+    /** Accessor to set the CL context to be used by the scheduler.
+     *
+     * @param[in] context A CL context.
+     */
+    void set_context(cl::Context context)
+    {
+        _context = std::move(context);
+    }
+
+    /** Accessor for the associated CL command queue.
+     *
+     * @return A CL command queue.
+     */
+    cl::CommandQueue &queue()
+    {
+        return _queue;
+    }
+
+    /** Get the target GPU.
+     *
+     * @return The target GPU.
+     */
+    GPUTarget target() const
+    {
+        return _target;
+    }
+
+    /** Accessor to set the CL command queue to be used by the scheduler.
+     *
+     * @param[in] queue A CL command queue.
+     */
+    void set_queue(cl::CommandQueue queue)
+    {
+        _queue = std::move(queue);
+    }
+
+    /** Accessor to set target GPU to be used by the scheduler.
+     *
+     * @param[in] target The target GPU.
+     */
+    void set_target(GPUTarget target)
+    {
+        _target = target;
+    }
+
+    /** Blocks until all commands in the associated command queue have finished. */
+    void sync()
+    {
+        _queue.finish();
+    }
+
+    /** Enqueues a marker into the associated command queue and return the event.
+     *
+     * @return An event that can be waited on to block the executing thread.
+     */
+    cl::Event enqueue_sync_event()
+    {
+        cl::Event event;
+        _queue.enqueueMarker(&event);
+
+        return event;
+    }
+
+private:
+    cl::Context      _context;
+    cl::CommandQueue _queue;
+    GPUTarget        _target;
+};
+}
+#endif /* __ARM_COMPUTE_CLSCHEDULER_H__ */
diff --git a/arm_compute/runtime/CL/CLSubTensor.h b/arm_compute/runtime/CL/CLSubTensor.h
new file mode 100644
index 0000000..4bab164
--- /dev/null
+++ b/arm_compute/runtime/CL/CLSubTensor.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLSUBTENSOR_H__
+#define __ARM_COMPUTE_CLSUBTENSOR_H__
+
+#include "arm_compute/core/SubTensorInfo.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ITensorInfo;
+
+/** Basic implementation of the OpenCL sub-tensor interface */
+class CLSubTensor : public ICLTensor
+{
+public:
+    /** Constructor
+     *
+     * @param[in] parent       Parent tensor
+     * @param[in] tensor_shape Shape of the subtensor
+     * @param[in] coords       Coordinates of the first subtensor element inside the parent tensor.
+     */
+    CLSubTensor(ICLTensor *parent, const TensorShape &tensor_shape, const Coordinates &coords);
+    /** Destructor: free the tensor's memory */
+    ~CLSubTensor() = default;
+    /** Restrict instances of this class to be copy constructed */
+    CLSubTensor(const CLSubTensor &) = delete;
+    /** Restrict instances of this class to be copied */
+    CLSubTensor &operator=(const CLSubTensor &) = delete;
+    /** Allow instances of this class to be move constructed */
+    CLSubTensor(CLSubTensor &&) = default;
+    /** Allow instances of this class to be moved */
+    CLSubTensor &operator=(CLSubTensor &&) = default;
+
+    /** Enqueue a map operation of the allocated buffer.
+     *
+     * @note Mapping a subtensor will lead to the mapping of the whole parent tensor for now.
+     *
+     * @param[in] blocking If true, then the mapping will be ready to use by the time
+     *                     this method returns, else it is the caller's responsibility
+     *                     to flush the queue and wait for the mapping operation to have completed.
+     */
+    void map(bool blocking = true);
+    using ICLTensor::map;
+    /** Enqueue an unmap operation of the allocated and mapped buffer.
+     *
+     * @note Unmapping a subtensor will lead to the unmapping of the whole parent tensor for now.
+     *
+     * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+     *       the memory is accessed by the device.
+     */
+    void unmap();
+    using ICLTensor::unmap;
+
+    /** Return the parent tensor of the subtensor
+     *
+     * @return Parent tensor
+     */
+    ICLTensor *parent();
+
+    // Inherited methods overridden:
+    ITensorInfo      *info() const override;
+    ITensorInfo      *info() override;
+    const cl::Buffer &cl_buffer() const override;
+
+protected:
+    // Inherited methods overridden:
+    uint8_t *do_map(cl::CommandQueue &q, bool blocking) override;
+    void do_unmap(cl::CommandQueue &q) override;
+
+private:
+    ICLTensor            *_parent;
+    mutable SubTensorInfo _info;
+};
+}
+#endif /*__ARM_COMPUTE_CLSUBTENSOR_H__ */
diff --git a/arm_compute/runtime/CL/CLTensor.h b/arm_compute/runtime/CL/CLTensor.h
new file mode 100644
index 0000000..2c685d1
--- /dev/null
+++ b/arm_compute/runtime/CL/CLTensor.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLTENSOR_H__
+#define __ARM_COMPUTE_CLTENSOR_H__
+
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ITensorAllocator;
+class ITensorInfo;
+
+/** Basic implementation of the OpenCL tensor interface */
+class CLTensor : public ICLTensor
+{
+public:
+    /** Constructor */
+    CLTensor();
+    /** Return a pointer to the tensor's allocator
+     *
+     * @return A pointer to the tensor's allocator
+     */
+    ITensorAllocator *allocator();
+    /** Enqueue a map operation of the allocated buffer.
+     *
+     * @param[in] blocking If true, then the mapping will be ready to use by the time
+     *                     this method returns, else it is the caller's responsibility
+     *                     to flush the queue and wait for the mapping operation to have completed.
+     */
+    void map(bool blocking = true);
+    using ICLTensor::map;
+    /** Enqueue an unmap operation of the allocated and mapped buffer.
+     *
+     * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+     *       the memory is accessed by the device.
+     */
+    void unmap();
+    using ICLTensor::unmap;
+
+    // Inherited methods overridden:
+    TensorInfo       *info() const override;
+    TensorInfo       *info() override;
+    const cl::Buffer &cl_buffer() const override;
+
+protected:
+    // Inherited methods overridden:
+    uint8_t *do_map(cl::CommandQueue &q, bool blocking) override;
+    void do_unmap(cl::CommandQueue &q) override;
+
+private:
+    mutable CLTensorAllocator _allocator; /**< Instance of the OpenCL tensor allocator */
+};
+
+using CLImage = CLTensor;
+}
+#endif /*__ARM_COMPUTE_CLTENSOR_H__ */
diff --git a/arm_compute/runtime/CL/CLTensorAllocator.h b/arm_compute/runtime/CL/CLTensorAllocator.h
new file mode 100644
index 0000000..ed371e0
--- /dev/null
+++ b/arm_compute/runtime/CL/CLTensorAllocator.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLTENSORALLOCATOR_H__
+#define __ARM_COMPUTE_CLTENSORALLOCATOR_H__
+
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/runtime/ITensorAllocator.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+/** Basic implementation of a CL memory tensor allocator. */
+class CLTensorAllocator : public ITensorAllocator
+{
+public:
+    /** Default constructor. */
+    CLTensorAllocator();
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    CLTensorAllocator(const CLTensorAllocator &) = delete;
+    /** Prevent instances of this class from being copy assigned (As this class contains pointers). */
+    CLTensorAllocator &operator=(const CLTensorAllocator &) = delete;
+    /** Allow instances of this class to be moved */
+    CLTensorAllocator(CLTensorAllocator &&) = default;
+    /** Allow instances of this class to be moved */
+    CLTensorAllocator &operator=(CLTensorAllocator &&) = default;
+    /** Default destructor */
+    ~CLTensorAllocator() = default;
+
+    /** Interface to be implemented by the child class to return the pointer to the mapped data. */
+    uint8_t *data();
+    /** Interface to be implemented by the child class to return the pointer to the CL data. */
+    const cl::Buffer &cl_data() const;
+    /** Enqueue a map operation of the allocated buffer on the given queue.
+     *
+     * @param[in,out] q        The CL command queue to use for the mapping operation.
+     * @param[in]     blocking If true, then the mapping will be ready to use by the time
+     *                         this method returns, else it is the caller's responsibility
+     *                         to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
+     *
+     * @return The mapping address.
+     */
+    uint8_t *map(cl::CommandQueue &q, bool blocking);
+    /** Enqueue an unmap operation of the allocated buffer on the given queue.
+     *
+     * @note This method simply enqueue the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+     *       the memory is accessed by the device.
+     *
+     * @param[in,out] q       The CL command queue to use for the mapping operation.
+     * @param[in]     mapping The cpu mapping to unmap.
+     */
+    void unmap(cl::CommandQueue &q, uint8_t *mapping);
+
+    /** Allocate size specified by TensorInfo of OpenCL memory.
+     *
+     * @note: The tensor must not already be allocated when calling this function.
+     *
+     */
+    void allocate() override;
+
+    /** Free allocated OpenCL memory.
+     *
+     * @note The tensor must have been allocated when calling this function.
+     *
+     */
+    void free() override;
+
+protected:
+    /** Call map() on the OpenCL buffer.
+     *
+     * @return A pointer to the beginning of the tensor's allocation.
+     */
+    uint8_t *lock() override;
+    /** Call unmap() on the OpenCL buffer. */
+    void unlock() override;
+
+private:
+    cl::Buffer _buffer;  /**< OpenCL buffer containing the tensor data. */
+    uint8_t   *_mapping; /**< Pointer to the CPU mapping of the OpenCL buffer. */
+};
+}
+#endif /* __ARM_COMPUTE_CLTENSORALLOCATOR_H__ */
diff --git a/arm_compute/runtime/CL/ICLSimpleFunction.h b/arm_compute/runtime/CL/ICLSimpleFunction.h
new file mode 100644
index 0000000..130c58a
--- /dev/null
+++ b/arm_compute/runtime/CL/ICLSimpleFunction.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_ICLSIMPLEFUNCTION_H__
+#define __ARM_COMPUTE_ICLSIMPLEFUNCTION_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+/** Basic interface for functions which have a single OpenCL kernel */
+class ICLSimpleFunction : public IFunction
+{
+public:
+    /** Default constructor */
+    ICLSimpleFunction();
+
+    // Inherited methods overridden:
+    void run() override final;
+
+protected:
+    std::unique_ptr<ICLKernel> _kernel;         /**< Kernel to run */
+    CLFillBorderKernel         _border_handler; /**< Kernel to handle  borders */
+};
+}
+#endif /*__ARM_COMPUTE_ICLSIMPLEFUNCTION_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h b/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h
new file mode 100644
index 0000000..40ee396
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLABSOLUTEDIFFERENCE_H__
+#define __ARM_COMPUTE_CLABSOLUTEDIFFERENCE_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLAbsoluteDifferenceKernel
+ *
+ * @note The tensor data types for the inputs must be U8 or S16.
+ * @note The function calculates the absolute difference also when the 2 inputs have different tensor data types.
+ */
+class CLAbsoluteDifference : public ICLSimpleFunction
+{
+public:
+    /** Initialize the function
+     *
+     * @param[in]  input1 First input tensor. Data types supported: U8, S16
+     * @param[in]  input2 Second input tensor. Data types supported: U8, S16
+     * @param[out] output Output tensor. Data types supported: U8, S16
+     */
+    void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+};
+}
+#endif /* __ARM_COMPUTE_CLABSOLUTEDIFFERENCE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLAccumulate.h b/arm_compute/runtime/CL/functions/CLAccumulate.h
new file mode 100644
index 0000000..51f6df9
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLAccumulate.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLACCUMULATE_H__
+#define __ARM_COMPUTE_CLACCUMULATE_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLAccumulateKernel */
+class CLAccumulate : public ICLSimpleFunction
+{
+public:
+    /** Set the input and accumulation tensors.
+     *
+     * @param[in]  input Source tensor. Data types supported: U8.
+     * @param[out] accum Destination tensor. Data types supported: S16.
+     */
+    void configure(const ICLTensor *input, ICLTensor *accum);
+};
+
+/** Basic function to run @ref CLAccumulateWeightedKernel */
+class CLAccumulateWeighted : public ICLSimpleFunction
+{
+public:
+    /** Set the input and accumulation tensors, and the scale value.
+     *
+     * @param[in]     input Source tensor. Data types supported: U8.
+     * @param[in]     alpha The input scalar value with a value input the range of [0, 1.0]. Data types supported: F32.
+     * @param[in,out] accum Accumulated tensor. Data types supported: U8.
+     */
+    void configure(const ICLTensor *input, float alpha, ICLTensor *accum);
+};
+
+/** Basic function to run @ref CLAccumulateSquaredKernel */
+class CLAccumulateSquared : public ICLSimpleFunction
+{
+public:
+    /** Set the input and accumulation tensors and the shift value.
+     *
+     * @param[in]     input Source tensor. Data types supported: U8.
+     * @param[in]     shift The input with a value input the range of [0, 15]. Data types supported: U32.
+     * @param[in,out] accum Accumulated tensor. Data types supported: S16.
+     */
+    void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum);
+};
+}
+#endif /*__ARM_COMPUTE_CLACCUMULATE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h
new file mode 100644
index 0000000..6468c99
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLACTIVATIONLAYER_H__
+#define __ARM_COMPUTE_CLACTIVATIONLAYER_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLActivationLayerKernel
+ *
+ * @note The function simulates an activation layer with the specified activation function.
+ */
+class CLActivationLayer : public ICLSimpleFunction
+{
+public:
+    /** Set the input and output tensor.
+     *
+     * @param[in]  input    Source tensor. Data types supported: F16, F32, U16, S16.
+     * @param[out] output   Destination tensor. Data type should match the input data type.
+     * @param[in]  act_info Activation layer parameters.
+     */
+    void configure(const ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info);
+};
+}
+#endif /* __ARM_COMPUTE_CLACTIVATIONLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLArithmeticAddition.h b/arm_compute/runtime/CL/functions/CLArithmeticAddition.h
new file mode 100644
index 0000000..feadf39
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLArithmeticAddition.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLARITHMETICADDITION_H__
+#define __ARM_COMPUTE_CLARITHMETICADDITION_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLArithmeticAdditionKernel
+ *
+ * @note The tensor data type for the inputs must be U8, S16, F16, F32.
+ * @note The function performs an arithmetic addition between two tensors.
+ */
+class CLArithmeticAddition : public ICLSimpleFunction
+{
+public:
+    /** Initialise the kernel's inputs, output and convertion policy.
+     *
+     * @param[in]  input1 First tensor input. Data types supported: U8, S16, F16, F32.
+     * @param[in]  input2 Second tensor input. Data types supported: U8, S16, F16, F32.
+     * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F16, F32.
+     * @param[in]  policy Policy to use to handle overflow.
+     */
+    void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy);
+};
+}
+#endif /* __ARM_COMPUTE_CLARITHMETICADDITION_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h b/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h
new file mode 100644
index 0000000..d7bb211
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLARITHMETICSUBTRACTION_H__
+#define __ARM_COMPUTE_CLARITHMETICSUBTRACTION_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLArithmeticSubtractionKernel
+ *
+ * @note The tensor data type for the inputs must be U8, S16, F16, F32
+ * @note The function performs an arithmetic subtraction between two tensors.
+ */
+class CLArithmeticSubtraction : public ICLSimpleFunction
+{
+public:
+    /** Initialise the kernel's inputs, output and convertion policy.
+     *
+     * @param[in]  input1 First tensor input. Data types supported: U8, S16, F16, F32.
+     * @param[in]  input2 Second tensor input. Data types supported: U8, S16, F16, F32.
+     * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F16, F32.
+     * @param[in]  policy Policy to use to handle overflow.
+     */
+    void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy);
+};
+}
+#endif /* __ARM_COMPUTE_CLARITHMETICSUBTRACTION_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
new file mode 100644
index 0000000..d766d1c
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYER_H__
+#define __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYER_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLNormalizationLayerKernel and simulate a batch normalization layer.
+ *
+ * Batch normalization is calculated by:
+ * @f[ out_i = \gamma * (\frac{in_i - \mu_{B}}{\sqrt{\sigma^2_{B} + \epsilon}}) + \beta \equiv BN_{\gamma,\beta}(in_i) @f]
+ *
+ */
+class CLBatchNormalizationLayer : public IFunction
+{
+public:
+    /** Default constructor */
+    CLBatchNormalizationLayer();
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input   Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM].
+     *                     The rest are optional and used for representing batches. Data types supported: F32.
+     * @param[in]  mean    Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  var     Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  beta    Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  epsilon Small value to avoid division with zero.
+     * @param[out] output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
+     */
+    void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    CLBatchNormalizationLayerKernel _norm_kernel; /**< BatchNormalization layer kernel to run */
+};
+}
+#endif /* __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLBitwiseAnd.h b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h
new file mode 100644
index 0000000..a4a523b
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLBITWISEAND_H__
+#define __ARM_COMPUTE_CLBITWISEAND_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLBitwiseAndKernel.
+ *
+ * @note The tensor data type for the inputs must be U8.
+ * @note The function performs a bitwise AND operation using the two input tensors.
+ */
+class CLBitwiseAnd : public ICLSimpleFunction
+{
+public:
+    /** Initialize the function
+     *
+     * @param[in]  input1 Input tensor. Data types supported: U8.
+     * @param[in]  input2 Input tensor. Data types supported: U8.
+     * @param[out] output Output tensor. Data types supported: U8.
+     */
+    void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+};
+}
+#endif /* __ARM_COMPUTE_CLBITWISEAND_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLBitwiseNot.h b/arm_compute/runtime/CL/functions/CLBitwiseNot.h
new file mode 100644
index 0000000..0ff16af
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLBitwiseNot.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLBITWISENOT_H__
+#define __ARM_COMPUTE_CLBITWISENOT_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLBitwiseNotKernel.
+ *
+ * @note The tensor data type for the inputs must be U8.
+ * @note The function performs a bitwise NOT operation on input tensor.
+ */
+class CLBitwiseNot : public ICLSimpleFunction
+{
+public:
+    /** Initialize the function
+     *
+     * @param[in]  input  Input tensor. Data types supported: U8.
+     * @param[out] output Output tensor. Data types supported: U8.
+     */
+    void configure(const ICLTensor *input, ICLTensor *output);
+};
+}
+#endif /* __ARM_COMPUTE_CLBITWISENOT_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLBitwiseOr.h b/arm_compute/runtime/CL/functions/CLBitwiseOr.h
new file mode 100644
index 0000000..880c476
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLBitwiseOr.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLBITWISEOR_H__
+#define __ARM_COMPUTE_CLBITWISEOR_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLBitwiseOrKernel.
+ *
+ * @note The tensor data type for the inputs must be U8.
+ * @note The function performs a bitwise OR operation using the two input tensors.
+ */
+class CLBitwiseOr : public ICLSimpleFunction
+{
+public:
+    /** Initialize the function
+     *
+     * @param[in]  input1 Input tensor. Data types supported: U8.
+     * @param[in]  input2 Input tensor. Data types supported: U8.
+     * @param[out] output Output tensor. Data types supported: U8.
+     */
+    void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+};
+}
+#endif /* __ARM_COMPUTE_CLBITWISEOR_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLBitwiseXor.h b/arm_compute/runtime/CL/functions/CLBitwiseXor.h
new file mode 100644
index 0000000..772dec2
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLBitwiseXor.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLBITWISEXOR_H__
+#define __ARM_COMPUTE_CLBITWISEXOR_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLBitwiseXorKernel.
+ *
+ * @note The tensor data type for the inputs must be U8.
+ * @note The function performs a bitwise XOR operation using the two input tensors.
+ */
+class CLBitwiseXor : public ICLSimpleFunction
+{
+public:
+    /** Initialize the function
+     *
+     * @param[in]  input1 Input tensor. Data types supported: U8.
+     * @param[in]  input2 Input tensor. Data types supported: U8.
+     * @param[out] output Output tensor. Data types supported: U8.
+     */
+    void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+};
+}
+#endif /* __ARM_COMPUTE_CLBITWISEXOR_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLBox3x3.h b/arm_compute/runtime/CL/functions/CLBox3x3.h
new file mode 100644
index 0000000..5e51c1a
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLBox3x3.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLBOX3X3_H__
+#define __ARM_COMPUTE_CLBOX3X3_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute box filter 3x3. This function calls the following OpenCL kernels:
+ *
+ *  -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
+ *  -# @ref CLBox3x3Kernel
+ *
+ */
+class CLBox3x3 : public ICLSimpleFunction
+{
+public:
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
+};
+}
+#endif /*__ARM_COMPUTE_CLBOX3X3_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLCannyEdge.h b/arm_compute/runtime/CL/functions/CLCannyEdge.h
new file mode 100644
index 0000000..e5a82b2
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLCannyEdge.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLCANNYEDGE_H__
+#define __ARM_COMPUTE_CLCANNYEDGE_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h"
+#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute canny edge on OpenCL. This function calls the following OpenCL kernels and functions:
+ *
+ * -# @ref CLFillBorderKernel (if border_mode == REPLICATE or border_mode == CONSTANT)
+ * -# @ref CLSobel3x3 (if gradient_size == 3) or @ref CLSobel5x5 (if gradient_size == 5) or @ref CLSobel7x7 (if gradient_size == 7)
+ * -# @ref CLGradientKernel
+ * -# @ref CLEdgeNonMaxSuppressionKernel
+ * -# @ref CLEdgeTraceKernel
+ *
+ */
+class CLCannyEdge : public IFunction
+{
+public:
+    /** Constructor */
+    CLCannyEdge();
+    /** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode.
+     *
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor. Data types supported: U8.
+     * @param[in]     upper_thr             Upper threshold used for the hysteresis.
+     * @param[in]     lower_thr             Lower threshold used for the hysteresis.
+     * @param[in]     gradient_size         Gradient size (3, 5 or 7).
+     * @param[in]     norm_type             Normalization type. if 1, L1-Norm otherwise L2-Norm.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type,
+                   BorderMode border_mode, uint8_t constant_border_value = 0);
+
+    // Inherited methods overridden:
+    virtual void run() override;
+
+private:
+    std::unique_ptr<IFunction>    _sobel;                                           /**< Pointer to Sobel kernel. */
+    CLGradientKernel              _gradient;                                        /**< Gradient kernel. */
+    CLFillBorderKernel            _border_mag_gradient;                             /**< Fill border on magnitude tensor kernel */
+    CLEdgeNonMaxSuppressionKernel _non_max_suppr;                                   /**< Non-Maxima suppression kernel. */
+    CLEdgeTraceKernel             _edge_trace;                                      /**< Edge tracing kernel. */
+    CLImage                       _gx;                                              /**< Source tensor - Gx component. */
+    CLImage                       _gy;                                              /**< Source tensor - Gy component. */
+    CLImage                       _mag;                                             /**< Source tensor - Magnitude. */
+    CLImage                       _phase;                                           /**< Source tensor - Phase. */
+    CLImage                       _nonmax;                                          /**< Source tensor - Non-Maxima suppressed. */
+    CLImage                       _visited, _recorded, _l1_list_counter, _l1_stack; /**< Temporary tensors */
+};
+}
+
+#endif /* __ARM_COMPUTE_CLCANNYEDGE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLChannelCombine.h b/arm_compute/runtime/CL/functions/CLChannelCombine.h
new file mode 100644
index 0000000..337e6b4
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLChannelCombine.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLCHANNELCOMBINE_H__
+#define __ARM_COMPUTE_CLCHANNELCOMBINE_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLMultiImage;
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Basic function to run @ref CLChannelCombineKernel to perform channel combination. */
+class CLChannelCombine : public ICLSimpleFunction
+{
+public:
+    /** Initialize function's inputs and outputs.
+     *
+     * @param[in]  plane0 The 2D plane that forms channel 0. Must be of U8 format.
+     * @param[in]  plane1 The 2D plane that forms channel 1. Must be of U8 format.
+     * @param[in]  plane2 The 2D plane that forms channel 2. Must be of U8 format.
+     * @param[in]  plane3 The 2D plane that forms channel 3. Must be of U8 format.
+     * @param[out] output The single planar output tensor.
+     */
+    void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output);
+    /** Initialize function's inputs and outputs.
+     *
+     * @param[in]  plane0 The 2D plane that forms channel 0. Must be of U8 format.
+     * @param[in]  plane1 The 2D plane that forms channel 1. Must be of U8 format.
+     * @param[in]  plane2 The 2D plane that forms channel 2. Must be of U8 format.
+     * @param[out] output The multi planar output image.
+     */
+    void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
+};
+}
+#endif /*__ARM_COMPUTE_CLCHANNELCOMBINE_H__*/
diff --git a/arm_compute/runtime/CL/functions/CLChannelExtract.h b/arm_compute/runtime/CL/functions/CLChannelExtract.h
new file mode 100644
index 0000000..1753374
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLChannelExtract.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLCHANNELEXTRACT_H__
+#define __ARM_COMPUTE_CLCHANNELEXTRACT_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLMultiImage;
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Basic function to run @ref CLChannelExtractKernel to perform channel extraction. */
+class CLChannelExtract : public ICLSimpleFunction
+{
+public:
+    /** Initialize the function's source, destination
+     *
+     * @param[in]  input   The input tensor to extract the channel from. Formats supported: Any single planar.
+     * @param[in]  channel The channel to extract.
+     * @param[out] output  The extracted channel. Must be of U8 format.
+     */
+    void configure(const ICLTensor *input, Channel channel, ICLTensor *output);
+    /** Initialize the function's source, destination
+     *
+     * @param[in]  input   The multi-planar input image to extract channel from.
+     * @param[in]  channel The channel to extract.
+     * @param[out] output  The extracted 2D channel. Must be of U8 format.
+     */
+    void configure(const ICLMultiImage *input, Channel channel, ICLImage *output);
+};
+}
+#endif /*__ARM_COMPUTE_CLCHANNELEXTRACT_H__*/
diff --git a/arm_compute/runtime/CL/functions/CLColorConvert.h b/arm_compute/runtime/CL/functions/CLColorConvert.h
new file mode 100644
index 0000000..12457a0
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLColorConvert.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLCOLORCONVERT_H__
+#define __ARM_COMPUTE_CLCOLORCONVERT_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLMultiImage;
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Basic function to run @ref CLColorConvertKernel
+ *
+ * @note The function performs color convert between images.
+ */
+class CLColorConvert : public ICLSimpleFunction
+{
+public:
+    /** Initialize the function's source, destination
+     *
+     * @param[in] input  The input single-planar tensor from which to convert
+     * @param[in] output The converted single-planar output tensor
+     */
+    void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialize the function's source, destination
+     *
+     * @param[in] input  The multi-planar input image from which to convert
+     * @param[in] output The converted single-planar output image
+     */
+    void configure(const ICLMultiImage *input, ICLImage *output);
+    /** Initialize the function's source, destination
+     *
+     * @param[in] input  The single-planar input image from which to convert
+     * @param[in] output The converted multi-planar output image
+     */
+    void configure(const ICLImage *input, ICLMultiImage *output);
+    /** Initialize the function's source, destination
+     *
+     * @param[in] input  The multi-planar input image from which to convert
+     * @param[in] output The converted multi-planar output image
+     */
+    void configure(const ICLMultiImage *input, ICLMultiImage *output);
+};
+}
+#endif /* __ARM_COMPUTE_CLCOLORCONVERT_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLConvolution.h b/arm_compute/runtime/CL/functions/CLConvolution.h
new file mode 100644
index 0000000..f526f6f
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLConvolution.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLCONVOLUTION_H__
+#define __ARM_COMPUTE_CLCONVOLUTION_H__
+
+#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
+#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute convolution of size 3x3. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
+ * -# @ref CLConvolution3x3Kernel
+ *
+ */
+class CLConvolution3x3 : public ICLSimpleFunction
+{
+public:
+    /** Initialize the function's source, destination, conv and border_mode.
+     *
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8 or S16.
+     * @param[in]     conv                  matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
+     * @param[in]     scale                 Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
+};
+
+/** Basic function to execute square convolution.Currently it supports 5x5, 7x7, 9x9. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
+ * -# @ref CLConvolutionKernel or<br/>
+ *    @ref CLSeparableConvolutionHorKernel and @ref CLSeparableConvolutionVertKernel (if convolution matrix is separable)
+ *
+ */
+template <unsigned int matrix_size>
+class CLConvolutionSquare : public IFunction
+{
+public:
+    /** Default constructor */
+    CLConvolutionSquare();
+    /** Initialize the function's source, destination, conv and border_mode.
+     *
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8 or S16.
+     * @param[in]     conv                  matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
+     * @param[in]     scale                 Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
+
+    // Inherited methods overriden:
+    void run() override;
+
+private:
+    CLTensor                                      _tmp;            /**< temporary buffer for output of horizontal pass */
+    bool                                          _is_separable;   /**< true if the convolution can be separated */
+    CLSeparableConvolutionHorKernel<matrix_size>  _kernel_hor;     /**< kernel for horizontal pass of separated convolution */
+    CLSeparableConvolutionVertKernel<matrix_size> _kernel_vert;    /**< kernel for vertical pass of separated convolution */
+    CLConvolutionKernel<matrix_size>              _kernel;         /**< kernel for non-separated convolution **/
+    CLFillBorderKernel                            _border_handler; /**< kernel for border handling */
+};
+
+/** Basic function to run 5x5 convolution. */
+using CLConvolution5x5 = CLConvolutionSquare<5>;
+/** Basic function to run 7x7 convolution. */
+using CLConvolution7x7 = CLConvolutionSquare<7>;
+/** Basic function to run 9x9 convolution. */
+using CLConvolution9x9 = CLConvolutionSquare<9>;
+
+/** Basic function to execute non-square convolution. This function calls the following CL kernels:
+ *
+ * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
+ * -# @ref CLConvolutionRectangleKernel or<br/>
+ *
+ * @note Convolution rectangle should have dimensions of 3, 5, 7, 9
+ */
+class CLConvolutionRectangle : public ICLSimpleFunction
+{
+public:
+    /** Initialize the function's source, destination, conv and border_mode.
+     *
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8 or S16.
+     * @param[in]     conv                  Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
+     * @param[in]     rows                  Rows of convolution kernel.
+     * @param[in]     cols                  Columns of convolution kernel.
+     * @param[in]     scale                 Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
+};
+}
+#endif /*__ARM_COMPUTE_CLCONVOLUTION_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
new file mode 100644
index 0000000..6a40396
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLCONVOLUTIONLAYER_H__
+#define __ARM_COMPUTE_CLCONVOLUTIONLAYER_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
+#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h"
+#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
+#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Function to reshape and transpose the weights. This function calls the following kernels:
+ * -# @ref CLWeightsReshapeKernel
+ * -# @ref CLGEMMTranspose1xWKernel
+ */
+class CLConvolutionLayerReshapeWeights : public IFunction
+{
+public:
+    /** Constructor */
+    CLConvolutionLayerReshapeWeights();
+    /** Set the input and output tensors.
+     *
+     * @param[in]  weights      Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: F32.
+     * @param[in]  biases       Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
+     * @param[out] output       Destination tensor. Data types supported: Same as @p weights.
+     * @param[in]  transpose1xW True if the weights are to undergo a 1xW transposition after reshaping (in case of GEMM operation), false otherwise.
+     *                          Data types supported: Same as @p weights.
+     */
+    void configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose1xW);
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    CLConvolutionLayerWeightsReshapeKernel _weights_reshape_kernel;
+    CLGEMMTranspose1xWKernel               _weights_transposed_kernel;
+    CLTensor                               _weights_reshaped;
+    bool                                   _transpose1xW;
+};
+
+/** Basic function to compute the convolution layer. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLConvolutionLayerWeightsReshapeKernel (executed only once for each configuration)
+ * -# @ref CLGEMMTranspose1xWKernel               (executed only once for each configuration)
+ * -# @ref CLIm2ColKernel
+ * -# @ref CLGEMMInterleave4x4Kernel
+ * -# @ref CLGEMMMatrixMultiplyKernel
+ * -# @ref CLCol2ImKernel
+ */
+class CLConvolutionLayer : public IFunction
+{
+public:
+    /** Default constructor */
+    CLConvolutionLayer();
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input        Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+     *                          while every optional dimension from 4 and above represent a batch of inputs.
+     *                          Data types supported: F16, F32.
+     * @param[in]  weights      Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+     * @param[in]  biases       Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input.
+     * @param[out] output       Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+     *                          Data types supported: Same as @p input.
+     * @param[in]  conv_info    Contains padding and stride information described in @ref PadStrideInfo.
+     * @param[in]  weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
+     *                          tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+     */
+    void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo());
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    CLConvolutionLayerReshapeWeights _reshape_weights;
+    CLIm2ColKernel                   _input_im2col_kernel;
+    CLGEMMInterleave4x4Kernel        _input_interleave_kernel;
+    CLGEMMMatrixMultiplyKernel       _mm_kernel;
+    CLCol2ImKernel                   _output_col2im_kernel;
+    CLTensor                         _input_im2col_reshaped;
+    CLTensor                         _input_interleaved_reshaped;
+    CLTensor                         _weights_reshaped;
+    CLTensor                         _weights_transposed;
+    CLTensor                         _gemm_output;
+    bool                             _has_bias;
+    bool                             _is_fully_connected_convolution;
+    bool                             _are_weights_reshaped;
+};
+}
+#endif /* __ARM_COMPUTE_CLCONVOLUTIONLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLDepthConcatenate.h b/arm_compute/runtime/CL/functions/CLDepthConcatenate.h
new file mode 100644
index 0000000..3199936
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLDepthConcatenate.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDEPTHCONCATENATE_H__
+#define __ARM_COMPUTE_CLDEPTHCONCATENATE_H__
+
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Window.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+#include <vector>
+
+namespace arm_compute
+{
+class ICLTensor;
+class CLDepthConcatenateKernel;
+class CLFillBorderKernel;
+
+/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels:
+ *
+ * -# @ref CLFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions)
+ * -# @ref CLDepthConcatenateKernel
+ *
+ */
+class CLDepthConcatenate : public IFunction
+{
+public:
+    /** Default constructor */
+    CLDepthConcatenate();
+    /** Initialise the kernel's inputs vector and output.
+     *
+     * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported:  F32.
+     * @param[out]    output        Output tensor. Data types supported: F32.
+     */
+    void configure(std::vector<ICLTensor *> inputs_vector, ICLTensor *output);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    std::vector<ICLTensor *>                    _inputs_vector;
+    std::unique_ptr<CLDepthConcatenateKernel[]> _concat_kernels_vector;
+    std::unique_ptr<CLFillBorderKernel[]>       _border_handlers_vector;
+    unsigned int                                _num_inputs;
+};
+}
+#endif /* __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLDepthConvert.h b/arm_compute/runtime/CL/functions/CLDepthConvert.h
new file mode 100644
index 0000000..f110276
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLDepthConvert.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDEPTHCONVERT_H__
+#define __ARM_COMPUTE_CLDEPTHCONVERT_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLDepthConvertKernel. */
+class CLDepthConvert : public ICLSimpleFunction
+{
+public:
+    /** Initialize the function's source, destination
+     *
+     * Input data type must be different than output data type.
+     *
+     * Valid conversions Input -> Output :
+     *
+     *   - U8 -> U16, S16, U32, S32
+     *   - U16 -> U8, U32, S32
+     *   - S16 -> U8, U32, S32
+     *   - U32 -> U8, U16, S16
+     *   - S32 -> U8, U16, S16
+     *
+     * @param[in]  input  The input tensor to convert. Data types supported: U8, U16, S16, U32 or S32.
+     * @param[out] output The output tensor. Data types supported: U8, U16, S16, U32 or S32.
+     * @param[in]  policy Conversion policy.
+     * @param[in]  shift  Value for down/up conversions. Must be 0 <= shift < 8.
+     */
+    void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
+};
+}
+#endif /*__ARM_COMPUTE_CLDEPTHCONVERT_H__*/
diff --git a/arm_compute/runtime/CL/functions/CLDerivative.h b/arm_compute/runtime/CL/functions/CLDerivative.h
new file mode 100644
index 0000000..05033e8
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLDerivative.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDERIVATIVE_H__
+#define __ARM_COMPUTE_CLDERIVATIVE_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute first order derivative operator. This function calls the following CL kernels:
+ *
+ * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
+ * -# @ref CLDerivativeKernel
+ *
+ */
+class CLDerivative : public ICLSimpleFunction
+{
+public:
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @note At least one of output_x or output_y must be not NULL.
+     *
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output_x              (optional) Destination tensor. Derivative along the X direction. Data types supported: S16.
+     * @param[out]    output_y              (optional) Destination tensor. Derivative along the Y direction. Data types supported: S16.
+     * @param[in]     border_mode           Border mode to use
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     *
+     */
+    void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
+};
+}
+#endif /* __ARM_COMPUTE_CLDERIVATIVE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLDilate.h b/arm_compute/runtime/CL/functions/CLDilate.h
new file mode 100644
index 0000000..8534139
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLDilate.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDILATE_H__
+#define __ARM_COMPUTE_CLDILATE_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute dilate. This function calls the following OpenCL kernels:
+*
+* -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
+* -# @ref CLDilateKernel
+*
+*/
+class CLDilate : public ICLSimpleFunction
+{
+public:
+    /** Initialise the kernel's inputs, output and border mode.
+     *
+     * @param[in,out] input                 First tensor input. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Output tensor. Data types supported: U8.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value);
+};
+}
+#endif /*__ARM_COMPUTE_CLDILATE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h b/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h
new file mode 100644
index 0000000..d718275
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H__
+#define __ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H__
+
+#include "arm_compute/core/CL/kernels/CLHistogramKernel.h"
+#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h"
+#include "arm_compute/runtime/CL/CLDistribution1D.h"
+#include "arm_compute/runtime/CL/CLLut.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Basic function to execute histogram equalization. This function calls the following CL kernels:
+ *
+ * -# @ref CLHistogramKernel
+ * -# @ref CLTableLookupKernel
+ *
+ */
+class CLEqualizeHistogram : public IFunction
+{
+public:
+    /** Default Constructor. */
+    CLEqualizeHistogram();
+    /** Initialise the kernel's inputs.
+     *
+     * @param[in]  input  Input image. Data types supported: U8.
+     * @param[out] output Output of same data type with equalized brightness and contrast.
+     */
+    void configure(const ICLImage *input, ICLImage *output);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    CLHistogramKernel       _histogram_kernel;        /**< Kernel that calculates the histogram of input. */
+    CLHistogramBorderKernel _border_histogram_kernel; /**< Kernel that calculates the histogram on the borders. */
+    CLTableLookupKernel     _map_histogram_kernel;    /**< Kernel that maps the input to output using the lut. */
+    CLDistribution1D        _hist;                    /**< Distribution that holds the histogram of the input image. */
+    CLDistribution1D        _cum_dist;                /**< Distribution that holds the cummulative distribution of the input histogram. */
+    CLLut                   _cd_lut;                  /**< Holds the equalization lookuptable. */
+    static const uint32_t   max_range = 256;          /**< Histogram range of the internal histograms. */
+    static const uint32_t   nr_bins   = 256;          /**< Histogram bins of the internal histograms. */
+};
+}
+#endif /*__ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLErode.h b/arm_compute/runtime/CL/functions/CLErode.h
new file mode 100644
index 0000000..cd2f551
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLErode.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLERODE_H__
+#define __ARM_COMPUTE_CLERODE_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute erode. This function calls the following OpenCL kernels:
+*
+* -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
+* -# @ref CLErodeKernel
+*
+*/
+class CLErode : public ICLSimpleFunction
+{
+public:
+    /** Initialise the kernel's inputs, output and border mode
+     *
+     * @param[in,out] input                 First tensor input. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Output tensor. Data types supported: U8.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value);
+};
+}
+#endif /*__ARM_COMPUTE_CLERODE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLFastCorners.h b/arm_compute/runtime/CL/functions/CLFastCorners.h
new file mode 100644
index 0000000..79d82af
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLFastCorners.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLFASTCORNERS_H__
+#define __ARM_COMPUTE_CLFASTCORNERS_H__
+
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Window.h"
+#include "arm_compute/runtime/CL/CLArray.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Basic function to execute fast corners. This function calls the following CL kernels:
+ *
+ * -# @ref CLFastCornersKernel
+ * -# @ref CLNonMaximaSuppression3x3Kernel (executed if nonmax_suppression == true)
+ * -# @ref CLCopyToArrayKernel
+ *
+ */
+class CLFastCorners : public IFunction
+{
+public:
+    /** Constructor */
+    CLFastCorners();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLFastCorners(const CLFastCorners &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    const CLFastCorners &operator=(const CLFastCorners &) = delete;
+    /** Initialize the function's source, destination, conv and border_mode.
+     *
+     * @param[in]     input                 Source image. Data types supported: U8.
+     * @param[in]     threshold             Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3.
+     * @param[in]     nonmax_suppression    If true, non-maximum suppression is applied to detected corners before being placed in the array.
+     * @param[out]    corners               Array of keypoints to store the results.
+     * @param[in,out] num_corners           Record number of corners in the array
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const ICLImage *input, float threshold, bool nonmax_suppression, CLKeyPointArray *corners, unsigned int *num_corners,
+                   BorderMode border_mode, uint8_t constant_border_value = 0);
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    CLFastCornersKernel       _fast_corners_kernel;
+    CLNonMaximaSuppression3x3 _suppr_func;
+    CLCopyToArrayKernel       _copy_array_kernel;
+    CLImage                   _output;
+    CLImage                   _suppr;
+    Window                    _win;
+    bool                      _non_max;
+    unsigned int             *_num_corners;
+    cl::Buffer                _num_buffer;
+    CLKeyPointArray          *_corners;
+    uint8_t                   _constant_border_value;
+};
+}
+#endif /*__ARM_COMPUTE_CLFASTCORNERS_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLFillBorder.h b/arm_compute/runtime/CL/functions/CLFillBorder.h
new file mode 100644
index 0000000..b485547
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLFillBorder.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLFILLBORDER_H__
+#define __ARM_COMPUTE_CLFILLBORDER_H__
+
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLFillBorderKernel */
+class CLFillBorder : public ICLSimpleFunction
+{
+public:
+    /** Initialize the function
+     *
+     * @param[in,out] tensor                Source tensor. Data types supported: U8, S16
+     * @param[in]     border_width          The border width
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(ICLTensor *tensor, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
+};
+}
+#endif /*__ARM_COMPUTE_FILLBORDER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
new file mode 100644
index 0000000..826f445
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLFULLYCONNECTEDLAYER_H__
+#define __ARM_COMPUTE_CLFULLYCONNECTEDLAYER_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h"
+#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
+#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+namespace arm_compute
+{
+/** Basic function to reshape the weights of Fully Connected layer with OpenCL. This function calls the following kernels:
+ *
+ *  -# @ref CLTransposeKernel        (if @p transpose_weights is set to true)
+ *  -# @ref CLGEMMTranspose1xWKernel (if @p is_batched_fc_layer is set to true)
+ *
+ * @note  The fully connected layer accepts "weights" tensors only with 2 dimensions.
+ */
+class CLFullyConnectedLayerReshapeWeights : public IFunction
+{
+public:
+    /** Constructor */
+    CLFullyConnectedLayerReshapeWeights();
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input               Weights tensor. The weights must be 2 dimensional. Data types supported: QS8/F32.
+     * @param[out] output              Destination tensor. Data type supported: Same as @p input.
+     * @param[in]  transpose_weights   True if the weights must be transposed. Data types supported: Same as @p weights.
+     * @param[in]  is_batched_fc_layer True if it is a batched fully connected layer
+     */
+    void configure(const ICLTensor *input, ICLTensor *output, bool transpose_weights, bool is_batched_fc_layer);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    CLTransposeKernel        _transpose_kernel;
+    CLGEMMTranspose1xWKernel _transpose1xW_kernel;
+    CLTensor                 _transpose_output;
+    bool                     _transpose_weights;
+    bool                     _is_batched_fc_layer;
+};
+
+/** Basic function to compute a Fully Connected layer on OpenCL. This function calls the following OpenCL kernels:
+ *
+ *  -# @ref CLIm2ColKernel (called when the input comes from a convolutional layer)
+ *  -# @ref CLFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false) (called once)
+ *  -# @ref CLGEMMInterleave4x4Kernel (called if we have a multi-batch input)
+ *  -# @ref CLGEMMMatrixMultiplyKernel
+ *  -# @ref CLGEMMMatrixAccumulateBiasesKernel (if @p biases is not equal to nullptr)
+ *
+ * @note  The fully connected layer accepts "weights" tensors only with 2 dimensions.
+ */
+class CLFullyConnectedLayer : public IFunction
+{
+public:
+    /** Constructor */
+    CLFullyConnectedLayer();
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input                Source tensor. Data type supported: F16/F32.
+     * @param[in]  weights              Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input
+     * @param[in]  biases               Bias tensor. It can be nullptr. Data type supported:Same as @p input.
+     * @param[out] output               Destination tensor. Data type supported: Same as @p input.
+     * @param[in]  transpose_weights    (Optional) Transpose weights if true. Defaults to true.
+     * @param[in]  are_weights_reshaped (Optional) Reshape the weights tensor if false. Defaults to false.
+     */
+    void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose_weights = true, bool are_weights_reshaped = false);
+
+    //Inherited methods override
+    void run() override;
+
+private:
+    void configure_fc_fc_wb(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output);
+    void configure_fc_fc_nb(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output);
+    void configure_conv_fc_wb(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output);
+    void configure_conv_fc_nb(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output);
+
+    CLIm2ColKernel                      _im2col_kernel;
+    CLFullyConnectedLayerReshapeWeights _reshape_weights_kernel;
+    CLGEMMInterleave4x4Kernel           _interleave4x4_kernel;
+    CLGEMMMatrixMultiplyKernel          _mm_kernel;
+    CLGEMMMatrixAccumulateBiasesKernel  _accumulate_biases_kernel;
+    CLTensor                            _im2col_output;
+    CLTensor                            _interleave4x4_output;
+    CLTensor                            _reshape_weights_output;
+    bool                                _are_weights_reshaped;
+    bool                                _is_fc_after_conv;
+    bool                                _is_batched_fc_layer;
+    bool                                _accumulate_biases;
+};
+}
+#endif /* __ARM_COMPUTE_CLFULLYCONNECTEDLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h
new file mode 100644
index 0000000..043b2b8
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLGEMM.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLGEMM_H__
+#define __ARM_COMPUTE_CLGEMM_H__
+
+#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute GEMM on OpenCL. Data types supported: F32, F16. This function calls the following OpenCL kernels:
+ *
+ *  -# @ref CLGEMMInterleave4x4Kernel (if the output tensor is a matrix)
+ *  -# @ref CLGEMMTranspose1xWKernel (if the output tensor is a matrix)
+ *  -# @ref CLGEMMMatrixMultiplyKernel
+ *  -# @ref CLGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0)
+ *
+ */
+class CLGEMM : public IFunction
+{
+public:
+    /** Default constructor. */
+    CLGEMM();
+    /** Initialise the kernel's inputs and output
+     *
+     * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
+     *
+     * @note All tensors must have the same data type. Data types supported: F32, F16
+     *
+     * @note Whilst the first input tensor can be a vector, the second input tensor must be at least a matrix
+     *
+     * @param[in]  a      First input tensor  (Matrix or Vector A). Data types supported: F32, F16
+     * @param[in]  b      Second input tensor (Matrix B). Data type supported: same as @p a.
+     * @param[in]  c      Third input tensor  (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a.
+     * @param[out] output Output tensor. Data type supported: same as @p a
+     * @param[in]  alpha  Weight of the matrix product
+     * @param[in]  beta   Weight of matrix C
+     */
+    void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    CLGEMMInterleave4x4Kernel  _interleave_kernel;
+    CLGEMMTranspose1xWKernel   _transpose_kernel;
+    CLGEMMMatrixMultiplyKernel _mm_kernel;
+    CLGEMMMatrixAdditionKernel _ma_kernel;
+    CLTensor                   _tmp_a;
+    CLTensor                   _tmp_b;
+    bool                       _run_vector_matrix_multiplication;
+    bool                       _run_addition;
+};
+}
+
+#endif /* __ARM_COMPUTE_CLGEMM_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h b/arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h
new file mode 100644
index 0000000..b80136b
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLGEMMINTERLEAVE4X4_H__
+#define __ARM_COMPUTE_CLGEMMINTERLEAVE4X4_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to execute CLGEMMInterleave4x4Kernel. This function calls the following OpenCL kernel:
+ *
+ *  -# @ref CLGEMMInterleave4x4Kernel
+ *
+ */
+class CLGEMMInterleave4x4 : public ICLSimpleFunction
+{
+public:
+    /** Initialise the kernel's inputs, output
+     *
+     * @param[in]  input  First input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32
+     * @param[out] output Output tensor. Data type supported: same as @p input
+     */
+    void configure(const ICLTensor *input, ICLTensor *output);
+};
+}
+
+#endif /* __ARM_COMPUTE_CLGEMMINTERLEAVE4X4_H__ */
\ No newline at end of file
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowp.h b/arm_compute/runtime/CL/functions/CLGEMMLowp.h
new file mode 100644
index 0000000..da8883c
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowp.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLGEMMLOWP_H__
+#define __ARM_COMPUTE_CLGEMMLOWP_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute GEMMLowp on OpenCL. This function calls the following OpenCL kernels:
+*
+*  -# @ref CLGEMMInterleave4x4Kernel
+*  -# @ref CLGEMMTranspose1xWKernel
+*  -# @ref CLGEMMLowpMatrixMultiplyKernel
+*
+*/
+class CLGEMMLowp : public IFunction
+{
+public:
+    /** Constructor */
+    CLGEMMLowp();
+    /** Initialise the kernel's inputs, output
+    *
+    * @note GEMM_LOWP:  low precision matrix multiply kernel
+    *  This kernel performs the following computation:
+    *
+    *  -# Convert a values from uint8 to int32 and add a_offset to each of them.
+    *  -# Convert b values from uint8 to int32 and add b_offset to each of them.
+    *  -# Compute the int32 matrix product of the resulting a * b.
+    *  -# Add output_offset to each entry of the result.
+    *  -# Multiply each entry of the result and round to the nearest integer
+    *  -# Clamp the resulting int32 values to the [0..255] range and cast to uint8.
+    *
+    * @param[in]  a               First input tensor  (Matrix A). Data types supported: U8.
+    * @param[in]  b               Second input tensor (Matrix B). Data types supported: same as @p a.
+    * @param[out] output          Output tensor. Data types supported: same as @p a.
+    * @param[in]  a_offset        Offset to be added to each element of the matrix A.
+    * @param[in]  b_offset        Offset to be added to each element of the matrix B.
+    * @param[in]  output_offset   Offset to be added to each element of the output matrix
+    * @param[in]  output_mult_int Multiplied with each element of the output matrix
+    * @param[in]  shift           Number of bits to shift right the result.
+    */
+    void configure(const ICLTensor *a, const ICLTensor *b, ICLTensor *output, int32_t a_offset, int32_t b_offset, int32_t output_offset, int32_t output_mult_int, int32_t shift);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    CLGEMMInterleave4x4Kernel      _interleave_kernel;
+    CLGEMMTranspose1xWKernel       _transpose_kernel;
+    CLGEMMLowpMatrixMultiplyKernel _mm_kernel;
+    CLTensor                       _tmp_a;
+    CLTensor                       _tmp_b;
+};
+}
+#endif /*__ARM_COMPUTE_CLGEMMLOWP_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLGaussian3x3.h b/arm_compute/runtime/CL/functions/CLGaussian3x3.h
new file mode 100644
index 0000000..f8223bc
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLGaussian3x3.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLGAUSSIAN3X3_H__
+#define __ARM_COMPUTE_CLGAUSSIAN3X3_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute gaussian filter 3x3. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
+ * -# @ref CLGaussian3x3Kernel
+ *
+ */
+class CLGaussian3x3 : public ICLSimpleFunction
+{
+public:
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
+};
+}
+#endif /*__ARM_COMPUTE_CLGAUSSIAN3X3_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLGaussian5x5.h b/arm_compute/runtime/CL/functions/CLGaussian5x5.h
new file mode 100644
index 0000000..148b9a9
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLGaussian5x5.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLGAUSSIAN5X5_H__
+#define __ARM_COMPUTE_CLGAUSSIAN5X5_H__
+
+#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute gaussian filter 5x5. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
+ * -# @ref CLGaussian5x5HorKernel
+ * -# @ref CLGaussian5x5VertKernel
+ *
+ */
+class CLGaussian5x5 : public IFunction
+{
+public:
+    /** Default Constructor. */
+    CLGaussian5x5();
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
+
+    // Inherited methods overridden:
+    void run() override;
+
+protected:
+    CLGaussian5x5HorKernel  _kernel_hor;     /**< Horizontal pass kernel */
+    CLGaussian5x5VertKernel _kernel_vert;    /**< Vertical pass kernel */
+    CLFillBorderKernel      _border_handler; /**< Kernel to handle image borders */
+    CLImage                 _tmp;            /**< Temporary buffer */
+};
+}
+#endif /*__ARM_COMPUTE_CLGAUSSIAN5X5_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLGaussianPyramid.h b/arm_compute/runtime/CL/functions/CLGaussianPyramid.h
new file mode 100644
index 0000000..9793519
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLGaussianPyramid.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLGAUSSIANPYRAMID_H__
+#define __ARM_COMPUTE_CLGAUSSIANPYRAMID_H__
+
+#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h"
+
+#include "arm_compute/core/CL/kernels/CLScaleKernel.h"
+#include "arm_compute/core/IPyramid.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLPyramid.h"
+#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <cstdint>
+#include <memory>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Common interface for all Gaussian pyramid functions
+ */
+class CLGaussianPyramid : public IFunction
+{
+public:
+    /** Constructor */
+    CLGaussianPyramid();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLGaussianPyramid(const CLGaussianPyramid &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLGaussianPyramid &operator=(const CLGaussianPyramid &) = delete;
+    /** Allow instances of this class to be moved */
+    CLGaussianPyramid(CLGaussianPyramid &&) = default;
+    /** Allow instances of this class to be moved */
+    CLGaussianPyramid &operator=(CLGaussianPyramid &&) = default;
+    /** Default destructor */
+    virtual ~CLGaussianPyramid() = default;
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in, out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]     pyramid               Destination pyramid tensors, Data types supported at each level: U8.
+     * @param[in]      border_mode           Border mode to use.
+     * @param[in]      constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     *
+     */
+    virtual void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value = 0) = 0;
+
+protected:
+    ICLTensor *_input;
+    CLPyramid *_pyramid;
+    CLPyramid  _tmp;
+};
+
+/** Basic function to execute gaussian pyramid with HALF scale factor. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
+ * -# @ref CLGaussianPyramidHorKernel
+ * -# @ref CLGaussianPyramidVertKernel
+ */
+class CLGaussianPyramidHalf : public CLGaussianPyramid
+{
+public:
+    /** Constructor */
+    CLGaussianPyramidHalf();
+
+    // Inherited methods overridden:
+    void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
+    void run() override;
+
+private:
+    std::unique_ptr<CLFillBorderKernel[]>          _border_handler;
+    std::unique_ptr<CLGaussianPyramidHorKernel[]>  _horizontal_reduction;
+    std::unique_ptr<CLGaussianPyramidVertKernel[]> _vertical_reduction;
+};
+
+/** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following OpenCL kernels and functions:
+ *
+ * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
+ * -# @ref CLGaussian5x5
+ * -# @ref CLScaleKernel
+ */
+class CLGaussianPyramidOrb : public CLGaussianPyramid
+{
+public:
+    /** Constructor */
+    CLGaussianPyramidOrb();
+
+    // Inherited methods overridden:
+    void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
+    void run() override;
+
+private:
+    std::unique_ptr<CLGaussian5x5[]> _gauss5x5;
+    std::unique_ptr<CLScaleKernel[]> _scale_nearest;
+};
+}
+#endif /*__ARM_COMPUTE_CLGAUSSIANPYRAMID_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h
new file mode 100644
index 0000000..cdb23bf
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLHOGDESCRIPTOR_H__
+#define __ARM_COMPUTE_CLHOGDESCRIPTOR_H__
+
+#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLHOGGradient.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class IHOG;
+/** Basic function to calculate HOG descriptor. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLHOGGradient
+ * -# @ref CLHOGOrientationBinningKernel
+ * -# @ref CLHOGBlockNormalizationKernel
+ *
+ */
+class CLHOGDescriptor : public IFunction
+{
+public:
+    /** Default constructor */
+    CLHOGDescriptor();
+    /** Initialise the function's source, destination, HOG data-object and border mode
+     *
+     * @param[in, out] input                 Input tensor. Data type supported: U8
+     *                                       (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]     output                Output tensor which stores the HOG descriptor. DataType supported: F32. The number of channels is equal to the number of histogram bins per block
+     * @param[in]      hog                   HOG data object which describes the HOG descriptor
+     * @param[in]      border_mode           Border mode to use.
+     * @param[in]      constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(ICLTensor *input, ICLTensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value = 0);
+
+    // Inherited method overridden:
+    void run() override;
+
+private:
+    CLHOGGradient                 _gradient;
+    CLHOGOrientationBinningKernel _orient_bin;
+    CLHOGBlockNormalizationKernel _block_norm;
+    CLTensor                      _mag;
+    CLTensor                      _phase;
+    CLTensor                      _hog_space;
+};
+}
+
+#endif /* __ARM_COMPUTE_CLHOGDESCRIPTOR_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLHOGDetector.h b/arm_compute/runtime/CL/functions/CLHOGDetector.h
new file mode 100644
index 0000000..0b4fad7
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLHOGDetector.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLHOGDETECTOR_H__
+#define __ARM_COMPUTE_CLHOGDETECTOR_H__
+
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h"
+#include "arm_compute/core/IHOG.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+/** Basic function to execute HOG detector based on linear SVM. This function calls the following OpenCL kernel:
+ *
+ * -# @ref CLHOGDetectorKernel
+ *
+ */
+class CLHOGDetector : public IFunction
+{
+public:
+    /** Default constructor */
+    CLHOGDetector();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLHOGDetector(const CLHOGDetector &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLHOGDetector &operator=(const CLHOGDetector &) = delete;
+    /** Allow instances of this class to be moved */
+    CLHOGDetector(CLHOGDetector &&) = default;
+    /** Allow instances of this class to be moved */
+    CLHOGDetector &operator=(CLHOGDetector &&) = default;
+    /** Default destructor */
+    ~CLHOGDetector() = default;
+    /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class
+     *
+     * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it.
+     *
+     * @param[in]  input                   Input tensor. It is the output of @ref NEHOGDescriptor. Data type supported: F32
+     * @param[in]  hog                     HOG data-object that describes the HOG descriptor
+     * @param[out] detection_windows       Array of @ref DetectionWindow used to store the detected objects
+     * @param[in]  detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
+     *                                     It must be multiple of the block stride stored in hog
+     * @param[in]  threshold               (Optional) Threshold for the distance between features and SVM classifying plane
+     * @param[in]  idx_class               (Optional) Index of the class used for evaluating which class the detection window belongs to
+     */
+    void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, size_t idx_class = 0);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    CLHOGDetectorKernel      _hog_detector_kernel;
+    ICLDetectionWindowArray *_detection_windows;
+    cl::Buffer               _num_detection_windows;
+};
+}
+
+#endif /* __ARM_COMPUTE_CLHOGDETECTOR_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLHOGGradient.h b/arm_compute/runtime/CL/functions/CLHOGGradient.h
new file mode 100644
index 0000000..e74a684
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLHOGGradient.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLHOGGRADIENT_H__
+#define __ARM_COMPUTE_CLHOGGRADIENT_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLDerivative.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+/** Basic function to calculate the gradient for HOG. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLDerivative
+ * -# @ref CLMagnitudePhaseKernel
+ *
+ */
+class CLHOGGradient : public IFunction
+{
+public:
+    /** Default constructor */
+    CLHOGGradient();
+    /** Initialise the function's source, destinations, phase type and border mode
+     *
+     * @param[in, out] input                 Input tensor. Data type supported: U8.
+     *                                       (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]     output_magnitude      Output tensor (magnitude). Data type supported: U16.
+     * @param[out]     output_phase          Output tensor.(phase). Format supported: U8
+     * @param[in]      phase_type            Type of @ref PhaseType
+     * @param[in]      border_mode           Border mode to use
+     * @param[in]      constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(ICLTensor *input, ICLTensor *output_magnitude, ICLTensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value = 0);
+
+    // Inherited method overridden:
+    void run() override;
+
+private:
+    CLDerivative           _derivative;
+    CLMagnitudePhaseKernel _mag_phase;
+    CLTensor               _gx;
+    CLTensor               _gy;
+};
+}
+#endif /*__ARM_COMPUTE_CLHOGGRADIENT_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h
new file mode 100644
index 0000000..3fe0fa9
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLHOGMULTIDETECTION_H__
+#define __ARM_COMPUTE_CLHOGMULTIDETECTION_H__
+
+#include "arm_compute/core/CL/ICLArray.h"
+#include "arm_compute/core/CL/ICLMultiHOG.h"
+#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h"
+#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLHOGDetector.h"
+#include "arm_compute/runtime/CL/functions/CLHOGGradient.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+/** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following kernels:
+ *
+ * -# @ref CLHOGGradient
+ * -# @ref CLHOGOrientationBinningKernel
+ * -# @ref CLHOGBlockNormalizationKernel
+ * -# @ref CLHOGDetector
+ * -# @ref CPPDetectionWindowNonMaximaSuppressionKernel (executed if non_maxima_suppression == true)
+ *
+ * @note This implementation works if all the HOG data-objects within the IMultiHOG container have the same:
+ *       -# Phase type
+         -# Normalization type
+         -# L2 hysteresis threshold if the normalization type is L2HYS_NORM
+ *
+ */
+class CLHOGMultiDetection : public IFunction
+{
+public:
+    /** Default constructor */
+    CLHOGMultiDetection();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLHOGMultiDetection(const CLHOGMultiDetection &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLHOGMultiDetection &operator=(const CLHOGMultiDetection &) = delete;
+    /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression
+     *
+     * @param[in, out] input                    Input tensor. Data type supported: U8
+     *                                          (Written to only for @p border_mode != UNDEFINED)
+     * @param[in]      multi_hog                Container of multiple HOG data object. Each HOG data object describes one HOG model to detect.
+     *                                          This container should store the HOG data-objects in descending or ascending cell_size width order.
+     *                                          This will help to understand if the HOG descriptor computation can be skipped for some HOG data-objects
+     * @param[out]     detection_windows        Array of @ref DetectionWindow used for locating the detected objects
+     * @param[in]      detection_window_strides Array of @ref Size2D used to specify the distance in pixels between 2 consecutive detection windows in x and y directions for each HOG data-object
+     *                                          The dimension of this array must be the same of multi_hog->num_models()
+     *                                          The i-th detection_window_stride of this array must be multiple of the block_stride stored in the i-th multi_hog array
+     * @param[in]      border_mode              Border mode to use.
+     * @param[in]      constant_border_value    (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     * @param[in]      threshold                (Optional) Threshold for the distance between features and SVM classifying plane
+     * @param[in]      non_maxima_suppression   (Optional) Flag to specify whether the non-maxima suppression is required or not.
+     *                                          True if the non-maxima suppression stage has to be computed
+     * @param[in]      min_distance             (Optional) Radial Euclidean distance to use for the non-maxima suppression stage
+     *
+     */
+    void configure(ICLTensor *input, const ICLMultiHOG *multi_hog, ICLDetectionWindowArray *detection_windows, ICLSize2DArray *detection_window_strides, BorderMode border_mode,
+                   uint8_t constant_border_value = 0,
+                   float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f);
+
+    // Inherited method overridden:
+    void run() override;
+
+private:
+    CLHOGGradient                                                 _gradient_kernel;
+    std::unique_ptr<CLHOGOrientationBinningKernel[]>              _orient_bin_kernel;
+    std::unique_ptr<CLHOGBlockNormalizationKernel[]>              _block_norm_kernel;
+    std::unique_ptr<CLHOGDetector[]>                              _hog_detect_kernel;
+    std::unique_ptr<CPPDetectionWindowNonMaximaSuppressionKernel> _non_maxima_kernel;
+    std::unique_ptr<CLTensor[]>                                   _hog_space;
+    std::unique_ptr<CLTensor[]>                                   _hog_norm_space;
+    ICLDetectionWindowArray                                      *_detection_windows;
+    CLTensor                                                      _mag;
+    CLTensor                                                      _phase;
+    bool                                                          _non_maxima_suppression;
+    size_t                                                        _num_orient_bin_kernel;
+    size_t                                                        _num_block_norm_kernel;
+    size_t                                                        _num_hog_detect_kernel;
+};
+}
+
+#endif /* __ARM_COMPUTE_CLHOGMULTIDETECTION_H__ */
\ No newline at end of file
diff --git a/arm_compute/runtime/CL/functions/CLHarrisCorners.h b/arm_compute/runtime/CL/functions/CLHarrisCorners.h
new file mode 100644
index 0000000..90da687
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLHarrisCorners.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLHARRISCORNERS_H__
+#define __ARM_COMPUTE_CLHARRISCORNERS_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/CL/ICLArray.h"
+#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h"
+#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h"
+#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+#include <cstdint>
+
+#include <memory>
+
+namespace arm_compute
+{
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Basic function to execute harris corners detection. This function calls the following CL and NEON kernels and functions:
+ *
+ * @note Requires CPU support for the kernels: CPPCornerCandidatesKernel and CPPSortEuclideanDistanceKernel.
+ *
+ * -# @ref CLSobel3x3 (if gradient_size == 3) or<br/>
+ *    @ref CLSobel5x5 (if gradient_size == 5) or<br/>
+ *    @ref CLSobel7x7 (if gradient_size == 7)
+ * -# @ref CLFillBorderKernel
+ * -# @ref CLHarrisScoreKernel
+ * -# @ref CLNonMaximaSuppression3x3
+ * -# @ref CPPCornerCandidatesKernel
+ * -# @ref CPPSortEuclideanDistanceKernel
+ */
+class CLHarrisCorners : public IFunction
+{
+public:
+    /** Constructor */
+    CLHarrisCorners();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLHarrisCorners(const CLHarrisCorners &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    const CLHarrisCorners &operator=(const CLHarrisCorners &) = delete;
+    /** Initialize the function's source, destination, conv and border_mode.
+     *
+     * @param[in,out] input                 Source image. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[in]     threshold             Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
+     * @param[in]     min_dist              Radial Euclidean distance for the euclidean distance stage.
+     * @param[in]     sensitivity           Sensitivity threshold k from the Harris-Stephens equation
+     * @param[in]     gradient_size         The gradient window size to use on the input. The implementation supports 3, 5, and 7
+     * @param[in]     block_size            The block window size used to compute the Harris Corner score. The implementation supports 3, 5, and 7.
+     * @param[out]    corners               Array of keypoints to store the results.
+     * @param[in]     border_mode           Border mode to use
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(ICLImage *input, float threshold, float min_dist, float sensitivity,
+                   int32_t gradient_size, int32_t block_size, ICLKeyPointArray *corners,
+                   BorderMode border_mode, uint8_t constant_border_value = 0);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    std::unique_ptr<IFunction>          _sobel;                 /**< Sobel function */
+    CLHarrisScoreKernel                 _harris_score;          /**< Harris score kernel */
+    CLNonMaximaSuppression3x3Kernel     _non_max_suppr;         /**< Non-maxima suppression function */
+    CPPCornerCandidatesKernel           _candidates;            /**< Sort kernel */
+    CPPSortEuclideanDistanceKernel      _sort_euclidean;        /**< Euclidean distance kernel */
+    CLFillBorderKernel                  _border_gx;             /**< Border handler before running harris score */
+    CLFillBorderKernel                  _border_gy;             /**< Border handler before running harris score */
+    CLImage                             _gx;                    /**< Source image - Gx component */
+    CLImage                             _gy;                    /**< Source image - Gy component */
+    CLImage                             _score;                 /**< Source image - Harris score */
+    CLImage                             _nonmax;                /**< Source image - Non-Maxima suppressed image */
+    std::unique_ptr<InternalKeypoint[]> _corners_list;          /**< Array of InternalKeypoint. It stores the potential corner candidates */
+    int32_t                             _num_corner_candidates; /**< Number of potential corner candidates */
+    ICLKeyPointArray                   *_corners;               /**< Output corners array */
+};
+}
+#endif /*__ARM_COMPUTE_CLHARRISCORNERS_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLHistogram.h b/arm_compute/runtime/CL/functions/CLHistogram.h
new file mode 100644
index 0000000..455b618
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLHistogram.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLHISTOGRAM_H__
+#define __ARM_COMPUTE_CLHISTOGRAM_H__
+
+#include "arm_compute/core/CL/kernels/CLHistogramKernel.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class ICLDistribution1D;
+class ICLTensor;
+using ICLTensor = ICLImage;
+
+/** Basic function to execute histogram. This function calls the following OpenCL kernels:
+ *
+ *  -# @ref CLHistogramKernel
+ *  -# @ref CLHistogramBorderKernel
+ *
+ */
+class CLHistogram : public IFunction
+{
+public:
+    /*
+     * @ Default constructor
+     */
+    CLHistogram();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLHistogram(const CLHistogram &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    const CLHistogram &operator=(const CLHistogram &) = delete;
+    /** Initialize the function
+     *
+     * @param[in]  input  Source image. Data types supported: U8
+     * @param[out] output Output distribution.
+     */
+    void configure(const ICLImage *input, ICLDistribution1D *output);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    CLHistogramKernel       _kernel;        /**< kernel to run */
+    CLHistogramBorderKernel _kernel_border; /**< Border kernel to run */
+};
+}
+#endif /*__ARM_COMPUTE_CLHISTOGRAM_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLIntegralImage.h b/arm_compute/runtime/CL/functions/CLIntegralImage.h
new file mode 100644
index 0000000..25fc549
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLIntegralImage.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLINTEGRALIMAGE_H__
+#define __ARM_COMPUTE_CLINTEGRALIMAGE_H__
+
+#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute integral image. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLIntegralImageHorKernel
+ * -# @ref CLIntegralImageVertKernel
+ *
+ */
+class CLIntegralImage : public IFunction
+{
+public:
+    /** Default Constructor. */
+    CLIntegralImage();
+    /** Initialise the function's source, destinations and border mode.
+    *
+    * @param[in]  input  Source tensor. Data types supported: U8.
+    * @param[out] output Destination tensor, Data types supported: U32.
+    */
+    void configure(const ICLTensor *input, ICLTensor *output);
+
+    // Inherited methods overridden:
+    void run() override;
+
+protected:
+    CLIntegralImageHorKernel  _integral_hor;  /**< Integral Image Horizontal kernel */
+    CLIntegralImageVertKernel _integral_vert; /**< Integral Image Vertical kernel */
+};
+}
+#endif /*__ARM_COMPUTE_CLINTEGRALIMAGE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h b/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h
new file mode 100644
index 0000000..0c6708a
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLLAPLACIANPYRAMID_H__
+#define __ARM_COMPUTE_CLLAPLACIANPYRAMID_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLPyramid.h"
+#include "arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h"
+#include "arm_compute/runtime/CL/functions/CLDepthConvert.h"
+#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
+#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute laplacian pyramid. This function calls the following OpenCL kernels and functions:
+ *
+ * -# @ref CLGaussianPyramidHalf
+ * -# @ref CLGaussian5x5
+ * -# @ref CLArithmeticSubtraction
+ *
+ *  First a Gaussian pyramid is created. Then, for each level i, the corresponding tensor I(i) is blurred with the Gaussian 5x5 filter, and then
+ *  difference between the two tensors is the corresponding level L(i) of the Laplacian pyramid.
+ *  L(i) = I(i) - Gaussian5x5(I(i))
+ *  Level 0 has always the same first two dimensions as the input tensor.
+*/
+class CLLaplacianPyramid : public IFunction
+{
+public:
+    /** Constructor */
+    CLLaplacianPyramid();
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in]  input                 Source tensor. Data types supported: U8.
+     * @param[out] pyramid               Destination pyramid tensors, Data types supported at each level: S16.
+     * @param[out] output                The lowest resolution tensor necessary to reconstruct the input tensor from the pyramid. Data types supported: S16.
+     *                                   The first two dimensions of this tensor must match the first two dimensions of the tensor in the last level of the pyramid, that is:
+     *                                   output.width = input.width() / pow(2,pyramid_levels-1) and out.height = in.height() / pow(2,pyramid_levels-1)
+     * @param[in]  border_mode           Border mode to use.
+     * @param[in]  constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     *
+     */
+    void configure(ICLTensor *input, CLPyramid *pyramid, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    size_t                                     _num_levels;
+    CLGaussianPyramidHalf                      _gaussian_pyr_function;
+    std::unique_ptr<CLGaussian5x5[]>           _convf;
+    std::unique_ptr<CLArithmeticSubtraction[]> _subf;
+    CLDepthConvert                             _depth_function;
+    CLPyramid                                  _gauss_pyr;
+    CLPyramid                                  _conv_pyr;
+};
+}
+#endif /*__ARM_COMPUTE_CLLAPLACIANPYRAMID_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h b/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h
new file mode 100644
index 0000000..4bc7eb6
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLLAPLACIANRECONSTRUCT_H__
+#define __ARM_COMPUTE_CLLAPLACIANRECONSTRUCT_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLPyramid.h"
+#include "arm_compute/runtime/CL/functions/CLArithmeticAddition.h"
+#include "arm_compute/runtime/CL/functions/CLDepthConvert.h"
+#include "arm_compute/runtime/CL/functions/CLScale.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <cstdint>
+#include <memory>
+
+namespace arm_compute
+{
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Basic function to execute laplacian reconstruction. This function calls the following OpenCL kernels and functions:
+ *
+ * -# @ref CLArithmeticAddition
+ * -# @ref CLScale
+ * -# @ref CLDepthConvert
+ *
+ * This function reconstructs the original image from a Laplacian Image Pyramid.
+ *
+ *  The input image is added to the last level of the Laplacian pyramid L(n-2), the resulting image is upsampled to the
+ *  resolution of the next pyramid level.
+ *
+ *  I(n-2) = upsample( input + L(n-1)
+ *
+ *  For each pyramid level i, except i=0 and i=n-1:
+ *  I(i-1) = upsample(I(i) + L(i))
+ *
+ *  output = I(0) + L(0)
+*/
+class CLLaplacianReconstruct : public IFunction
+{
+public:
+    /** Constructor */
+    CLLaplacianReconstruct();
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * The Output image must have the same size as the first level of the pyramid.
+     * The Input image must have the same size as the last level of the pyramid.
+     *
+     * The idea is to reconstuct the original hi-res image from a low-res representation of it and the laplacian pyramid.
+     *
+     * @param[in]  pyramid               Laplacian pyramid tensors, Data types supported at each level: S16.
+     * @param[in]  input                 Source tensor. Data types supported: S16.
+     * @param[out] output                Output tensor. Data types supported: U8.
+     * @param[in]  border_mode           Border mode to use for the convolution.
+     * @param[in]  constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     *
+     */
+    void configure(const CLPyramid *pyramid, const ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    CLPyramid                               _tmp_pyr;
+    std::unique_ptr<CLArithmeticAddition[]> _addf;
+    std::unique_ptr<CLScale[]>              _scalef;
+    CLDepthConvert                          _depthf;
+};
+}
+#endif /*__ARM_COMPUTE_CLLAPLACIANRECONSTRUCT_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
new file mode 100644
index 0000000..b4e4691
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H__
+#define __ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
+#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
+#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
+#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to compute the locally connected layer. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLLocallyConnectedLayerWeightsReshapeKernel (executed only once for each configuration)
+ * -# @ref CLIm2ColKernel
+ * -# @ref CLLocallyConnectedMatrixMultiplyKernel
+ * -# @ref CLCol2ImKernel
+ */
+class CLLocallyConnectedLayer : public IFunction
+{
+public:
+    /** Default constructor */
+    CLLocallyConnectedLayer();
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input     Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+     *                       while every optional dimension from 4 and above represent a batch of inputs.
+     *                       Data types supported: F32.
+     * @param[in]  weights   Weights tensor. Weights are 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches]. Data type supported:Same as @p input.
+     * @param[in]  biases    Biases tensor. Shared biases supported. Biases are 2D tensor with dimensions [OFM, num_patches]. Data type supported:Same as @p input.
+     * @param[out] output    Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+     *                       Data types supported: Same as @p input.
+     * @param[in]  conv_info Contains padding and stride information described in @ref PadStrideInfo.
+     */
+    void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    CLIm2ColKernel                              _input_im2col_kernel;
+    CLLocallyConnectedLayerWeightsReshapeKernel _weights_reshape_kernel;
+    CLLocallyConnectedMatrixMultiplyKernel      _mm_kernel;
+    CLCol2ImKernel                              _output_col2im_kernel;
+    CLTensor                                    _input_im2col_reshaped;
+    CLTensor                                    _weights_reshaped;
+    CLTensor                                    _gemm_output;
+    bool                                        _is_first_run;
+};
+}
+#endif /* __ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLMagnitude.h b/arm_compute/runtime/CL/functions/CLMagnitude.h
new file mode 100644
index 0000000..dc5f913
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLMagnitude.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLMAGNITUDE_H__
+#define __ARM_COMPUTE_CLMAGNITUDE_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLMagnitudePhaseKernel. */
+class CLMagnitude : public ICLSimpleFunction
+{
+public:
+    /** Initialise the kernel's inputs.
+     *
+     * @param[in]  input1   First tensor input. Data types supported: S16.
+     * @param[in]  input2   Second tensor input. Data types supported: S16.
+     * @param[out] output   Output tensor. Data types supported: S16.
+     * @param[in]  mag_type (Optional) Magnitude calculation type. Default: L2NORM.
+     */
+    void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, MagnitudeType mag_type = MagnitudeType::L2NORM);
+};
+}
+#endif /*__ARM_COMPUTE_CLMAGNITUDE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDev.h b/arm_compute/runtime/CL/functions/CLMeanStdDev.h
new file mode 100644
index 0000000..e33bcdd
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLMeanStdDev.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLMEANSTDDEV_H__
+#define __ARM_COMPUTE_CLMEANSTDDEV_H__
+
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+/** Basic function to execute mean and standard deviation by calling @ref CLMeanStdDevKernel */
+class CLMeanStdDev : public IFunction
+{
+public:
+    /** Default Constructor. */
+    CLMeanStdDev();
+    /** Initialise the kernel's inputs and outputs.
+     *
+     * @param[in]  input  Input image. Data types supported: U8.
+     * @param[out] mean   Output average pixel value.
+     * @param[out] stddev (Optional)Output standard deviation of pixel values.
+     */
+    void configure(const ICLImage *input, float *mean, float *stddev = nullptr);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    CLMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */
+    cl::Buffer         _global_sum;         /**< Variable that holds the global sum among calls in order to ease reduction */
+    cl::Buffer         _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */
+};
+}
+#endif /*__ARM_COMPUTE_CLMEANSTDDEV_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLMedian3x3.h b/arm_compute/runtime/CL/functions/CLMedian3x3.h
new file mode 100644
index 0000000..af84ba7
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLMedian3x3.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLMEDIAN3X3_H__
+#define __ARM_COMPUTE_CLMEDIAN3X3_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute median filter. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
+ * -# @ref CLMedian3x3Kernel
+ *
+ */
+class CLMedian3x3 : public ICLSimpleFunction
+{
+public:
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
+};
+}
+#endif /*__ARM_COMPUTE_CLMEDIAN3X3_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLMinMaxLocation.h b/arm_compute/runtime/CL/functions/CLMinMaxLocation.h
new file mode 100644
index 0000000..84fd675
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLMinMaxLocation.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLMINMAXLOCATION_H__
+#define __ARM_COMPUTE_CLMINMAXLOCATION_H__
+
+#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h"
+#include "arm_compute/runtime/CL/CLArray.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Basic function to execute min and max location. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLMinMaxKernel
+ * -# @ref CLMinMaxLocationKernel
+ */
+class CLMinMaxLocation : public IFunction
+{
+public:
+    /** Constructor */
+    CLMinMaxLocation();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLMinMaxLocation(const CLMinMaxLocation &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLMinMaxLocation &operator=(const CLMinMaxLocation &) = delete;
+    /** Allow instances of this class to be moved */
+    CLMinMaxLocation(CLMinMaxLocation &&) = default;
+    /** Allow instances of this class to be moved */
+    CLMinMaxLocation &operator=(CLMinMaxLocation &&) = default;
+    /** Initialise the kernel's inputs and outputs.
+     *
+     * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size.
+     *
+     * @param[in]  input     Input image. Data types supported: U8 or S16.
+     * @param[out] min       Minimum value of image.
+     * @param[out] max       Maximum value of image.
+     * @param[out] min_loc   (Optional) Array of Coordinates2D used to store minimum value locations.
+     * @param[out] max_loc   (Optional) Array of Coordinates2D used to store maximum value locations.
+     * @param[out] min_count (Optional) Number of minimum value encounters.
+     * @param[out] max_count (Optional) Number of maximum value encounters.
+     */
+    void configure(const ICLImage *input, int32_t *min, int32_t *max,
+                   CLCoordinates2DArray *min_loc = nullptr, CLCoordinates2DArray *max_loc = nullptr,
+                   uint32_t *min_count = nullptr, uint32_t *max_count = nullptr);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    CLMinMaxKernel         _min_max_kernel;     /**< Kernel that performs min/max */
+    CLMinMaxLocationKernel _min_max_loc_kernel; /**< Kernel that counts min/max occurrences and identifies their positions */
+    cl::Buffer             _min_max_vals;       /**< Buffer to collect min, max values */
+    cl::Buffer             _min_max_count_vals; /**< Buffer to collect min, max values */
+    int32_t               *_min;                /**< Minimum value. */
+    int32_t               *_max;                /**< Maximum value. */
+    uint32_t              *_min_count;          /**< Minimum value occurrences. */
+    uint32_t              *_max_count;          /**< Maximum value occurrences. */
+    CLCoordinates2DArray *_min_loc;             /**< Minimum value occurrences coordinates. */
+    CLCoordinates2DArray *_max_loc;             /**< Maximum value occurrences  coordinates. */
+};
+}
+#endif /*__ARM_COMPUTE_CLMINMAXLOCATION_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLNonLinearFilter.h b/arm_compute/runtime/CL/functions/CLNonLinearFilter.h
new file mode 100644
index 0000000..9eee33e
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLNonLinearFilter.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLNONLINEARFILTER_H__
+#define __ARM_COMPUTE_CLNONLINEARFILTER_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute non linear filter. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
+ * -# @ref CLNonLinearFilterKernel
+ *
+ * @note Supported mask dimensions squares of sizes 3, 5
+ */
+class CLNonLinearFilter : public ICLSimpleFunction
+{
+public:
+    /** Initialize the function's source, destination, conv and border_mode.
+     *
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor. Data types supported: U8
+     * @param[in]     function              Non linear function to perform
+     * @param[in]     mask_size             Mask size. Supported sizes: 3, 5
+     * @param[in]     pattern               Mask pattern
+     * @param[in]     mask                  The given mask. Will be used only if pattern is specified to PATTERN_OTHER
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
+                   BorderMode border_mode, uint8_t constant_border_value = 0);
+};
+}
+#endif /*__ARM_COMPUTE_CLNONLINEARFILTER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h b/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h
new file mode 100644
index 0000000..7adced4
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3X3_H__
+#define __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3X3_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute non-maxima suppression over a 3x3 window. This function calls the following CL kernels:
+ *
+ * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
+ * -# @ref CLNonMaximaSuppression3x3Kernel
+ */
+class CLNonMaximaSuppression3x3 : public ICLSimpleFunction
+{
+public:
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @note The implementation supports just 2 border modes: UNDEFINED and CONSTANT
+     *       The constant values used with CONSTANT border mode is 0
+     *
+     * @param[in,out] input       Source tensor. Data types supported: U8, F32. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output      Destination for the Non-Maxima suppressions 3x3. Data types supported: same as @p input.
+     * @param[in]     border_mode Border mode to use for non-maxima suppression.
+     *                                   The implementation supports just 2 border modes: UNDEFINED and CONSTANT
+     */
+    void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode);
+};
+}
+#endif /* __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3X3_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
new file mode 100644
index 0000000..a4dae85
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLNORMALIZATIONLAYER_H__
+#define __ARM_COMPUTE_CLNORMALIZATIONLAYER_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h"
+#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to simulate a normalization layer. This function calls the following CL kernels:
+ *
+ * -# @ref CLPixelWiseMultiplicationKernel
+ * -# @ref CLFillBorderKernel
+ * -# @ref CLNormalizationLayerKernel
+ *
+ */
+class CLNormalizationLayer : public IFunction
+{
+public:
+    /** Default constructor */
+    CLNormalizationLayer();
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input     Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
+     *                       and an optional 4th dimension for batch of inputs. Data types supported: F16, F32. Number of channels must be 1.
+     * @param[out] output    Destination tensor. Dimensions, data type and number of channels must match the input ones.
+     * @param[in]  norm_info Normalization layer information like the normalization type, normalization size and other parameters.
+     */
+    void configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    CLTensor                        _squared_input;   /**< The intermediate buffer which stores results of squaring input*/
+    CLNormalizationLayerKernel      _norm_kernel;     /**< Normalization layer kernel to run */
+    CLPixelWiseMultiplicationKernel _multiply_kernel; /**< Pixel multiplication kernel to run */
+    CLFillBorderKernel              _border_handler;  /**< Kernel to handle  borders */
+};
+}
+#endif /* __ARM_COMPUTE_CLNORMALIZATIONLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLOpticalFlow.h b/arm_compute/runtime/CL/functions/CLOpticalFlow.h
new file mode 100644
index 0000000..ca3f861
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLOpticalFlow.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLOPTICALFLOW_H__
+#define __ARM_COMPUTE_CLOPTICALFLOW_H__
+
+#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h"
+
+#include "arm_compute/core/IArray.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLArray.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLScharr3x3.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+
+namespace arm_compute
+{
+class CLPyramid;
+
+using CLLKInternalKeypointArray = CLArray<CLLKInternalKeypoint>;
+using CLCoefficientTableArray   = CLArray<CLCoefficientTable>;
+using CLOldValueArray           = CLArray<CLOldValue>;
+
+/** Basic function to execute optical flow. This function calls the following OpenCL kernels and functions:
+ *
+ * -# @ref CLScharr3x3
+ * -# @ref CLLKTrackerInitKernel
+ * -# @ref CLLKTrackerStage0Kernel
+ * -# @ref CLLKTrackerStage1Kernel
+ * -# @ref CLLKTrackerFinalizeKernel
+ */
+class CLOpticalFlow : public IFunction
+{
+public:
+    /** Default constructor */
+    CLOpticalFlow();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLOpticalFlow(const CLOpticalFlow &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLOpticalFlow &operator=(const CLOpticalFlow &) = delete;
+    /** Allow instances of this class to be moved */
+    CLOpticalFlow(CLOpticalFlow &&) = default;
+    /** Allow instances of this class to be moved */
+    CLOpticalFlow &operator=(CLOpticalFlow &&) = default;
+    /**  Initialise the function input and output
+     *
+     * @param[in]  old_pyramid           Pointer to the pyramid for the old tensor. Data types supported U8
+     * @param[in]  new_pyramid           Pointer to the pyramid for the new tensor. Data types supported U8
+     * @param[in]  old_points            Pointer to the IKeyPointArray storing old key points
+     * @param[in]  new_points_estimates  Pointer to the IKeyPointArray storing new estimates key points
+     * @param[out] new_points            Pointer to the IKeyPointArray storing new key points
+     * @param[in]  termination           The criteria to terminate the search of each keypoint.
+     * @param[in]  epsilon               The error for terminating the algorithm
+     * @param[in]  num_iterations        The maximum number of iterations before terminate the alogrithm
+     * @param[in]  window_dimension      The size of the window on which to perform the algorithm
+     * @param[in]  use_initial_estimate  The flag to indicate whether the initial estimated position should be used
+     * @param[in]  border_mode           The border mode applied at scharr kernel stage
+     * @param[in]  constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT
+     *
+     */
+    void configure(const CLPyramid *old_pyramid, const CLPyramid *new_pyramid,
+                   const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLKeyPointArray *new_points,
+                   Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, bool use_initial_estimate,
+                   BorderMode border_mode, uint8_t constant_border_value = 0);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    std::unique_ptr<CLLKTrackerInitKernel[]>   _tracker_init_kernel;
+    std::unique_ptr<CLLKTrackerStage0Kernel[]> _tracker_stage0_kernel;
+    std::unique_ptr<CLLKTrackerStage1Kernel[]> _tracker_stage1_kernel;
+    CLLKTrackerFinalizeKernel                  _tracker_finalize_kernel;
+    std::unique_ptr<CLScharr3x3[]>             _func_scharr;
+    std::unique_ptr<CLTensor[]>                _scharr_gx;
+    std::unique_ptr<CLTensor[]>                _scharr_gy;
+    const ICLKeyPointArray                    *_old_points;
+    const ICLKeyPointArray                    *_new_points_estimates;
+    ICLKeyPointArray                          *_new_points;
+    std::unique_ptr<CLLKInternalKeypointArray> _old_points_internal;
+    std::unique_ptr<CLLKInternalKeypointArray> _new_points_internal;
+    std::unique_ptr<CLCoefficientTableArray>   _coefficient_table;
+    std::unique_ptr<CLOldValueArray>           _old_values;
+    size_t                                     _num_levels;
+};
+}
+#endif /*__ARM_COMPUTE_CLOPTICALFLOW_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLPhase.h b/arm_compute/runtime/CL/functions/CLPhase.h
new file mode 100644
index 0000000..7cdfab1
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLPhase.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLPHASE_H__
+#define __ARM_COMPUTE_CLPHASE_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute an @ref CLMagnitudePhaseKernel. */
+class CLPhase : public ICLSimpleFunction
+{
+public:
+    /** Initialise the kernel's inputs, output.
+     *
+     * @param[in]  input1     First tensor input. Data types supported: S16.
+     * @param[in]  input2     Second tensor input. Data types supported: S16.
+     * @param[out] output     Output tensor. Data types supported: U8.
+     * @param[in]  phase_type (Optional) Phase calculation type. Default: SIGNED.
+     */
+    void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, PhaseType phase_type = PhaseType::SIGNED);
+};
+}
+#endif /*__ARM_COMPUTE_CLPHASE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
new file mode 100644
index 0000000..71754fc
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H__
+#define __ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLPixelWiseMultiplicationKernel. */
+class CLPixelWiseMultiplication : public ICLSimpleFunction
+{
+public:
+    /** Initialise the kernel's inputs, output and convertion policy.
+     *
+     * @param[in]  input1          First tensor input. Data types supported: U8, S16, F16 or F32.
+     * @param[in]  input2          Second tensor input. Data types supported: U8, S16, F16 or F32.
+     * @param[out] output          Output tensor. Data types supported: U8(Only if both inputs are U8), S16, F16 or F32.
+     * @param[in]  scale           Scale to apply after multiplication. Must be positive.
+     * @param[in]  overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
+     * @param[in]  rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
+     */
+    void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale,
+                   ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
+};
+}
+#endif /*__ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLPoolingLayer.h b/arm_compute/runtime/CL/functions/CLPoolingLayer.h
new file mode 100644
index 0000000..f92860e
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLPoolingLayer.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLPOOLINGLAYER_H__
+#define __ARM_COMPUTE_CLPOOLINGLAYER_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLFillBorderKernel (executed if padding size is different from zero)
+ * -# @ref CLPoolingLayerKernel
+ */
+class CLPoolingLayer : public ICLSimpleFunction
+{
+public:
+    /** Set the input and output tensors.
+     *
+     * @param[in,out] input     Source tensor. (Written to only when padding != 0) Data types supported: F16, F32.
+     * @param[out]    output    Destination tensor. Data types supported: Same as @p input.
+     * @param[in]     pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+     */
+    void configure(ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info);
+};
+}
+#endif /* __ARM_COMPUTE_CLPOOLINGLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLRemap.h b/arm_compute/runtime/CL/functions/CLRemap.h
new file mode 100644
index 0000000..4cb2be9
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLRemap.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLREMAP_H__
+#define __ARM_COMPUTE_CLREMAP_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute remap. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
+ * -# @ref CLRemapKernel
+ */
+class CLRemap : public ICLSimpleFunction
+{
+public:
+    /** Initialise the function's sources, destination, interpolation policy and border mode.
+     *
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[in]     map_x                 Map for X coords. Data types supported: F32.
+     * @param[in]     map_y                 Map for Y coords. Data types supported: F32.
+     * @param[out]    output                Output tensor. Data types supported: U8.
+     * @param[in]     policy                Interpolation policy to use. Only NEAREST and BILINEAR are supported.
+     * @param[in]     border_mode           Border mode to use on the input tensor.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     *
+     */
+    void configure(ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output,
+                   InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0);
+};
+}
+#endif /*__ARM_COMPUTE_CLREMAP_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLScale.h b/arm_compute/runtime/CL/functions/CLScale.h
new file mode 100644
index 0000000..c2438dd
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLScale.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLSCALE_H__
+#define __ARM_COMPUTE_CLSCALE_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLScaleKernel */
+class CLScale : public ICLSimpleFunction
+{
+public:
+    /** Initialize the function's source, destination, interpolation type and border_mode.
+     *
+     * @param[in,out] input                 Source tensor. Data types supported: U8, S16. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor. Data types supported: U8, S16 (Must be the same as the input tensor).
+     *                                      All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+     * @param[in]     policy                The interpolation type.
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0);
+};
+}
+#endif /*__ARM_COMPUTE_CLSCALE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLScharr3x3.h b/arm_compute/runtime/CL/functions/CLScharr3x3.h
new file mode 100644
index 0000000..3ea0b84
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLScharr3x3.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLSCHARR3X3_H__
+#define __ARM_COMPUTE_CLSCHARR3X3_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute scharr 3x3 filter. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
+ * -# @ref CLScharr3x3Kernel
+ *
+ */
+class CLScharr3x3 : public ICLSimpleFunction
+{
+public:
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @note At least one of output_x or output_y must be not NULL.
+     *
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output_x              (optional) Destination for the Scharr 3x3 convolution along the X axis. Data types supported: S16.
+     * @param[out]    output_y              (optional) Destination for the Scharr 3x3 convolution along the Y axis. Data types supported: S16.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
+};
+}
+#endif /*__ARM_COMPUTE_CLSCHARR3X3_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLSobel3x3.h b/arm_compute/runtime/CL/functions/CLSobel3x3.h
new file mode 100644
index 0000000..7a4f47d
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLSobel3x3.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLSOBEL3X3_H__
+#define __ARM_COMPUTE_CLSOBEL3X3_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute sobel 3x3 filter. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
+ * -# @ref CLSobel3x3Kernel
+ *
+ */
+class CLSobel3x3 : public ICLSimpleFunction
+{
+public:
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @note At least one of output_x or output_y must be not NULL.
+     *
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output_x              (optional) Destination for the Sobel 3x3 convolution along the X axis. Data types supported: S16.
+     * @param[out]    output_y              (optional) Destination for the Sobel 3x3 convolution along the Y axis. Data types supported: S16.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
+};
+}
+#endif /*__ARM_COMPUTE_CLSOBEL3X3_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLSobel5x5.h b/arm_compute/runtime/CL/functions/CLSobel5x5.h
new file mode 100644
index 0000000..ad1f72f
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLSobel5x5.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLSOBEL5X5_H__
+#define __ARM_COMPUTE_CLSOBEL5X5_H__
+
+#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute sobel 5x5 filter. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
+ * -# @ref CLSobel5x5HorKernel
+ * -# @ref CLSobel5x5VertKernel
+ *
+ */
+class CLSobel5x5 : public IFunction
+{
+public:
+    /** Default Constructor. */
+    CLSobel5x5();
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @note At least one of output_x or output_y must be not NULL.
+     *
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output_x              (optional) Destination for the Sobel 5x5 convolution along the X axis. Data types supported: S16.
+     * @param[out]    output_y              (optional) Destination for the Sobel 5x5 convolution along the Y axis. Data types supported: S16.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
+
+    // Inherited methods overridden:
+    void run() override;
+
+protected:
+    CLSobel5x5HorKernel  _sobel_hor;      /**< Sobel Horizontal 5x5 kernel */
+    CLSobel5x5VertKernel _sobel_vert;     /**< Sobel Vertical 5x5 kernel */
+    CLFillBorderKernel   _border_handler; /**< Kernel to handle image borders */
+    CLImage              _tmp_x;          /**< Temporary buffer for Sobel X */
+    CLImage              _tmp_y;          /**< Temporary buffer for Sobel Y */
+};
+}
+#endif /*__ARM_COMPUTE_CLSOBEL5X5_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLSobel7x7.h b/arm_compute/runtime/CL/functions/CLSobel7x7.h
new file mode 100644
index 0000000..1a3fe1a
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLSobel7x7.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLSOBEL7X7_H__
+#define __ARM_COMPUTE_CLSOBEL7X7_H__
+
+#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute sobel 7x7 filter. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
+ * -# @ref CLSobel7x7HorKernel
+ * -# @ref CLSobel7x7VertKernel
+ *
+ */
+class CLSobel7x7 : public IFunction
+{
+public:
+    /** Default Constructor. */
+    CLSobel7x7();
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @note At least one of output_x or output_y must be not NULL.
+     *
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output_x              (optional) Destination for the Sobel 7x7 convolution along the X axis. Data types supported: S32.
+     * @param[out]    output_y              (optional) Destination for the Sobel 7x7 convolution along the Y axis. Data types supported: S32.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
+
+    // Inherited methods overridden:
+    void run() override;
+
+protected:
+    CLSobel7x7HorKernel  _sobel_hor;      /**< Sobel Horizontal 7x7 kernel */
+    CLSobel7x7VertKernel _sobel_vert;     /**< Sobel Vertical 7x7 kernel */
+    CLFillBorderKernel   _border_handler; /**< Kernel to handle image borders */
+    CLImage              _tmp_x;          /**< Temporary buffer for Sobel X */
+    CLImage              _tmp_y;          /**< Temporary buffer for Sobel Y */
+};
+}
+#endif /*__ARM_COMPUTE_CLSOBEL7X7_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
new file mode 100644
index 0000000..42cfc06
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLSOFTMAXLAYER_H__
+#define __ARM_COMPUTE_CLSOFTMAXLAYER_H__
+
+#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to compute a SoftmaxLayer.
+ *
+ * Softmax is calculated by :
+ * @f[ out = exp(x - max(x)) / sum(exp(x - max(x))) @f]
+ *
+ * This function runs the following kernels:
+ * -# @ref CLLogits1DMaxKernel
+ * -# @ref CLLogits1DShiftExpSumKernel
+ * -# @ref CLLogits1DNormKernel
+ */
+class CLSoftmaxLayer : public IFunction
+{
+public:
+    /** Constructor */
+    CLSoftmaxLayer();
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input  Source tensor. Data types supported: F16, F32. Number of channels must be 1.
+     * @param[out] output Destination tensor. Matching input type and channel number.
+     */
+    void configure(const ICLTensor *input, ICLTensor *output);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    CLLogits1DMaxKernel         _max_kernel;
+    CLLogits1DShiftExpSumKernel _shift_exp_sum_kernel;
+    CLLogits1DNormKernel        _norm_kernel;
+    CLTensor                    _max;
+    CLTensor                    _sum;
+    CLTensor                    _tmp;
+};
+}
+#endif /* __ARM_COMPUTE_CLSOFTMAXLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLTableLookup.h b/arm_compute/runtime/CL/functions/CLTableLookup.h
new file mode 100644
index 0000000..ebe6593
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLTableLookup.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLTABLELOOKUP_H__
+#define __ARM_COMPUTE_CLTABLELOOKUP_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+class ICLLut;
+
+/** Basic function to run @ref CLTableLookupKernel */
+class CLTableLookup : public ICLSimpleFunction
+{
+public:
+    /** Initialise the kernel's inputs and output
+     *
+     * @param[in]  input  First tensor input. Data types supported: U8 and S16
+     * @param[in]  lut    Input lookup table. Data types supported: U8 and S16
+     * @param[out] output Output tensor. Data types supported: U8 and S16
+     */
+    void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output);
+};
+}
+#endif /*__ARM_COMPUTE_CLTABLELOOKUP_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLThreshold.h b/arm_compute/runtime/CL/functions/CLThreshold.h
new file mode 100644
index 0000000..14c0578
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLThreshold.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLTHRESHOLD_H__
+#define __ARM_COMPUTE_CLTHRESHOLD_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLThresholdKernel */
+class CLThreshold : public ICLSimpleFunction
+{
+public:
+    /** Initialise the function's source, destination, thresholds and threshold type
+     *
+     * @param[in]  input       First tensor input. Data types supported: U8.
+     * @param[out] output      Output tensor. Data types supported: U8.
+     * @param[in]  threshold   Threshold. If upper threshold is specified, this will be used as the lower threshold.
+     * @param[in]  false_value Value to assign when the condition is false.
+     * @param[in]  true_value  value to assign when the condition is true.
+     * @param[in]  type        Thresholding type. Can either be BINARY or RANGE.
+     * @param[in]  upper       Upper threshold. Only used with RANGE thresholding
+     */
+    void configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold,
+                   uint8_t false_value = 0, uint8_t true_value = 0,
+                   ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0);
+};
+}
+#endif /*__ARM_COMPUTE_CLTHRESHOLD_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLTranspose.h b/arm_compute/runtime/CL/functions/CLTranspose.h
new file mode 100644
index 0000000..9b57fe0
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLTranspose.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLTRANSPOSE_H__
+#define __ARM_COMPUTE_CLTRANSPOSE_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to transpose a matrix on OpenCL. This function calls the following OpenCL kernel:
+ *
+ *  -# @ref CLTransposeKernel
+ *
+ */
+class CLTranspose : public ICLSimpleFunction
+{
+public:
+    /** Initialise the kernel's inputs and output
+     *
+     * @param[in]  input  Input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32
+     * @param[out] output Output tensor. Data type supported: Same as @p input
+     */
+    void configure(const ICLTensor *input, ICLTensor *output);
+};
+}
+
+#endif /* __ARM_COMPUTE_CLTRANSPOSE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLWarpAffine.h b/arm_compute/runtime/CL/functions/CLWarpAffine.h
new file mode 100644
index 0000000..aeab3f7
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLWarpAffine.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLWARPAFFINE_H__
+#define __ARM_COMPUTE_CLWARPAFFINE_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLWarpAffineKernel for AFFINE transformation */
+class CLWarpAffine : public ICLSimpleFunction
+{
+public:
+    /** Initialize the function's source, destination, interpolation policy and border_mode.
+     *
+     * @param[in,out] input                 Source temspr. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8.
+     * @param[in]     matrix                The affine matrix. Must be 2x3 of type float.
+     * @param[in]     policy                The interpolation type.
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(ICLTensor *input, ICLTensor *output, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0);
+};
+}
+#endif /*__ARM_COMPUTE_CLWARPAFFINE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLWarpPerspective.h b/arm_compute/runtime/CL/functions/CLWarpPerspective.h
new file mode 100644
index 0000000..8023701
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLWarpPerspective.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLWARPPERSPECTIVE_H__
+#define __ARM_COMPUTE_CLWARPPERSPECTIVE_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLWarpPerspectiveKernel for PERSPECTIVE transformation */
+class CLWarpPerspective : public ICLSimpleFunction
+{
+public:
+    /** Initialize the function's source, destination, interpolation policy and border_mode.
+     *
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor. Data types supported: U8.
+     * @param[in]     matrix                The perspective matrix. Must be 2x3 of type float.
+     * @param[in]     policy                The interpolation type.
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(ICLTensor *input, ICLTensor *output, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0);
+};
+}
+#endif /*__ARM_COMPUTE_CLWARPPERSPECTIVE_H__ */