COMPMID-761: Add CL/NEON Convolution benchmark tests

Change-Id: I684baff3bfdff2244e04facd2d85d84609b7caff
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/134769
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
diff --git a/tests/Utils.h b/tests/Utils.h
index d0eebb1..278af41 100644
--- a/tests/Utils.h
+++ b/tests/Utils.h
@@ -596,6 +596,61 @@
     return pyramid;
 }
 
+/** Initialize a convolution matrix.
+ *
+ * @param[in, out] conv   The input convolution matrix.
+ * @param[in]      width  The width of the convolution matrix.
+ * @param[in]      height The height of the convolution matrix.
+ * @param[in]      seed   The random seed to be used.
+ */
+inline void init_conv(int16_t *conv, unsigned int width, unsigned int height, std::random_device::result_type seed)
+{
+    std::mt19937                           gen(seed);
+    std::uniform_int_distribution<int16_t> distribution_int16(-32768, 32767);
+
+    for(unsigned int i = 0; i < width * height; ++i)
+    {
+        conv[i] = distribution_int16(gen);
+    }
+}
+
+/** Initialize a separable convolution matrix.
+ *
+ * @param[in, out] conv   The input convolution matrix.
+ * @param[in]      width  The width of the convolution matrix.
+ * @param[in]      height The height of the convolution matrix.
+ * @param[in]      seed   The random seed to be used.
+ */
+inline void init_separable_conv(int16_t *conv, unsigned int width, unsigned int height, std::random_device::result_type seed)
+{
+    std::mt19937 gen(seed);
+    // Set it between -128 and 127 to ensure the matrix does not overflow
+    std::uniform_int_distribution<int16_t> distribution_int16(-128, 127);
+
+    int16_t conv_row[width];
+    int16_t conv_col[height];
+
+    conv_row[0] = conv_col[0] = 1;
+    for(unsigned int i = 1; i < width; ++i)
+    {
+        conv_row[i] = distribution_int16(gen);
+    }
+
+    for(unsigned int i = 1; i < height; ++i)
+    {
+        conv_col[i] = distribution_int16(gen);
+    }
+
+    // Multiply two matrices
+    for(unsigned int i = 0; i < width; ++i)
+    {
+        for(unsigned int j = 0; j < height; ++j)
+        {
+            conv[i * width + j] = conv_col[i] * conv_row[j];
+        }
+    }
+}
+
 /** Create a vector of random ROIs.
  *
  * @param[in] shape     The shape of the input tensor.
diff --git a/tests/benchmark/CL/Convolution.cpp b/tests/benchmark/CL/Convolution.cpp
new file mode 100644
index 0000000..04622aa
--- /dev/null
+++ b/tests/benchmark/CL/Convolution.cpp
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLConvolution.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/benchmark/fixtures/ConvolutionFixture.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+// *INDENT-OFF*
+// clang-format off
+
+#define CONVOLUTION_SQUARE_DATA_TEST_CASE(TEST_NAME, MODE, SHAPES, DT, FILTER_SIZE)                    \
+    REGISTER_FIXTURE_DATA_TEST_CASE(TEST_NAME, CLConvolutionFixture, framework::DatasetMode::MODE,     \
+                                combine(combine(combine(                                               \
+                                datasets::SHAPES,                                                      \
+                                framework::dataset::make("DataType", DataType::DT)),                   \
+                                datasets::BorderModes()),                                              \
+                                framework::dataset::make("FilterSize", { FILTER_SIZE })));
+
+#define CONVOLUTION_RECTANGLE_DATA_TEST_CASE(TEST_NAME, MODE, SHAPES, DT)                              \
+    REGISTER_FIXTURE_DATA_TEST_CASE(TEST_NAME, CLConvolutionFixture, framework::DatasetMode::MODE,     \
+                                combine(combine(combine(combine(                                       \
+                                datasets::SHAPES,                                                      \
+                                framework::dataset::make("DataType", DataType::DT)),                   \
+                                datasets::BorderModes()),                                              \
+                                framework::dataset::make("FilterSize", { 3, 5, 7, 9 })),               \
+                                framework::dataset::make("FilterSize", { 3, 5, 7, 9 })));
+
+#define CONVOLUTION_SEPARABLE_DATA_TEST_CASE(TEST_NAME, MODE, SHAPES, DT, FILTER_SIZE)                 \
+    CONVOLUTION_SQUARE_DATA_TEST_CASE(TEST_NAME, MODE, SHAPES, DT, FILTER_SIZE)
+
+// clang-format on
+// *INDENT-ON*
+
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(CustomConvolution)
+
+TEST_SUITE(Square3x3)
+
+using CLConvolutionFixture = ConvolutionSquareFixture<CLTensor, CLConvolution3x3, CLAccessor>;
+
+TEST_SUITE(U8)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), U8, 3)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), U8, 3)
+TEST_SUITE_END() // U8
+
+TEST_SUITE(S16)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), S16, 3)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), S16, 3)
+TEST_SUITE_END() // S16
+
+TEST_SUITE_END() // Square3x3
+
+TEST_SUITE(Square5x5)
+
+using CLConvolutionFixture = ConvolutionSquareFixture<CLTensor, CLConvolution5x5, CLAccessor>;
+
+TEST_SUITE(U8)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), U8, 5)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), U8, 5)
+TEST_SUITE_END() // U8
+
+TEST_SUITE(S16)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), S16, 5)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), S16, 5)
+TEST_SUITE_END() // S16
+
+TEST_SUITE_END() // Square5x5
+
+TEST_SUITE(Square7x7)
+
+using CLConvolutionFixture = ConvolutionSquareFixture<CLTensor, CLConvolution7x7, CLAccessor>;
+
+TEST_SUITE(U8)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), U8, 7)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), U8, 7)
+TEST_SUITE_END() // U8
+
+TEST_SUITE(S16)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), S16, 7)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), S16, 7)
+TEST_SUITE_END() // S16
+
+TEST_SUITE_END() // Square7x7
+
+TEST_SUITE(Square9x9)
+
+using CLConvolutionFixture = ConvolutionSquareFixture<CLTensor, CLConvolution9x9, CLAccessor>;
+
+TEST_SUITE(U8)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), U8, 9)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), U8, 9)
+TEST_SUITE_END() // U8
+
+TEST_SUITE(S16)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), S16, 9)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), S16, 9)
+TEST_SUITE_END() // S16
+
+TEST_SUITE_END() // Square9x9
+
+TEST_SUITE(Rectangle)
+
+using CLConvolutionFixture = ConvolutionRectangleFixture<CLTensor, CLConvolutionRectangle, CLAccessor>;
+
+TEST_SUITE(U8)
+CONVOLUTION_RECTANGLE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), U8)
+CONVOLUTION_RECTANGLE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), U8)
+TEST_SUITE_END() // U8
+
+TEST_SUITE(S16)
+CONVOLUTION_RECTANGLE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), S16)
+CONVOLUTION_RECTANGLE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), S16)
+TEST_SUITE_END() // S16
+
+TEST_SUITE_END() // Rectangle
+
+TEST_SUITE(Separable5x5)
+
+using CLConvolutionFixture = ConvolutionSeperableFixture<CLTensor, CLConvolution5x5, CLAccessor>;
+
+TEST_SUITE(U8)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), U8, 5)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), U8, 5)
+TEST_SUITE_END() // U8
+
+TEST_SUITE(S16)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), S16, 5)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), S16, 5)
+TEST_SUITE_END() // S16
+
+TEST_SUITE_END() // Separable5x5
+
+TEST_SUITE(Separable7x7)
+
+using CLConvolutionFixture = ConvolutionSeperableFixture<CLTensor, CLConvolution7x7, CLAccessor>;
+
+TEST_SUITE(U8)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), U8, 7)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), U8, 7)
+TEST_SUITE_END() // U8
+
+TEST_SUITE(S16)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), S16, 7)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), S16, 7)
+TEST_SUITE_END() // S16
+
+TEST_SUITE_END() // Separable7x7
+
+TEST_SUITE(Separable9x9)
+
+using CLConvolutionFixture = ConvolutionSeperableFixture<CLTensor, CLConvolution9x9, CLAccessor>;
+
+TEST_SUITE(U8)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), U8, 9)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), U8, 9)
+TEST_SUITE_END() // U8
+
+TEST_SUITE(S16)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), S16, 9)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), S16, 9)
+TEST_SUITE_END() // S16
+
+TEST_SUITE_END() // Separable9x9
+
+TEST_SUITE_END() // CustomConvolution
+TEST_SUITE_END() // CL
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/NEON/Convolution.cpp b/tests/benchmark/NEON/Convolution.cpp
new file mode 100644
index 0000000..29ff60a
--- /dev/null
+++ b/tests/benchmark/NEON/Convolution.cpp
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime//Tensor.h"
+#include "arm_compute/runtime/NEON/functions/NEConvolution.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/benchmark/fixtures/ConvolutionFixture.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+// *INDENT-OFF*
+// clang-format off
+
+#define CONVOLUTION_SQUARE_DATA_TEST_CASE(TEST_NAME, MODE, SHAPES, DT, FILTER_SIZE)                    \
+    REGISTER_FIXTURE_DATA_TEST_CASE(TEST_NAME, NEConvolutionFixture, framework::DatasetMode::MODE,     \
+                                combine(combine(combine(                                               \
+                                datasets::SHAPES,                                                      \
+                                framework::dataset::make("DataType", DataType::DT)),                   \
+                                datasets::BorderModes()),                                              \
+                                framework::dataset::make("FilterSize", { FILTER_SIZE })));
+
+#define CONVOLUTION_RECTANGLE_DATA_TEST_CASE(TEST_NAME, MODE, SHAPES, DT)                              \
+    REGISTER_FIXTURE_DATA_TEST_CASE(TEST_NAME, NEConvolutionFixture, framework::DatasetMode::MODE,     \
+                                combine(combine(combine(combine(                                       \
+                                datasets::SHAPES,                                                      \
+                                framework::dataset::make("DataType", DataType::DT)),                   \
+                                datasets::BorderModes()),                                              \
+                                framework::dataset::make("FilterSize", { 3, 5, 7, 9 })),              \
+                                framework::dataset::make("FilterSize", { 3, 5, 7, 9 })));
+
+#define CONVOLUTION_SEPARABLE_DATA_TEST_CASE(TEST_NAME, MODE, SHAPES, DT, FILTER_SIZE)                 \
+    CONVOLUTION_SQUARE_DATA_TEST_CASE(TEST_NAME, MODE, SHAPES, DT, FILTER_SIZE)
+
+// clang-format on
+// *INDENT-ON*
+
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(CustomConvolution)
+
+TEST_SUITE(Square3x3)
+
+using NEConvolutionFixture = ConvolutionSquareFixture<Tensor, NEConvolution3x3, Accessor>;
+
+TEST_SUITE(U8)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), U8, 3)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), U8, 3)
+TEST_SUITE_END() // U8
+
+TEST_SUITE(S16)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), S16, 3)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), S16, 3)
+TEST_SUITE_END() // S16
+
+TEST_SUITE_END() // Square3x3
+
+TEST_SUITE(Square5x5)
+
+using NEConvolutionFixture = ConvolutionSquareFixture<Tensor, NEConvolution5x5, Accessor>;
+
+TEST_SUITE(U8)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), U8, 5)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), U8, 5)
+TEST_SUITE_END() // U8
+
+TEST_SUITE(S16)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), S16, 5)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), S16, 5)
+TEST_SUITE_END() // S16
+
+TEST_SUITE_END() // Square5x5
+
+TEST_SUITE(Square7x7)
+
+using NEConvolutionFixture = ConvolutionSquareFixture<Tensor, NEConvolution7x7, Accessor>;
+
+TEST_SUITE(U8)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), U8, 7)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), U8, 7)
+TEST_SUITE_END() // U8
+
+TEST_SUITE(S16)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), S16, 7)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), S16, 7)
+TEST_SUITE_END() // S16
+
+TEST_SUITE_END() // Square7x7
+
+TEST_SUITE(Square9x9)
+
+using NEConvolutionFixture = ConvolutionSquareFixture<Tensor, NEConvolution9x9, Accessor>;
+
+TEST_SUITE(U8)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), U8, 9)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), U8, 9)
+TEST_SUITE_END() // U8
+
+TEST_SUITE(S16)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), S16, 9)
+CONVOLUTION_SQUARE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), S16, 9)
+TEST_SUITE_END() // S16
+
+TEST_SUITE_END() // Square9x9
+
+TEST_SUITE(Rectangle)
+
+using NEConvolutionFixture = ConvolutionRectangleFixture<Tensor, NEConvolutionRectangle, Accessor>;
+
+TEST_SUITE(U8)
+CONVOLUTION_RECTANGLE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), U8)
+CONVOLUTION_RECTANGLE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), U8)
+TEST_SUITE_END() // U8
+
+TEST_SUITE(S16)
+CONVOLUTION_RECTANGLE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), S16)
+CONVOLUTION_RECTANGLE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), S16)
+TEST_SUITE_END() // S16
+
+TEST_SUITE_END() // Rectangle
+
+TEST_SUITE(Separable5x5)
+
+using NEConvolutionFixture = ConvolutionSeperableFixture<Tensor, NEConvolution5x5, Accessor>;
+
+TEST_SUITE(U8)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), U8, 5)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), U8, 5)
+TEST_SUITE_END() // U8
+
+TEST_SUITE(S16)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), S16, 5)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), S16, 5)
+TEST_SUITE_END() // S16
+
+TEST_SUITE_END() // Separable5x5
+
+TEST_SUITE(Separable7x7)
+
+using NEConvolutionFixture = ConvolutionSeperableFixture<Tensor, NEConvolution7x7, Accessor>;
+
+TEST_SUITE(U8)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), U8, 7)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), U8, 7)
+TEST_SUITE_END() // U8
+
+TEST_SUITE(S16)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), S16, 7)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), S16, 7)
+TEST_SUITE_END() // S16
+
+TEST_SUITE_END() // Separable7x7
+
+TEST_SUITE(Separable9x9)
+
+using NEConvolutionFixture = ConvolutionSeperableFixture<Tensor, NEConvolution9x9, Accessor>;
+
+TEST_SUITE(U8)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), U8, 9)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), U8, 9)
+TEST_SUITE_END() // U8
+
+TEST_SUITE(S16)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunSmall, PRECOMMIT, SmallShapes(), S16, 9)
+CONVOLUTION_SEPARABLE_DATA_TEST_CASE(RunLarge, NIGHTLY, LargeShapes(), S16, 9)
+TEST_SUITE_END() // S16
+
+TEST_SUITE_END() // Separable9x9
+
+TEST_SUITE_END() // CustomConvolution
+TEST_SUITE_END() // NEON
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/fixtures/ConvolutionFixture.h b/tests/benchmark/fixtures/ConvolutionFixture.h
new file mode 100644
index 0000000..3f9c2a4
--- /dev/null
+++ b/tests/benchmark/fixtures/ConvolutionFixture.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_CONVOLUTIONFIXTURE
+#define ARM_COMPUTE_TEST_CONVOLUTIONFIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/Globals.h"
+#include "tests/Utils.h"
+#include "tests/framework/Fixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+/** Parent fixture that can be used for NEON and CL */
+template <typename TensorType, typename Function, typename Accessor>
+class ConvolutionFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape src_shape, DataType output_data_type, BorderMode border_mode, unsigned int width, unsigned int height, bool is_separable = false)
+    {
+        std::mt19937  gen(library->seed());
+        const uint8_t constant_border_value = 0;
+
+        // Generate random scale value between 1 and 255.
+        std::uniform_int_distribution<uint8_t> distribution_scale(1, 255);
+        const uint32_t                         scale = distribution_scale(gen);
+
+        ARM_COMPUTE_ERROR_ON(3 != width && 5 != width && 7 != width && 9 != width);
+        ARM_COMPUTE_ERROR_ON(3 != height && 5 != height && 7 != height && 9 != height);
+
+        std::vector<int16_t> conv(width * height);
+
+        _width  = width;
+        _height = height;
+
+        if(is_separable)
+        {
+            init_separable_conv(conv.data(), width, height, seed);
+        }
+        else
+        {
+            init_conv(conv.data(), width, height, seed);
+        }
+
+        // Create tensors
+        src = create_tensor<TensorType>(src_shape, DataType::U8);
+        dst = create_tensor<TensorType>(src_shape, output_data_type);
+
+        // Configure function
+        configure_target(convolution_func, src, dst, conv.data(), scale, border_mode, constant_border_value);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        // Fill tensors
+        library->fill_tensor_uniform(Accessor(src), 0);
+        library->fill_tensor_uniform(Accessor(dst), 1);
+    }
+
+    void run()
+    {
+        convolution_func.run();
+    }
+
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
+    }
+
+protected:
+    virtual void configure_target(Function &func, TensorType &src, TensorType &dst, const int16_t *conv, uint32_t scale,
+                                  BorderMode border_mode, uint8_t border_value) = 0;
+
+protected:
+    unsigned int _width{};
+    unsigned int _height{};
+    Function     convolution_func{};
+
+private:
+    const std::random_device::result_type seed = 0;
+    TensorType                            src{};
+    TensorType                            dst{};
+};
+
+/** Child fixture used for square convolutions */
+template <typename TensorType, typename Function, typename Accessor>
+class ConvolutionSquareFixture : public ConvolutionFixture<TensorType, Function, Accessor>
+{
+public:
+    template <typename...>
+    void setup(TensorShape src_shape, DataType output_data_type, BorderMode border_mode, unsigned int width)
+    {
+        ConvolutionFixture<TensorType, Function, Accessor>::setup(src_shape, output_data_type, border_mode, width, width);
+    }
+
+protected:
+    void configure_target(Function &func, TensorType &src, TensorType &dst, const int16_t *conv, uint32_t scale,
+                          BorderMode border_mode, uint8_t constant_border_value)
+    {
+        this->convolution_func.configure(&src, &dst, conv, scale, border_mode, constant_border_value);
+    }
+};
+
+/** Child fixture used for rectangular convolutions */
+template <typename TensorType, typename Function, typename Accessor>
+class ConvolutionRectangleFixture : public ConvolutionFixture<TensorType, Function, Accessor>
+{
+public:
+    template <typename...>
+    void setup(TensorShape src_shape, DataType output_data_type, BorderMode border_mode, unsigned int width, unsigned int height)
+    {
+        ConvolutionFixture<TensorType, Function, Accessor>::setup(src_shape, output_data_type, border_mode, width, height);
+    }
+
+protected:
+    void configure_target(Function &func, TensorType &src, TensorType &dst, const int16_t *conv, uint32_t scale,
+                          BorderMode border_mode, uint8_t constant_border_value)
+    {
+        this->convolution_func.configure(&src, &dst, conv, this->_width, this->_height, scale, border_mode, constant_border_value);
+    }
+};
+
+/** Child fixture used for separable convolutions */
+template <typename TensorType, typename Function, typename Accessor>
+class ConvolutionSeperableFixture : public ConvolutionFixture<TensorType, Function, Accessor>
+{
+public:
+    template <typename...>
+    void setup(TensorShape src_shape, DataType output_data_type, BorderMode border_mode, unsigned int width)
+    {
+        ConvolutionFixture<TensorType, Function, Accessor>::setup(src_shape, output_data_type, border_mode, width, width, true);
+    }
+
+protected:
+    void configure_target(Function &func, TensorType &src, TensorType &dst, const int16_t *conv, uint32_t scale,
+                          BorderMode border_mode, uint8_t constant_border_value)
+    {
+        this->convolution_func.configure(&src, &dst, conv, scale, border_mode, constant_border_value);
+    }
+};
+
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_CONVOLUTIONFIXTURE */
diff --git a/tests/validation/fixtures/ConvolutionFixture.h b/tests/validation/fixtures/ConvolutionFixture.h
index 8ebb924..741ebe5 100644
--- a/tests/validation/fixtures/ConvolutionFixture.h
+++ b/tests/validation/fixtures/ConvolutionFixture.h
@@ -66,56 +66,17 @@
 
         if(is_separable)
         {
-            create_separable_conv(conv.data());
+            init_separable_conv(conv.data(), width, height, library->seed());
         }
         else
         {
-            create_conv(conv.data());
+            init_conv(conv.data(), width, height, library->seed());
         }
 
         _target    = compute_target(shape, output_data_type, conv.data(), scale, border_mode, constant_border_value);
         _reference = compute_reference(shape, output_data_type, conv.data(), scale, border_mode, constant_border_value);
     }
 
-    void
-    create_conv(int16_t *conv)
-    {
-        std::mt19937                           gen(library->seed());
-        std::uniform_int_distribution<int16_t> distribution_int16(-32768, 32767);
-
-        for(unsigned int i = 0; i < _width * _height; ++i)
-        {
-            conv[i] = distribution_int16(gen);
-        }
-    }
-
-    void
-    create_separable_conv(int16_t *conv)
-    {
-        std::mt19937 gen(library->seed());
-        // Set it between -128 and 127 to ensure the matrix does not overflow
-        std::uniform_int_distribution<int16_t> distribution_int16(-128, 127);
-
-        int16_t conv_row[_width];
-        int16_t conv_col[_height];
-
-        conv_row[0] = conv_col[0] = 1;
-        for(unsigned int i = 1; i < _width; ++i)
-        {
-            conv_row[i] = distribution_int16(gen);
-            conv_col[i] = distribution_int16(gen);
-        }
-
-        // Multiply two matrices
-        for(unsigned int i = 0; i < _width; ++i)
-        {
-            for(unsigned int j = 0; j < _height; ++j)
-            {
-                conv[i * _width + j] = conv_col[i] * conv_row[j];
-            }
-        }
-    }
-
     template <typename U>
     void fill(U &&tensor, int i)
     {