COMPMID-592: Ported GEMMInterleave4x4 tests.

Change-Id: I161fa095b2b8a719bb152366294f62c1aad073ce
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110463
Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
diff --git a/tests/NEON/Helper.h b/tests/NEON/Helper.h
index 8bd11cc..93f2010 100644
--- a/tests/NEON/Helper.h
+++ b/tests/NEON/Helper.h
@@ -64,6 +64,21 @@
     }
 };
 
+// As above but this also setups a Zero border on the input tensor of the specified bordersize
+template <typename K, int bordersize>
+class NESynthetizeFunctionWithZeroConstantBorder : public INESimpleFunction
+{
+public:
+    template <typename T, typename... Args>
+    void configure(T first, Args &&... args)
+    {
+        auto k = arm_compute::support::cpp14::make_unique<K>();
+        k->configure(first, std::forward<Args>(args)...);
+        _kernel = std::move(k);
+        _border_handler.configure(first, BorderSize(bordersize), BorderMode::CONSTANT, PixelValue(0));
+    }
+};
+
 } // namespace test
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_TEST_NEON_HELPER_H__ */
diff --git a/tests/validation/CPP/GEMMInterleave4x4.h b/tests/validation/CPP/GEMMInterleave4x4.h
new file mode 100644
index 0000000..e6b09af
--- /dev/null
+++ b/tests/validation/CPP/GEMMInterleave4x4.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GEMM.h"
+
+#include "arm_compute/core/Types.h"
+#include "tests/validation/FixedPoint.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> gemm_interleave_4x4(const SimpleTensor<T> &in, SimpleTensor<T> &out)
+{
+    const T      *mtx_in     = reinterpret_cast<const T *>(in.data());
+    T            *mtx_ref    = reinterpret_cast<T *>(out.data());
+    const int32_t in_rows    = in.shape().y();
+    const int32_t in_cols    = in.shape().x();
+    const int32_t out_stride = out.shape().x();
+    int32_t       y          = 0;
+    for(; y <= (in_rows - 4); y += 4)
+    {
+        const T *in_ptr = &mtx_in[y * in_cols];
+
+        for(int32_t x = 0; x < in_cols; x++)
+        {
+            const T tmp[4] = { in_ptr[x + 0 * in_cols],
+                               in_ptr[x + 1 * in_cols],
+                               in_ptr[x + 2 * in_cols],
+                               in_ptr[x + 3 * in_cols]
+                             };
+
+            T *dst = &mtx_ref[static_cast<size_t>(x * 4.f) + static_cast<size_t>(std::ceil(y / 4.f)) * out_stride];
+            memcpy(dst, tmp, sizeof(T) * 4);
+        }
+    }
+
+    // Leftover along the Y direction
+    const int32_t leftover_y = in_rows - y;
+
+    if(leftover_y != 0)
+    {
+        const T *in_ptr = &mtx_in[y * in_cols];
+
+        for(int32_t x = 0; x < in_cols; x++)
+        {
+            T tmp[4] = { 0, 0, 0, 0 };
+
+            for(int32_t k = 0; k < leftover_y; k++)
+            {
+                tmp[k] = in_ptr[k * in_cols + x];
+            }
+            T *dst = &mtx_ref[static_cast<size_t>(x * 4.f) + static_cast<size_t>(std::ceil(y / 4.f)) * out_stride];
+            memcpy(dst, tmp, sizeof(T) * 4);
+        }
+    }
+
+    return out;
+}
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/GEMM.cpp b/tests/validation/NEON/GEMM.cpp
index 129544e..57e8ce7 100644
--- a/tests/validation/NEON/GEMM.cpp
+++ b/tests/validation/NEON/GEMM.cpp
@@ -21,11 +21,13 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMM.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
 #include "tests/NEON/Accessor.h"
+#include "tests/NEON/Helper.h"
 #include "tests/PaddingCalculator.h"
 #include "tests/datasets/LargeGEMMDataset.h"
 #include "tests/datasets/SmallGEMMDataset.h"
@@ -34,6 +36,7 @@
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
 #include "tests/validation/fixtures/GEMMFixture.h"
+#include "tests/validation/fixtures/GEMMInterleave4x4Fixture.h"
 
 namespace arm_compute
 {
@@ -56,11 +59,52 @@
     DataType::QS8,
     DataType::QS16,
 });
+
+const auto data_interleave = framework::dataset::make("M", 8, 12) * framework::dataset::make("N", 8, 12);
 } // namespace
 
 TEST_SUITE(NEON)
 TEST_SUITE(GEMM)
 
+TEST_SUITE(INTERLEAVE_4X4)
+using NEGEMMInterleave4x4 = NESynthetizeFunctionWithZeroConstantBorder<NEGEMMInterleave4x4Kernel, 4>;
+
+TEST_SUITE(FP32)
+using NEGEMMInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, NEGEMMInterleave4x4, float>;
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * framework::dataset::make("DataType", DataType::F32))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QS8)
+using NEGEMMInterleave4x4Fixture = GEMMInterleave4x4ValidationFixedPointFixture<Tensor, Accessor, NEGEMMInterleave4x4, int8_t>;
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave *
+                       framework::dataset::make("DataType", DataType::QS8)
+                       * framework::dataset::make("FractionalBits", 1, 7))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(QS16)
+using NEGEMMInterleave4x4Fixture = GEMMInterleave4x4ValidationFixedPointFixture<Tensor, Accessor, NEGEMMInterleave4x4, int16_t>;
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave *
+                       framework::dataset::make("DataType", DataType::QS16)
+                       * framework::dataset::make("FractionalBits", 1, 14))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+
+TEST_SUITE_END() // INTERLEAVE_4X4
+
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallGEMMDataset(), datasets::LargeGEMMDataset()), CNNDataTypes),
                shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type)
 {
diff --git a/tests/validation/fixtures/GEMMInterleave4x4Fixture.h b/tests/validation/fixtures/GEMMInterleave4x4Fixture.h
new file mode 100644
index 0000000..b5e3eb6
--- /dev/null
+++ b/tests/validation/fixtures/GEMMInterleave4x4Fixture.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_GEMM_INTERLEAVE_4X4_FIXTURE
+#define ARM_COMPUTE_TEST_GEMM_INTERLEAVE_4X4_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/CPP/GEMMInterleave4x4.h"
+#include "tests/validation/Helpers.h"
+
+#include <random>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class GEMMInterleave4x4ValidationFixedPointFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(size_t x, size_t y, DataType data_type, int fractional_bits)
+    {
+        _fractional_bits = fractional_bits;
+        _data_type       = data_type;
+        const TensorShape shape_a(x, y);
+        const TensorShape shape_b(static_cast<size_t>(x * 4.f), static_cast<size_t>(std::ceil(y / 4.f)));
+        _target    = compute_target(shape_a, shape_b, data_type, fractional_bits);
+        _reference = compute_reference(shape_a, shape_b, data_type, fractional_bits);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        switch(tensor.data_type())
+        {
+            case DataType::F16:
+            case DataType::F32:
+            {
+                std::uniform_real_distribution<> distribution(-1.f, 1.f);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            default:
+                library->fill_tensor_uniform(tensor, i);
+                break;
+        }
+    }
+
+    TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, DataType data_type, int fixed_point_position)
+    {
+        // Create tensors
+        TensorType a = create_tensor<TensorType>(shape_a, data_type, 1, fixed_point_position);
+        TensorType b = create_tensor<TensorType>(shape_b, data_type, 1, fixed_point_position);
+
+        // Create and configure function
+        FunctionType f;
+        f.configure(&a, &b);
+
+        ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        a.allocator()->allocate();
+        b.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(a), 0);
+        fill(AccessorType(b), 0);
+
+        // Compute GEMM function
+        f.run();
+        return b;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, DataType data_type, int fixed_point_position)
+    {
+        // Create reference
+        SimpleTensor<T> a{ shape_a, data_type, 1, fixed_point_position };
+        SimpleTensor<T> b{ shape_b, data_type, 1, fixed_point_position };
+
+        // Fill reference
+        fill(a, 0);
+        fill(b, 0);
+
+        return reference::gemm_interleave_4x4<T>(a, b);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+    int             _fractional_bits{};
+    DataType        _data_type{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class GEMMInterleave4x4ValidationFixture : public GEMMInterleave4x4ValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(size_t x, size_t y, DataType data_type)
+    {
+        GEMMInterleave4x4ValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>::setup(x, y, data_type, 0);
+    }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_GEMM_INTERLEAVE_4X4_FIXTURE */