COMPMID-3320: Add cl_image support for GEMMReshaped T_NT
COMPMID-3321: Add cl_image support for GEMMReshaped NT_T

- Added support for cl_image in CLGEMMMatrixMultiplyReshapedKernel (both
  NT and T kernels)
- Extended the tests for the validating rhs_info.export_to_cl_image =
  true
- Added utility macros in OpenCL to load data from a OpenCL image object
- Updated doxygen documentation in CLGEMMMatrixMultiplyReshapedKernel.h
- Updated doxygen documentation in CLGEMMReshapeRHSMatrixKernel.h

Change-Id: I953b10e4ef205d1b76dcbc366e5a91fd5a8e1d5c
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3329
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp
index 833a924..b5b9607 100644
--- a/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp
+++ b/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp
@@ -147,6 +147,12 @@
 /** K0 values to test - Nightly */
 const auto k0_values_nightly = framework::dataset::make("K0", { 2, 3, 4, 8 });
 
+/** N0 values to test with export to OpenCL image object - Nightly */
+const auto n0_export_to_cl_image_values_nightly = framework::dataset::make("N0", { 4, 8, 16 });
+
+/** K0 values to test with export to OpenCL image object - Nightly */
+const auto k0_export_to_cl_image_values_nightly = framework::dataset::make("K0", { 4, 8, 16 });
+
 /** V0 values to test - Nightly */
 const auto v0_values_nightly = framework::dataset::make("V0", 1, 4);
 
@@ -224,14 +230,14 @@
 
                                 })),
                framework::dataset::make("RHSMInfo",{
-                                                          GEMMRHSMatrixInfo(4,4,1,true,true),
-                                                          GEMMRHSMatrixInfo(4,4,1, true,true),
-                                                          GEMMRHSMatrixInfo(4,4,1,true,true),
-                                                          GEMMRHSMatrixInfo(2,2,1,true,false),
-                                                          GEMMRHSMatrixInfo(2,2,1,true,false),
-                                                          GEMMRHSMatrixInfo(4,4,1,true,true),
-                                                          GEMMRHSMatrixInfo(4,4,1,true,true),
-                                                          GEMMRHSMatrixInfo(4,4,2,true,false),
+                                                          GEMMRHSMatrixInfo(4,4,1,true,true,false),
+                                                          GEMMRHSMatrixInfo(4,4,1,true,true,false),
+                                                          GEMMRHSMatrixInfo(4,4,1,true,true,false),
+                                                          GEMMRHSMatrixInfo(2,2,1,true,false,false),
+                                                          GEMMRHSMatrixInfo(2,2,1,true,false,false),
+                                                          GEMMRHSMatrixInfo(4,4,1,true,true,false),
+                                                          GEMMRHSMatrixInfo(4,4,1,true,true,false),
+                                                          GEMMRHSMatrixInfo(4,4,2,true,false,false),
 
 
                            })),
@@ -248,7 +254,7 @@
                                                                      1   /**< Multiplication factor for the width of the 1xW transposed block */,
                                                                      1   /**< Multiplication factor for the height of the 4x4 interleaved block */,
                                                                      GEMMLHSMatrixInfo(4,4,1,false,true),
-                                                                     GEMMRHSMatrixInfo(4,4,1,true,true),
+                                                                     GEMMRHSMatrixInfo(4,4,1,true,true,false),
                                                                      0  /**< Offset to be added to each element of the matrix A */,
                                                                      0 /**< Offset to be added to each element of the matrix B */),
 
@@ -262,7 +268,7 @@
                                                                      1   /**< Multiplication factor for the width of the 1xW transposed block */,
                                                                      1   /**< Multiplication factor for the height of the 4x4 interleaved block */,
                                                                      GEMMLHSMatrixInfo(4,4,1,false,true),
-                                                                     GEMMRHSMatrixInfo(4,4,1,true,true),
+                                                                     GEMMRHSMatrixInfo(4,4,1,true,true,false),
                                                                      0  /**< Offset to be added to each element of the matrix A */,
                                                                      0 /**< Offset to be added to each element of the matrix B */),
                                                             GEMMKernelInfo(),
@@ -279,7 +285,7 @@
                                                                      1   /**< Multiplication factor for the width of the 1xW transposed block */,
                                                                      1   /**< Multiplication factor for the height of the 4x4 interleaved block */,
                                                                      GEMMLHSMatrixInfo(4,4,1,false,true),
-                                                                     GEMMRHSMatrixInfo(4,4,1,true,true),
+                                                                     GEMMRHSMatrixInfo(4,4,1,true,true,false),
                                                                      0  /**< Offset to be added to each element of the matrix A */,
                                                                      0 /**< Offset to be added to each element of the matrix B */),
 
@@ -294,7 +300,7 @@
                                                                      1   /**< Multiplication factor for the width of the 1xW transposed block */,
                                                                      1   /**< Multiplication factor for the height of the 4x4 interleaved block */,
                                                                      GEMMLHSMatrixInfo(4,4,1,false,true),
-                                                                     GEMMRHSMatrixInfo(4,4,1,true,true),
+                                                                     GEMMRHSMatrixInfo(4,4,1,true,true,false),
                                                                      0  /**< Offset to be added to each element of the matrix A */,
                                                                      0 /**< Offset to be added to each element of the matrix B */),
 
@@ -308,7 +314,7 @@
                                                                      1   /**< Multiplication factor for the width of the 1xW transposed block */,
                                                                      1   /**< Multiplication factor for the height of the 4x4 interleaved block */,
                                                                      GEMMLHSMatrixInfo(4,4,1,false,true),
-                                                                     GEMMRHSMatrixInfo(4,4,2,true,false),
+                                                                     GEMMRHSMatrixInfo(4,4,2,true,false,false),
                                                                      0  /**< Offset to be added to each element of the matrix A */,
                                                                      0 /**< Offset to be added to each element of the matrix B */),
                                                     })),
@@ -327,7 +333,7 @@
 TEST_SUITE(FP32)
 
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedFixture<float>, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                    m_values,
                                                                    n_values),
                                                                    k_values),
@@ -339,6 +345,7 @@
                                                                    h0_values_precommit),
                                                                    i_values_lhs),
                                                                    i_values_rhs),
+                                                                   framework::dataset::make("export_to_cl_image_rhs", false)),
                                                                    framework::dataset::make("DataType", DataType::F32)),
                                                                    a_values_precommit),
                                                                    beta_values_precommit),
@@ -351,7 +358,7 @@
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedFixture<float>, framework::DatasetMode::DISABLED,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                    m_values,
                                                                    n_values),
                                                                    k_values),
@@ -363,6 +370,7 @@
                                                                    h0_values_nightly),
                                                                    i_values_lhs),
                                                                    i_values_rhs),
+                                                                   framework::dataset::make("export_to_cl_image_rhs", false)),
                                                                    framework::dataset::make("DataType", DataType::F32)),
                                                                    a_values_nightly),
                                                                    beta_values_nightly),
@@ -375,7 +383,7 @@
 }
 
 FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DFixture<float>, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                    m_w_values,
                                                                    m_h_values),
                                                                    n_values),
@@ -388,6 +396,7 @@
                                                                    h0_values_precommit),
                                                                    i_values_lhs),
                                                                    i_values_rhs),
+                                                                   framework::dataset::make("export_to_cl_image_rhs", false)),
                                                                    framework::dataset::make("DataType", DataType::F32)),
                                                                    a_values_precommit),
                                                                    beta_values_precommit),
@@ -399,7 +408,7 @@
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DFixture<float>, framework::DatasetMode::DISABLED,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                    m_w_values,
                                                                    m_h_values),
                                                                    n_values),
@@ -412,6 +421,7 @@
                                                                    h0_values_nightly),
                                                                    i_values_lhs),
                                                                    i_values_rhs),
+                                                                   framework::dataset::make("export_to_cl_image_rhs", false)),
                                                                    framework::dataset::make("DataType", DataType::F32)),
                                                                    a_values_nightly),
                                                                    beta_values_nightly),
@@ -421,12 +431,137 @@
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
 }
-TEST_SUITE_END() // FP32
+TEST_SUITE(ExportToCLImage)
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
+               framework::dataset::make("Input0Info", { TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F32),  // OK or incorrect if cl_khr_image2d_from_buffer not supported
+                                                        TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F32),  // OK or incorrect if cl_khr_image2d_from_buffer not supported
+                                                        TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F32),  // OK or incorrect if cl_khr_image2d_from_buffer not supported
+                                                        TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F32),  // Incorrect k0
+                                                        TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F32),  // Incorrect n0
 
-TEST_SUITE(FP16)
+                                                      }),
+               framework::dataset::make("Input1Info",{ TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(512U, 8U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(128U, 32U, 2U), 1, DataType::F32),
 
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedFixture<half>, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                      })),
+               framework::dataset::make("Input2Info", { TensorInfo(TensorShape(64U), 1, DataType::F32),
+                                                        TensorInfo(TensorShape(64U), 1, DataType::F32),
+                                                        TensorInfo(TensorShape(64U), 1, DataType::F32),
+                                                        TensorInfo(TensorShape(64U), 1, DataType::F32),
+                                                        TensorInfo(TensorShape(64U), 1, DataType::F32),
+
+                                                      })),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(64U, 64U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(64U, 64U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(64U, 64U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(64U, 64U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(64U, 64U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(64U, 64U, 2U), 1, DataType::F32),
+
+                           })),
+               framework::dataset::make("LHSMInfo",{
+                                                          GEMMLHSMatrixInfo(4, 4, 1, false, true),
+                                                          GEMMLHSMatrixInfo(4, 8, 1, false, true),
+                                                          GEMMLHSMatrixInfo(4, 4, 1, false, true),
+                                                          GEMMLHSMatrixInfo(4, 2, 1, false, false),
+                                                          GEMMLHSMatrixInfo(4, 4, 1, false, false),
+
+                                })),
+               framework::dataset::make("RHSMInfo",{
+                                                          GEMMRHSMatrixInfo(4, 4, 1, true, true, true),
+                                                          GEMMRHSMatrixInfo(4, 8, 1, true, true, true),
+                                                          GEMMRHSMatrixInfo(8, 4, 1, true, true, true),
+                                                          GEMMRHSMatrixInfo(4, 2, 1, true, false, true),
+                                                          GEMMRHSMatrixInfo(2, 4, 1, true, false, true),
+                           })),
+               framework::dataset::make("GEMMInfo",{GEMMKernelInfo( 64 /**<M Number of LHS rows*/,
+                                                                    64 /**<N Number of RHS columns*/,
+                                                                    64 /**<K Number of LHS columns or RHS rows */, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
+                                                             false /**< reinterpret the input as 3D */,
+                                                             true  /**< Flag used to broadcast the bias addition */,
+                                                             false /**< wider accumm */,
+                                                           ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+                                                             1   /**< Multiplication factor for the width of the 1xW transposed block */,
+                                                             1   /**< Multiplication factor for the height of the 4x4 interleaved block */,
+                                                             GEMMLHSMatrixInfo(),
+                                                             GEMMRHSMatrixInfo(),
+                                                             0  /**< Offset to be added to each element of the matrix A */,
+                                                             0 /**< Offset to be added to each element of the matrix B */),
+                                                    GEMMKernelInfo( 64 /**<M Number of LHS rows*/,
+                                                                    64 /**<N Number of RHS columns*/,
+                                                                    64 /**<K Number of LHS columns or RHS rows */, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
+                                                             false /**< reinterpret the input as 3D */,
+                                                             true  /**< Flag used to broadcast the bias addition */,
+                                                             false /**< wider accumm */,
+                                                           ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+                                                             1   /**< Multiplication factor for the width of the 1xW transposed block */,
+                                                             1   /**< Multiplication factor for the height of the 4x4 interleaved block */,
+                                                             GEMMLHSMatrixInfo(),
+                                                             GEMMRHSMatrixInfo(),
+                                                             0  /**< Offset to be added to each element of the matrix A */,
+                                                             0 /**< Offset to be added to each element of the matrix B */),
+                                                    GEMMKernelInfo( 64 /**<M Number of LHS rows*/,
+                                                                    64 /**<N Number of RHS columns*/,
+                                                                    64 /**<K Number of LHS columns or RHS rows */, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
+                                                             false /**< reinterpret the input as 3D */,
+                                                             true  /**< Flag used to broadcast the bias addition */,
+                                                             false /**< wider accumm */,
+                                                           ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+                                                             1   /**< Multiplication factor for the width of the 1xW transposed block */,
+                                                             1   /**< Multiplication factor for the height of the 4x4 interleaved block */,
+                                                             GEMMLHSMatrixInfo(),
+                                                             GEMMRHSMatrixInfo(),
+                                                             0  /**< Offset to be added to each element of the matrix A */,
+                                                             0 /**< Offset to be added to each element of the matrix B */),
+
+                                                    GEMMKernelInfo( 64 /**<M Number of LHS rows*/,
+                                                                    64 /**<N Number of RHS columns*/,
+                                                                    64 /**<K Number of LHS columns or RHS rows */, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
+                                                             false /**< reinterpret the input as 3D */,
+                                                             true  /**< Flag used to broadcast the bias addition */,
+                                                             false /**< wider accumm */,
+                                                           ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+                                                             1   /**< Multiplication factor for the width of the 1xW transposed block */,
+                                                             1   /**< Multiplication factor for the height of the 4x4 interleaved block */,
+                                                             GEMMLHSMatrixInfo(),
+                                                             GEMMRHSMatrixInfo(),
+                                                             0  /**< Offset to be added to each element of the matrix A */,
+                                                             0 /**< Offset to be added to each element of the matrix B */),
+                                                    GEMMKernelInfo( 64 /**<M Number of LHS rows*/,
+                                                                    64 /**<N Number of RHS columns*/,
+                                                                    64 /**<K Number of LHS columns or RHS rows */, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
+                                                             false /**< reinterpret the input as 3D */,
+                                                             true  /**< Flag used to broadcast the bias addition */,
+                                                             false /**< wider accumm */,
+                                                           ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+                                                             1   /**< Multiplication factor for the width of the 1xW transposed block */,
+                                                             1   /**< Multiplication factor for the height of the 4x4 interleaved block */,
+                                                             GEMMLHSMatrixInfo(),
+                                                             GEMMRHSMatrixInfo(),
+                                                             0  /**< Offset to be added to each element of the matrix A */,
+                                                             0 /**< Offset to be added to each element of the matrix B */)
+                                                    })),
+               framework::dataset::make("Expected", { true,
+                                                      true,
+                                                      true,
+                                                      false,
+                                                      false})),
+                    input0_info ,input1_info, input2_info, output_info, lhs_info, rhs_info, gemm_info, expected)
+{
+   ARM_COMPUTE_EXPECT(bool(CLGEMMMatrixMultiplyReshapedKernel::validate(&input0_info.clone()->set_is_resizable(true),
+                                                          &input1_info.clone()->set_is_resizable(true),
+                                                          &input2_info.clone()->set_is_resizable(true),
+                                                          &output_info.clone()->set_is_resizable(true),1.f,1.f,
+                                                          lhs_info,
+                                                          rhs_info,
+                                                          gemm_info)) == (expected && image2d_from_buffer_supported(CLKernelLibrary::get().get_device())), framework::LogLevel::ERRORS);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedFixture<float>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                    m_values,
                                                                    n_values),
                                                                    k_values),
@@ -438,6 +573,144 @@
                                                                    h0_values_precommit),
                                                                    i_values_lhs),
                                                                    i_values_rhs),
+                                                                   framework::dataset::make("export_to_cl_image_rhs", true)),
+                                                                   framework::dataset::make("DataType", DataType::F32)),
+                                                                   a_values_precommit),
+                                                                   beta_values_precommit),
+                                                                   broadcast_bias_values),
+                                                                   lhs_transpose_values),
+                                                                   act_values))
+{
+    // Validate output only if the target platform supports the OpenCL cl_khr_image2d_from_buffer extension
+    if(image2d_from_buffer_supported(CLKernelLibrary::get().get_device()))
+    {
+        validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedFixture<float>, framework::DatasetMode::NIGHTLY,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_values,
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0_values_nightly),
+                                                                   n0_export_to_cl_image_values_nightly),
+                                                                   k0_export_to_cl_image_values_nightly),
+                                                                   v0_values_nightly),
+                                                                   h0_values_nightly),
+                                                                   i_values_lhs),
+                                                                   i_values_rhs),
+                                                                   framework::dataset::make("export_to_cl_image_rhs", true)),
+                                                                   framework::dataset::make("DataType", DataType::F32)),
+                                                                   a_values_nightly),
+                                                                   beta_values_nightly),
+                                                                   broadcast_bias_values),
+                                                                   lhs_transpose_values),
+                                                                   act_values))
+{
+    // Validate output only if the target platform supports the OpenCL cl_khr_image2d_from_buffer extension
+    if(image2d_from_buffer_supported(CLKernelLibrary::get().get_device()))
+    {
+        validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DFixture<float>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_w_values,
+                                                                   m_h_values),
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0_values_precommit),
+                                                                   n0_values_precommit),
+                                                                   k0_values_precommit),
+                                                                   v0_values_precommit),
+                                                                   h0_values_precommit),
+                                                                   i_values_lhs),
+                                                                   i_values_rhs),
+                                                                   framework::dataset::make("export_to_cl_image_rhs", true)),
+                                                                   framework::dataset::make("DataType", DataType::F32)),
+                                                                   a_values_precommit),
+                                                                   beta_values_precommit),
+                                                                   lhs_transpose_values),
+                                                                   act_values))
+{
+    // Validate output only if the target platform supports the OpenCL cl_khr_image2d_from_buffer extension
+    if(image2d_from_buffer_supported(CLKernelLibrary::get().get_device()))
+    {
+        validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DFixture<float>, framework::DatasetMode::NIGHTLY,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_w_values,
+                                                                   m_h_values),
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0_values_nightly),
+                                                                   n0_export_to_cl_image_values_nightly),
+                                                                   k0_export_to_cl_image_values_nightly),
+                                                                   v0_values_nightly),
+                                                                   h0_values_nightly),
+                                                                   i_values_lhs),
+                                                                   i_values_rhs),
+                                                                   framework::dataset::make("export_to_cl_image_rhs", true)),
+                                                                   framework::dataset::make("DataType", DataType::F32)),
+                                                                   a_values_nightly),
+                                                                   beta_values_nightly),
+                                                                   lhs_transpose_values),
+                                                                   act_values))
+{
+    // Validate output only if the target platform supports the OpenCL cl_khr_image2d_from_buffer extension
+    if(image2d_from_buffer_supported(CLKernelLibrary::get().get_device()))
+    {
+        validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+TEST_SUITE_END() // ExportToCLImage
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedFixture<half>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_values,
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0_values_precommit),
+                                                                   n0_values_precommit),
+                                                                   k0_values_precommit),
+                                                                   v0_values_precommit),
+                                                                   h0_values_precommit),
+                                                                   i_values_lhs),
+                                                                   i_values_rhs),
+                                                                   framework::dataset::make("export_to_cl_image_rhs", false)),
                                                                    framework::dataset::make("DataType", DataType::F16)),
                                                                    a_values_precommit),
                                                                    beta_values_precommit),
@@ -450,7 +723,7 @@
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedFixture<half>, framework::DatasetMode::DISABLED,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                    m_values,
                                                                    n_values),
                                                                    k_values),
@@ -462,6 +735,7 @@
                                                                    h0_values_nightly),
                                                                    i_values_lhs),
                                                                    i_values_rhs),
+                                                                   framework::dataset::make("export_to_cl_image_rhs", false)),
                                                                    framework::dataset::make("DataType", DataType::F16)),
                                                                    a_values_nightly),
                                                                    beta_values_nightly),
@@ -474,7 +748,7 @@
 }
 
 FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DFixture<half>, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                    m_w_values,
                                                                    m_h_values),
                                                                    n_values),
@@ -487,6 +761,7 @@
                                                                    h0_values_precommit),
                                                                    i_values_lhs),
                                                                    i_values_rhs),
+                                                                   framework::dataset::make("export_to_cl_image_rhs", false)),
                                                                    framework::dataset::make("DataType", DataType::F16)),
                                                                    a_values_precommit),
                                                                    beta_values_precommit),
@@ -498,7 +773,7 @@
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DFixture<half>, framework::DatasetMode::DISABLED,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                    m_w_values,
                                                                    m_h_values),
                                                                    n_values),
@@ -511,6 +786,7 @@
                                                                    h0_values_nightly),
                                                                    i_values_lhs),
                                                                    i_values_rhs),
+                                                                   framework::dataset::make("export_to_cl_image_rhs", false)),
                                                                    framework::dataset::make("DataType", DataType::F16)),
                                                                    a_values_nightly),
                                                                    beta_values_nightly),
@@ -525,7 +801,7 @@
 TEST_SUITE(MixedPrecision)
 
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedMixedPrecisionFixture<half>, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                    m_values,
                                                                    n_values),
                                                                    k_values),
@@ -537,6 +813,7 @@
                                                                    h0_values_precommit),
                                                                    i_values_lhs),
                                                                    i_values_rhs),
+                                                                   framework::dataset::make("export_to_cl_image_rhs", false)),
                                                                    framework::dataset::make("DataType", DataType::F16)),
                                                                    a_values_precommit),
                                                                    beta_values_precommit),
@@ -549,7 +826,7 @@
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedMixedPrecisionFixture<half>, framework::DatasetMode::DISABLED,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                    m_values,
                                                                    n_values),
                                                                    k_values),
@@ -561,6 +838,7 @@
                                                                    h0_values_nightly),
                                                                    i_values_lhs),
                                                                    i_values_rhs),
+                                                                   framework::dataset::make("export_to_cl_image_rhs", false)),
                                                                    framework::dataset::make("DataType", DataType::F16)),
                                                                    a_values_nightly),
                                                                    beta_values_nightly),
@@ -573,7 +851,7 @@
 }
 
 FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DMixedPrecisionFixture<half>, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                    m_w_values,
                                                                    m_h_values),
                                                                    n_values),
@@ -586,6 +864,7 @@
                                                                    h0_values_precommit),
                                                                    i_values_lhs),
                                                                    i_values_rhs),
+                                                                   framework::dataset::make("export_to_cl_image_rhs", false)),
                                                                    framework::dataset::make("DataType", DataType::F16)),
                                                                    a_values_precommit),
                                                                    beta_values_precommit),
@@ -597,7 +876,7 @@
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DMixedPrecisionFixture<half>, framework::DatasetMode::DISABLED,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                    m_w_values,
                                                                    m_h_values),
                                                                    n_values),
@@ -610,6 +889,7 @@
                                                                    h0_values_nightly),
                                                                    i_values_lhs),
                                                                    i_values_rhs),
+                                                                   framework::dataset::make("export_to_cl_image_rhs", false)),
                                                                    framework::dataset::make("DataType", DataType::F16)),
                                                                    a_values_nightly),
                                                                    beta_values_nightly),