Set up the framework to choose the default LWS
Resolve COMPMID-4486
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Change-Id: Ib38b7943bd776a6d75d1da163908724c49eae73d
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5864
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/gpu/cl/kernels/ClActivationKernel.cpp b/src/core/gpu/cl/kernels/ClActivationKernel.cpp
index 17a8c64..e892d6a 100644
--- a/src/core/gpu/cl/kernels/ClActivationKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClActivationKernel.cpp
@@ -89,6 +89,7 @@
ClActivationKernel::ClActivationKernel()
: _run_in_place(false)
{
+ _type = CLKernelType::ELEMENTWISE;
}
void ClActivationKernel::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, ActivationLayerInfo act_info)
diff --git a/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp b/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp
index 26f5113..dbc628d 100644
--- a/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp
@@ -62,6 +62,7 @@
ClBatchConcatenateKernel::ClBatchConcatenateKernel()
: _batch_offset(0)
{
+ _type = CLKernelType::ELEMENTWISE;
}
void ClBatchConcatenateKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int batch_offset, ITensorInfo *dst)
diff --git a/src/core/gpu/cl/kernels/ClCastKernel.cpp b/src/core/gpu/cl/kernels/ClCastKernel.cpp
index 7a1d5c2..fac9ebe 100644
--- a/src/core/gpu/cl/kernels/ClCastKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClCastKernel.cpp
@@ -72,6 +72,11 @@
}
} // namespace
+ClCastKernel::ClCastKernel()
+{
+ _type = CLKernelType::ELEMENTWISE;
+}
+
void ClCastKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
diff --git a/src/core/gpu/cl/kernels/ClCastKernel.h b/src/core/gpu/cl/kernels/ClCastKernel.h
index 451aa9c..6bf3cd9 100644
--- a/src/core/gpu/cl/kernels/ClCastKernel.h
+++ b/src/core/gpu/cl/kernels/ClCastKernel.h
@@ -41,7 +41,7 @@
class ClCastKernel : public IClKernel
{
public:
- ClCastKernel() = default;
+ ClCastKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClCastKernel);
/** Set the src and dst of the kernel.
*
diff --git a/src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.cpp b/src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.cpp
index 49f2f68..d1abd27 100644
--- a/src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.cpp
@@ -40,6 +40,11 @@
{
namespace kernels
{
+ClConvertFullyConnectedWeightsKernel::ClConvertFullyConnectedWeightsKernel()
+{
+ _type = CLKernelType::ELEMENTWISE;
+}
+
void ClConvertFullyConnectedWeightsKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const TensorShape &original_src_shape,
DataLayout data_layout)
{
diff --git a/src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.h b/src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.h
index 11ab4d2..6f4f09d 100644
--- a/src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.h
+++ b/src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.h
@@ -47,7 +47,7 @@
class ClConvertFullyConnectedWeightsKernel : public IClKernel
{
public:
- ClConvertFullyConnectedWeightsKernel() = default;
+ ClConvertFullyConnectedWeightsKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClConvertFullyConnectedWeightsKernel);
/** Set the src and dst tensor.
*
diff --git a/src/core/gpu/cl/kernels/ClCopyKernel.cpp b/src/core/gpu/cl/kernels/ClCopyKernel.cpp
index d6c87f8..98c6f34 100644
--- a/src/core/gpu/cl/kernels/ClCopyKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClCopyKernel.cpp
@@ -68,6 +68,11 @@
} // namespace
+ClCopyKernel::ClCopyKernel()
+{
+ _type = CLKernelType::ELEMENTWISE;
+}
+
void ClCopyKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, Window *dst_window)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
diff --git a/src/core/gpu/cl/kernels/ClCopyKernel.h b/src/core/gpu/cl/kernels/ClCopyKernel.h
index b1b9672..f3eb0aa 100644
--- a/src/core/gpu/cl/kernels/ClCopyKernel.h
+++ b/src/core/gpu/cl/kernels/ClCopyKernel.h
@@ -38,7 +38,7 @@
class ClCopyKernel : public IClKernel
{
public:
- ClCopyKernel() = default;
+ ClCopyKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClCopyKernel);
/** Initialize the kernel's src, dst.
*
diff --git a/src/core/gpu/cl/kernels/ClCropKernel.cpp b/src/core/gpu/cl/kernels/ClCropKernel.cpp
index 1d322ee..ef2e48b 100644
--- a/src/core/gpu/cl/kernels/ClCropKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClCropKernel.cpp
@@ -46,6 +46,11 @@
configure(CLKernelLibrary::get().get_compile_context(), src, dst, start, end, batch_index, extrapolation_value, dst_window);
}
+ClCropKernel::ClCropKernel()
+{
+ _type = CLKernelType::ELEMENTWISE;
+}
+
void ClCropKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index,
float extrapolation_value, Window *dst_window)
{
diff --git a/src/core/gpu/cl/kernels/ClCropKernel.h b/src/core/gpu/cl/kernels/ClCropKernel.h
index ec0f8e5..7120dbb 100644
--- a/src/core/gpu/cl/kernels/ClCropKernel.h
+++ b/src/core/gpu/cl/kernels/ClCropKernel.h
@@ -38,7 +38,7 @@
class ClCropKernel : public IClKernel
{
public:
- ClCropKernel() = default;
+ ClCropKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClCropKernel);
/** Configure kernel
*
diff --git a/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp b/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp
index 4039570..e3e384f 100644
--- a/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp
@@ -61,6 +61,7 @@
ClDepthConcatenateKernel::ClDepthConcatenateKernel()
: _depth_offset(0)
{
+ _type = CLKernelType::ELEMENTWISE;
}
void ClDepthConcatenateKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int depth_offset, ITensorInfo *dst)
diff --git a/src/core/gpu/cl/kernels/ClDequantizeKernel.cpp b/src/core/gpu/cl/kernels/ClDequantizeKernel.cpp
index f2758b7..d69da87 100644
--- a/src/core/gpu/cl/kernels/ClDequantizeKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClDequantizeKernel.cpp
@@ -61,6 +61,11 @@
}
} // namespace
+ClDequantizeKernel::ClDequantizeKernel()
+{
+ _type = CLKernelType::ELEMENTWISE;
+}
+
void ClDequantizeKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
diff --git a/src/core/gpu/cl/kernels/ClDequantizeKernel.h b/src/core/gpu/cl/kernels/ClDequantizeKernel.h
index 33e0164..2460674 100644
--- a/src/core/gpu/cl/kernels/ClDequantizeKernel.h
+++ b/src/core/gpu/cl/kernels/ClDequantizeKernel.h
@@ -39,7 +39,7 @@
{
public:
/** Default constructor */
- ClDequantizeKernel() = default;
+ ClDequantizeKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClDequantizeKernel);
/** Initialise the kernel's input and output
*
diff --git a/src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp b/src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp
index 94c4044..7b98671 100644
--- a/src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp
@@ -377,6 +377,11 @@
return _border_size;
}
+ClDirectConv2dKernel::ClDirectConv2dKernel()
+{
+ _type = CLKernelType::DIRECT;
+}
+
void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst,
const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
{
diff --git a/src/core/gpu/cl/kernels/ClDirectConv2dKernel.h b/src/core/gpu/cl/kernels/ClDirectConv2dKernel.h
index e76666f..b592a21 100644
--- a/src/core/gpu/cl/kernels/ClDirectConv2dKernel.h
+++ b/src/core/gpu/cl/kernels/ClDirectConv2dKernel.h
@@ -38,7 +38,7 @@
class ClDirectConv2dKernel : public IClKernel
{
public:
- ClDirectConv2dKernel() = default;
+ ClDirectConv2dKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClDirectConv2dKernel);
/** Set the src, weights, biases and dst tensors info.
*
diff --git a/src/core/gpu/cl/kernels/ClElementwiseKernel.cpp b/src/core/gpu/cl/kernels/ClElementwiseKernel.cpp
index 335ee9c..7bfdb9e 100644
--- a/src/core/gpu/cl/kernels/ClElementwiseKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClElementwiseKernel.cpp
@@ -98,7 +98,7 @@
return Status{};
}
-Status validate_arguments_divide_operation(const ITensorInfo* src1, const ITensorInfo* src2, const ITensorInfo* dst)
+Status validate_arguments_divide_operation(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src1, src2, dst);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src1);
@@ -271,6 +271,11 @@
}
} // namespace
+ClElementwiseKernel::ClElementwiseKernel()
+{
+ _type = CLKernelType::ELEMENTWISE;
+}
+
void ClElementwiseKernel::configure_common(ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst)
{
configure_common(CLKernelLibrary::get().get_compile_context(), src1, src2, dst);
diff --git a/src/core/gpu/cl/kernels/ClElementwiseKernel.h b/src/core/gpu/cl/kernels/ClElementwiseKernel.h
index 4ed8ae7..7f55151 100644
--- a/src/core/gpu/cl/kernels/ClElementwiseKernel.h
+++ b/src/core/gpu/cl/kernels/ClElementwiseKernel.h
@@ -45,7 +45,7 @@
{
public:
/** Default constructor */
- ClElementwiseKernel() = default;
+ ClElementwiseKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClElementwiseKernel);
// Inherited methods overridden:
@@ -89,9 +89,15 @@
ActivationLayerInfo _act_info{};
private:
- const ITensorInfo *_src1{ nullptr }; /**< Source tensor info 1 */
- const ITensorInfo *_src2{ nullptr }; /**< Source tensor info 2 */
- ITensorInfo *_dst{ nullptr }; /**< Destination tensor info */
+ const ITensorInfo *_src1
+ {
+ nullptr
+ }; /**< Source tensor info 1 */
+ const ITensorInfo *_src2
+ {
+ nullptr
+ }; /**< Source tensor info 2 */
+ ITensorInfo *_dst{ nullptr }; /**< Destination tensor info */
};
class ClLogicalBinaryKernel : public ClElementwiseKernel
diff --git a/src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.cpp b/src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.cpp
index 5cbb3f2..1525c0f 100644
--- a/src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.cpp
@@ -66,6 +66,11 @@
}
} // namespace
+ClElementWiseUnaryKernel::ClElementWiseUnaryKernel()
+{
+ _type = CLKernelType::ELEMENTWISE;
+}
+
void ClElementWiseUnaryKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const ElementWiseUnary &op)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
diff --git a/src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.h b/src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.h
index 7e5edef..225869b 100644
--- a/src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.h
+++ b/src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.h
@@ -38,7 +38,7 @@
class ClElementWiseUnaryKernel : public IClKernel
{
public:
- ClElementWiseUnaryKernel() = default;
+ ClElementWiseUnaryKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClElementWiseUnaryKernel);
/** Initialise the kernel's srcs, dst.
*
diff --git a/src/core/gpu/cl/kernels/ClFillKernel.cpp b/src/core/gpu/cl/kernels/ClFillKernel.cpp
index b194ee5..526a466 100644
--- a/src/core/gpu/cl/kernels/ClFillKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClFillKernel.cpp
@@ -42,6 +42,11 @@
{
namespace kernels
{
+ClFillKernel::ClFillKernel()
+{
+ _type = CLKernelType::ELEMENTWISE;
+}
+
void ClFillKernel::configure(ITensorInfo *tensor,
const PixelValue &constant_value,
Window *window)
diff --git a/src/core/gpu/cl/kernels/ClFillKernel.h b/src/core/gpu/cl/kernels/ClFillKernel.h
index b439eac..9542c20 100644
--- a/src/core/gpu/cl/kernels/ClFillKernel.h
+++ b/src/core/gpu/cl/kernels/ClFillKernel.h
@@ -38,7 +38,7 @@
class ClFillKernel : public IClKernel
{
public:
- ClFillKernel() = default;
+ ClFillKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClFillKernel);
/** Initialise the kernel's tensor and filling value
*
diff --git a/src/core/gpu/cl/kernels/ClFloorKernel.cpp b/src/core/gpu/cl/kernels/ClFloorKernel.cpp
index 7296d40..2047128 100644
--- a/src/core/gpu/cl/kernels/ClFloorKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClFloorKernel.cpp
@@ -61,6 +61,11 @@
}
} // namespace
+ClFloorKernel::ClFloorKernel()
+{
+ _type = CLKernelType::ELEMENTWISE;
+}
+
void ClFloorKernel::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
diff --git a/src/core/gpu/cl/kernels/ClFloorKernel.h b/src/core/gpu/cl/kernels/ClFloorKernel.h
index 646dfb3..3bc648b 100644
--- a/src/core/gpu/cl/kernels/ClFloorKernel.h
+++ b/src/core/gpu/cl/kernels/ClFloorKernel.h
@@ -38,7 +38,7 @@
class ClFloorKernel : public IClKernel
{
public:
- ClFloorKernel() = default;
+ ClFloorKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClFloorKernel);
/** Configure kernel for a given list of arguments
*
diff --git a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.cpp b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.cpp
index 817a105..6079644 100644
--- a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.cpp
@@ -262,6 +262,11 @@
}
} // namespace
+ClGemmMatrixMultiplyKernel::ClGemmMatrixMultiplyKernel()
+{
+ _type = CLKernelType::GEMM;
+}
+
void ClGemmMatrixMultiplyKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src0, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float alpha,
float beta,
bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info, bool fp_mixed_precision, const ActivationLayerInfo &activation_info)
diff --git a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.h b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.h
index c160133..c303f78 100644
--- a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.h
+++ b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.h
@@ -45,7 +45,7 @@
class ClGemmMatrixMultiplyKernel : public IClKernel
{
public:
- ClGemmMatrixMultiplyKernel() = default;
+ ClGemmMatrixMultiplyKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClGemmMatrixMultiplyKernel);
/** Initialise the kernel's input, output and alpha
*
diff --git a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp
index 97d64c4..5ae55ab 100644
--- a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp
@@ -194,6 +194,11 @@
}
} // namespace
+ClGemmMatrixMultiplyNativeKernel::ClGemmMatrixMultiplyNativeKernel()
+{
+ _type = CLKernelType::GEMM;
+}
+
void ClGemmMatrixMultiplyNativeKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src0, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float alpha,
float beta,
const GEMMLHSMatrixInfo &lhs_info,
diff --git a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h
index 4770b18..c3bdc75 100644
--- a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h
+++ b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h
@@ -39,7 +39,7 @@
class ClGemmMatrixMultiplyNativeKernel : public IClKernel
{
public:
- ClGemmMatrixMultiplyNativeKernel() = default;
+ ClGemmMatrixMultiplyNativeKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClGemmMatrixMultiplyNativeKernel);
/** Initialise the kernel's input and dst.
*
diff --git a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp
index 27409b6..591834f 100644
--- a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp
@@ -184,6 +184,11 @@
}
} // namespace
+ClGemmMatrixMultiplyReshapedKernel::ClGemmMatrixMultiplyReshapedKernel()
+{
+ _type = CLKernelType::GEMM;
+}
+
void ClGemmMatrixMultiplyReshapedKernel::configure(const CLCompileContext &compile_context,
ITensorInfo *src0, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float alpha, float beta,
const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info)
diff --git a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h
index ab648f1..b8ae4b9 100644
--- a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h
+++ b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h
@@ -45,7 +45,7 @@
class ClGemmMatrixMultiplyReshapedKernel : public IClKernel
{
public:
- ClGemmMatrixMultiplyReshapedKernel() = default;
+ ClGemmMatrixMultiplyReshapedKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClGemmMatrixMultiplyReshapedKernel);
/** Initialise the kernel's input and output.
*
diff --git a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp
index 4eea2c6..32ee0f9 100644
--- a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp
@@ -181,6 +181,11 @@
}
} // namespace
+ClGemmMatrixMultiplyReshapedOnlyRhsKernel::ClGemmMatrixMultiplyReshapedOnlyRhsKernel()
+{
+ _type = CLKernelType::GEMM;
+}
+
void ClGemmMatrixMultiplyReshapedOnlyRhsKernel::configure(const CLCompileContext &compile_context,
ITensorInfo *src0, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float alpha, float beta,
const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info)
diff --git a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h
index ff6c391..3d6164e 100644
--- a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h
+++ b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h
@@ -43,7 +43,7 @@
class ClGemmMatrixMultiplyReshapedOnlyRhsKernel : public ICLKernel
{
public:
- ClGemmMatrixMultiplyReshapedOnlyRhsKernel() = default;
+ ClGemmMatrixMultiplyReshapedOnlyRhsKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClGemmMatrixMultiplyReshapedOnlyRhsKernel);
/** Initialise the kernel's input and output.
*
diff --git a/src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.cpp b/src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.cpp
index 98161ed..f92945e 100644
--- a/src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.cpp
@@ -111,6 +111,11 @@
}
} // namespace
+ClGemmReshapeLhsMatrixKernel::ClGemmReshapeLhsMatrixKernel()
+{
+ _type = CLKernelType::ELEMENTWISE;
+}
+
void ClGemmReshapeLhsMatrixKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
diff --git a/src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h b/src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h
index b830ba0..73d811f 100644
--- a/src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h
+++ b/src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h
@@ -41,7 +41,7 @@
class ClGemmReshapeLhsMatrixKernel : public ICLKernel
{
public:
- ClGemmReshapeLhsMatrixKernel() = default;
+ ClGemmReshapeLhsMatrixKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClGemmReshapeLhsMatrixKernel);
/** Initialise the kernel's input and output.
*
diff --git a/src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.cpp b/src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.cpp
index e1ef7c6..3a6f3c7 100644
--- a/src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.cpp
@@ -107,6 +107,11 @@
}
} // namespace
+ClGemmReshapeRhsMatrixKernel::ClGemmReshapeRhsMatrixKernel()
+{
+ _type = CLKernelType::ELEMENTWISE;
+}
+
void ClGemmReshapeRhsMatrixKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const GEMMRHSMatrixInfo &rhs_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
diff --git a/src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h b/src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h
index e877d87..27f80d3 100644
--- a/src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h
+++ b/src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h
@@ -40,7 +40,7 @@
class ClGemmReshapeRhsMatrixKernel : public ICLKernel
{
public:
- ClGemmReshapeRhsMatrixKernel() = default;
+ ClGemmReshapeRhsMatrixKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClGemmReshapeRhsMatrixKernel);
/** Initialise the kernel's input and output.
*
diff --git a/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp b/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp
index 4436e98..9ff30ee 100644
--- a/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp
@@ -64,6 +64,7 @@
ClHeightConcatenateKernel::ClHeightConcatenateKernel()
: _height_offset(0)
{
+ _type = CLKernelType::ELEMENTWISE;
}
Status ClHeightConcatenateKernel::validate(const ITensorInfo *src, unsigned int height_offset, const ITensorInfo *dst)
diff --git a/src/core/gpu/cl/kernels/ClMulKernel.cpp b/src/core/gpu/cl/kernels/ClMulKernel.cpp
index b8081bb..65f3bec 100644
--- a/src/core/gpu/cl/kernels/ClMulKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClMulKernel.cpp
@@ -92,6 +92,11 @@
}
} // namespace
+ClMulKernel::ClMulKernel()
+{
+ _type = CLKernelType::ELEMENTWISE;
+}
+
void ClMulKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float scale,
ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info)
{
@@ -128,7 +133,7 @@
else
{
if(src1->element_size() == 4 || src2->element_size() == 4)
- {
+ {
// use 64 bit accumulator for 32-bit input
acc_type = "long";
}
@@ -316,6 +321,11 @@
}
} // namespace
+ClComplexMulKernel::ClComplexMulKernel()
+{
+ _type = CLKernelType::ELEMENTWISE;
+}
+
void ClComplexMulKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src1, src2, dst);
diff --git a/src/core/gpu/cl/kernels/ClMulKernel.h b/src/core/gpu/cl/kernels/ClMulKernel.h
index 44162f3..9c70301 100644
--- a/src/core/gpu/cl/kernels/ClMulKernel.h
+++ b/src/core/gpu/cl/kernels/ClMulKernel.h
@@ -39,7 +39,7 @@
{
public:
/** Default constructor */
- ClMulKernel() = default;
+ ClMulKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClMulKernel);
/** Initialise the kernel's src and dst.
*
@@ -88,7 +88,7 @@
{
public:
/** Default constructor */
- ClComplexMulKernel() = default;
+ ClComplexMulKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClComplexMulKernel);
/** Initialise the kernel's src and dst.
*
diff --git a/src/core/gpu/cl/kernels/ClPermuteKernel.cpp b/src/core/gpu/cl/kernels/ClPermuteKernel.cpp
index ffc1306..722bf45 100644
--- a/src/core/gpu/cl/kernels/ClPermuteKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClPermuteKernel.cpp
@@ -77,6 +77,11 @@
}
} // namespace
+ClPermuteKernel::ClPermuteKernel()
+{
+ _type = CLKernelType::ELEMENTWISE;
+}
+
void ClPermuteKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const PermutationVector &perm)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
diff --git a/src/core/gpu/cl/kernels/ClPermuteKernel.h b/src/core/gpu/cl/kernels/ClPermuteKernel.h
index b844214..326110a 100644
--- a/src/core/gpu/cl/kernels/ClPermuteKernel.h
+++ b/src/core/gpu/cl/kernels/ClPermuteKernel.h
@@ -42,7 +42,7 @@
{
public:
/** Default constructor */
- ClPermuteKernel() = default;
+ ClPermuteKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClPermuteKernel);
/** Set the src and dst of the kernel.
*
diff --git a/src/core/gpu/cl/kernels/ClPool2dKernel.cpp b/src/core/gpu/cl/kernels/ClPool2dKernel.cpp
index 0e15bff..9d5a24f 100644
--- a/src/core/gpu/cl/kernels/ClPool2dKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClPool2dKernel.cpp
@@ -205,6 +205,7 @@
ClPool2dKernel::ClPool2dKernel()
: _pool_info(), _data_layout(DataLayout::UNKNOWN), _border_size(0), _num_elems_processed_per_iteration(1)
{
+ _type = CLKernelType::POOL;
}
BorderSize ClPool2dKernel::border_size() const
diff --git a/src/core/gpu/cl/kernels/ClQuantizeKernel.cpp b/src/core/gpu/cl/kernels/ClQuantizeKernel.cpp
index 48d351d..7900489 100644
--- a/src/core/gpu/cl/kernels/ClQuantizeKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClQuantizeKernel.cpp
@@ -61,6 +61,11 @@
}
} // namespace
+ClQuantizeKernel::ClQuantizeKernel()
+{
+ _type = CLKernelType::ELEMENTWISE;
+}
+
void ClQuantizeKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
diff --git a/src/core/gpu/cl/kernels/ClQuantizeKernel.h b/src/core/gpu/cl/kernels/ClQuantizeKernel.h
index 8d37f33..cd97298 100644
--- a/src/core/gpu/cl/kernels/ClQuantizeKernel.h
+++ b/src/core/gpu/cl/kernels/ClQuantizeKernel.h
@@ -42,7 +42,7 @@
{
public:
/** Default constructor */
- ClQuantizeKernel() = default;
+ ClQuantizeKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClQuantizeKernel);
/** Set the input, output.
*
diff --git a/src/core/gpu/cl/kernels/ClReshapeKernel.cpp b/src/core/gpu/cl/kernels/ClReshapeKernel.cpp
index 923b9cb..fcda061 100644
--- a/src/core/gpu/cl/kernels/ClReshapeKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClReshapeKernel.cpp
@@ -62,6 +62,11 @@
}
} // namespace
+ClReshapeKernel::ClReshapeKernel()
+{
+ _type = CLKernelType::ELEMENTWISE;
+}
+
void ClReshapeKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
diff --git a/src/core/gpu/cl/kernels/ClReshapeKernel.h b/src/core/gpu/cl/kernels/ClReshapeKernel.h
index 0501b93..3cd8369 100644
--- a/src/core/gpu/cl/kernels/ClReshapeKernel.h
+++ b/src/core/gpu/cl/kernels/ClReshapeKernel.h
@@ -38,7 +38,7 @@
class ClReshapeKernel : public IClKernel
{
public:
- ClReshapeKernel() = default;
+ ClReshapeKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClReshapeKernel);
/** Set the src and dst of the kernel
*
diff --git a/src/core/gpu/cl/kernels/ClScaleKernel.cpp b/src/core/gpu/cl/kernels/ClScaleKernel.cpp
index 7fb5d2a..57ca331 100644
--- a/src/core/gpu/cl/kernels/ClScaleKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClScaleKernel.cpp
@@ -140,6 +140,11 @@
return Status{};
}
+ClScaleKernel::ClScaleKernel()
+{
+ _type = CLKernelType::ELEMENTWISE;
+}
+
void ClScaleKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const ScaleKernelInfo &info)
{
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, info));
diff --git a/src/core/gpu/cl/kernels/ClScaleKernel.h b/src/core/gpu/cl/kernels/ClScaleKernel.h
index ad7632c..826c482 100644
--- a/src/core/gpu/cl/kernels/ClScaleKernel.h
+++ b/src/core/gpu/cl/kernels/ClScaleKernel.h
@@ -42,7 +42,7 @@
{
public:
/** Default constructor */
- ClScaleKernel() = default;
+ ClScaleKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClScaleKernel);
/** Initialise the kernel's inputs, output and interpolation policy
diff --git a/src/core/gpu/cl/kernels/ClSoftmaxKernel.cpp b/src/core/gpu/cl/kernels/ClSoftmaxKernel.cpp
index 000c9ad..1dd905d 100644
--- a/src/core/gpu/cl/kernels/ClSoftmaxKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClSoftmaxKernel.cpp
@@ -154,6 +154,11 @@
/**< Vector size in the parallel case (obtained through auto-tuning, enables the best memory access pattern for Bifrost) .*/
const unsigned int ClLogits1DMaxShiftExpSumKernel::_parallel_vector_size = 4;
+ClLogits1DMaxShiftExpSumKernel::ClLogits1DMaxShiftExpSumKernel()
+{
+ _type = CLKernelType::ELEMENTWISE;
+}
+
void ClLogits1DMaxShiftExpSumKernel::configure(const CLCompileContext &compile_context, const ITensorInfo &src, ITensorInfo &max, ITensorInfo &dst, ITensorInfo &sum, const SoftmaxKernelInfo &info)
{
auto padding_info = get_padding_info({ &src, &max, &dst, &sum });
@@ -273,6 +278,11 @@
while(window_collapsed.slide_window_slice_3D(slice));
}
+ClLogits1DNormKernel::ClLogits1DNormKernel()
+{
+ _type = CLKernelType::ELEMENTWISE;
+}
+
void ClLogits1DNormKernel::configure(const CLCompileContext &compile_context, const ITensorInfo &src, const ITensorInfo &sum, ITensorInfo &dst, const SoftmaxKernelInfo &info)
{
auto padding_info = get_padding_info({ &src, &dst, &sum });
diff --git a/src/core/gpu/cl/kernels/ClSoftmaxKernel.h b/src/core/gpu/cl/kernels/ClSoftmaxKernel.h
index af980ea..db1aca3 100644
--- a/src/core/gpu/cl/kernels/ClSoftmaxKernel.h
+++ b/src/core/gpu/cl/kernels/ClSoftmaxKernel.h
@@ -51,7 +51,7 @@
using ParallelReductionInfo = std::tuple<bool, unsigned int>;
/** Default constructor */
- ClLogits1DMaxShiftExpSumKernel() = default;
+ ClLogits1DMaxShiftExpSumKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClLogits1DMaxShiftExpSumKernel);
/** Configure the kernel using the given information about tensors
*
@@ -94,7 +94,7 @@
{
public:
/** Default constructor */
- ClLogits1DNormKernel() = default;
+ ClLogits1DNormKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClLogits1DNormKernel);
/** Set the input and output tensors.
diff --git a/src/core/gpu/cl/kernels/ClTransposeKernel.cpp b/src/core/gpu/cl/kernels/ClTransposeKernel.cpp
index 704d015..40bd4b0 100644
--- a/src/core/gpu/cl/kernels/ClTransposeKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClTransposeKernel.cpp
@@ -43,6 +43,11 @@
{
namespace kernels
{
+ClTransposeKernel::ClTransposeKernel()
+{
+ _type = CLKernelType::ELEMENTWISE;
+}
+
void ClTransposeKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
diff --git a/src/core/gpu/cl/kernels/ClTransposeKernel.h b/src/core/gpu/cl/kernels/ClTransposeKernel.h
index 21d4fd4..7d1226c 100644
--- a/src/core/gpu/cl/kernels/ClTransposeKernel.h
+++ b/src/core/gpu/cl/kernels/ClTransposeKernel.h
@@ -38,7 +38,7 @@
class ClTransposeKernel : public IClKernel
{
public:
- ClTransposeKernel() = default;
+ ClTransposeKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClTransposeKernel);
/** Set the src and dst of the kernel.
*
diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp
index 9f97071..8607620 100644
--- a/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp
@@ -68,6 +68,11 @@
return Status{};
}
+ClWidthConcatenate2TensorsKernel::ClWidthConcatenate2TensorsKernel()
+{
+ _type = CLKernelType::ELEMENTWISE;
+}
+
void ClWidthConcatenate2TensorsKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src1, src2, dst);
diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h
index ddade29..56202ba 100644
--- a/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h
+++ b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h
@@ -41,7 +41,7 @@
{
public:
/** Default constructor */
- ClWidthConcatenate2TensorsKernel() = default;
+ ClWidthConcatenate2TensorsKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClWidthConcatenate2TensorsKernel);
/** Initialise the kernel's sources and destination
*
diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp b/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp
index 281d190..edbc23c 100644
--- a/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp
@@ -66,6 +66,7 @@
ClWidthConcatenate4TensorsKernel::ClWidthConcatenate4TensorsKernel()
{
+ _type = CLKernelType::ELEMENTWISE;
}
Status ClWidthConcatenate4TensorsKernel::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *src3, const ITensorInfo *src4, const ITensorInfo *dst)
diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp b/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp
index d188a52..5510c74 100644
--- a/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp
@@ -63,6 +63,7 @@
ClWidthConcatenateKernel::ClWidthConcatenateKernel()
{
+ _type = CLKernelType::ELEMENTWISE;
}
Status ClWidthConcatenateKernel::validate(const ITensorInfo *src, unsigned int width_offset, const ITensorInfo *dst)
diff --git a/src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp b/src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp
index 381b4bc..ae43fed 100644
--- a/src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp
@@ -91,6 +91,11 @@
}
} // namespace
+ClWinogradFilterTransformKernel::ClWinogradFilterTransformKernel()
+{
+ _type = CLKernelType::WINOGRAD;
+}
+
void ClWinogradFilterTransformKernel::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const WinogradInfo &winograd_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
diff --git a/src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.h b/src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.h
index 2bc2ceb..13200dc 100644
--- a/src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.h
+++ b/src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.h
@@ -40,7 +40,7 @@
{
public:
/** Default constructor */
- ClWinogradFilterTransformKernel() = default;
+ ClWinogradFilterTransformKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClWinogradFilterTransformKernel);
/** Set the input and output tensor.
*
diff --git a/src/core/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp b/src/core/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp
index 17f0eb9..62db228 100644
--- a/src/core/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp
@@ -103,6 +103,7 @@
ClWinogradInputTransformKernel::ClWinogradInputTransformKernel()
: _border_size(0), _data_layout(DataLayout::UNKNOWN), _num_tiles_x(0), _num_tiles_y(0), _step_z(1)
{
+ _type = CLKernelType::WINOGRAD;
}
BorderSize ClWinogradInputTransformKernel::border_size() const
diff --git a/src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp b/src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp
index a6c0542..f6ade57 100644
--- a/src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp
@@ -122,6 +122,11 @@
}
} // namespace
+ClWinogradOutputTransformKernel::ClWinogradOutputTransformKernel()
+{
+ _type = CLKernelType::WINOGRAD;
+}
+
void ClWinogradOutputTransformKernel::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *bias, ITensorInfo *dst, const WinogradInfo &winograd_info,
const ActivationLayerInfo &act_info)
{
diff --git a/src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.h b/src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.h
index 48b27e6..2948d3f 100644
--- a/src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.h
+++ b/src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.h
@@ -40,7 +40,7 @@
{
public:
/** Default constructor */
- ClWinogradOutputTransformKernel() = default;
+ ClWinogradOutputTransformKernel();
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClWinogradOutputTransformKernel);
/** Set the input and output tensor.
*