COMPMID-927: Adding support for FP16 in CLDepthwiseConvolutionLayer3x3 Change-Id: Ie5f299c7a7fbe3062cee22bb2b4ae5df818fe490 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/121178 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>

commit: 933fe86bdc0603c5350fa131df72549933632233 [log] [tgz]
author: Michele Di Giorgio <michele.digiorgio@arm.com> Mon Feb 19 15:42:12 2018 +0000
committer: Anthony Barbier <anthony.barbier@arm.com> Fri Nov 02 16:47:40 2018 +0000
tree: 026d616c8673ca1e549a04cf6840e1e431d9dfab
parent: 4406fd6cc4abded564d3791324e1f48bdfd34273 [diff] [blame]
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp
index 3613419..c7cee4c 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp

@@ -51,7 +51,7 @@
 
 void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info)
 {
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
     ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
     ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != 3 || weights->info()->dimension(1) != 3);
 
@@ -134,7 +134,15 @@
     // Create kernel
     std::string kernel_name;
 
-    if(input->info()->data_type() == DataType::F32 && gpu_target == GPUTarget::BIFROST)
+    if(input->info()->data_type() == DataType::F16)
+    {
+        kernel_name                       = "depthwise_convolution_3x3_f16";
+        num_elems_written_per_iteration_x = 8 / data_size_from_type(input->info()->data_type());
+        num_elems_written_per_iteration_y = 1;
+        num_elems_read_per_iteration_x    = 3 + (num_elems_written_per_iteration_x - 1) * _conv_stride_x;
+        num_elems_read_per_iteration_y    = 3;
+    }
+    else if(input->info()->data_type() == DataType::F32 && gpu_target == GPUTarget::BIFROST)
     {
         if(_conv_stride_x == 1 && _conv_stride_y == 1)
         {
commit	933fe86bdc0603c5350fa131df72549933632233	[log] [tgz]
author	Michele Di Giorgio <michele.digiorgio@arm.com>	Mon Feb 19 15:42:12 2018 +0000
committer	Anthony Barbier <anthony.barbier@arm.com>	Fri Nov 02 16:47:40 2018 +0000
tree	026d616c8673ca1e549a04cf6840e1e431d9dfab
parent	4406fd6cc4abded564d3791324e1f48bdfd34273 [diff] [blame]