COMPMID-1304: NEDepthConvert : Add support for FP32 -> FP16 and FP16 -> FP32 + validate() function

Change-Id: I12e4696a454744f6d493ab3a53520d3acf3a1a26
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145719
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
diff --git a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h
index 77bb041..6840b1a 100644
--- a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h
@@ -25,9 +25,6 @@
 #define __ARM_COMPUTE_DEPTHCONVERTKERNEL_H__
 
 #include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
 
 namespace arm_compute
 {
@@ -58,23 +55,34 @@
      *   - U8 -> U16, S16, S32
      *   - U16 -> U8, U32
      *   - S16 -> U8, S32
+     *   - F16 -> F32
+     *   - F32 -> F16
      *
-     * @param[in, out] input  The input tensor to convert (Written in case of in-place computation). Data types supported: U8/U16/S16.
-     * @param[out]     output The output tensor. Can be null in case of in-place computation. Data types supported: U8/U16/S16/U32/S32/F32.
-     * @param[in]      policy Conversion policy.
-     * @param[in]      shift  (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
-     *                         In case of fixed point position conversion, it specifies the new fixed point position, if operation is in-place.
+     * @param[in]  input  The input tensor to convert. Data types supported: U8/U16/S16/F16/F32.
+     * @param[out] output The output tensor. Data types supported: U8/U16/S16/U32/S32/F16/F32.
+     * @param[in]  policy Conversion policy.
+     * @param[in]  shift  (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
      */
-    void configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0);
+    void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0);
+    /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConvertLayerKernel
+     *
+     * @param[in] input  Source tensor info. Data types supported: U8/U16/S16/F16/F32.
+     * @param[in] output Destination tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32.
+     * @param[in] policy Conversion policy
+     * @param[in] shift  (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0);
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
 
 private:
-    ITensor      *_input;
-    ITensor      *_output;
-    ConvertPolicy _policy;
-    uint32_t      _shift;
+    const ITensor *_input;
+    ITensor       *_output;
+    ConvertPolicy  _policy;
+    uint32_t       _shift;
 };
 } // namespace arm_compute
 #endif /*__ARM_COMPUTE_NEDEPTHCONVERTKERNEL_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
index eedadc2..1fdad30 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
@@ -49,13 +49,25 @@
      *    U8 -> U16, S16, S32
      *    U16 -> U8, U32
      *    S16 -> U8, S32
+     *    F16 -> F32
+     *    F32 -> F16
      *
-     * @param[in, out] input  The input tensor to convert (Written in case of in-place computation). Data types supported: U8/U16/S16/F32.
-     * @param[out]     output The output tensor. Can be null in case of in-place computation. Data types supported: U8/U16/S16/U32/S32/F32.
-     * @param[in]      policy Conversion policy.
-     * @param[in]      shift  (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
+     * @param[in]  input  The input tensor to convert. Data types supported: U8/U16/S16/F32.
+     * @param[out] output The output tensor. Data types supported: U8/U16/S16/U32/S32/F32.
+     * @param[in]  policy Conversion policy.
+     * @param[in]  shift  (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
      */
-    void configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0);
+    void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0);
+    /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConvertLayer
+     *
+     * @param[in] input  Source tensor info. Data types supported: U8/U16/S16/U32/S32/F16/F32.
+     * @param[in] output Destination tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32.
+     * @param[in] policy Conversion policy.
+     * @param[in] shift  (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0);
 };
 }
 #endif /*__ARM_COMPUTE_NEDEPTHCONVERT_H__*/
diff --git a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
index 8280b52..158f401 100644
--- a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
@@ -23,6 +23,7 @@
  */
 #include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h"
 
+#include "arm_compute/core/CPP/Validate.h"
 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/ITensor.h"
@@ -34,68 +35,90 @@
 
 using namespace arm_compute;
 
-namespace arm_compute
+namespace
 {
-class Coordinates;
-} // namespace arm_compute
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
+    ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(output);
+    ARM_COMPUTE_UNUSED(policy);
+    ARM_COMPUTE_RETURN_ERROR_ON(input == output);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::U16, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON(shift >= 8);
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U8 && (output->data_type() != DataType::S16 && output->data_type() != DataType::U16
+                                                                           && output->data_type() != DataType::S32),
+                                    "Only data_types supported [in] U8 -> [out] U16, S16, S32");
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U32),
+                                    "Only data_types supported [in] U16 ->  [out] U8, U32");
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::S32),
+                                    "Only data_types supported [in] S16 ->  [out] U8, S32");
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F16 && output->data_type() != DataType::F32,
+                                    "Only data_types supported [in] F16 ->  [out] F32");
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F32 && output->data_type() != DataType::F16,
+                                    "Only data_types supported [in] F32 ->  [out] F16");
+
+    // Validate in case of configured output
+    if(output->total_size() > 0)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+    }
+
+    return Status{};
+}
+
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
+{
+    constexpr unsigned int num_elems_processed_per_iteration = 16;
+
+    Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
+
+    AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
+    bool                   window_changed = update_window_and_padding(win, input_access, output_access);
+    output_access.set_valid_region(win, output->valid_region());
+
+    Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+    return std::make_pair(err, win);
+}
+} // namespace
 
 NEDepthConvertLayerKernel::NEDepthConvertLayerKernel()
     : _input(nullptr), _output(nullptr), _policy(), _shift(0)
 {
 }
 
-void NEDepthConvertLayerKernel::configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift)
+void NEDepthConvertLayerKernel::configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift)
 {
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::U16);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+    // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype must be given)
+    set_shape_if_empty(*output->info(), input->info()->tensor_shape());
 
     _input  = input;
-    _output = input;
+    _output = output;
     _policy = policy;
     _shift  = shift;
 
-    if(output != nullptr)
-    {
-        // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype must be given)
-        set_shape_if_empty(*output->info(), input->info()->tensor_shape());
-
-        ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F32);
-        ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
-
-        // Set output
-        _output = output;
-    }
-
-    ARM_COMPUTE_ERROR_ON(shift >= 8);
-    ARM_COMPUTE_ERROR_ON(input == output && (data_size_from_type(input->info()->data_type()) != data_size_from_type(output->info()->data_type())));
-
-    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U8 && (output->info()->data_type() != DataType::S16 && output->info()->data_type() != DataType::U16
-                                                                            && output->info()->data_type() != DataType::S32),
-                             "Only data_types supported [in] U8 -> [out] U16, S16, S32");
-
-    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U32),
-                             "Only data_types supported [in] U16 ->  [out] U8, U32");
-
-    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::S16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::S32),
-                             "Only data_types supported [in] S16 ->  [out] U8, S32");
-
-    constexpr unsigned int num_elems_processed_per_iteration = 16;
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), policy, shift));
 
     // Configure kernel window
-    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
+    auto win_config = validate_and_configure_window(input->info(), output->info());
+    ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+    ICPPKernel::configure(win_config.second);
+}
 
-    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
-    if(output != nullptr)
-    {
-        AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-        update_window_and_padding(win, input_access, output_access);
-        output_access.set_valid_region(win, input->info()->valid_region());
-    }
-    else
-    {
-        // In-place computation
-        update_window_and_padding(win, input_access);
-    }
-    ICPPKernel::configure(win);
+Status NEDepthConvertLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift)
+{
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, policy, shift));
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get()).first);
+
+    return Status{};
 }
 
 void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info)
@@ -103,8 +126,7 @@
     ARM_COMPUTE_UNUSED(info);
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-    ARM_COMPUTE_ERROR_ON(nullptr == _input);
-    ARM_COMPUTE_ERROR_ON(nullptr == _output);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(_input, _output);
     ARM_COMPUTE_ERROR_ON(_input == _output);
 
     Iterator input(_input, window);
@@ -341,6 +363,68 @@
             }
             break;
         }
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+        case DataType::F16:
+            switch(_output->info()->data_type())
+            {
+                case DataType::F32:
+                {
+                    const float32x4_t scale = vdupq_n_f32(1 << _shift);
+
+                    /* Up-conversion F16 -> F32 */
+                    execute_window_loop(window, [&](const Coordinates & id)
+                    {
+                        const float16x8x2_t texels =
+                        {
+                            {
+                                vld1q_f16(reinterpret_cast<float16_t *>(input.ptr())),
+                                vld1q_f16(reinterpret_cast<float16_t *>(input.ptr()) + 8)
+                            }
+                        };
+
+                        vst1q_f32(reinterpret_cast<float *>(output.ptr()), vmulq_f32(vcvt_f32_f16(vget_low_f16(texels.val[0])), scale));
+                        vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 4, vmulq_f32(vcvt_f32_f16(vget_high_f16(texels.val[0])), scale));
+                        vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 8, vmulq_f32(vcvt_f32_f16(vget_low_f16(texels.val[1])), scale));
+                        vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 12, vmulq_f32(vcvt_f32_f16(vget_high_f16(texels.val[1])), scale));
+                    },
+                    input, output);
+                    break;
+                }
+                default:
+                    ARM_COMPUTE_ERROR("Output data type not supported");
+            }
+            break;
+        case DataType::F32:
+            switch(_output->info()->data_type())
+            {
+                case DataType::F16:
+                {
+                    const float32x4_t scale = vdupq_n_f32(1.f / (1 << _shift));
+
+                    /* Down-conversion F32 -> F16 */
+                    execute_window_loop(window, [&](const Coordinates & id)
+                    {
+                        const float32x4x4_t texels =
+                        {
+                            {
+                                vmulq_f32(vld1q_f32(reinterpret_cast<float *>(input.ptr())), scale),
+                                vmulq_f32(vld1q_f32(reinterpret_cast<float *>(input.ptr()) + 4), scale),
+                                vmulq_f32(vld1q_f32(reinterpret_cast<float *>(input.ptr()) + 8), scale),
+                                vmulq_f32(vld1q_f32(reinterpret_cast<float *>(input.ptr()) + 12), scale)
+                            }
+                        };
+
+                        vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()), vcombine_f16(vcvt_f16_f32(texels.val[0]), vcvt_f16_f32(texels.val[1])));
+                        vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()) + 8, vcombine_f16(vcvt_f16_f32(texels.val[2]), vcvt_f16_f32(texels.val[3])));
+                    },
+                    input, output);
+                    break;
+                }
+                default:
+                    ARM_COMPUTE_ERROR("Output data type not supported");
+            }
+            break;
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
         default:
             ARM_COMPUTE_ERROR("Not supported");
     }
diff --git a/src/runtime/NEON/functions/NEDepthConvertLayer.cpp b/src/runtime/NEON/functions/NEDepthConvertLayer.cpp
index 9a75404..0041c1f 100644
--- a/src/runtime/NEON/functions/NEDepthConvertLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthConvertLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,9 +30,14 @@
 
 using namespace arm_compute;
 
-void NEDepthConvertLayer::configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift)
+void NEDepthConvertLayer::configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift)
 {
     auto k = arm_compute::support::cpp14::make_unique<NEDepthConvertLayerKernel>();
     k->configure(input, output, policy, shift);
     _kernel = std::move(k);
 }
+
+Status NEDepthConvertLayer::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift)
+{
+    return NEDepthConvertLayerKernel::validate(input, output, policy, shift);
+}
diff --git a/tests/validation/NEON/DepthConvertLayer.cpp b/tests/validation/NEON/DepthConvertLayer.cpp
index 78070d0..40700f8 100644
--- a/tests/validation/NEON/DepthConvertLayer.cpp
+++ b/tests/validation/NEON/DepthConvertLayer.cpp
@@ -51,6 +51,8 @@
 const auto DepthConvertLayerU16toU32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32));
 const auto DepthConvertLayerS16toU8Dataset  = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8));
 const auto DepthConvertLayerS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32));
+const auto DepthConvertLayerF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32));
+const auto DepthConvertLayerF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16));
 const auto DepthConvertLayerShiftDataset    = framework::dataset::make("Shift", 0, 7);
 } // namespace
 
@@ -66,6 +68,10 @@
 using NEDepthConvertLayerToU8Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, uint8_t>;
 template <typename T>
 using NEDepthConvertLayerToU32Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, uint32_t>;
+template <typename T>
+using NEDepthConvertLayerToF16Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, half>;
+template <typename T>
+using NEDepthConvertLayerToF32Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, float>;
 
 TEST_SUITE(U8_to_U16)
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
@@ -342,6 +348,86 @@
 }
 TEST_SUITE_END()
 
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+TEST_SUITE(F16_to_F32)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, DataType::F16, 1);
+    Tensor dst = create_tensor<Tensor>(shape, DataType::F32, 1);
+
+    // Create and Configure function
+    NEDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF32Fixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerF16toF32Dataset),
+                                                                                                                   framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                   DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF32Fixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF16toF32Dataset),
+                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                 DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(F32_to_F16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, DataType::F32, 1);
+    Tensor dst = create_tensor<Tensor>(shape, DataType::F16, 1);
+
+    // Create and Configure function
+    NEDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF16Fixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerF32toF16Dataset),
+                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                    DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF16Fixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF32toF16Dataset),
+                                                                                                                  framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                  DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
 TEST_SUITE_END()
 TEST_SUITE_END()
 } // namespace validation