Remove deprecated support for BF16 in CpuCast

Resolves : [COMPMID-6212]

Signed-off-by: Omar Al Khatib <omar.alkhatib@arm.com>
Signed-off-by: Adnan AlSinan <adnan.alsinan@arm.com>
Change-Id: I29bbd9a3d96af462faf7f0ee13b9849f75e05356
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10319
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
diff --git a/Android.bp b/Android.bp
index d7d900c..a81bf87 100644
--- a/Android.bp
+++ b/Android.bp
@@ -481,7 +481,6 @@
         "src/cpu/kernels/boundingboxtransform/generic/neon/fp32.cpp",
         "src/cpu/kernels/boundingboxtransform/generic/neon/impl.cpp",
         "src/cpu/kernels/boundingboxtransform/generic/neon/qsymm16.cpp",
-        "src/cpu/kernels/cast/generic/neon/bfloat16.cpp",
         "src/cpu/kernels/cast/generic/neon/fp16.cpp",
         "src/cpu/kernels/crop/generic/neon/fp16.cpp",
         "src/cpu/kernels/crop/generic/neon/fp32.cpp",
diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox
index 1a5add0..05a18c0 100644
--- a/docs/user_guide/release_version_and_change_log.dox
+++ b/docs/user_guide/release_version_and_change_log.dox
@@ -55,6 +55,7 @@
    - @ref experimental::dynamic_fusion::GpuCkwPool2d
  - Add new OpenCL™ kernels:
    - @ref opencl::kernels::ClMatMulLowpNativeMMULKernel support for QASYMM8 and QASYMM8_SIGNED, with batch support
+ - Deprecate support for Bfloat16 in @ref cpu::CpuCast.
 
 v23.08 Public major release
  - Deprecate the legacy 'libarm_compute_core' library. This library is an artifact of Compute Library's legacy library architecture and no longer serves any purpose.
diff --git a/filelist.json b/filelist.json
index 2a88aec..23ee9ca 100644
--- a/filelist.json
+++ b/filelist.json
@@ -1051,8 +1051,7 @@
           "common": [
             "src/cpu/operators/CpuCast.cpp",
             "src/cpu/kernels/CpuCastKernel.cpp",
-            "src/runtime/NEON/functions/NECast.cpp",
-            "src/cpu/kernels/cast/generic/neon/bfloat16.cpp"
+            "src/runtime/NEON/functions/NECast.cpp"
           ],
           "neon":{
             "fp16":["src/cpu/kernels/cast/generic/neon/fp16.cpp"]
diff --git a/src/BUILD.bazel b/src/BUILD.bazel
index ab0ea66..3b42839 100644
--- a/src/BUILD.bazel
+++ b/src/BUILD.bazel
@@ -732,7 +732,6 @@
 	"cpu/kernels/boundingboxtransform/generic/neon/fp32.cpp",
 	"cpu/kernels/boundingboxtransform/generic/neon/impl.cpp",
 	"cpu/kernels/boundingboxtransform/generic/neon/qsymm16.cpp",
-	"cpu/kernels/cast/generic/neon/bfloat16.cpp",
 	"cpu/kernels/cast/generic/neon/fp16.cpp",
 	"cpu/kernels/crop/generic/neon/fp16.cpp",
 	"cpu/kernels/crop/generic/neon/fp32.cpp",
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index e6ef5bf..0b3da44 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -724,7 +724,6 @@
 	cpu/kernels/boundingboxtransform/generic/neon/fp32.cpp
 	cpu/kernels/boundingboxtransform/generic/neon/impl.cpp
 	cpu/kernels/boundingboxtransform/generic/neon/qsymm16.cpp
-	cpu/kernels/cast/generic/neon/bfloat16.cpp
 	cpu/kernels/cast/generic/neon/fp16.cpp
 	cpu/kernels/crop/generic/neon/fp16.cpp
 	cpu/kernels/crop/generic/neon/fp32.cpp
diff --git a/src/cpu/kernels/CpuCastKernel.cpp b/src/cpu/kernels/CpuCastKernel.cpp
index d478328..764a1ec 100644
--- a/src/cpu/kernels/CpuCastKernel.cpp
+++ b/src/cpu/kernels/CpuCastKernel.cpp
@@ -75,46 +75,34 @@
         REGISTER_FP16_NEON(arm_compute::cpu::neon_fp32_to_fp16_cast)
     },
     {
-        "neon_fp32_to_bf16_cast",
-        [](const CastDataTypeISASelectorData & data) { return data.src_dt == DataType::F32 && data.dst_dt == DataType::BFLOAT16 && data.isa.bf16; },
-        REGISTER_BF16_NEON(arm_compute::cpu::neon_fp32_to_bfloat16_cast)
-    },
-    {
         "neon_s32_cast",
         [](const CastDataTypeISASelectorData & data) { return data.src_dt == DataType::S32 && data.dst_dt == DataType::F16 && data.isa.fp16; },
         REGISTER_FP16_NEON(arm_compute::cpu::neon_s32_to_fp16_cast)
     },
-    {
-        "neon_bf16_cast",
-        [](const CastDataTypeISASelectorData & data) { return data.src_dt == DataType::BFLOAT16 && data.dst_dt == DataType::F32 && data.isa.bf16; },
-        REGISTER_BF16_NEON(arm_compute::cpu::neon_bfloat16_to_fp32_cast)
-    },
 };
 
 Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, ConvertPolicy policy)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src);
     ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(dst);
-    ARM_COMPUTE_RETURN_ERROR_ON_CPU_BF16_UNSUPPORTED(src);
-    ARM_COMPUTE_RETURN_ERROR_ON_CPU_BF16_UNSUPPORTED(dst);
     ARM_COMPUTE_UNUSED(policy);
     ARM_COMPUTE_RETURN_ERROR_ON(src == dst);
 #ifdef __aarch64__
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::U8,
-                                                         DataType::S16, DataType::U16, DataType::BFLOAT16, DataType::F16,
+                                                         DataType::S16, DataType::U16, DataType::F16,
                                                          DataType::F32, DataType::S32, DataType::S64, DataType::U64);
 
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::U8,
-                                                         DataType::S16, DataType::U16, DataType::BFLOAT16, DataType::F16,
+                                                         DataType::S16, DataType::U16, DataType::F16,
                                                          DataType::U32, DataType::S32, DataType::F32, DataType::S64);
 
 #else  // __aarch64__
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::U8,
-                                                         DataType::S16, DataType::U16, DataType::BFLOAT16, DataType::F16,
+                                                         DataType::S16, DataType::U16, DataType::F16,
                                                          DataType::F32, DataType::S32);
 
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::U8,
-                                                         DataType::S16, DataType::U16, DataType::BFLOAT16, DataType::F16,
+                                                         DataType::S16, DataType::U16, DataType::F16,
                                                          DataType::U32, DataType::S32, DataType::F32);
 #endif // __aarch64__
 
@@ -136,18 +124,15 @@
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == DataType::S16 && (dst->data_type() != DataType::QASYMM8_SIGNED && dst->data_type() != DataType::U8 && dst->data_type() != DataType::S32),
                                     "Only data_types supported [in] S16 ->  [out] U8, S32");
 
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == DataType::BFLOAT16 && dst->data_type() != DataType::F32,
-                                    "Only data_types supported [in] BFLOAT16 ->  [out] F32");
-
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == DataType::F16 && (dst->data_type() != DataType::QASYMM8_SIGNED && dst->data_type() != DataType::QASYMM8
                                                                           && dst->data_type() != DataType::U8
                                                                           && dst->data_type() != DataType::F32 && dst->data_type() != DataType::S32),
                                     "Only data_types supported [in] F16 ->  [out] QASYMM8, F32, S32, U8");
 
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == DataType::F32 && (dst->data_type() != DataType::QASYMM8_SIGNED && dst->data_type() != DataType::QASYMM8
-                                                                          && dst->data_type() != DataType::F16 && dst->data_type() != DataType::BFLOAT16
+                                                                          && dst->data_type() != DataType::F16
                                                                           && dst->data_type() != DataType::S32 && dst->data_type() != DataType::U8),
-                                    "Only data_types supported [in] F32 ->  [out] QASYMM8, BFLOAT16, F16, S32, U8");
+                                    "Only data_types supported [in] F32 ->  [out] QASYMM8, F16, S32, U8");
 
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == DataType::S32 && (dst->data_type() != DataType::QASYMM8_SIGNED && dst->data_type() != DataType::QASYMM8
                                                                           && dst->data_type() != DataType::F16
@@ -346,7 +331,7 @@
     Iterator src(_src, win);
     Iterator dst(_dst, win);
 
-    /*ukernel runs only when using fp16/bfloat16, so we validate it isn't a nullptr only before using it */
+    /*ukernel runs only when using fp16, so we validate it isn't a nullptr only before using it */
     const auto *uk = CpuCastKernel::get_implementation(CastDataTypeISASelectorData{ _src->info()->data_type(), _dst->info()->data_type(), CPUInfo::get().get_isa() });
 
     switch(_src->info()->data_type())
@@ -948,13 +933,6 @@
             }
             break;
         }
-        case DataType::BFLOAT16:
-        {
-            /* Up-conversion BFLOAT16 -> F32 */
-            ARM_COMPUTE_ERROR_ON(uk->ukernel == nullptr);
-            uk->ukernel(_src, _dst, info, _policy, window);
-            break;
-        }
         case DataType::F16:
         {
             /* conversion F16 -> any data type */
@@ -972,13 +950,6 @@
                     uk->ukernel(_src, _dst, info, _policy, window);
                     break;
                 }
-                case DataType::BFLOAT16:
-                {
-                    /* Down-conversion F32 -> BFLOAT16 */
-                    ARM_COMPUTE_ERROR_ON(uk->ukernel == nullptr);
-                    uk->ukernel(_src, _dst, info, _policy, window);
-                    break;
-                }
                 case DataType::S32:
                 {
                     /* Conversion F32 -> S32 */
diff --git a/src/cpu/kernels/CpuCastKernel.h b/src/cpu/kernels/CpuCastKernel.h
index d8e61e6..a7e6417 100644
--- a/src/cpu/kernels/CpuCastKernel.h
+++ b/src/cpu/kernels/CpuCastKernel.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CPU_CAST_KERNEL_H
-#define ARM_COMPUTE_CPU_CAST_KERNEL_H
+#ifndef ACL_SRC_CPU_KERNELS_CPUCASTKERNEL_H
+#define ACL_SRC_CPU_KERNELS_CPUCASTKERNEL_H
 
 #include "src/core/common/Macros.h"
 #include "src/cpu/ICpuKernel.h"
@@ -54,19 +54,17 @@
      *   - U8             -> U16, S16, S32, F32, F16
      *   - U16            -> U8, U32
      *   - S16            -> QASYMM8_SIGNED, U8, S32
-     *   - BFLOAT16       -> F32
      *   - F16            -> QASYMM8_SIGNED, QASYMM8, F32, S32, U8
      *   - S32            -> QASYMM8_SIGNED, QASYMM8, F16, F32, U8
      *   - S64            -> F32
-     *   - F32            -> QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8
+     *   - F32            -> QASYMM8_SIGNED, QASYMM8, F16, S32, U8
      *
-     * @param[in]  src    The src tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/S32/S64/BFLOAT16/F16/F32.
-     * @param[out] dst    The dst tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/S64/BFLOAT16/F16/F32.
+     * @param[in]  src    The src tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/S32/S64/F16/F32.
+     * @param[out] dst    The dst tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/S64/F16/F32.
      * @param[in]  policy Conversion policy.
      *
      * @note S64 is only supported in aarch64
      *
-     * @deprecated Support for BFLOAT16 will be removed in 23.05 release
      */
     void configure(const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy);
     /** Static function to check if given info will lead to a valid configuration
@@ -96,4 +94,4 @@
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CPU_CAST_KERNEL_H */
+#endif // ACL_SRC_CPU_KERNELS_CPUCASTKERNEL_H
diff --git a/src/cpu/kernels/cast/generic/neon/bfloat16.cpp b/src/cpu/kernels/cast/generic/neon/bfloat16.cpp
deleted file mode 100644
index d8e2756..0000000
--- a/src/cpu/kernels/cast/generic/neon/bfloat16.cpp
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright (c) 2016-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#if defined(ARM_COMPUTE_ENABLE_BF16)
-
-#include "arm_compute/core/CPP/CPPTypes.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "src/core/NEON/wrapper/wrapper.h"
-#include "src/cpu/kernels/CpuCastKernel.h"
-#include "src/cpu/kernels/cast/list.h"
-#include "support/SaturateCast.h"
-
-namespace arm_compute
-{
-namespace cpu
-{
-void neon_fp32_to_bfloat16_cast(const ITensor *_src, ITensor *_dst, const ThreadInfo &info, ConvertPolicy _policy, const Window &window)
-{
-    ARM_COMPUTE_UNUSED(info);
-    ARM_COMPUTE_UNUSED(_policy);
-
-    const auto window_start_x = static_cast<int>(window.x().start());
-    const auto window_end_x   = static_cast<int>(window.x().end());
-    const int  window_step_x  = 16;
-
-    ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
-    ARM_COMPUTE_ERROR_ON(_src == _dst);
-
-    ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
-
-    Window win{ window };
-    win.set(Window::DimX, Window::Dimension(0, 1, 1));
-
-    Iterator src(_src, win);
-    Iterator dst(_dst, win);
-
-    /* Down-conversion F32 -> BFLOAT16 */
-    execute_window_loop(win, [&](const Coordinates &)
-    {
-        const auto src_ptr = reinterpret_cast<const float *>(src.ptr());
-        const auto dst_ptr = reinterpret_cast<bfloat16 *>(dst.ptr());
-
-        int x = window_start_x;
-        for(; x <= (window_end_x - window_step_x); x += window_step_x)
-        {
-            wrapper::vcvt_bf16_f32(reinterpret_cast<float *>(src.ptr()),
-                                   reinterpret_cast<uint16_t *>(dst.ptr()));
-            wrapper::vcvt_bf16_f32(reinterpret_cast<float *>(src.ptr()) + 8,
-                                   reinterpret_cast<uint16_t *>(dst.ptr()) + 8);
-        }
-
-        for(; x < window_end_x; ++x)
-        {
-            *(dst_ptr + x) = *(src_ptr + x);
-        }
-    },
-    src, dst);
-}
-
-void neon_bfloat16_to_fp32_cast(const ITensor *_src, ITensor *_dst, const ThreadInfo &info, ConvertPolicy _policy, const Window &window)
-{
-    ARM_COMPUTE_UNUSED(info);
-    ARM_COMPUTE_UNUSED(_policy);
-
-    const auto window_start_x = static_cast<int>(window.x().start());
-    const auto window_end_x   = static_cast<int>(window.x().end());
-    const int  window_step_x  = 16;
-
-    ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
-    ARM_COMPUTE_ERROR_ON(_src == _dst);
-
-    ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
-
-    Window win{ window };
-    win.set(Window::DimX, Window::Dimension(0, 1, 1));
-
-    Iterator src(_src, win);
-    Iterator dst(_dst, win);
-    switch(_dst->info()->data_type())
-    {
-        case DataType::F32:
-        {
-            /* Up-conversion BFLOAT16 -> F32 */
-            execute_window_loop(win, [&](const Coordinates &)
-            {
-                const auto src_ptr = reinterpret_cast<const bfloat16 *>(src.ptr());
-                const auto dst_ptr = reinterpret_cast<float *>(dst.ptr());
-
-                int x = window_start_x;
-                for(; x <= (window_end_x - window_step_x); x += window_step_x)
-                {
-                    const uint16x8x2_t texels =
-                    {
-                        {
-                            vld1q_u16(reinterpret_cast<uint16_t *>(src.ptr())),
-                            vld1q_u16(reinterpret_cast<uint16_t *>(src.ptr()) + 8)
-                        }
-                    };
-
-                    vst1q_f32(reinterpret_cast<float *>(dst.ptr()),
-                              vreinterpretq_f32_u32(vshlq_n_u32(vmovl_u16(vget_low_u16(texels.val[0])), 16)));
-                    vst1q_f32(reinterpret_cast<float *>(dst.ptr()) + 4,
-                              vreinterpretq_f32_u32(vshlq_n_u32(vmovl_u16(vget_high_u16(texels.val[0])), 16)));
-                    vst1q_f32(reinterpret_cast<float *>(dst.ptr()) + 8,
-                              vreinterpretq_f32_u32(vshlq_n_u32(vmovl_u16(vget_low_u16(texels.val[1])), 16)));
-                    vst1q_f32(reinterpret_cast<float *>(dst.ptr()) + 12,
-                              vreinterpretq_f32_u32(vshlq_n_u32(vmovl_u16(vget_high_u16(texels.val[1])), 16)));
-                }
-
-                for(; x < window_end_x; ++x)
-                {
-                    *(dst_ptr + x) = float(*(src_ptr + x));
-                }
-            },
-            src, dst);
-            break;
-        }
-        default:
-            ARM_COMPUTE_ERROR("dst data type unsupported");
-    }
-}
-
-} // namespace cpu
-} // namespace arm_compute
-
-#endif /* defined(ARM_COMPUTE_ENABLE_BF16) */
diff --git a/src/cpu/operators/CpuCast.h b/src/cpu/operators/CpuCast.h
index 356b033..1f4da6e 100644
--- a/src/cpu/operators/CpuCast.h
+++ b/src/cpu/operators/CpuCast.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CPU_CAST_H
-#define ARM_COMPUTE_CPU_CAST_H
+#ifndef ACL_SRC_CPU_OPERATORS_CPUCAST_H
+#define ACL_SRC_CPU_OPERATORS_CPUCAST_H
 
 #include "src/cpu/ICpuOperator.h"
 
@@ -51,14 +51,13 @@
      * |S16            | QASYMM8_SIGNED, U8, S32                        |
      * |F16            | QASYMM8_SIGNED, QASYMM8, F32, S32, U8          |
      * |S32            | QASYMM8_SIGNED, QASYMM8, F16, F32, U8          |
-     * |F32            | QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8|
+     * |F32            | QASYMM8_SIGNED, QASYMM8, F16, S32, U8|
      * |S64            | F32                                            |
      *
      * @param[in]  src    The source tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/S64/F16/F32.
      * @param[out] dst    The destination tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
      * @param[in]  policy Conversion policy.
      *
-     * @deprecated Support for BFLOAT16 will be removed in 23.05 release
      *
      */
     void configure(const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy);
@@ -72,4 +71,4 @@
 };
 } // namespace cpu
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CPU_ACTIVATION_H */
+#endif // ACL_SRC_CPU_OPERATORS_CPUCAST_H
diff --git a/tests/validation/NEON/Cast.cpp b/tests/validation/NEON/Cast.cpp
index a1ddcc9..b565945 100644
--- a/tests/validation/NEON/Cast.cpp
+++ b/tests/validation/NEON/Cast.cpp
@@ -217,7 +217,6 @@
     DataType::S32,
     DataType::QASYMM8,
     DataType::QASYMM8_SIGNED,
-    DataType::BFLOAT16,
 })),
 cpu_ext, data_type)
 {
@@ -226,21 +225,9 @@
 
     cpuinfo::CpuIsaInfo cpu_isa{};
     cpu_isa.neon = (cpu_ext == "NEON");
+    cpu_isa.fp16 = true;
 
-    cpu_isa.bf16 = (data_type == DataType::BFLOAT16);
-
-    /* bf16 cast is different from all the others being converted to fp32 and not to fp16 */
-    if(cpu_isa.bf16)
-    {
-        cpu_isa.fp16  = false;
-        selected_impl = CpuCastKernel::get_implementation(CastDataTypeISASelectorData{ data_type, DataType::F32, cpu_isa }, cpu::KernelSelectionType::Preferred);
-    }
-    else
-    {
-        cpu_isa.fp16  = true;
-        selected_impl = CpuCastKernel::get_implementation(CastDataTypeISASelectorData{ data_type, DataType::F16, cpu_isa }, cpu::KernelSelectionType::Preferred);
-    }
-
+    selected_impl = CpuCastKernel::get_implementation(CastDataTypeISASelectorData{ data_type, DataType::F16, cpu_isa }, cpu::KernelSelectionType::Preferred);
     ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);
 
     std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_cast";
@@ -254,7 +241,6 @@
                        framework::dataset::make("DataType",
 {
     DataType::F16,
-    DataType::BFLOAT16,
 })),
 cpu_ext, data_type)
 {
@@ -263,7 +249,6 @@
     cpuinfo::CpuIsaInfo cpu_isa{};
     cpu_isa.neon = (cpu_ext == "NEON");
     cpu_isa.fp16 = (data_type == DataType::F16);
-    cpu_isa.bf16 = (data_type == DataType::BFLOAT16);
 
     const auto *selected_impl = CpuCastKernel::get_implementation(CastDataTypeISASelectorData{ DataType::F32, data_type, cpu_isa }, cpu::KernelSelectionType::Preferred);