Adding GELU activation
OpenCL implementation uses built in erf.
NEON implementation requires new vectorized erf.
Uses the following approximation:
erf(x) = 1 - 1 / (1 + a1x + a2x^2 + a3x^3 + a4x^4)^4
a1 = 0.278393, a2 = 0.230389, a3 = 0.000972, a4 = 0.078108
From https://en.wikipedia.org/wiki/Error_function#Numerical_approximations
Signed-off-by: Murray Kornelsen <murray.kornelsen@mail.mcgill.ca>
Change-Id: I2d3964b2c26a4334166b17135f9104bc6324fad2
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7921
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Pablo Marquez Tello <pablo.tello@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/cpu/kernels/activation/generic/neon/impl.h b/src/cpu/kernels/activation/generic/neon/impl.h
index 2dd239e..35abcb5 100644
--- a/src/cpu/kernels/activation/generic/neon/impl.h
+++ b/src/cpu/kernels/activation/generic/neon/impl.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -77,7 +77,9 @@
const auto const_0 = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
const auto const_6 = wrapper::vdup_n(static_cast<T>(6.f), ExactTagType{});
const auto const_3 = wrapper::vdup_n(static_cast<T>(3.f), ExactTagType{});
+ const auto const_inv_2 = wrapper::vdup_n(static_cast<T>(0.5f), ExactTagType{});
const auto const_inv_6 = wrapper::vdup_n(static_cast<T>(0.166666667f), ExactTagType{});
+ const auto const_inv_sqrt_2 = wrapper::vdup_n(static_cast<T>(0.70710678118f), ExactTagType{});
constexpr float soft_relu_thresh = 12.f;
const auto vsoft_relu_thresh = wrapper::vdup_n(static_cast<T>(soft_relu_thresh), ExactTagType{});
const auto va = wrapper::vdup_n(static_cast<T>(act_info.a()), ExactTagType{});
@@ -146,6 +148,9 @@
case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
tmp = wrapper::vmul(vin, wrapper::vmul(const_inv_6, wrapper::vmin(const_6, wrapper::vmax(const_0, wrapper::vadd(vin, const_3)))));
break;
+ case ActivationLayerInfo::ActivationFunction::GELU:
+ tmp = wrapper::vmul(vin, wrapper::vmul(const_inv_2, wrapper::vadd(const_1, wrapper::verf(wrapper::vmul(vin, const_inv_sqrt_2)))));
+ break;
default:
ARM_COMPUTE_ERROR("Unsupported activation function");
}
@@ -200,6 +205,9 @@
case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
tmp = in * ((std::min(std::max((in + 3), 0.0f), 6.0f)) * 0.166666667f);
break;
+ case ActivationLayerInfo::ActivationFunction::GELU:
+ tmp = in * static_cast<T>(0.5f * (1.0f + erff(static_cast<float>(in) / 1.41421356237f)));
+ break;
default:
ARM_COMPUTE_ERROR("Unsupported activation function");
}