COMPMID-412: Port PoolingLayer to use fixed point 16.
Change-Id: I2005de4c7c14526996309826d33a0ec8e732d2d5
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78720
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Steven Niu <steven.niu@arm.com>
diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl
index dd1066d..a5d9e76 100644
--- a/arm_compute/core/NEON/NEFixedPoint.inl
+++ b/arm_compute/core/NEON/NEFixedPoint.inl
@@ -25,8 +25,9 @@
namespace arm_compute
{
-/**< Exponent polynomial coefficients for 8 bit fixed point (8 elements)
- * Format is in Q0.7 for all elements */
+/** Exponent polynomial coefficients for 8 bit fixed point (8 elements)
+ * Format is in Q0.7 for all elements
+ */
static const std::array<qint8x8_t, 4> exp_tab_qs8 =
{
{
@@ -37,8 +38,9 @@
}
};
-/**< Exponent polynomial coefficients for 16 bit fixed point (4 elements)
- * Format is in Q0.15 for all elements */
+/** Exponent polynomial coefficients for 16 bit fixed point (4 elements)
+ * Format is in Q0.15 for all elements
+ */
static const std::array<qint16x4_t, 4> exp_tab_qs16 =
{
{
@@ -49,8 +51,9 @@
}
};
-/**< Exponent polynomial coefficients for 8 bit fixed point (16 elements)
- * Format is in Q0.7 for all elements */
+/** Exponent polynomial coefficients for 8 bit fixed point (16 elements)
+ * Format is in Q0.7 for all elements
+ */
static const std::array<qint8x16_t, 4> exp_tabq_qs8 =
{
{
@@ -61,8 +64,9 @@
}
};
-/**< Exponent polynomial coefficients for 16 bit fixed point (8 elements)
- * Format is in Q0.15 for all elements */
+/** Exponent polynomial coefficients for 16 bit fixed point (8 elements)
+ * Format is in Q0.15 for all elements
+ */
static const std::array<qint16x8_t, 4> exp_tabq_qs16 =
{
{
@@ -73,8 +77,9 @@
}
};
-/**< Logarithm polynomial coefficients for 8 bit fixed point (8 elements)
- * Format is in Q0.7 for all elements except the first one which is in Q1.6 */
+/** Logarithm polynomial coefficients for 8 bit fixed point (8 elements)
+ * Format is in Q0.7 for all elements except the first one which is in Q1.6
+ */
static const std::array<qint8x8_t, 4> log_tab_qs8 =
{
{
@@ -85,8 +90,9 @@
}
};
-/**< Logarithm polynomial coefficients for 16 bit fixed point (8 elements)
- * Format is in Q0.15 for all elements except the first one which is in Q1.14 */
+/** Logarithm polynomial coefficients for 16 bit fixed point (8 elements)
+ * Format is in Q0.15 for all elements except the first one which is in Q1.14
+ */
static const std::array<qint16x4_t, 4> log_tab_qs16 =
{
{
@@ -97,8 +103,9 @@
}
};
-/**< Logarithm polynomial coefficients for 8 bit fixed point (16 elements)
- * Format is in Q0.7 for all elements except the first one which is in Q1.6 */
+/** Logarithm polynomial coefficients for 8 bit fixed point (16 elements)
+ * Format is in Q0.7 for all elements except the first one which is in Q1.6
+ */
static const std::array<qint8x16_t, 4> log_tabq_qs8 =
{
{
@@ -109,8 +116,9 @@
}
};
-/**< Logarithm polynomial coefficients for 16 bit fixed point (8 elements)
- * Format is in Q0.15 for all elements except the first one which is in Q1.14 */
+/** Logarithm polynomial coefficients for 16 bit fixed point (8 elements)
+ * Format is in Q0.15 for all elements except the first one which is in Q1.14
+ */
static const std::array<qint16x8_t, 4> log_tabq_qs16 =
{
{
diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
index a5de811..8a938a7 100644
--- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
@@ -48,7 +48,7 @@
~NEPoolingLayerKernel() = default;
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: QS8/F16/F32.
+ * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32.
* @param[out] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
*/
@@ -81,6 +81,13 @@
*/
template <PoolingType pooling_type>
void pooling2_q8(const Window &window_input, const Window &window);
+ /** Function to perform 2x2 pooling for 16bit fixed point.
+ *
+ * @param[in] window_input Input region on which to execute the kernel.
+ * @param[in] window Output region on which to execute the kernel.
+ */
+ template <PoolingType pooling_type>
+ void pooling2_q16(const Window &window_input, const Window &window);
/** Function to perform 3x3 pooling.
*
* @param[in] window_input Input region on which to execute the kernel.
@@ -102,6 +109,13 @@
*/
template <PoolingType pooling_type>
void pooling3_q8(const Window &window_input, const Window &window);
+ /** Function to perform 3x3 pooling for 16bit fixed point.
+ *
+ * @param[in] window_input Input region on which to execute the kernel.
+ * @param[in] window Output region on which to execute the kernel.
+ */
+ template <PoolingType pooling_type>
+ void pooling3_q16(const Window &window_input, const Window &window);
/** Function to perform 7x7 pooling.
*
* @param[in] window_input Input region on which to execute the kernel.