blob: f1e23d43cf7d3a8edb397e183b4e830dee287273 [file] [log] [blame]
Sheri Zhang79144a62021-02-08 17:43:04 +00001/*
2 * Copyright (c) 2021 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef SRC_CORE_NEON_KERNELS_POOLING_LIST_H
25#define SRC_CORE_NEON_KERNELS_POOLING_LIST_H
26
27#include "arm_compute/core/Types.h"
28#include "arm_compute/core/utils/misc/Traits.h"
29#include "src/core/NEON/wrapper/wrapper.h"
Georgios Pinitascd060c42021-06-25 06:00:17 +010030#include "src/core/cpu/kernels/pool2d/neon/quantized.h"
Sheri Zhang79144a62021-02-08 17:43:04 +000031#include <arm_neon.h>
32
33namespace arm_compute
34{
35namespace cpu
36{
37#define DECLARE_POOLING_KERNEL(func_name) \
38 void func_name(const ITensor *src0, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &, const Window &window_src, const Window &window)
39
40DECLARE_POOLING_KERNEL(poolingMxN_qasymm8_neon_nhwc);
41DECLARE_POOLING_KERNEL(poolingMxN_qasymm8_signed_neon_nhwc);
42DECLARE_POOLING_KERNEL(poolingMxN_fp16_neon_nhwc);
43DECLARE_POOLING_KERNEL(poolingMxN_fp32_neon_nhwc);
44
45#if defined(ENABLE_NCHW_KERNELS)
46
47#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
48DECLARE_POOLING_KERNEL(pooling2_fp16_neon_nchw);
49DECLARE_POOLING_KERNEL(pooling3_fp16_neon_nchw);
50DECLARE_POOLING_KERNEL(poolingMxN_fp16_neon_nchw);
51#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
52
53DECLARE_POOLING_KERNEL(pooling2_fp32_neon_nchw);
54DECLARE_POOLING_KERNEL(pooling3_fp32_neon_nchw);
55DECLARE_POOLING_KERNEL(pooling7_fp32_neon_nchw);
56DECLARE_POOLING_KERNEL(poolingMxN_fp32_neon_nchw);
57#endif /* defined(ENABLE_NCHW_KERNELS) */
58
59#undef DECLARE_POOLING_KERNEL
60
61template <typename T>
Manuel Bottinica62c6f2021-03-23 11:50:34 +000062inline uint32_t offset_no_padding(uint32_t padded_offset, const Coordinates &id, const ITensorInfo &info, int pool_stride_x, int pool_stride_y, DataLayout data_layout)
Sheri Zhang79144a62021-02-08 17:43:04 +000063{
64 const int pad_left = info.padding().left;
65 const int pad_right = info.padding().right;
66 const int pad_top = info.padding().top;
67 const int pad_bottom = info.padding().bottom;
68 const int in_stride_y = static_cast<int>(info.strides_in_bytes().y());
69 const int in_stride_w = static_cast<int>(info.strides_in_bytes()[3]);
70 const int pad_horiz = pad_left + pad_right;
71 const int pad_vert = pad_top + pad_bottom;
72
Manuel Bottinica62c6f2021-03-23 11:50:34 +000073 if(data_layout == DataLayout::NCHW)
Sheri Zhang79144a62021-02-08 17:43:04 +000074 {
75 const uint32_t offset_base = padded_offset
76 - sizeof(T) * pad_horiz * id.y() * pool_stride_y /* subtract padding elems per row */
77 - pad_top * sizeof(T) /* top padding */
78 - sizeof(T) * pad_horiz * info.tensor_shape()[1] * id.z() - pad_vert * in_stride_y * id.z() /* for each Z plane there are height*pad_right padding elems */
79 - in_stride_w * id[3];
80
81 return offset_base;
82 }
83 else
84 {
85 const uint32_t offset_base = padded_offset
86 - sizeof(T) * pad_horiz * id.y() * pool_stride_x // subtract padding elems per row
87 - pad_top * sizeof(T) // top padding
88 - sizeof(T) * pad_horiz * info.tensor_shape()[1] * id.z() * pool_stride_y // for each Z plane there are width*pad_right padding elems
89 - in_stride_w * id[3];
90
91 return offset_base;
92 }
93}
94} // namespace cpu
95} // namespace arm_compute
96
97#endif // SRC_CORE_NEON_KERNELS_POOLING_LIST_H