blob: 35715a3764bf49ba42499e978d0040394a0e728a [file] [log] [blame]
/*
* Copyright (c) 2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/* arm_conv kernels share a lot of similarities in how they address input and
* output tensors. Consequently, this file contains common approaches to
* preparing these tensor descriptions. Generic (i.e., untyped) methods are
* contained within the `arm_conv::addressing` namespace, and typed wrappers
* are provided within an anonymous namespace within `arm_conv`. The various
* methods are described below.
*/
#include <cstddef>
namespace arm_conv {
namespace addressing {
/* Pointer array
* -------------
*
* Constructs an array of pointers which point to a `array_rows` x `array_cols`
* chunk of a tensor. The array of pointers will be written into `dest`.
*
* `base_ptr` should point at the first VALID element of the chunk of tensor
* (i.e., if there's one padded row, and one padded column, then `base_ptr`
* should point at the element which will be at position (1, 1) in the array).
* `ld_row` and `ld_col` are in bytes, and describe the strides over rows and
* columns (respectively) of the NHWC-ordered tensor. `pad_buffer` should point
* at a suitably sized (and initialised) area of memory which can be addressed
* by elements of the array which represent padding.
*
* `pad_top` and `pad_left` describe the padding on the top and left of the
* array, respectively, and `valid_rows` and `valid_cols` describe the number
* of rows and columns between the element pointed to by `base_ptr` and the
* edge of the image (that is `valid_rows` may be greater than `array_rows` and
* likewise for the columns).
*/
void fill_pointer_array(
size_t element_size,
void **dest, unsigned int array_rows, unsigned int array_cols,
void *base_ptr, size_t ld_row, size_t ld_col,
void *pad_buffer,
unsigned int pad_top, unsigned int valid_rows,
unsigned int pad_left, unsigned int valid_cols
);
/* Interleaved multi-point pointer array
* -------------------------------------
*
* For each point in a `output_rows` x `output_cols` array, constructs
* `kernel_rows` x `kernel_cols` array of pointers. The pointers are
* interleaved thusly:
*
* for ki in kernel_rows:
* for kj in kernel_cols:
* for oi in output_rows:
* for oj in output_cols:
* get pointer for point (oi*stride_rows + ki, oj*stride_cols + kj)
*
* Other arguments are as for `fill_pointer_array`.
*
* The name reflects that this is the form of addressing mode used by "generic"
* depthwise and pooling kernels.
*/
void fill_pointer_array_generic_kernel(
size_t element_size,
void **dest,
unsigned int output_rows, unsigned int output_cols,
unsigned int kernel_rows, unsigned int kernel_cols,
unsigned int stride_rows, unsigned int stride_cols,
void *base_ptr, size_t ld_row, size_t ld_col,
void *pad_buffer,
unsigned int pad_top, unsigned int valid_rows,
unsigned int pad_left, unsigned int valid_cols
);
/* NCHW-patch addressed by row
* ---------------------------
*
* Construct an array of pointers, each of which points at a row of an
* NCHW-ordered patch of a tensor. Memory addressed by the pointers may be
* outside of the original tensor, and should therefore not be written to
* (modifications will be lost).
*
* `dest_row_pointers` should point at a `patch_rows` list of pointers; each of
* which will point at a 1 x `patch_cols` NCHW-ordered sample of the source
* tensor.
*
* `dest_patch` should point to a `element_size * patch_rows * patch_cols` area
* of memory which can be written to by this function to form samples of the
* source tensor.
*
* `src_ptr` should point at the first VALID element of the chunk of tensor
* (i.e., if there's one padded row, and one padded column, then `src_ptr`
* should point at the element which will be at position (1, 1) in the array).
* `ld_row` and `ld_col` are in bytes, and describe the strides over rows and
* columns (respectively) of the NHWC-ordered tensor. If `ld_col` ==
* `element_size` then copies from the source tensor will be elided and source
* data may be addressed directly.
*
* `pad_row` should point to a `patch_cols` array of (appropriately
* initialised) padding values.
*
* Other arguments are as for `fill_pointer_array`.
*/
void fill_nchw_patch_array(
size_t element_size,
const void **dest_row_pointers, // Array of pointers to each row of the patch
void *dest_patch, // Pointer to space which can be used to construct the patch
unsigned int patch_rows, unsigned int patch_cols, // Patch size
const void *src_ptr, size_t ld_row, size_t ld_col, // Source tensor
const void *pad_row, // Pointer to a row of padding values
unsigned int pad_top, unsigned int valid_rows,
unsigned int pad_left, unsigned int valid_cols
);
void fill_patch_array_generic_kernel(
size_t element_size,
const void **dest_pointers, // Pointers: one per output row per kernel point
void *dest_patch, // Pointer to space which can be used to construct the patch
unsigned int output_rows, unsigned int output_cols,
unsigned int kernel_rows, unsigned int kernel_cols,
unsigned int stride_rows, unsigned int stride_cols,
const void *src_ptr, size_t ld_row, size_t ld_col, // Source tensor
const void *pad_row, // Pointer to a row of padding values
unsigned int pad_top, unsigned int valid_rows,
unsigned int pad_left, unsigned int valid_cols
);
} // namespace addressing
namespace {
/* Pointer array
* -------------
*
* See `addressing::fill_pointer_array`. No copies are made by this method,
* memory pointed to by the pointer array is contained within the base tensor
* and the padding buffer.
*/
template <typename T>
inline void fill_pointer_array(
T **dest, unsigned int array_rows, unsigned int array_cols,
T *base_ptr, size_t ld_row, size_t ld_col,
T *pad_buffer,
unsigned int pad_top, unsigned int valid_rows,
unsigned int pad_left, unsigned int valid_cols
)
{
addressing::fill_pointer_array(
sizeof(T), (void **) dest, array_rows, array_cols,
(void *) base_ptr, ld_row, ld_col,
(void *) pad_buffer,
pad_top, valid_rows,
pad_left, valid_cols
);
}
/* Interleaved multi-point pointer array
* -------------------------------------
*
* See `addressing::fill_pointer_array_generic_kernel`. No copies are made by
* this method, memory pointed to by the pointer array is contained within the
* base tensor and the padding buffer.
*/
template <typename T>
inline void fill_pointer_array_generic_kernel(
T **dest,
unsigned int output_rows, unsigned int output_cols,
unsigned int kernel_rows, unsigned int kernel_cols,
unsigned int stride_rows, unsigned int stride_cols,
T *base_ptr, size_t ld_row, size_t ld_col,
T *pad_buffer,
unsigned int pad_top, unsigned int valid_rows,
unsigned int pad_left, unsigned int valid_cols
)
{
addressing::fill_pointer_array_generic_kernel(
sizeof(T),
(void **) dest,
output_rows, output_cols,
kernel_rows, kernel_cols,
stride_rows, stride_cols,
(void *) base_ptr, ld_row, ld_col,
(void *) pad_buffer,
pad_top, valid_rows,
pad_left, valid_cols
);
}
template <typename T>
inline void fill_nchw_patch_array(
const T **dest_row_pointers, // Array of pointers to each row of the patch
T *dest_patch, // Pointer to space which can be used to construct the patch
unsigned int patch_rows, unsigned int patch_cols, // Patch size
const T *src_ptr, size_t ld_row, size_t ld_col, // Source tensor
const T *pad_row, // Pointer to a row of padding values
unsigned int pad_top, unsigned int valid_rows,
unsigned int pad_left, unsigned int valid_cols
)
{
addressing::fill_nchw_patch_array(
sizeof(T),
reinterpret_cast<const void **>(dest_row_pointers),
reinterpret_cast<void *>(dest_patch),
patch_rows, patch_cols,
reinterpret_cast<const void *>(src_ptr), ld_row, ld_col,
reinterpret_cast<const void *>(pad_row),
pad_top, valid_rows,
pad_left, valid_cols
);
}
template <typename T>
inline void fill_patch_array_generic_kernel(
const T **dest_pointers, // Pointers: one per output row per kernel point
T *dest_patch, // Pointer to space which can be used to construct the patch
unsigned int output_rows, unsigned int output_cols,
unsigned int kernel_rows, unsigned int kernel_cols,
unsigned int stride_rows, unsigned int stride_cols,
const T *src_ptr, size_t ld_row, size_t ld_col, // Source tensor
const T *pad_row, // Pointer to a row of padding values
unsigned int pad_top, unsigned int valid_rows,
unsigned int pad_left, unsigned int valid_cols
)
{
addressing::fill_patch_array_generic_kernel(
sizeof(T),
reinterpret_cast<const void **>(dest_pointers),
reinterpret_cast<void *>(dest_patch),
output_rows, output_cols,
kernel_rows, kernel_cols,
stride_rows, stride_cols,
reinterpret_cast<const void *>(src_ptr), ld_row, ld_col,
reinterpret_cast<const void *>(pad_row),
pad_top, valid_rows,
pad_left, valid_cols
);
}
} // namespace {anonymous}
} // namespace arm_conv