src/core/NEON/kernels/arm_conv/addressing.hpp - ml/ComputeLibrary - Gitiles

 /*
  * Copyright (c) 2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
  * deal in the Software without restriction, including without limitation the
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in all
  * copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */

 /* arm_conv kernels share a lot of similarities in how they address input and
  * output tensors. Consequently, this file contains common approaches to
  * preparing these tensor descriptions. Generic (i.e., untyped) methods are
  * contained within the `arm_conv::addressing` namespace, and typed wrappers
  * are provided within an anonymous namespace within `arm_conv`. The various
  * methods are described below.
  */

 #include <cstddef>

 namespace arm_conv {
 namespace addressing {

 /* Pointer array
  * -------------
  *
  * Constructs an array of pointers which point to a `array_rows` x `array_cols`
  * chunk of a tensor. The array of pointers will be written into `dest`.
  *
  * `base_ptr` should point at the first VALID element of the chunk of tensor
  * (i.e., if there's one padded row, and one padded column, then `base_ptr`
  * should point at the element which will be at position (1, 1) in the array).
  * `ld_row` and `ld_col` are in bytes, and describe the strides over rows and
  * columns (respectively) of the NHWC-ordered tensor. `pad_buffer` should point
  * at a suitably sized (and initialised) area of memory which can be addressed
  * by elements of the array which represent padding.
  *
  * `pad_top` and `pad_left` describe the padding on the top and left of the
  * array, respectively, and `valid_rows` and `valid_cols` describe the number
  * of rows and columns between the element pointed to by `base_ptr` and the
  * edge of the image (that is `valid_rows` may be greater than `array_rows` and
  * likewise for the columns).
  */
 void fill_pointer_array(
   size_t element_size,
   void **dest, unsigned int array_rows, unsigned int array_cols,
   void *base_ptr, size_t ld_row, size_t ld_col,
   void *pad_buffer,
   unsigned int pad_top, unsigned int valid_rows,
   unsigned int pad_left, unsigned int valid_cols
 );

 /* Interleaved multi-point pointer array
  * -------------------------------------
  *
  * For each point in a `output_rows` x `output_cols` array, constructs
  * `kernel_rows` x `kernel_cols` array of pointers. The pointers are
  * interleaved thusly:
  *
  *   for ki in kernel_rows:
  *       for kj in kernel_cols:
  *           for oi in output_rows:
  *               for oj in output_cols:
  *                   get pointer for point (oi*stride_rows + ki, oj*stride_cols + kj)
  *
  * Other arguments are as for `fill_pointer_array`.
  *
  * The name reflects that this is the form of addressing mode used by "generic"
  * depthwise and pooling kernels.
  */
 void fill_pointer_array_generic_kernel(
   size_t element_size,
   void **dest,
   unsigned int output_rows, unsigned int output_cols,
   unsigned int kernel_rows, unsigned int kernel_cols,
   unsigned int stride_rows, unsigned int stride_cols,
   void *base_ptr, size_t ld_row, size_t ld_col,
   void *pad_buffer,
   unsigned int pad_top, unsigned int valid_rows,
   unsigned int pad_left, unsigned int valid_cols
 );

 /* NCHW-patch addressed by row
  * ---------------------------
  *
  * Construct an array of pointers, each of which points at a row of an
  * NCHW-ordered patch of a tensor. Memory addressed by the pointers may be
  * outside of the original tensor, and should therefore not be written to
  * (modifications will be lost).
  *
  * `dest_row_pointers` should point at a `patch_rows` list of pointers; each of
  * which will point at a 1 x `patch_cols` NCHW-ordered sample of the source
  * tensor.
  *
  * `dest_patch` should point to a `element_size * patch_rows * patch_cols` area
  * of memory which can be written to by this function to form samples of the
  * source tensor.
  *
  * `src_ptr` should point at the first VALID element of the chunk of tensor
  * (i.e., if there's one padded row, and one padded column, then `src_ptr`
  * should point at the element which will be at position (1, 1) in the array).
  * `ld_row` and `ld_col` are in bytes, and describe the strides over rows and
  * columns (respectively) of the NHWC-ordered tensor. If `ld_col` ==
  * `element_size` then copies from the source tensor will be elided and source
  * data may be addressed directly.
  *
  * `pad_row` should point to a `patch_cols` array of (appropriately
  * initialised) padding values.
  *
  * Other arguments are as for `fill_pointer_array`.
  */
 void fill_nchw_patch_array(
   size_t element_size,
   const void **dest_row_pointers,  // Array of pointers to each row of the patch
   void *dest_patch,  // Pointer to space which can be used to construct the patch
   unsigned int patch_rows, unsigned int patch_cols,  // Patch size
   const void *src_ptr, size_t ld_row, size_t ld_col,  // Source tensor
   const void *pad_row,  // Pointer to a row of padding values
   unsigned int pad_top, unsigned int valid_rows,
   unsigned int pad_left, unsigned int valid_cols
 );

 void fill_patch_array_generic_kernel(
   size_t element_size,
   const void **dest_pointers,  // Pointers: one per output row per kernel point
   void *dest_patch,  // Pointer to space which can be used to construct the patch
   unsigned int output_rows, unsigned int output_cols,
   unsigned int kernel_rows, unsigned int kernel_cols,
   unsigned int stride_rows, unsigned int stride_cols,
   const void *src_ptr, size_t ld_row, size_t ld_col,  // Source tensor
   const void *pad_row,  // Pointer to a row of padding values
   unsigned int pad_top, unsigned int valid_rows,
   unsigned int pad_left, unsigned int valid_cols
 );

 }  // namespace addressing

 namespace {

 /* Pointer array
  * -------------
  *
  * See `addressing::fill_pointer_array`. No copies are made by this method,
  * memory pointed to by the pointer array is contained within the base tensor
  * and the padding buffer.
  */
 template <typename T>
 inline void fill_pointer_array(
   T **dest, unsigned int array_rows, unsigned int array_cols,
   T *base_ptr, size_t ld_row, size_t ld_col,
   T *pad_buffer,
   unsigned int pad_top, unsigned int valid_rows,
   unsigned int pad_left, unsigned int valid_cols
 )
 {
   addressing::fill_pointer_array(
     sizeof(T), (void **) dest, array_rows, array_cols,
     (void *) base_ptr, ld_row, ld_col,
     (void *) pad_buffer,
     pad_top, valid_rows,
     pad_left, valid_cols
   );
 }


 /* Interleaved multi-point pointer array
  * -------------------------------------
  *
  * See `addressing::fill_pointer_array_generic_kernel`. No copies are made by
  * this method, memory pointed to by the pointer array is contained within the
  * base tensor and the padding buffer.
  */
 template <typename T>
 inline void fill_pointer_array_generic_kernel(
   T **dest,
   unsigned int output_rows, unsigned int output_cols,
   unsigned int kernel_rows, unsigned int kernel_cols,
   unsigned int stride_rows, unsigned int stride_cols,
   T *base_ptr, size_t ld_row, size_t ld_col,
   T *pad_buffer,
   unsigned int pad_top, unsigned int valid_rows,
   unsigned int pad_left, unsigned int valid_cols
 )
 {
   addressing::fill_pointer_array_generic_kernel(
     sizeof(T),
     (void **) dest,
     output_rows, output_cols,
     kernel_rows, kernel_cols,
     stride_rows, stride_cols,
     (void *) base_ptr, ld_row, ld_col,
     (void *) pad_buffer,
     pad_top, valid_rows,
     pad_left, valid_cols
   );
 }

 template <typename T>
 inline void fill_nchw_patch_array(
   const T **dest_row_pointers,  // Array of pointers to each row of the patch
   T *dest_patch,  // Pointer to space which can be used to construct the patch
   unsigned int patch_rows, unsigned int patch_cols,  // Patch size
   const T *src_ptr, size_t ld_row, size_t ld_col,  // Source tensor
   const T *pad_row,  // Pointer to a row of padding values
   unsigned int pad_top, unsigned int valid_rows,
   unsigned int pad_left, unsigned int valid_cols
 )
 {
   addressing::fill_nchw_patch_array(
     sizeof(T),
     reinterpret_cast<const void **>(dest_row_pointers),
     reinterpret_cast<void *>(dest_patch),
     patch_rows, patch_cols,
     reinterpret_cast<const void *>(src_ptr), ld_row, ld_col,
     reinterpret_cast<const void *>(pad_row),
     pad_top, valid_rows,
     pad_left, valid_cols
   );
 }

 template <typename T>
 inline void fill_patch_array_generic_kernel(
   const T **dest_pointers,  // Pointers: one per output row per kernel point
   T *dest_patch,  // Pointer to space which can be used to construct the patch
   unsigned int output_rows, unsigned int output_cols,
   unsigned int kernel_rows, unsigned int kernel_cols,
   unsigned int stride_rows, unsigned int stride_cols,
   const T *src_ptr, size_t ld_row, size_t ld_col,  // Source tensor
   const T *pad_row,  // Pointer to a row of padding values
   unsigned int pad_top, unsigned int valid_rows,
   unsigned int pad_left, unsigned int valid_cols
 )
 {
   addressing::fill_patch_array_generic_kernel(
     sizeof(T),
     reinterpret_cast<const void **>(dest_pointers),
     reinterpret_cast<void *>(dest_patch),
     output_rows, output_cols,
     kernel_rows, kernel_cols,
     stride_rows, stride_cols,
     reinterpret_cast<const void *>(src_ptr), ld_row, ld_col,
     reinterpret_cast<const void *>(pad_row),
     pad_top, valid_rows,
     pad_left, valid_cols
   );
 }

 }  // namespace {anonymous}
 }  // namespace arm_conv
	/*
	* Copyright (c) 2022 Arm Limited.
	*
	* SPDX-License-Identifier: MIT
	*
	* Permission is hereby granted, free of charge, to any person obtaining a copy
	* of this software and associated documentation files (the "Software"), to
	* deal in the Software without restriction, including without limitation the
	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
	* sell copies of the Software, and to permit persons to whom the Software is
	* furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice shall be included in all
	* copies or substantial portions of the Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	* SOFTWARE.
	*/

	/* arm_conv kernels share a lot of similarities in how they address input and
	* output tensors. Consequently, this file contains common approaches to
	* preparing these tensor descriptions. Generic (i.e., untyped) methods are
	* contained within the `arm_conv::addressing` namespace, and typed wrappers
	* are provided within an anonymous namespace within `arm_conv`. The various
	* methods are described below.
	*/

	#include <cstddef>

	namespace arm_conv {
	namespace addressing {

	/* Pointer array
	* -------------
	*
	* Constructs an array of pointers which point to a `array_rows` x `array_cols`
	* chunk of a tensor. The array of pointers will be written into `dest`.
	*
	* `base_ptr` should point at the first VALID element of the chunk of tensor
	* (i.e., if there's one padded row, and one padded column, then `base_ptr`
	* should point at the element which will be at position (1, 1) in the array).
	* `ld_row` and `ld_col` are in bytes, and describe the strides over rows and
	* columns (respectively) of the NHWC-ordered tensor. `pad_buffer` should point
	* at a suitably sized (and initialised) area of memory which can be addressed
	* by elements of the array which represent padding.
	*
	* `pad_top` and `pad_left` describe the padding on the top and left of the
	* array, respectively, and `valid_rows` and `valid_cols` describe the number
	* of rows and columns between the element pointed to by `base_ptr` and the
	* edge of the image (that is `valid_rows` may be greater than `array_rows` and
	* likewise for the columns).
	*/
	void fill_pointer_array(
	size_t element_size,
	void **dest, unsigned int array_rows, unsigned int array_cols,
	void *base_ptr, size_t ld_row, size_t ld_col,
	void *pad_buffer,
	unsigned int pad_top, unsigned int valid_rows,
	unsigned int pad_left, unsigned int valid_cols
	);

	/* Interleaved multi-point pointer array
	* -------------------------------------
	*
	* For each point in a `output_rows` x `output_cols` array, constructs
	* `kernel_rows` x `kernel_cols` array of pointers. The pointers are
	* interleaved thusly:
	*
	* for ki in kernel_rows:
	* for kj in kernel_cols:
	* for oi in output_rows:
	* for oj in output_cols:
	* get pointer for point (oistride_rows + ki, ojstride_cols + kj)
	*
	* Other arguments are as for `fill_pointer_array`.
	*
	* The name reflects that this is the form of addressing mode used by "generic"
	* depthwise and pooling kernels.
	*/
	void fill_pointer_array_generic_kernel(
	size_t element_size,
	void **dest,
	unsigned int output_rows, unsigned int output_cols,
	unsigned int kernel_rows, unsigned int kernel_cols,
	unsigned int stride_rows, unsigned int stride_cols,
	void *base_ptr, size_t ld_row, size_t ld_col,
	void *pad_buffer,
	unsigned int pad_top, unsigned int valid_rows,
	unsigned int pad_left, unsigned int valid_cols
	);

	/* NCHW-patch addressed by row
	* ---------------------------
	*
	* Construct an array of pointers, each of which points at a row of an
	* NCHW-ordered patch of a tensor. Memory addressed by the pointers may be
	* outside of the original tensor, and should therefore not be written to
	* (modifications will be lost).
	*
	* `dest_row_pointers` should point at a `patch_rows` list of pointers; each of
	* which will point at a 1 x `patch_cols` NCHW-ordered sample of the source
	* tensor.
	*
	* `dest_patch` should point to a `element_size * patch_rows * patch_cols` area
	* of memory which can be written to by this function to form samples of the
	* source tensor.
	*
	* `src_ptr` should point at the first VALID element of the chunk of tensor
	* (i.e., if there's one padded row, and one padded column, then `src_ptr`
	* should point at the element which will be at position (1, 1) in the array).
	* `ld_row` and `ld_col` are in bytes, and describe the strides over rows and
	* columns (respectively) of the NHWC-ordered tensor. If `ld_col` ==
	* `element_size` then copies from the source tensor will be elided and source
	* data may be addressed directly.
	*
	* `pad_row` should point to a `patch_cols` array of (appropriately
	* initialised) padding values.
	*
	* Other arguments are as for `fill_pointer_array`.
	*/
	void fill_nchw_patch_array(
	size_t element_size,
	const void **dest_row_pointers, // Array of pointers to each row of the patch
	void *dest_patch, // Pointer to space which can be used to construct the patch
	unsigned int patch_rows, unsigned int patch_cols, // Patch size
	const void *src_ptr, size_t ld_row, size_t ld_col, // Source tensor
	const void *pad_row, // Pointer to a row of padding values
	unsigned int pad_top, unsigned int valid_rows,
	unsigned int pad_left, unsigned int valid_cols
	);

	void fill_patch_array_generic_kernel(
	size_t element_size,
	const void **dest_pointers, // Pointers: one per output row per kernel point
	void *dest_patch, // Pointer to space which can be used to construct the patch
	unsigned int output_rows, unsigned int output_cols,
	unsigned int kernel_rows, unsigned int kernel_cols,
	unsigned int stride_rows, unsigned int stride_cols,
	const void *src_ptr, size_t ld_row, size_t ld_col, // Source tensor
	const void *pad_row, // Pointer to a row of padding values
	unsigned int pad_top, unsigned int valid_rows,
	unsigned int pad_left, unsigned int valid_cols
	);

	} // namespace addressing

	namespace {

	/* Pointer array
	* -------------
	*
	* See `addressing::fill_pointer_array`. No copies are made by this method,
	* memory pointed to by the pointer array is contained within the base tensor
	* and the padding buffer.
	*/
	template <typename T>
	inline void fill_pointer_array(
	T **dest, unsigned int array_rows, unsigned int array_cols,
	T *base_ptr, size_t ld_row, size_t ld_col,
	T *pad_buffer,
	unsigned int pad_top, unsigned int valid_rows,
	unsigned int pad_left, unsigned int valid_cols
	)
	{
	addressing::fill_pointer_array(
	sizeof(T), (void **) dest, array_rows, array_cols,
	(void *) base_ptr, ld_row, ld_col,
	(void *) pad_buffer,
	pad_top, valid_rows,
	pad_left, valid_cols
	);
	}


	/* Interleaved multi-point pointer array
	* -------------------------------------
	*
	* See `addressing::fill_pointer_array_generic_kernel`. No copies are made by
	* this method, memory pointed to by the pointer array is contained within the
	* base tensor and the padding buffer.
	*/
	template <typename T>
	inline void fill_pointer_array_generic_kernel(
	T **dest,
	unsigned int output_rows, unsigned int output_cols,
	unsigned int kernel_rows, unsigned int kernel_cols,
	unsigned int stride_rows, unsigned int stride_cols,
	T *base_ptr, size_t ld_row, size_t ld_col,
	T *pad_buffer,
	unsigned int pad_top, unsigned int valid_rows,
	unsigned int pad_left, unsigned int valid_cols
	)
	{
	addressing::fill_pointer_array_generic_kernel(
	sizeof(T),
	(void **) dest,
	output_rows, output_cols,
	kernel_rows, kernel_cols,
	stride_rows, stride_cols,
	(void *) base_ptr, ld_row, ld_col,
	(void *) pad_buffer,
	pad_top, valid_rows,
	pad_left, valid_cols
	);
	}

	template <typename T>
	inline void fill_nchw_patch_array(
	const T **dest_row_pointers, // Array of pointers to each row of the patch
	T *dest_patch, // Pointer to space which can be used to construct the patch
	unsigned int patch_rows, unsigned int patch_cols, // Patch size
	const T *src_ptr, size_t ld_row, size_t ld_col, // Source tensor
	const T *pad_row, // Pointer to a row of padding values
	unsigned int pad_top, unsigned int valid_rows,
	unsigned int pad_left, unsigned int valid_cols
	)
	{
	addressing::fill_nchw_patch_array(
	sizeof(T),
	reinterpret_cast<const void **>(dest_row_pointers),
	reinterpret_cast<void *>(dest_patch),
	patch_rows, patch_cols,
	reinterpret_cast<const void *>(src_ptr), ld_row, ld_col,
	reinterpret_cast<const void *>(pad_row),
	pad_top, valid_rows,
	pad_left, valid_cols
	);
	}

	template <typename T>
	inline void fill_patch_array_generic_kernel(
	const T **dest_pointers, // Pointers: one per output row per kernel point
	T *dest_patch, // Pointer to space which can be used to construct the patch
	unsigned int output_rows, unsigned int output_cols,
	unsigned int kernel_rows, unsigned int kernel_cols,
	unsigned int stride_rows, unsigned int stride_cols,
	const T *src_ptr, size_t ld_row, size_t ld_col, // Source tensor
	const T *pad_row, // Pointer to a row of padding values
	unsigned int pad_top, unsigned int valid_rows,
	unsigned int pad_left, unsigned int valid_cols
	)
	{
	addressing::fill_patch_array_generic_kernel(
	sizeof(T),
	reinterpret_cast<const void **>(dest_pointers),
	reinterpret_cast<void *>(dest_patch),
	output_rows, output_cols,
	kernel_rows, kernel_cols,
	stride_rows, stride_cols,
	reinterpret_cast<const void *>(src_ptr), ld_row, ld_col,
	reinterpret_cast<const void *>(pad_row),
	pad_top, valid_rows,
	pad_left, valid_cols
	);
	}

	} // namespace {anonymous}
	} // namespace arm_conv