src/gpu/cl/operators/ClIndirectConv2d.cpp - ml/ComputeLibrary - Gitiles

 /*
  * Copyright (c) 2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
  * deal in the Software without restriction, including without limitation the
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in all
  * copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 #include "src/gpu/cl/operators/ClIndirectConv2d.h"

 #include "arm_compute/core/KernelDescriptors.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"

 #include "src/common/utils/Log.h"
 #include "src/core/helpers/MemoryHelpers.h"
 #include "src/gpu/cl/kernels/ClIndirectConv2dAddressPrecalculationKernel.h"
 #include "src/gpu/cl/kernels/ClIndirectConv2dKernel.h"
 #include "src/gpu/cl/utils/ClAuxTensorHandler.h"
 #include "src/runtime/heuristics/indirect_conv/ClIndirectConvKernelConfig.h"
 #include "src/runtime/heuristics/indirect_conv/IClIndirectConvKernelConfig.h"

 using namespace arm_compute::cl_indirect_conv;

 namespace arm_compute
 {
 namespace opencl
 {
 using namespace arm_compute::experimental;

 namespace
 {
 DirectConvComputeKernelInfo
 config_indirect_convolution_nhwc(const ITensorInfo *src, const ITensorInfo *weights, const PadStrideInfo &conv_info)
 {
     // Get GPU target
     GPUTarget gpu_target = CLScheduler::get().target();

     std::unique_ptr<IClIndirectConvKernelConfig> t = ClIndirectConvKernelConfigurationFactory::create(gpu_target);

     return t->configure(src, weights, conv_info);
 }

 } // namespace

 void ClIndirectConv2d::configure(const CLCompileContext    &compile_context,
                                  ITensorInfo               *src,
                                  ITensorInfo               *weights,
                                  ITensorInfo               *biases,
                                  ITensorInfo               *dst,
                                  const PadStrideInfo       &conv_info,
                                  const ActivationLayerInfo &act_info)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(src);
     ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, conv_info, act_info);

     // Reuse the direct convolution descriptor
     const DirectConvComputeKernelInfo desc = config_indirect_convolution_nhwc(src, weights, conv_info);

     // Configure indirect convolution kernels
     auto k0 = std::make_unique<kernels::ClIndirectConv2dAddressPrecalculationKernel>();
     auto k1 = std::make_unique<kernels::ClIndirectConv2dKernel>();

     k0->set_target(CLScheduler::get().target());
     k1->set_target(CLScheduler::get().target());

     k0->configure(compile_context, src, weights, &_indirect_buffer, conv_info, desc);
     k1->configure(compile_context, src, weights, biases, &_indirect_buffer, dst, conv_info, act_info, desc);

     _addr_precalculation_kernel = std::move(k0);
     _indirect_conv_kernel       = std::move(k1);
     _is_prepared                = false;

     // Tune kernels
     CLScheduler::get().tune_kernel_static(*_indirect_conv_kernel);

     // Request memory for the indirect buffer
     _aux_mem[IndirectBuffer] =
         MemoryInfo(offset_int_vec(IndirectBuffer), MemoryLifetime::Persistent, _indirect_buffer.total_size());
 }

 Status ClIndirectConv2d::validate(const ITensorInfo         *src,
                                   const ITensorInfo         *weights,
                                   const ITensorInfo         *biases,
                                   const ITensorInfo         *dst,
                                   const PadStrideInfo       &conv_info,
                                   const ActivationLayerInfo &act_info)
 {
     // Initialize the direct convolution descriptor
     const DirectConvComputeKernelInfo desc = config_indirect_convolution_nhwc(src, weights, conv_info);

     TensorShape ind_buffer_shape = misc::shape_calculator::compute_indirect_buffer_shape(
         src->tensor_shape(), src->data_layout(), weights->tensor_shape(), conv_info, desc);

     TensorInfo indirect_buffer(ind_buffer_shape, 1, DataType::S32);

     ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClIndirectConv2dAddressPrecalculationKernel::validate(
         src, weights, &indirect_buffer, conv_info, desc));
     ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClIndirectConv2dKernel::validate(src, weights, biases, &indirect_buffer, dst,
                                                                           conv_info, act_info, desc));

     return Status{};
 }

 void ClIndirectConv2d::run(ITensorPack &tensors)
 {
     CLAuxTensorHandler indirect_buffer(offset_int_vec(IndirectBuffer), _indirect_buffer, tensors, true);

     prepare(tensors);

     ITensorPack indirect_conv2d_pack(tensors);
     indirect_conv2d_pack.add_const_tensor(ACL_SRC_3, indirect_buffer.get());

     // Run indirect convolution
     CLScheduler::get().enqueue_op(*_indirect_conv_kernel, indirect_conv2d_pack, true);
 }

 void ClIndirectConv2d::prepare(ITensorPack &constants)
 {
     if (!_is_prepared)
     {
         ICLTensor *indirect_buffer_aux =
             utils::cast::polymorphic_downcast<ICLTensor *>(constants.get_tensor(offset_int_vec(IndirectBuffer)));
         ARM_COMPUTE_ERROR_ON(indirect_buffer_aux == nullptr);

         ARM_COMPUTE_LOG_INFO_WITH_FUNCNAME_ACL("Preparing indirect buffer");

         CLAuxTensorHandler indirect_buffer(_indirect_buffer, *indirect_buffer_aux);
         ARM_COMPUTE_ERROR_ON(indirect_buffer.get()->cl_buffer().get() == nullptr);

         ITensorPack indirect_buffer_pack{{ACL_DST, indirect_buffer.get()}};
         CLScheduler::get().enqueue_op(*_addr_precalculation_kernel, indirect_buffer_pack, true);

         _is_prepared = true;
     }
 }

 experimental::MemoryRequirements ClIndirectConv2d::workspace() const
 {
     return _aux_mem;
 }
 } // namespace opencl
 } // namespace arm_compute
	/*
	* Copyright (c) 2022 Arm Limited.
	*
	* SPDX-License-Identifier: MIT
	*
	* Permission is hereby granted, free of charge, to any person obtaining a copy
	* of this software and associated documentation files (the "Software"), to
	* deal in the Software without restriction, including without limitation the
	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
	* sell copies of the Software, and to permit persons to whom the Software is
	* furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice shall be included in all
	* copies or substantial portions of the Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	* SOFTWARE.
	*/
	#include "src/gpu/cl/operators/ClIndirectConv2d.h"

	#include "arm_compute/core/KernelDescriptors.h"
	#include "arm_compute/core/Types.h"
	#include "arm_compute/core/utils/misc/ShapeCalculator.h"
	#include "arm_compute/runtime/CL/CLScheduler.h"

	#include "src/common/utils/Log.h"
	#include "src/core/helpers/MemoryHelpers.h"
	#include "src/gpu/cl/kernels/ClIndirectConv2dAddressPrecalculationKernel.h"
	#include "src/gpu/cl/kernels/ClIndirectConv2dKernel.h"
	#include "src/gpu/cl/utils/ClAuxTensorHandler.h"
	#include "src/runtime/heuristics/indirect_conv/ClIndirectConvKernelConfig.h"
	#include "src/runtime/heuristics/indirect_conv/IClIndirectConvKernelConfig.h"

	using namespace arm_compute::cl_indirect_conv;

	namespace arm_compute
	{
	namespace opencl
	{
	using namespace arm_compute::experimental;

	namespace
	{
	DirectConvComputeKernelInfo
	config_indirect_convolution_nhwc(const ITensorInfo src, const ITensorInfo weights, const PadStrideInfo &conv_info)
	{
	// Get GPU target
	GPUTarget gpu_target = CLScheduler::get().target();

	std::unique_ptr<IClIndirectConvKernelConfig> t = ClIndirectConvKernelConfigurationFactory::create(gpu_target);

	return t->configure(src, weights, conv_info);
	}

	} // namespace

	void ClIndirectConv2d::configure(const CLCompileContext &compile_context,
	ITensorInfo *src,
	ITensorInfo *weights,
	ITensorInfo *biases,
	ITensorInfo *dst,
	const PadStrideInfo &conv_info,
	const ActivationLayerInfo &act_info)
	{
	ARM_COMPUTE_ERROR_ON_NULLPTR(src);
	ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, conv_info, act_info);

	// Reuse the direct convolution descriptor
	const DirectConvComputeKernelInfo desc = config_indirect_convolution_nhwc(src, weights, conv_info);

	// Configure indirect convolution kernels
	auto k0 = std::make_unique<kernels::ClIndirectConv2dAddressPrecalculationKernel>();
	auto k1 = std::make_unique<kernels::ClIndirectConv2dKernel>();

	k0->set_target(CLScheduler::get().target());
	k1->set_target(CLScheduler::get().target());

	k0->configure(compile_context, src, weights, &_indirect_buffer, conv_info, desc);
	k1->configure(compile_context, src, weights, biases, &_indirect_buffer, dst, conv_info, act_info, desc);

	_addr_precalculation_kernel = std::move(k0);
	_indirect_conv_kernel = std::move(k1);
	_is_prepared = false;

	// Tune kernels
	CLScheduler::get().tune_kernel_static(*_indirect_conv_kernel);

	// Request memory for the indirect buffer
	_aux_mem[IndirectBuffer] =
	MemoryInfo(offset_int_vec(IndirectBuffer), MemoryLifetime::Persistent, _indirect_buffer.total_size());
	}

	Status ClIndirectConv2d::validate(const ITensorInfo *src,
	const ITensorInfo *weights,
	const ITensorInfo *biases,
	const ITensorInfo *dst,
	const PadStrideInfo &conv_info,
	const ActivationLayerInfo &act_info)
	{
	// Initialize the direct convolution descriptor
	const DirectConvComputeKernelInfo desc = config_indirect_convolution_nhwc(src, weights, conv_info);

	TensorShape ind_buffer_shape = misc::shape_calculator::compute_indirect_buffer_shape(
	src->tensor_shape(), src->data_layout(), weights->tensor_shape(), conv_info, desc);

	TensorInfo indirect_buffer(ind_buffer_shape, 1, DataType::S32);

	ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClIndirectConv2dAddressPrecalculationKernel::validate(
	src, weights, &indirect_buffer, conv_info, desc));
	ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClIndirectConv2dKernel::validate(src, weights, biases, &indirect_buffer, dst,
	conv_info, act_info, desc));

	return Status{};
	}

	void ClIndirectConv2d::run(ITensorPack &tensors)
	{
	CLAuxTensorHandler indirect_buffer(offset_int_vec(IndirectBuffer), _indirect_buffer, tensors, true);

	prepare(tensors);

	ITensorPack indirect_conv2d_pack(tensors);
	indirect_conv2d_pack.add_const_tensor(ACL_SRC_3, indirect_buffer.get());

	// Run indirect convolution
	CLScheduler::get().enqueue_op(*_indirect_conv_kernel, indirect_conv2d_pack, true);
	}

	void ClIndirectConv2d::prepare(ITensorPack &constants)
	{
	if (!_is_prepared)
	{
	ICLTensor *indirect_buffer_aux =
	utils::cast::polymorphic_downcast<ICLTensor *>(constants.get_tensor(offset_int_vec(IndirectBuffer)));
	ARM_COMPUTE_ERROR_ON(indirect_buffer_aux == nullptr);

	ARM_COMPUTE_LOG_INFO_WITH_FUNCNAME_ACL("Preparing indirect buffer");

	CLAuxTensorHandler indirect_buffer(_indirect_buffer, *indirect_buffer_aux);
	ARM_COMPUTE_ERROR_ON(indirect_buffer.get()->cl_buffer().get() == nullptr);

	ITensorPack indirect_buffer_pack{{ACL_DST, indirect_buffer.get()}};
	CLScheduler::get().enqueue_op(*_addr_precalculation_kernel, indirect_buffer_pack, true);

	_is_prepared = true;
	}
	}

	experimental::MemoryRequirements ClIndirectConv2d::workspace() const
	{
	return _aux_mem;
	}
	} // namespace opencl
	} // namespace arm_compute