Teresa Charlin | 5bda973 | 2024-02-08 18:46:38 +0000 | [diff] [blame] | 1 | // |
| 2 | // Copyright © 2024 Arm Ltd and Contributors. All rights reserved. |
| 3 | // SPDX-License-Identifier: MIT |
| 4 | // |
| 5 | |
| 6 | #include "GpuFsaBatchMatMul.hpp" |
| 7 | #include "UtilsGpuFsa.hpp" |
| 8 | |
| 9 | #include <aclCommon/ArmComputeTensorUtils.hpp> |
| 10 | |
| 11 | #include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h> |
| 12 | #include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h> |
| 13 | #include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h> |
| 14 | #include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h> |
| 15 | |
| 16 | using namespace arm_compute::experimental::dynamic_fusion; |
| 17 | using namespace armnn::armcomputetensorutils; |
| 18 | |
| 19 | namespace armnn |
| 20 | { |
| 21 | |
| 22 | arm_compute::Status GpuFsaBatchMatMulValidate(const TensorInfo& input0, |
| 23 | const TensorInfo& input1, |
| 24 | const BatchMatMulDescriptor& descriptor) |
| 25 | { |
| 26 | // Create a new workload sketch, for validation purposes |
| 27 | auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context(); |
| 28 | auto workloadContext = GpuWorkloadContext(&compileCtx); |
| 29 | GpuWorkloadSketch sketch{ &workloadContext }; |
| 30 | |
| 31 | arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0, input0.GetNumDimensions()); |
| 32 | arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1, input1.GetNumDimensions()); |
| 33 | |
| 34 | aclInput0Info.set_are_values_constant(input0.IsConstant()); |
| 35 | aclInput1Info.set_are_values_constant(input1.IsConstant()); |
| 36 | |
| 37 | arm_compute::ITensorInfo* inputInfo0 = workloadContext.create_tensor_info(aclInput0Info); |
| 38 | arm_compute::ITensorInfo* inputInfo1 = workloadContext.create_tensor_info(aclInput1Info); |
| 39 | |
| 40 | MatMulAttributes matMulAttributes{}; |
| 41 | matMulAttributes.adj_lhs(descriptor.m_TransposeX); |
| 42 | matMulAttributes.adj_rhs(descriptor.m_TransposeY); |
| 43 | GpuMatMulSettings matmulSettings{}; |
| 44 | matmulSettings.m0(1); |
| 45 | matmulSettings.n0(1); |
| 46 | matmulSettings.k0(1); |
| 47 | |
| 48 | return GpuMatMul::validate_op(sketch, inputInfo0, inputInfo1, matMulAttributes, matmulSettings); |
| 49 | } |
| 50 | |
| 51 | void GpuFsaBatchMatMulCreateOp(GpuFsaPreCompiledBlob* blob, |
| 52 | const TensorInfo& input0, |
| 53 | const TensorInfo& input1, |
| 54 | const BatchMatMulDescriptor& descriptor) |
| 55 | { |
| 56 | GpuWorkloadSketch* sketch = blob->sketch.get(); |
| 57 | GpuWorkloadContext* workloadContext = blob->workloadContext.get(); |
| 58 | std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {}; |
| 59 | std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {}; |
| 60 | |
| 61 | arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0, input0.GetNumDimensions()); |
| 62 | arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1, input1.GetNumDimensions()); |
| 63 | |
| 64 | aclInput0Info.set_are_values_constant(input0.IsConstant()); |
| 65 | aclInput1Info.set_are_values_constant(input1.IsConstant()); |
| 66 | |
| 67 | inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInput0Info)); |
| 68 | inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInput1Info)); |
| 69 | |
| 70 | MatMulAttributes matMulAttributes{}; |
| 71 | matMulAttributes.adj_lhs(descriptor.m_TransposeX); |
| 72 | matMulAttributes.adj_rhs(descriptor.m_TransposeY); |
| 73 | GpuMatMulSettings matmulSettings{}; |
| 74 | matmulSettings.m0(1); |
| 75 | matmulSettings.n0(1); |
| 76 | matmulSettings.k0(1); |
| 77 | |
| 78 | // Validate operator, check status and update reasonIfUnsupported |
| 79 | arm_compute::Status aclStatus = GpuMatMul::validate_op(*sketch, |
| 80 | inputTensorInfos[0], |
| 81 | inputTensorInfos[1], |
| 82 | matMulAttributes, |
| 83 | matmulSettings); |
| 84 | |
| 85 | const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK; |
| 86 | if (!supported) |
| 87 | { |
| 88 | throw BackendCapabilityException("\"GpuFsa\" backend failed during elementwise binary add validation"); |
| 89 | } |
| 90 | |
| 91 | arm_compute::ITensorInfo* addOutputInfo = GpuMatMul::create_op(*sketch, |
| 92 | inputTensorInfos[0], |
| 93 | inputTensorInfos[1], |
| 94 | matMulAttributes, |
| 95 | matmulSettings); |
| 96 | |
| 97 | // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created. |
| 98 | outputTensorInfos.emplace_back(workloadContext->create_tensor_info()); |
| 99 | GpuOutput::create_op(*sketch, addOutputInfo, outputTensorInfos[0]); |
| 100 | |
| 101 | // Store the TensorInfos within the blob as unique_ptrs to be used later |
| 102 | blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos); |
| 103 | blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos); |
| 104 | } |
| 105 | |
| 106 | } // namespace armnn |