Avoid allocation of auxiliary memory in CpuGemmConvolution
Resolves: COMPMID-4690
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I2d44fd59fc66e2d3e80acffd1a130f6d3fab5c57
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5990
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/runtime/cpu/operators/CpuGemmConvolution.cpp b/src/runtime/cpu/operators/CpuGemmConvolution.cpp
index e7fae9d..6a78b0c 100644
--- a/src/runtime/cpu/operators/CpuGemmConvolution.cpp
+++ b/src/runtime/cpu/operators/CpuGemmConvolution.cpp
@@ -488,8 +488,6 @@
prepare(tensors);
auto src = tensors.get_const_tensor(ACL_SRC_0);
- auto weights = tensors.get_const_tensor(ACL_SRC_1);
- auto biases = tensors.get_const_tensor(ACL_SRC_2);
auto dst = tensors.get_tensor(ACL_DST);
auto gemm_input_to_use = src;
@@ -525,13 +523,9 @@
}
// Runs CpuGemm or CpuGemmLowpMatrixMultiplyCore functions
- ITensorPack pack_mm =
- {
- { TensorType::ACL_SRC_0, gemm_input_to_use },
- { TensorType::ACL_SRC_1, weights },
- { TensorType::ACL_SRC_2, biases },
- { TensorType::ACL_DST, gemm_output_to_use }
- };
+ ITensorPack pack_mm = tensors;
+ pack_mm.add_const_tensor(TensorType::ACL_SRC_0, gemm_input_to_use);
+ pack_mm.add_tensor(TensorType::ACL_DST, gemm_output_to_use);
if(_is_quantized)
{
// Run gemmlowp
diff --git a/src/runtime/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp b/src/runtime/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp
index 56eb4fb..8adf704 100644
--- a/src/runtime/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp
@@ -502,6 +502,7 @@
void CpuGemmLowpMatrixMultiplyCore::run(ITensorPack &tensors)
{
prepare(tensors);
+
auto a = tensors.get_const_tensor(TensorType::ACL_SRC_0);
auto b = tensors.get_const_tensor(TensorType::ACL_SRC_1);
auto c = tensors.get_const_tensor(TensorType::ACL_SRC_2);