Avoid over-allocation of temporary buffers within CpuWinogradConv2d

Resolves: COMPMID-4716

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: Ie036d2bb7a243301a62f089b3920ebee0f409190
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6028
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
diff --git a/src/runtime/cpu/utils/CpuAuxTensorHandler.h b/src/runtime/cpu/utils/CpuAuxTensorHandler.h
index 0d1c927..ae1cffb 100644
--- a/src/runtime/cpu/utils/CpuAuxTensorHandler.h
+++ b/src/runtime/cpu/utils/CpuAuxTensorHandler.h
@@ -28,6 +28,7 @@
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/runtime/Tensor.h"
 
+#include "src/common/utils/Log.h"
 #include "support/Cast.h"
 
 namespace arm_compute
@@ -38,7 +39,7 @@
 class CpuAuxTensorHandler
 {
 public:
-    CpuAuxTensorHandler(int slot_id, TensorInfo &info, ITensorPack &pack, bool pack_inject = false)
+    CpuAuxTensorHandler(int slot_id, TensorInfo &info, ITensorPack &pack, bool pack_inject = false, bool bypass_alloc = false)
         : _tensor()
     {
         if(info.total_size() == 0)
@@ -50,7 +51,12 @@
         ITensor *packed_tensor = utils::cast::polymorphic_downcast<ITensor *>(pack.get_tensor(slot_id));
         if((packed_tensor == nullptr) || (info.total_size() > packed_tensor->info()->total_size()))
         {
-            _tensor.allocator()->allocate();
+            if(!bypass_alloc)
+            {
+                _tensor.allocator()->allocate();
+                ARM_COMPUTE_LOG_INFO_WITH_FUNCNAME_ACL("Allocating auxiliary tensor");
+            }
+
             if(pack_inject)
             {
                 pack.add_tensor(slot_id, &_tensor);