Ensure that correct transformed matrices are used in CpuFullyConnected Execution pack of CpuFullyConnected was altered explicitly with local objects that were getting out of scope. Leading to incorrect results or memory related issues. Track transformed weights and register the weights matrix explicitly during execution honoring the object lifetime scope. Resolves: COMPMID-4762, COMPMID-4764 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I53449c377fb1cfccdf5e6f9505d963518748c318 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/349345 Tested-by: bsgcomp <bsgcomp@arm.com> Reviewed-by: Pablo Tello <pablo.tello@arm.com> Comments-Addressed: bsgcomp <bsgcomp@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6092 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>

commit: fa1db17a9ad7d35a77e76bf9c61ebc8756149ab2 [log] [tgz]
author: Georgios Pinitas <georgios.pinitas@arm.com> Thu Aug 12 06:28:09 2021 +0100
committer: SiCong Li <sicong.li@arm.com> Thu Aug 12 14:23:46 2021 +0000
tree: 2b6a64b43afea654f423c64ac96a21b9d31a45c2
parent: d761a3e3c153083cd3843fe686f27e3438c87d1c [diff] [blame]
diff --git a/src/runtime/cpu/operators/CpuFullyConnected.cpp b/src/runtime/cpu/operators/CpuFullyConnected.cpp
index e7808fb..eeabce0 100644
--- a/src/runtime/cpu/operators/CpuFullyConnected.cpp
+++ b/src/runtime/cpu/operators/CpuFullyConnected.cpp

@@ -150,9 +150,11 @@
       _flattened_src(),
       _converted_weights(),
       _reshaped_weights(),
+      _trans_weights(),
+      _trans_weights_idx(AuxTensorIdx::Count),
       _aux_mem(Count),
-      _are_weights_converted(false),
-      _are_weights_reshaped(false),
+      _needs_weights_conversion(false),
+      _needs_weights_reshape(false),
       _is_fc_after_conv(false),
       _is_quantized_asymmetric(false),
       _is_prepared(false)
@@ -230,11 +232,13 @@
                                                            dst,
                                                            fc_info));
 
-    _are_weights_converted   = true;
-    _are_weights_reshaped    = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
-    _is_fc_after_conv        = true;
-    _is_quantized_asymmetric = is_data_type_quantized_asymmetric(src->data_type());
-    _is_prepared             = false;
+    _needs_weights_conversion = false;
+    _needs_weights_reshape    = fc_info.transpose_weights ? !fc_info.are_weights_reshaped : false;
+    _needs_weights_reshape    = _needs_weights_reshape && !fc_info.retain_internal_weights;
+    _is_fc_after_conv         = true;
+    _is_quantized_asymmetric  = is_data_type_quantized_asymmetric(src->data_type());
+    _is_prepared              = false;
+    _trans_weights_idx        = AuxTensorIdx::Count;
 
     // With the Fully Connected layer we can have 4 different cases:
     //  1) Convolution layer -> Fully Connected layer without batches
@@ -258,12 +262,13 @@
     }
 
     // Reshape weights if needed
-    if(!_are_weights_reshaped)
+    if(_needs_weights_reshape)
     {
         // Reshape the weights
         _transpose_weights = std::make_unique<kernels::CpuTransposeKernel>();
         _transpose_weights->configure(weights, &_reshaped_weights);
-        weights_to_use = &_reshaped_weights;
+        weights_to_use     = &_reshaped_weights;
+        _trans_weights_idx = AuxTensorIdx::TransposedWeights;
     }
 
     // Convert weights if needed
@@ -276,8 +281,9 @@
                                     src->tensor_shape(),
                                     fc_info.weights_trained_layout);
 
-        weights_to_use         = &_converted_weights;
-        _are_weights_converted = false;
+        weights_to_use            = &_converted_weights;
+        _needs_weights_conversion = true;
+        _trans_weights_idx        = AuxTensorIdx::ConvertedWeights;
     }
 
     if(_is_fc_after_conv)
@@ -291,7 +297,11 @@
         configure_fc_fc(src, weights_to_use, biases, dst, fc_info.activation_info);
     }
 
-    _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights;
+    // Retain the tensorinfo with the weights to use
+    if(_needs_weights_reshape || _needs_weights_conversion)
+    {
+        _trans_weights = *weights_to_use;
+    }
 
     // Set auxiliary memory requirements
     auto gemm_mem_req = (_is_quantized_asymmetric) ? _mm_gemmlowp->workspace() : _mm_gemm->workspace();
@@ -308,7 +318,7 @@
     }
     else
     {
-        _aux_mem[TransposedWeights] = MemoryInfo(offset_int_vec(TransposedWeights), MemoryLifetime::Persistent, _reshaped_weights.total_size());
+        _aux_mem[TransposedWeights] = MemoryInfo(offset_int_vec(TransposedWeights), _needs_weights_conversion ? MemoryLifetime::Prepare : MemoryLifetime::Persistent, _reshaped_weights.total_size());
         _aux_mem[ConvertedWeights]  = MemoryInfo(offset_int_vec(ConvertedWeights), MemoryLifetime::Persistent, _converted_weights.total_size());
     }
     _aux_mem[FlattenedSrc] = MemoryInfo(offset_int_vec(FlattenedSrc), MemoryLifetime::Temporary, _flattened_src.total_size());
@@ -401,6 +411,7 @@
     auto src = tensors.get_const_tensor(ACL_SRC_0);
 
     CpuAuxTensorHandler flattened_src(offset_int_vec(FlattenedSrc), _flattened_src, tensors, false);
+    CpuAuxTensorHandler transformed_wei(offset_int_vec(_trans_weights_idx), _trans_weights, tensors, false);
 
     // Linearize src if it comes from a convolutional layer
     if(_is_fc_after_conv)
@@ -411,6 +422,10 @@
 
     ITensorPack gemm_pack = tensors;
     gemm_pack.add_const_tensor(ACL_SRC_0, (_is_fc_after_conv) ? flattened_src.get() : src);
+    if(_needs_weights_reshape || _needs_weights_conversion)
+    {
+        gemm_pack.add_const_tensor(ACL_SRC_1, transformed_wei.get());
+    }
 
     // Run matrix multiply
     if(_is_quantized_asymmetric)
@@ -436,7 +451,7 @@
         const ITensor *cur_weights = weights;
 
         // Reshape of the weights (happens only once)
-        if(!_are_weights_reshaped)
+        if(_needs_weights_reshape)
         {
             // Run reshape weights kernel and mark weights as unused
             ITensorPack transpose_pack{ { ACL_SRC, weights }, { ACL_DST, reshaped_weights.get() } };
@@ -444,32 +459,29 @@
 
             cur_weights->mark_as_unused();
             cur_weights = reshaped_weights.get();
-
-            _are_weights_reshaped = true;
         }
 
         // Convert weights if needed (happens only once)
-        if(!_are_weights_converted)
+        if(_needs_weights_conversion)
         {
             ITensorPack convert_pack{ { ACL_SRC, cur_weights }, { ACL_DST, converted_weights.get() } };
             _convert_weights->run(convert_pack);
 
             cur_weights->mark_as_unused();
             cur_weights = converted_weights.get();
-
-            _are_weights_converted = true;
         }
 
-        tensors.add_const_tensor(ACL_SRC_1, cur_weights);
+        ITensorPack gemm_pack = tensors;
+        gemm_pack.add_const_tensor(ACL_SRC_1, cur_weights);
 
         // Prepare GEMM prepare and release unused weights
         if(!_is_quantized_asymmetric)
         {
-            _mm_gemm->prepare(tensors);
+            _mm_gemm->prepare(gemm_pack);
         }
         else
         {
-            _mm_gemmlowp->prepare(tensors);
+            _mm_gemmlowp->prepare(gemm_pack);
         }
 
         _is_prepared = true;
commit	fa1db17a9ad7d35a77e76bf9c61ebc8756149ab2	[log] [tgz]
author	Georgios Pinitas <georgios.pinitas@arm.com>	Thu Aug 12 06:28:09 2021 +0100
committer	SiCong Li <sicong.li@arm.com>	Thu Aug 12 14:23:46 2021 +0000
tree	2b6a64b43afea654f423c64ac96a21b9d31a45c2
parent	d761a3e3c153083cd3843fe686f27e3438c87d1c [diff] [blame]