Handle Conv2d layer with implicit output padding in NHWC Corner cases exist when output top/bottom padding is non-zero for Convolution Layer. This can cause invalid output from the NEGEMMConvolutionLayer as assembly kernel integration does not efficiently handles such cases. As a workaround we always allocate a memory-managed auxiliary tensor which we use as an output for GEMM when padding exists and then we copy to the padded output. If no padding exists we import the output tensor memory to the temporary buffer and perform calculation as we did before. Resolves: COMPMID-4114 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: If82d0e115b8369b91d775895d5315b044306cc74 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5083 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>

commit: 2ee9801bd874c87bc2e3645f220de1de85e9b75d [log] [tgz]
author: Georgios Pinitas <georgios.pinitas@arm.com> Mon Feb 15 20:42:39 2021 +0000
committer: Giorgio Arena <giorgio.arena@arm.com> Tue Feb 16 11:55:12 2021 +0000
tree: 562ece9612f9978dd13ba2c696deadbdbbcc14ac
parent: bae22373a71ccf9b4c008e62f081344a929ffb24 [diff]
diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
index 466e601..381fa4d 100644
--- a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
+++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -117,7 +117,7 @@
     void run() override;
 
 private:
-    std::unique_ptr<IFallback> _arm_gemm;        /** Interface for the arm_gemm fallback */
+    std::unique_ptr<IFallback> _arm_gemm;        /**< Interface for the arm_gemm fallback */
     MemoryGroup                _memory_group;    /**< Function memory group */
     IWeightsManager           *_weights_manager; /**< Pointer to the weights manager */
 };

diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index 8fc788c..74ef3ee 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -109,8 +109,8 @@
 
 NEGEMMConvolutionLayer::NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager, IWeightsManager *weights_manager)
     : _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager),
-      _col2im_kernel(), _reshape_layer(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _data_layout(DataLayout::NCHW), _skip_im2col(false),
-      _skip_col2im(false), _is_quantized(false), _is_prepared(false)
+      _col2im_kernel(), _reshape_layer(), _original_weights(nullptr), _original_output(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _gemm_output_3d(), _tmp_output(),
+      _data_layout(DataLayout::NCHW), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _is_prepared(false)
 {
 }
 
@@ -281,6 +281,7 @@
 
     _is_prepared      = weights_info.retain_internal_weights();
     _original_weights = weights;
+    _original_output  = output;
     _is_quantized     = is_data_type_quantized_asymmetric(input->info()->data_type());
     _data_layout      = data_layout;
     _skip_im2col      = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
@@ -368,6 +369,15 @@
         // Update GEMM output
         gemm_output_to_use = &_gemm_output;
     }
+    else
+    {
+        _gemm_output.allocator()->init(*output->info());
+        _memory_group.manage(&_gemm_output);
+        _gemm_output_3d.allocator()->init(*output->info());
+
+        // Update GEMM output
+        gemm_output_to_use = &_gemm_output_3d;
+    }
 
     // Configure GEMM
     // In case we need to skip col2im, GEMM3D (gemm_3d_depth != 0) must be called in order to avoid reshaping the output matrix
@@ -393,16 +403,18 @@
             _reshape_layer.configure(gemm_output_to_use, output);
         }
     }
+    else
+    {
+        // Configure reshape layer
+        _reshape_layer.configure(gemm_output_to_use, output);
+    }
 
     if(_is_quantized && !_skip_col2im)
     {
         _tmp_output.allocator()->allocate();
     }
 
-    if(!_skip_col2im || _is_quantized)
-    {
-        _gemm_output.allocator()->allocate();
-    }
+    _gemm_output.allocator()->allocate();
 
     ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(idx_width) != conv_w) || (output->info()->dimension(idx_height) != conv_h),
                              "Output shape does not match the expected one");
@@ -554,6 +566,8 @@
 
     MemoryGroupResourceScope scope_mg(_memory_group);
 
+    bool out_has_padding = _skip_col2im && (_original_output->info()->padding().bottom != 0 || _original_output->info()->padding().top != 0);
+
     if(!_skip_im2col)
     {
         // Run input reshaping
@@ -561,6 +575,10 @@
         NEScheduler::get().schedule(_im2col_kernel.get(), y_dim);
     }
 
+    // Handle the case where output has top/bottom padding
+    const ITensor *out_to_use = out_has_padding ? &_gemm_output : _original_output;
+    _gemm_output_3d.allocator()->import_memory(out_to_use->buffer());
+
     // Runs NEGEMM or NEGEMMLowpMatrixMultiplyCore functions
     if(_is_quantized)
     {
@@ -585,6 +603,10 @@
             _reshape_layer.run();
         }
     }
+    else if(out_has_padding)
+    {
+        _reshape_layer.run();
+    }
 }
 
 void NEGEMMConvolutionLayer::prepare()
commit	2ee9801bd874c87bc2e3645f220de1de85e9b75d	[log] [tgz]
author	Georgios Pinitas <georgios.pinitas@arm.com>	Mon Feb 15 20:42:39 2021 +0000
committer	Giorgio Arena <giorgio.arena@arm.com>	Tue Feb 16 11:55:12 2021 +0000
tree	562ece9612f9978dd13ba2c696deadbdbbcc14ac
parent	bae22373a71ccf9b4c008e62f081344a929ffb24 [diff]