Remove OpenCL padding: CLReductionOperationKernel Change the parallel implementation across the X, now every thread computes one row Add missing test for MEAN_SUM Make reduction on any axis != 0 work with num_channels > 1 Resolve COMPMID-3917 Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Change-Id: Ib0f99540104e3c253bcd1ea637833db533f5e76e Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5522 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>

commit: 3ecf9fefa6f6299a0736599f150d4791cc8345d9 [log] [tgz]
author: Giorgio Arena <giorgio.arena@arm.com> Wed Apr 28 16:11:51 2021 +0100
committer: Giorgio Arena <giorgio.arena@arm.com> Thu Apr 29 17:06:28 2021 +0000
tree: 749d73c8496d152600f528e442b4e4f11a81621e
parent: 8ed3ac11352d1ebb2c69787432fa5893997f1c50 [diff] [blame]
diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h
index 3fbcee6..58164fd 100644
--- a/arm_compute/runtime/CL/functions/CLReductionOperation.h
+++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,7 +36,6 @@
 {
 // Forward declarations
 class CLCompileContext;
-class CLFillBorderKernel;
 class CLReductionOperationKernel;
 class ICLTensor;
 
@@ -99,15 +98,12 @@
 private:
     ICLTensor *configure_intermediate_result_vector(ICLTensor *input, ICLTensor *output);
 
-    MemoryGroup                                              _memory_group;
-    std::vector<CLTensor>                                    _results_vector;
-    std::vector<std::unique_ptr<CLReductionOperationKernel>> _reduction_kernels_vector;
-    std::vector<std::unique_ptr<CLFillBorderKernel>>         _border_handlers_vector;
-    CLReshapeLayer                                           _reshape;
-    unsigned int                                             _num_of_stages;
-    unsigned int                                             _reduction_axis;
-    bool                                                     _is_serial;
-    bool                                                     _is_reshape_required;
+    MemoryGroup                                 _memory_group;
+    CLTensor                                    _unreshaped_output;
+    std::unique_ptr<CLReductionOperationKernel> _reduction_kernel;
+    CLReshapeLayer                              _reshape;
+    unsigned int                                _reduction_axis;
+    bool                                        _is_reshape_required;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLREDUCTIONOPERATION_H */
\ No newline at end of file
commit	3ecf9fefa6f6299a0736599f150d4791cc8345d9	[log] [tgz]
author	Giorgio Arena <giorgio.arena@arm.com>	Wed Apr 28 16:11:51 2021 +0100
committer	Giorgio Arena <giorgio.arena@arm.com>	Thu Apr 29 17:06:28 2021 +0000
tree	749d73c8496d152600f528e442b4e4f11a81621e
parent	8ed3ac11352d1ebb2c69787432fa5893997f1c50 [diff] [blame]