COMPMID-1480 Add support for NHWC QASYMM8/FP32(non-optimized) to NEON DepthwiseConvolution Change-Id: I751f5d3fb74085d2e67f610ecf52da4736d0cfb5 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/143870 Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>

commit: 26b22160c00d9955255015d82203c7e16f28f0c3 [log] [tgz]
author: Giorgio Arena <giorgio.arena@arm.com> Mon Aug 13 15:49:49 2018 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> Fri Nov 02 16:54:54 2018 +0000
tree: 8646becfe3133ac376d7fc534abe95cbaeb76719
parent: bc92a34f9a96da8fa52fd3563e10f0d1bdd7f3fe [diff] [blame]
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index 1317fb7..ac06553 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h

@@ -90,15 +90,16 @@
     NEPermute                                 _permute_weights;
     NEPermute                                 _permute_output;
     Tensor                                    _accumulator;
-    Tensor                                    _input_nhwc;
-    Tensor                                    _weights_hwio;
-    Tensor                                    _output_nhwc;
+    Tensor                                    _permuted_input;
+    Tensor                                    _permuted_weights;
+    Tensor                                    _permuted_output;
     bool                                      _has_bias;
     bool                                      _is_quantized;
     bool                                      _is_optimized;
     bool                                      _are_weights_reshaped;
     bool                                      _is_nchw;
     bool                                      _is_first_run;
+    bool                                      _permute;
 };
 
 /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels:
@@ -146,7 +147,7 @@
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
 
     // Inherited methods overriden:
     void run() override;
@@ -160,12 +161,19 @@
     NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
     NEFillBorderKernel                        _v2mm_input_fill_border;
     NEFillBorderKernel                        _v2mm_weights_fill_border;
+    NEPermute                                 _permute_input;
+    NEPermute                                 _permute_weights;
+    NEPermute                                 _permute_output;
     Tensor                                    _input_reshaped;
     Tensor                                    _weights_reshaped;
     Tensor                                    _v2mm_output;
     Tensor                                    _output_reshaped;
+    Tensor                                    _permuted_input;
+    Tensor                                    _permuted_weights;
+    Tensor                                    _permuted_output;
     bool                                      _is_prepared;
     bool                                      _is_quantized;
+    bool                                      _is_nhwc;
     const ITensor                            *_original_weights;
 };
 }
commit	26b22160c00d9955255015d82203c7e16f28f0c3	[log] [tgz]
author	Giorgio Arena <giorgio.arena@arm.com>	Mon Aug 13 15:49:49 2018 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	Fri Nov 02 16:54:54 2018 +0000
tree	8646becfe3133ac376d7fc534abe95cbaeb76719
parent	bc92a34f9a96da8fa52fd3563e10f0d1bdd7f3fe [diff] [blame]