COMPMID-1480 Add support for NHWC QASYMM8/FP32(non-optimized) to NEON DepthwiseConvolution
Change-Id: I751f5d3fb74085d2e67f610ecf52da4736d0cfb5
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/143870
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index 1317fb7..ac06553 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -90,15 +90,16 @@
NEPermute _permute_weights;
NEPermute _permute_output;
Tensor _accumulator;
- Tensor _input_nhwc;
- Tensor _weights_hwio;
- Tensor _output_nhwc;
+ Tensor _permuted_input;
+ Tensor _permuted_weights;
+ Tensor _permuted_output;
bool _has_bias;
bool _is_quantized;
bool _is_optimized;
bool _are_weights_reshaped;
bool _is_nchw;
bool _is_first_run;
+ bool _permute;
};
/** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels:
@@ -146,7 +147,7 @@
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
// Inherited methods overriden:
void run() override;
@@ -160,12 +161,19 @@
NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
NEFillBorderKernel _v2mm_input_fill_border;
NEFillBorderKernel _v2mm_weights_fill_border;
+ NEPermute _permute_input;
+ NEPermute _permute_weights;
+ NEPermute _permute_output;
Tensor _input_reshaped;
Tensor _weights_reshaped;
Tensor _v2mm_output;
Tensor _output_reshaped;
+ Tensor _permuted_input;
+ Tensor _permuted_weights;
+ Tensor _permuted_output;
bool _is_prepared;
bool _is_quantized;
+ bool _is_nhwc;
const ITensor *_original_weights;
};
}