COMPMID-2049: Add support for deconvolution for qasymm8 on NEON

Change-Id: I02890c7542f6036edad9cbba9fdcf2312c70070a
Signed-off-by: Usama Arif <usama.arif@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1000
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
diff --git a/src/core/CPP/kernels/CPPUpsampleKernel.cpp b/src/core/CPP/kernels/CPPUpsampleKernel.cpp
index d77d9c1..f04728d 100644
--- a/src/core/CPP/kernels/CPPUpsampleKernel.cpp
+++ b/src/core/CPP/kernels/CPPUpsampleKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -73,14 +73,15 @@
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);
 
     // Initialize _scaled_output buffer
-    const int width_scaled  = _output->info()->dimension(0);
-    const int height_scaled = _output->info()->dimension(1);
-    const int stride_x      = _info.stride().first;
-    const int stride_y      = _info.stride().second;
-    const int start_x       = _info.pad().first;
-    const int start_y       = _inner_border.second + _info.pad().second;
-    const int end_y         = height_scaled - _info.pad().second;
-    const int end_x         = width_scaled - _inner_border.first - _info.pad().first;
+    const int    width_scaled  = _output->info()->dimension(0);
+    const int    height_scaled = _output->info()->dimension(1);
+    const int    stride_x      = _info.stride().first;
+    const int    stride_y      = _info.stride().second;
+    const int    start_x       = _info.pad().first;
+    const int    start_y       = _inner_border.second + _info.pad().second;
+    const int    end_y         = height_scaled - _info.pad().second;
+    const int    end_x         = width_scaled - _inner_border.first - _info.pad().first;
+    const size_t element_size  = _input->info()->element_size();
 
     std::fill_n(_output->buffer(), _output->info()->total_size(), 0);
 
@@ -95,7 +96,7 @@
 
     execute_window_loop(window, [&](const Coordinates & id)
     {
-        *(reinterpret_cast<float *>(out.ptr())) = *(reinterpret_cast<const float *>(in.ptr()));
+        memcpy(out.ptr(), in.ptr(), element_size);
     },
     in, out);
 }
diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
index fdc959c..aff335e 100644
--- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
@@ -51,8 +51,8 @@
                                       unsigned int inner_border_right, unsigned int inner_border_top)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::QASYMM8);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32, DataType::QASYMM8);
     ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != weights->dimension(1));
     ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) < 1);
     ARM_COMPUTE_RETURN_ERROR_ON(!info.padding_is_symmetric());
@@ -68,7 +68,11 @@
 
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
 
-    if(bias != nullptr)
+    if(is_data_type_quantized_asymmetric(input->data_type()))
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
+    }
+    else
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
     }
@@ -111,10 +115,11 @@
     _inner_border     = std::make_pair(inner_border_right, inner_border_top);
     _is_prepared      = false;
 
-    const unsigned int stride_x = info.stride().first;
-    const unsigned int stride_y = info.stride().second;
+    const DataLayout   data_layout = input->info()->data_layout();
+    const unsigned int stride_x    = info.stride().first;
+    const unsigned int stride_y    = info.stride().second;
 
-    _weights_flipped.allocator()->init(TensorInfo(weights->info()->tensor_shape(), 1, weights->info()->data_type()));
+    _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
     _flip_weights.configure(weights, &_weights_flipped);
 
     auto out_dims = deconvolution_output_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights->info()->dimension(0), weights->info()->dimension(1),