Remove padding from NERemapKernel

Use of out_of_tensor function to check if parallel instructons can be used safely
Reverting to serial computation otherwise

Resolves: COMPMID-4449

Change-Id: I23a986612e3c5d0367e23e56f1aeedbb1330cffc
Signed-off-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5651
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
diff --git a/arm_compute/runtime/NEON/functions/NERemap.h b/arm_compute/runtime/NEON/functions/NERemap.h
index 1693078..271ac97 100644
--- a/arm_compute/runtime/NEON/functions/NERemap.h
+++ b/arm_compute/runtime/NEON/functions/NERemap.h
@@ -25,7 +25,7 @@
 #define ARM_COMPUTE_NEREMAP_H
 
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 #include "arm_compute/runtime/Tensor.h"
 
 #include <cstdint>
@@ -36,10 +36,9 @@
 
 /** Basic function to execute remap. This function calls the following kernels:
  *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
  * -# @ref NERemapKernel
  */
-class NERemap : public INESimpleFunction
+class NERemap : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialise the function's sources, destination, interpolation policy and border mode.
@@ -58,7 +57,7 @@
      * @param[out]     output                Output tensor. Data type supported: U8.
      * @param[in]      policy                Interpolation policy to use. Only NEAREST and BILINEAR are supported.
      * @param[in]      border_mode           Border mode to use on the input tensor.
-     * @param[in]      constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     * @param[in]      constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. Defaults to 0.
      *
      */
     void configure(ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output,