diff --git a/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h b/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h
new file mode 100644
index 0000000..6765b5f
--- /dev/null
+++ b/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_INESIMPLEFUNCTIONNOBORDER_H__
+#define __ARM_COMPUTE_INESIMPLEFUNCTIONNOBORDER_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+/** Basic interface for functions which have a single NEON kernel and no border */
+class INESimpleFunctionNoBorder : public IFunction
+{
+public:
+    /** Constructor */
+    INESimpleFunctionNoBorder();
+
+    // Inherited methods overridden:
+    void run() override final;
+
+protected:
+    std::unique_ptr<INEKernel> _kernel; /**< Kernel to run */
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_INESIMPLEFUNCTIONNOBORDER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEAccumulate.h b/arm_compute/runtime/NEON/functions/NEAccumulate.h
index de532c3..36b8bec 100644
--- a/arm_compute/runtime/NEON/functions/NEAccumulate.h
+++ b/arm_compute/runtime/NEON/functions/NEAccumulate.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,7 @@
 #ifndef __ARM_COMPUTE_NEACCUMULATE_H__
 #define __ARM_COMPUTE_NEACCUMULATE_H__
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 #include <cstdint>
 
@@ -33,7 +33,7 @@
 class ITensor;
 
 /** Basic function to run @ref NEAccumulateKernel */
-class NEAccumulate : public INESimpleFunction
+class NEAccumulate : public INESimpleFunctionNoBorder
 {
 public:
     /** Set the input and accumulation tensors
@@ -45,7 +45,7 @@
 };
 
 /** Basic function to run @ref NEAccumulateWeightedKernel */
-class NEAccumulateWeighted : public INESimpleFunction
+class NEAccumulateWeighted : public INESimpleFunctionNoBorder
 {
 public:
     /** Set the input and accumulation tensors, and the scale value
@@ -59,7 +59,7 @@
 };
 
 /** Basic function to run @ref NEAccumulateSquaredKernel */
-class NEAccumulateSquared : public INESimpleFunction
+class NEAccumulateSquared : public INESimpleFunctionNoBorder
 {
 public:
     /** Set the input and accumulation tensors and the shift value.
@@ -70,5 +70,5 @@
      */
     void configure(const ITensor *input, uint32_t shift, ITensor *output);
 };
-}
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_NEACCUMULATE_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
index a65146d..588de04 100644
--- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
@@ -24,7 +24,7 @@
 #ifndef __ARM_COMPUTE_NEACTIVATIONLAYER_H__
 #define __ARM_COMPUTE_NEACTIVATIONLAYER_H__
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 #include "arm_compute/core/Types.h"
 
@@ -36,7 +36,7 @@
  *
  * @note The function simulates an activation layer with the specified activation function.
  */
-class NEActivationLayer : public INESimpleFunction
+class NEActivationLayer : public INESimpleFunctionNoBorder
 {
 public:
     /** Set the input and output tensor.
@@ -60,5 +60,5 @@
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info);
 };
-}
+} // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEACTIVATIONLAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h
index 0250293..bdcbaba 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,14 +24,14 @@
 #ifndef __ARM_COMPUTE_NEBITWISEAND_H__
 #define __ARM_COMPUTE_NEBITWISEAND_H__
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
 
 /** Basic function to run @ref NEBitwiseAndKernel */
-class NEBitwiseAnd : public INESimpleFunction
+class NEBitwiseAnd : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialise the kernel's inputs and output
@@ -42,5 +42,5 @@
      */
     void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
 };
-}
+} // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEBITWISEAND_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h
index 62c08ff..c2321a8 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,14 +24,14 @@
 #ifndef __ARM_COMPUTE_NEBITWISENOT_H__
 #define __ARM_COMPUTE_NEBITWISENOT_H__
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
 
 /** Basic function to run @ref NEBitwiseNotKernel */
-class NEBitwiseNot : public INESimpleFunction
+class NEBitwiseNot : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialise the kernel's input and output
@@ -41,5 +41,5 @@
      */
     void configure(const ITensor *input, ITensor *output);
 };
-}
+} // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEBITWISENOT_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h
index 1c9a2f9..689329f 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,14 +24,14 @@
 #ifndef __ARM_COMPUTE_NEBITWISEOR_H__
 #define __ARM_COMPUTE_NEBITWISEOR_H__
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
 
 /** Basic function to run @ref NEBitwiseOrKernel */
-class NEBitwiseOr : public INESimpleFunction
+class NEBitwiseOr : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialise the kernel's inputs and output
@@ -42,5 +42,5 @@
      */
     void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
 };
-}
+} // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEBITWISEOR_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h
index 4690f0a..cc9f1ed 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,14 +24,14 @@
 #ifndef __ARM_COMPUTE_NEBITWISEXOR_H__
 #define __ARM_COMPUTE_NEBITWISEXOR_H__
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
 
 /** Basic function to run @ref NEBitwiseXorKernel */
-class NEBitwiseXor : public INESimpleFunction
+class NEBitwiseXor : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialise the kernel's inputs and output
@@ -42,5 +42,5 @@
      */
     void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
 };
-}
+} // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEBITWISEXOR_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEChannelCombine.h b/arm_compute/runtime/NEON/functions/NEChannelCombine.h
index 7133553..b5942b2 100644
--- a/arm_compute/runtime/NEON/functions/NEChannelCombine.h
+++ b/arm_compute/runtime/NEON/functions/NEChannelCombine.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,7 @@
 #ifndef __ARM_COMPUTE_NECHANNELCOMBINE_H__
 #define __ARM_COMPUTE_NECHANNELCOMBINE_H__
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
@@ -33,7 +33,7 @@
 using IImage = ITensor;
 
 /**Basic function to run @ref NEChannelCombineKernel to perform channel combination. */
-class NEChannelCombine : public INESimpleFunction
+class NEChannelCombine : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialize function's inputs and outputs.
@@ -54,5 +54,5 @@
      */
     void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output);
 };
-}
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_NECHANNELCOMBINE_H__*/
diff --git a/arm_compute/runtime/NEON/functions/NEChannelExtract.h b/arm_compute/runtime/NEON/functions/NEChannelExtract.h
index 1620d3a..ad2bd53 100644
--- a/arm_compute/runtime/NEON/functions/NEChannelExtract.h
+++ b/arm_compute/runtime/NEON/functions/NEChannelExtract.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,7 +25,7 @@
 #define __ARM_COMPUTE_NECHANNELEXTRACT_H__
 
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
@@ -34,7 +34,7 @@
 using IImage = ITensor;
 
 /**Basic function to run @ref NEChannelExtractKernel to perform channel extraction. */
-class NEChannelExtract : public INESimpleFunction
+class NEChannelExtract : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialize the function's source, destination
@@ -52,5 +52,5 @@
      */
     void configure(const IMultiImage *input, Channel channel, IImage *output);
 };
-}
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_NECHANNELEXTRACT_H__*/
diff --git a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
index 20fe483..0a140d6 100644
--- a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
@@ -24,7 +24,7 @@
 #ifndef __ARM_COMPUTE_NECHANNELSHUFFLELAYER_H__
 #define __ARM_COMPUTE_NECHANNELSHUFFLELAYER_H__
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
@@ -37,7 +37,7 @@
  * first divide the channels into G groups, C = (G * C'), and perform a transpose of the channel, which gives C = (C' * G).
  * for more details see: https://arxiv.org/pdf/1707.01083.pdf
  */
-class NEChannelShuffleLayer : public INESimpleFunction
+class NEChannelShuffleLayer : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialize the function
diff --git a/arm_compute/runtime/NEON/functions/NECol2Im.h b/arm_compute/runtime/NEON/functions/NECol2Im.h
index 42876a8..64ce994 100644
--- a/arm_compute/runtime/NEON/functions/NECol2Im.h
+++ b/arm_compute/runtime/NEON/functions/NECol2Im.h
@@ -24,7 +24,7 @@
 #ifndef __ARM_COMPUTE_NECOL2IM_H__
 #define __ARM_COMPUTE_NECOL2IM_H__
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 #include "arm_compute/core/Size2D.h"
 #include "arm_compute/core/Types.h"
@@ -34,7 +34,7 @@
 class ITensor;
 
 /** Basic function to run @ref NECol2Im */
-class NECol2Im : public INESimpleFunction
+class NECol2Im : public INESimpleFunctionNoBorder
 {
 public:
     /** Configure the col2im NEON kernel
@@ -56,5 +56,5 @@
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims);
 };
-}
+} // namespace arm_compute
 #endif /* __ARM_COMPUTE_NECOL2IM_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEColorConvert.h b/arm_compute/runtime/NEON/functions/NEColorConvert.h
index 73eb3f9..a3dd064 100644
--- a/arm_compute/runtime/NEON/functions/NEColorConvert.h
+++ b/arm_compute/runtime/NEON/functions/NEColorConvert.h
@@ -24,7 +24,7 @@
 #ifndef __ARM_COMPUTE_NECOLORCONVERT_H__
 #define __ARM_COMPUTE_NECOLORCONVERT_H__
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
@@ -33,7 +33,7 @@
 using IImage = ITensor;
 
 /**Basic function to run @ref NEColorConvertKernel to perform color conversion */
-class NEColorConvert : public INESimpleFunction
+class NEColorConvert : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialize the function's source, destination
@@ -63,5 +63,5 @@
      */
     void configure(const IMultiImage *input, IMultiImage *output);
 };
-}
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_NECOLORCONVERT_H__*/
diff --git a/arm_compute/runtime/NEON/functions/NECopy.h b/arm_compute/runtime/NEON/functions/NECopy.h
index df06b1d..c476da5 100644
--- a/arm_compute/runtime/NEON/functions/NECopy.h
+++ b/arm_compute/runtime/NEON/functions/NECopy.h
@@ -25,14 +25,14 @@
 #define __ARM_COMPUTE_NECOPY_H__
 
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
 
 /** Basic function to run @ref NECopyKernel */
-class NECopy : public INESimpleFunction
+class NECopy : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialise the function's source and destination.
diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
index 1fdad30..ebb9530 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
@@ -25,7 +25,7 @@
 #define __ARM_COMPUTE_NEDEPTHCONVERT_H__
 
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 #include <cstdint>
 
@@ -34,7 +34,7 @@
 class ITensor;
 
 /**Basic function to run @ref NEDepthConvertLayerKernel */
-class NEDepthConvertLayer : public INESimpleFunction
+class NEDepthConvertLayer : public INESimpleFunctionNoBorder
 {
 public:
     /* Contructor */
@@ -69,5 +69,5 @@
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0);
 };
-}
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_NEDEPTHCONVERT_H__*/
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h
index 99e93cc..1281238 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h
@@ -26,7 +26,6 @@
 
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
 #include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
 #include "arm_compute/runtime/Tensor.h"
@@ -76,5 +75,5 @@
     NEDepthwiseConvolutionLayer _depthwise_conv;
     NEDirectConvolutionLayer    _pointwise_conv;
 };
-}
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_NEON_DEPTHWISE_SEPARABLE_CONVOLUTION_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
index 26d7c7f..3365b35 100644
--- a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
@@ -25,14 +25,14 @@
 #define __ARM_COMPUTE_NEFLATTENLAYER_H__
 
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
 
 /** Basic function to execute flatten layer kernel. */
-class NEFlattenLayer : public INESimpleFunction
+class NEFlattenLayer : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialise the kernel's input and output.
diff --git a/arm_compute/runtime/NEON/functions/NEFloor.h b/arm_compute/runtime/NEON/functions/NEFloor.h
index 92aa994..630a7fc 100644
--- a/arm_compute/runtime/NEON/functions/NEFloor.h
+++ b/arm_compute/runtime/NEON/functions/NEFloor.h
@@ -24,7 +24,7 @@
 #ifndef __ARM_COMPUTE_NEFLOOR_H__
 #define __ARM_COMPUTE_NEFLOOR_H__
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 #include "arm_compute/core/Types.h"
 
@@ -33,7 +33,7 @@
 class ITensor;
 
 /** Basic function to run @ref NEFloorKernel */
-class NEFloor : public INESimpleFunction
+class NEFloor : public INESimpleFunctionNoBorder
 {
 public:
     /** Set the source, destination of the kernel
@@ -51,5 +51,5 @@
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output);
 };
-}
+} // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEFLOOR_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
index 9c9074c..56ce274 100644
--- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
@@ -44,7 +44,7 @@
  *
  * @note  The fully connected layer accepts "weights" tensors only with 2 dimensions.
  */
-class NEFullyConnectedLayerReshapeWeights : public INESimpleFunction
+class NEFullyConnectedLayerReshapeWeights : public INESimpleFunctionNoBorder
 {
 public:
     /** Set the input and output tensors.
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h
index 4a6bec0..4d7f67b 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h
@@ -24,7 +24,7 @@
 #ifndef __ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H__
 #define __ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H__
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
@@ -35,7 +35,7 @@
  *  -# @ref NEGEMMInterleave4x4Kernel
  *
  */
-class NEGEMMInterleave4x4 : public INESimpleFunction
+class NEGEMMInterleave4x4 : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialise the kernel's inputs, output
@@ -45,5 +45,5 @@
      */
     void configure(const ITensor *input, ITensor *output);
 };
-}
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
index 53b91b3..77bfb98 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
@@ -24,7 +24,7 @@
 #ifndef __ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H__
 #define __ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H__
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 /** This file contains all available output stages for GEMMLowp on NEON.
  *
@@ -56,7 +56,7 @@
  * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
  *       after the result is shifted right by result_shift
 */
-class NEGEMMLowpQuantizeDownInt32ToUint8Scale : public INESimpleFunction
+class NEGEMMLowpQuantizeDownInt32ToUint8Scale : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialise the kernel's inputs, output
@@ -116,7 +116,7 @@
  * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
  *       after the result is shifted right by result_shift
 */
-class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint : public INESimpleFunction
+class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialise the kernel's inputs, output
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
index 3f8e731..b44c5a3 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
@@ -24,16 +24,18 @@
 #ifndef __ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H__
 #define __ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H__
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
+class ITensor;
+
 /** Basic function to execute NEGEMMTranspose1xWKernel. This function calls the following NEON kernels:
  *
  *  -# @ref NEGEMMTranspose1xWKernel
  *
  */
-class NEGEMMTranspose1xW : public INESimpleFunction
+class NEGEMMTranspose1xW : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialise the kernel's inputs, output
@@ -51,5 +53,5 @@
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output);
 };
-}
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEHOGDetector.h b/arm_compute/runtime/NEON/functions/NEHOGDetector.h
index 98b8a89..f41e49b 100644
--- a/arm_compute/runtime/NEON/functions/NEHOGDetector.h
+++ b/arm_compute/runtime/NEON/functions/NEHOGDetector.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,7 +26,7 @@
 
 #include "arm_compute/core/IHOG.h"
 #include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
@@ -35,7 +35,7 @@
  * -# @ref NEHOGDetectorKernel
  *
  */
-class NEHOGDetector : public INESimpleFunction
+class NEHOGDetector : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class
@@ -52,6 +52,6 @@
      */
     void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, size_t idx_class = 0);
 };
-}
+} // namespace arm_compute
 
 #endif /* __ARM_COMPUTE_NEHOGDETECTOR_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEIm2Col.h b/arm_compute/runtime/NEON/functions/NEIm2Col.h
index de4780f..e281dce 100644
--- a/arm_compute/runtime/NEON/functions/NEIm2Col.h
+++ b/arm_compute/runtime/NEON/functions/NEIm2Col.h
@@ -24,7 +24,7 @@
 #ifndef __ARM_COMPUTE_NEIM2COL_H__
 #define __ARM_COMPUTE_NEIM2COL_H__
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 #include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
 #include "arm_compute/core/Size2D.h"
@@ -78,5 +78,5 @@
     NEIm2ColKernel _kernel;
     unsigned int   _y_dim;
 };
-}
+} // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEIM2COL_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
index 9c4ab2b..b98e74d 100644
--- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
@@ -31,7 +31,6 @@
 #include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h"
 
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
 #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
 #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMM.h"
diff --git a/arm_compute/runtime/NEON/functions/NEMagnitude.h b/arm_compute/runtime/NEON/functions/NEMagnitude.h
index 6aabe9d..9fe043b 100644
--- a/arm_compute/runtime/NEON/functions/NEMagnitude.h
+++ b/arm_compute/runtime/NEON/functions/NEMagnitude.h
@@ -24,14 +24,14 @@
 #ifndef __ARM_COMPUTE_NEMAGNITUDE_H__
 #define __ARM_COMPUTE_NEMAGNITUDE_H__
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
 
-/** Basic function to run NEMagnitudePhaseKernel */
-class NEMagnitude : public INESimpleFunction
+/** Basic function to run @ref NEMagnitudePhaseKernel */
+class NEMagnitude : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialise the kernel's inputs.
@@ -43,5 +43,5 @@
      */
     void configure(const ITensor *input1, const ITensor *input2, ITensor *output, MagnitudeType mag_type = MagnitudeType::L2NORM);
 };
-}
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_NEMAGNITUDE_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEPermute.h b/arm_compute/runtime/NEON/functions/NEPermute.h
index 580d24e..43789e6 100644
--- a/arm_compute/runtime/NEON/functions/NEPermute.h
+++ b/arm_compute/runtime/NEON/functions/NEPermute.h
@@ -24,7 +24,7 @@
 #ifndef __ARM_COMPUTE_NEPERMUTE_H__
 #define __ARM_COMPUTE_NEPERMUTE_H__
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 #include "arm_compute/core/Types.h"
 
@@ -33,7 +33,7 @@
 class ITensor;
 
 /** Basic function to run @ref NEPermuteKernel */
-class NEPermute : public INESimpleFunction
+class NEPermute : public INESimpleFunctionNoBorder
 {
 public:
     /** Configure the permute NEON kernel
@@ -57,5 +57,5 @@
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm);
 };
-}
+} // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEPERMUTE_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEPhase.h b/arm_compute/runtime/NEON/functions/NEPhase.h
index cd62cf9..d096cf8 100644
--- a/arm_compute/runtime/NEON/functions/NEPhase.h
+++ b/arm_compute/runtime/NEON/functions/NEPhase.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,14 +24,14 @@
 #ifndef __ARM_COMPUTE_NEPHASE_H__
 #define __ARM_COMPUTE_NEPHASE_H__
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
 
-/** Basic function to run NEMagnitudePhaseKernel */
-class NEPhase : public INESimpleFunction
+/** Basic function to run @ref NEMagnitudePhaseKernel */
+class NEPhase : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialise the kernel's inputs, output.
@@ -43,5 +43,5 @@
      */
     void configure(const ITensor *input1, const ITensor *input2, ITensor *output, PhaseType phase_type = PhaseType::SIGNED);
 };
-}
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_NEPHASE_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
index 34ba39d..a7a2034 100644
--- a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
@@ -26,14 +26,14 @@
 
 #include "arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h"
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
 
 /** Basic function to run @ref NEPriorBoxLayerKernel. */
-class NEPriorBoxLayer : public INESimpleFunction
+class NEPriorBoxLayer : public INESimpleFunctionNoBorder
 {
 public:
     /** Set the input and output tensors.
diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h
index bdba42d..ec39439 100644
--- a/arm_compute/runtime/NEON/functions/NERNNLayer.h
+++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h
@@ -27,7 +27,6 @@
 #include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
 #include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
 #include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
 
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
diff --git a/arm_compute/runtime/NEON/functions/NEReorgLayer.h b/arm_compute/runtime/NEON/functions/NEReorgLayer.h
index a737528..716f164 100644
--- a/arm_compute/runtime/NEON/functions/NEReorgLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReorgLayer.h
@@ -25,7 +25,7 @@
 #define __ARM_COMPUTE_NEREORGLAYER_H__
 
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
@@ -33,7 +33,7 @@
 class ITensor;
 
 /** Basic function to run @ref NEReorgLayerKernel */
-class NEReorgLayer : public INESimpleFunction
+class NEReorgLayer : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialise the kernel's inputs and outputs
diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
index 01fe3bd..8896b4f 100644
--- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
@@ -25,14 +25,14 @@
 #define __ARM_COMPUTE_NERESHAPELAYER_H__
 
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
 
 /** Basic function to run @ref NEReshapeLayerKernel */
-class NEReshapeLayer : public INESimpleFunction
+class NEReshapeLayer : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialise the kernel's inputs and outputs
@@ -51,5 +51,5 @@
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output);
 };
-}
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_NERESHAPELAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NETableLookup.h b/arm_compute/runtime/NEON/functions/NETableLookup.h
index b59ffb8..2a49aee 100644
--- a/arm_compute/runtime/NEON/functions/NETableLookup.h
+++ b/arm_compute/runtime/NEON/functions/NETableLookup.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,7 @@
 #ifndef __ARM_COMPUTE_NETABLELOOKUP_H__
 #define __ARM_COMPUTE_NETABLELOOKUP_H__
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
@@ -32,7 +32,7 @@
 class ILut;
 
 /** Basic function to run @ref NETableLookupKernel */
-class NETableLookup : public INESimpleFunction
+class NETableLookup : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialise the kernel's inputs and output
@@ -43,5 +43,5 @@
      */
     void configure(const ITensor *input, const ILut *lut, ITensor *output);
 };
-}
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_NETABLELOOKUP_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEThreshold.h b/arm_compute/runtime/NEON/functions/NEThreshold.h
index d407ee5..69a437b 100644
--- a/arm_compute/runtime/NEON/functions/NEThreshold.h
+++ b/arm_compute/runtime/NEON/functions/NEThreshold.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,7 +25,7 @@
 #define __ARM_COMPUTE_NETHRESHOLD_H__
 
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 #include <cstdint>
 
@@ -34,7 +34,7 @@
 class ITensor;
 
 /** Basic function to run @ref NEThresholdKernel */
-class NEThreshold : public INESimpleFunction
+class NEThreshold : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialise the function's source, destination, thresholds and threshold type
@@ -50,5 +50,5 @@
     void configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value = 0, uint8_t true_value = 0,
                    ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0);
 };
-}
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_NETHRESHOLD_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h
index 0234288..08ee3a6 100644
--- a/arm_compute/runtime/NEON/functions/NETranspose.h
+++ b/arm_compute/runtime/NEON/functions/NETranspose.h
@@ -25,7 +25,7 @@
 #define __ARM_COMPUTE_NETRANSPOSE_H__
 
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
@@ -36,7 +36,7 @@
  *  -# @ref NETransposeKernel
  *
  */
-class NETranspose : public INESimpleFunction
+class NETranspose : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialise the kernel's inputs and output
@@ -54,6 +54,6 @@
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output);
 };
-}
+} // namespace arm_compute
 
 #endif /* __ARM_COMPUTE_NETRANSPOSE_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEYOLOLayer.h b/arm_compute/runtime/NEON/functions/NEYOLOLayer.h
index e09dd42..0adc0f1 100644
--- a/arm_compute/runtime/NEON/functions/NEYOLOLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEYOLOLayer.h
@@ -24,7 +24,7 @@
 #ifndef __ARM_COMPUTE_NEYOLOLAYER_H__
 #define __ARM_COMPUTE_NEYOLOLAYER_H__
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 #include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h"
 #include "arm_compute/core/Types.h"
@@ -34,7 +34,7 @@
 class ITensor;
 
 /** Basic function to run @ref NEYOLOLayerKernel */
-class NEYOLOLayer : public INESimpleFunction
+class NEYOLOLayer : public INESimpleFunctionNoBorder
 {
 public:
     /** Set the input and output tensor.
