Release 18.05
diff --git a/Android.bp b/Android.bp
index 0daab06..03b2ded 100644
--- a/Android.bp
+++ b/Android.bp
@@ -19,6 +19,7 @@
         "clframework/src/core/AccessWindowAutoPadding.cpp",
         "clframework/src/core/AccessWindowStatic.cpp",
         "clframework/src/core/AccessWindowTranspose.cpp",
+        "clframework/src/core/GPUTarget.cpp",
         "clframework/src/core/CL/CLHelpers.cpp",
         "clframework/src/core/CL/CLKernelLibrary.cpp",
         "clframework/src/core/CL/ICLDistribution1D.cpp",
@@ -48,10 +49,12 @@
         "clframework/src/core/CL/kernels/CLCol2ImKernel.cpp",
         "clframework/src/core/CL/kernels/CLColorConvertKernel.cpp",
         "clframework/src/core/CL/kernels/CLConvolutionKernel.cpp",
+        "clframework/src/core/CL/kernels/CLCopyKernel.cpp",
         "clframework/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp",
         "clframework/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp",
         "clframework/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp",
-        "clframework/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp",
+        "clframework/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp",
+        "clframework/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp",
         "clframework/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp",
         "clframework/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp",
         "clframework/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp",
@@ -115,12 +118,17 @@
         "clframework/src/core/CL/kernels/CLWarpAffineKernel.cpp",
         "clframework/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp",
         "clframework/src/core/CL/kernels/CLWeightsReshapeKernel.cpp",
+        "clframework/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp",
+        "clframework/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp",
+        "clframework/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp",
         "clframework/src/core/CL/OpenCL.cpp",
+        "clframework/src/core/CPP/CPPTypes.cpp",
         "clframework/src/core/CPP/ICPPSimpleKernel.cpp",
         "clframework/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp",
         "clframework/src/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.cpp",
         "clframework/src/core/CPP/kernels/CPPPermuteKernel.cpp",
         "clframework/src/core/CPP/kernels/CPPSortEuclideanDistanceKernel.cpp",
+        "clframework/src/core/CPP/kernels/CPPUpsampleKernel.cpp",
         "clframework/src/core/Error.cpp",
         "clframework/src/core/Helpers.cpp",
         "clframework/src/core/HOGInfo.cpp",
@@ -130,6 +138,12 @@
         "clframework/src/core/IKernel.cpp",
         "clframework/src/core/ITensor.cpp",
         "clframework/src/core/MultiImageInfo.cpp",
+        "clframework/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp",
+        "clframework/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp",
+        "clframework/src/core/NEON/kernels/arm_gemm/gemm_int16.cpp",
+        "clframework/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp",
+        "clframework/src/core/NEON/kernels/arm_gemm/gemm_uint16.cpp",
+        "clframework/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp",
         "clframework/src/core/NEON/kernels/convolution/common/utils.cpp",
         "clframework/src/core/NEON/kernels/convolution/depthwise/depthwise_2x2_3x3_1x1_fp32_fp32.cpp",
         "clframework/src/core/NEON/kernels/convolution/depthwise/depthwise_2x2_3x3_2x2_fp32_fp32.cpp",
@@ -187,7 +201,6 @@
         "clframework/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp",
         "clframework/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp",
         "clframework/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp",
-        "clframework/src/core/NEON/kernels/NEGEMMInterleaveBlockedKernel.cpp",
         "clframework/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp",
         "clframework/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp",
         "clframework/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp",
@@ -234,7 +247,7 @@
         "clframework/src/core/NEON/kernels/NETransposeKernel.cpp",
         "clframework/src/core/NEON/kernels/NEWarpKernel.cpp",
         "clframework/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp",
-        "clframework/src/core/NEON/kernels/NEWinogradLayerKernel.cpp",
+        "clframework/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp",
         "clframework/src/core/PyramidInfo.cpp",
         "clframework/src/core/Rounding.cpp",
         "clframework/src/core/SubTensorInfo.cpp",
@@ -255,6 +268,8 @@
         "clframework/src/runtime/CL/CLHOG.cpp",
         "clframework/src/runtime/CL/CLLutAllocator.cpp",
         "clframework/src/runtime/CL/CLLut.cpp",
+        "clframework/src/runtime/CL/CLMemory.cpp",
+        "clframework/src/runtime/CL/CLMemoryRegion.cpp",
         "clframework/src/runtime/CL/CLMultiHOG.cpp",
         "clframework/src/runtime/CL/CLMultiImage.cpp",
         "clframework/src/runtime/CL/CLPyramid.cpp",
@@ -280,6 +295,7 @@
         "clframework/src/runtime/CL/functions/CLColorConvert.cpp",
         "clframework/src/runtime/CL/functions/CLConvolution.cpp",
         "clframework/src/runtime/CL/functions/CLConvolutionLayer.cpp",
+        "clframework/src/runtime/CL/functions/CLCopy.cpp",
         "clframework/src/runtime/CL/functions/CLDeconvolutionLayer.cpp",
         "clframework/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp",
         "clframework/src/runtime/CL/functions/CLDepthConcatenateLayer.cpp",
@@ -333,6 +349,7 @@
         "clframework/src/runtime/CL/functions/CLReductionOperation.cpp",
         "clframework/src/runtime/CL/functions/CLRemap.cpp",
         "clframework/src/runtime/CL/functions/CLReshapeLayer.cpp",
+        "clframework/src/runtime/CL/functions/CLRNNLayer.cpp",
         "clframework/src/runtime/CL/functions/CLROIPoolingLayer.cpp",
         "clframework/src/runtime/CL/functions/CLScale.cpp",
         "clframework/src/runtime/CL/functions/CLScharr3x3.cpp",
@@ -345,11 +362,16 @@
         "clframework/src/runtime/CL/functions/CLTranspose.cpp",
         "clframework/src/runtime/CL/functions/CLWarpAffine.cpp",
         "clframework/src/runtime/CL/functions/CLWarpPerspective.cpp",
+        "clframework/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp",
+        "clframework/src/runtime/CL/functions/CLWinogradInputTransform.cpp",
         "clframework/src/runtime/CL/ICLSimpleFunction.cpp",
+        "clframework/src/runtime/CL/tuners/BifrostTuner.cpp",
         "clframework/src/runtime/CPP/CPPScheduler.cpp",
         "clframework/src/runtime/CPP/functions/CPPPermute.cpp",
+        "clframework/src/runtime/CPP/functions/CPPUpsample.cpp",
         "clframework/src/runtime/CPP/ICPPSimpleFunction.cpp",
         "clframework/src/runtime/CPP/SingleThreadScheduler.cpp",
+        "clframework/src/runtime/CPUUtils.cpp",
         "clframework/src/runtime/Distribution1D.cpp",
         "clframework/src/runtime/HOG.cpp",
         "clframework/src/runtime/ILutAllocator.cpp",
@@ -446,7 +468,7 @@
         "clframework/src/runtime/NEON/functions/NETranspose.cpp",
         "clframework/src/runtime/NEON/functions/NEWarpAffine.cpp",
         "clframework/src/runtime/NEON/functions/NEWarpPerspective.cpp",
-        "clframework/src/runtime/NEON/functions/NEWinogradLayer.cpp",
+        "clframework/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp",
         "clframework/src/runtime/NEON/INESimpleFunction.cpp",
         "clframework/src/runtime/OffsetLifetimeManager.cpp",
         "clframework/src/runtime/OffsetMemoryPool.cpp",
@@ -463,16 +485,32 @@
     ],
     arch: {
         arm: {
-            srcs: ["clframework/src/core/NEON/kernels/arm32/NEGEMMAArch32Kernel.cpp"],
+            srcs: [
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/a53.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/a55r1.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/generic.cpp"
+            ],
         },
         arm64: {
-            srcs: ["clframework/src/core/NEON/kernels/arm64/NEGEMMAArch64Kernel.cpp",
-                   "clframework/src/core/NEON/kernels/arm64/NEGEMMAArch64NativeKernel.cpp",
-                   "clframework/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64A53Kernel.cpp",
-                   "clframework/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.cpp",
-                   "clframework/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.cpp",
-                   "clframework/src/core/NEON/kernels/arm64/NEGEMVAArch64Kernel.cpp",
-                   "clframework/src/core/NEON/kernels/arm64/NEHGEMMAArch64FP16Kernel.cpp"],
+            srcs: [
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_12x8/generic.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_12x8/a55r1.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_12x8/generic.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4/generic.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_12x8/generic.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_12x8/a55r1.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_12x8/generic.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4/generic.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_24x8/a55r1.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_24x8/generic.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_12x8/a53.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_12x8/a55.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_12x8/a55r1.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_12x8/generic.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_native_16x4/generic.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemv_pretransposed/generic.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemv_trans/generic.cpp"
+            ],
         },
     },
     cppflags: [
@@ -481,6 +519,8 @@
         "-DEMBEDDED_KERNELS",
         "-DARM_COMPUTE_ASSERTS_ENABLED",
         "-Wno-unused-parameter",
+        "-DNO_DOT_IN_TOOLCHAIN",
+        "-no-integrated-as"
     ],
     rtti: true,
 }
@@ -492,6 +532,7 @@
 ////////////////////////////////////////////
 cc_defaults {
     name: "libboost-defaults",
+    proprietary: true,
     export_include_dirs: ["boost_1_64_0"],
     cflags: [
         "-O3",