COMPMID-518 - Bare metal support

Change-Id: Ida6d3dc46476fd9a67b5860e5e5bf8b848a8ac23
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/85981
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com>
diff --git a/SConstruct b/SConstruct
index d98a99b..34b717b 100644
--- a/SConstruct
+++ b/SConstruct
@@ -103,17 +103,22 @@
 if env['arch'] == 'armv7a':
     env.Append(CXXFLAGS = ['-march=armv7-a', '-mthumb', '-mfpu=neon'])
 
-    if env['os'] in ['linux', 'bare_metal']:
+    if env['os'] == 'linux':
         prefix = "arm-linux-gnueabihf-"
         env.Append(CXXFLAGS = ['-mfloat-abi=hard'])
+    elif env['os'] == 'bare_metal':
+        prefix = "arm-none-eabi-"
+        env.Append(CXXFLAGS = ['-mfloat-abi=hard'])
     elif env['os'] == 'android':
         prefix = "arm-linux-androideabi-"
         env.Append(CXXFLAGS = ['-mfloat-abi=softfp'])
 elif env['arch'] == 'arm64-v8a':
     env.Append(CXXFLAGS = ['-march=armv8-a'])
 
-    if env['os'] in ['linux', 'bare_metal']:
+    if env['os'] == 'linux':
         prefix = "aarch64-linux-gnu-"
+    elif env['os'] == 'bare_metal':
+        prefix = "aarch64-none-elf-"
     elif env['os'] == 'android':
         prefix = "aarch64-linux-android-"
 elif env['arch'] == 'arm64-v8.2-a':
@@ -174,8 +179,10 @@
     env.Append(LINKFLAGS = ['-pie', '-static-libstdc++'])
 elif env['os'] == 'bare_metal':
     env.Append(LINKFLAGS = ['-static'])
+    env.Append(LINKFLAGS = ['-specs=rdimon.specs'])
     env.Append(CXXFLAGS = ['-fPIC'])
     env.Append(CPPDEFINES = ['NO_MULTI_THREADING'])
+    env.Append(CPPDEFINES = ['BARE_METAL'])
 
 if env['opencl']:
     if env['os'] == 'bare_metal':
diff --git a/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h b/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h
index 0866d4e..28edb48 100644
--- a/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h
@@ -26,9 +26,9 @@
 
 #include "arm_compute/core/IArray.h"
 #include "arm_compute/core/NEON/INEKernel.h"
+#include "support/Mutex.h"
 
 #include <cstdint>
-#include <mutex>
 
 namespace arm_compute
 {
@@ -65,10 +65,10 @@
     void run(const Window &window) override;
 
 private:
-    int32_t          *_num_corner_candidates;   /**< Number of corner candidates */
-    std::mutex        _corner_candidates_mutex; /**< Mutex to preventing race conditions */
-    const IImage     *_input;                   /**< Source image - Harris score */
-    InternalKeypoint *_output;                  /**< Array of NEInternalKeypoint */
+    int32_t           *_num_corner_candidates;   /**< Number of corner candidates */
+    arm_compute::Mutex _corner_candidates_mutex; /**< Mutex to preventing race conditions */
+    const IImage      *_input;                   /**< Source image - Harris score */
+    InternalKeypoint *_output;                   /**< Array of NEInternalKeypoint */
 };
 } //namespace arm_compute
 #endif /* __ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h b/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h
index e56d1e5..b6b9d34 100644
--- a/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h
+++ b/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h
@@ -27,8 +27,7 @@
 #include "arm_compute/core/IArray.h"
 #include "arm_compute/core/IHOG.h"
 #include "arm_compute/core/NEON/INEKernel.h"
-
-#include <mutex>
+#include "support/Mutex.h"
 
 namespace arm_compute
 {
@@ -80,7 +79,7 @@
     size_t                 _detection_window_width;
     size_t                 _detection_window_height;
     size_t                 _max_num_detection_windows;
-    std::mutex             _mutex;
+    arm_compute::Mutex     _mutex;
 };
 }
 
diff --git a/arm_compute/core/NEON/kernels/NEHistogramKernel.h b/arm_compute/core/NEON/kernels/NEHistogramKernel.h
index c4dbbea..2c0f34b 100644
--- a/arm_compute/core/NEON/kernels/NEHistogramKernel.h
+++ b/arm_compute/core/NEON/kernels/NEHistogramKernel.h
@@ -25,10 +25,10 @@
 #define __ARM_COMPUTE_NEHISTOGRAMKERNEL_H__
 
 #include "arm_compute/core/NEON/INEKernel.h"
+#include "support/Mutex.h"
 
 #include <cstddef>
 #include <cstdint>
-#include <mutex>
 
 namespace arm_compute
 {
@@ -122,7 +122,7 @@
     IDistribution1D              *_output;
     uint32_t                     *_local_hist;
     uint32_t                     *_window_lut;
-    std::mutex                    _hist_mtx;
+    arm_compute::Mutex            _hist_mtx;
     static constexpr unsigned int _max_range_size{ 256 }; ///< 256 possible pixel values as we handle only U8 images
 };
 }
diff --git a/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h b/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h
index 9c72b20..ce2c5eb 100644
--- a/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h
+++ b/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h
@@ -25,9 +25,9 @@
 #define __ARM_COMPUTE_NEMEANSTDDEVKERNEL_H__
 
 #include "arm_compute/core/NEON/INEKernel.h"
+#include "support/Mutex.h"
 
 #include <cstdint>
-#include <mutex>
 
 namespace arm_compute
 {
@@ -67,13 +67,13 @@
     BorderSize border_size() const override;
 
 private:
-    const IImage *_input;
-    float        *_mean;
-    float        *_stddev;
-    uint64_t     *_global_sum;
-    uint64_t     *_global_sum_squared;
-    std::mutex    _mtx;
-    BorderSize    _border_size;
+    const IImage      *_input;
+    float             *_mean;
+    float             *_stddev;
+    uint64_t          *_global_sum;
+    uint64_t          *_global_sum_squared;
+    arm_compute::Mutex _mtx;
+    BorderSize         _border_size;
 };
 }
 #endif /* __ARM_COMPUTE_NEMEANSTDDEVKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h b/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h
index 7b2748d..ddaa017 100644
--- a/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h
+++ b/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h
@@ -26,9 +26,9 @@
 
 #include "arm_compute/core/IArray.h"
 #include "arm_compute/core/NEON/INEKernel.h"
+#include "support/Mutex.h"
 
 #include <cstdint>
-#include <mutex>
 
 namespace arm_compute
 {
@@ -92,10 +92,10 @@
     template <typename T>
     void update_min_max(T min, T max);
 
-    const IImage *_input; /**< Input image. */
-    void         *_min;   /**< Minimum value. */
-    void         *_max;   /**< Maximum value. */
-    std::mutex    _mtx;   /**< Mutex used for result reduction. */
+    const IImage      *_input; /**< Input image. */
+    void              *_min;   /**< Minimum value. */
+    void              *_max;   /**< Maximum value. */
+    arm_compute::Mutex _mtx;   /**< Mutex used for result reduction. */
 };
 
 /** Interface for the kernel to find min max locations of an image. */
diff --git a/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp b/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp
index 84e12d4..c7ac753 100644
--- a/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp
+++ b/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp
@@ -37,12 +37,12 @@
 
 namespace
 {
-inline void check_corner(float x, float y, float strength, InternalKeypoint *output, int32_t *num_corner_candidates, std::mutex *corner_candidates_mutex)
+inline void check_corner(float x, float y, float strength, InternalKeypoint *output, int32_t *num_corner_candidates, arm_compute::Mutex *corner_candidates_mutex)
 {
     if(strength != 0.0f)
     {
         /* Set index and update num_corner_candidate */
-        std::unique_lock<std::mutex> lock(*corner_candidates_mutex);
+        std::unique_lock<arm_compute::Mutex> lock(*corner_candidates_mutex);
 
         const int32_t idx = *num_corner_candidates;
 
@@ -55,7 +55,7 @@
     }
 }
 
-inline void corner_candidates(const float *__restrict input, InternalKeypoint *__restrict output, int32_t x, int32_t y, int32_t *num_corner_candidates, std::mutex *corner_candidates_mutex)
+inline void corner_candidates(const float *__restrict input, InternalKeypoint *__restrict output, int32_t x, int32_t y, int32_t *num_corner_candidates, arm_compute::Mutex *corner_candidates_mutex)
 {
     check_corner(x, y, *input, output, num_corner_candidates, corner_candidates_mutex);
 }
diff --git a/src/core/NEON/kernels/NEHOGDetectorKernel.cpp b/src/core/NEON/kernels/NEHOGDetectorKernel.cpp
index 4af22bc..9dd50ca 100644
--- a/src/core/NEON/kernels/NEHOGDetectorKernel.cpp
+++ b/src/core/NEON/kernels/NEHOGDetectorKernel.cpp
@@ -176,7 +176,7 @@
                 win.idx_class = _idx_class;
                 win.score     = score;
 
-                std::unique_lock<std::mutex> lock(_mutex);
+                std::unique_lock<arm_compute::Mutex> lock(_mutex);
                 _detection_windows->push_back(win);
                 lock.unlock();
             }
diff --git a/src/core/NEON/kernels/NEHistogramKernel.cpp b/src/core/NEON/kernels/NEHistogramKernel.cpp
index 9e967ec..2018651 100644
--- a/src/core/NEON/kernels/NEHistogramKernel.cpp
+++ b/src/core/NEON/kernels/NEHistogramKernel.cpp
@@ -44,7 +44,7 @@
 
 inline void NEHistogramKernel::merge_histogram(uint32_t *global_hist, const uint32_t *local_hist, size_t bins)
 {
-    std::lock_guard<std::mutex> lock(_hist_mtx);
+    std::lock_guard<arm_compute::Mutex> lock(_hist_mtx);
 
     const unsigned int v_end = (bins / 4) * 4;
 
diff --git a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp
index 579c46f..cb32a3b 100644
--- a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp
+++ b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp
@@ -141,7 +141,7 @@
     const float num_pixels = _input->info()->dimension(0) * _input->info()->dimension(1);
 
     // Merge sum and calculate mean and stddev
-    std::unique_lock<std::mutex> lock(_mtx);
+    std::unique_lock<arm_compute::Mutex> lock(_mtx);
 
     *_global_sum += vget_lane_u64(local_sum, 0);
 
diff --git a/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp b/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp
index a6da7f4..24c7af7 100644
--- a/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp
+++ b/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp
@@ -115,7 +115,7 @@
 template <typename T>
 void NEMinMaxKernel::update_min_max(const T min, const T max)
 {
-    std::lock_guard<std::mutex> lock(_mtx);
+    std::lock_guard<arm_compute::Mutex> lock(_mtx);
 
     using type = typename std::conditional<std::is_same<T, float>::value, float, int32_t>::type;
 
diff --git a/support/Mutex.h b/support/Mutex.h
new file mode 100644
index 0000000..6d0a387
--- /dev/null
+++ b/support/Mutex.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_MUTEX_H__
+#define __ARM_COMPUTE_MUTEX_H__
+
+#include <mutex>
+
+namespace arm_compute
+{
+#ifndef NO_MULTI_THREADING
+using Mutex = std::mutex;
+#else  /* NO_MULTI_THREADING */
+/** Wrapper implementation of Mutex data-object */
+class Mutex
+{
+public:
+    /** Default constructor */
+    Mutex()  = default;
+    ~Mutex() = default;
+
+    void lock() {};
+
+    void unlock() {};
+
+    bool try_lock()
+    {
+        return true;
+    }
+};
+#endif /* NO_MULTI_THREADING */
+}
+#endif /* __ARM_COMPUTE_MUTEX_H__ */
diff --git a/support/ToolchainSupport.h b/support/ToolchainSupport.h
index c73f248..87e9bd2 100644
--- a/support/ToolchainSupport.h
+++ b/support/ToolchainSupport.h
@@ -40,7 +40,7 @@
 {
 namespace cpp11
 {
-#ifdef __ANDROID__
+#if(__ANDROID__ || BARE_METAL)
 /** Convert integer and float values to string.
  *
  * @note This function implements the same behaviour as std::to_string. The
@@ -154,7 +154,7 @@
 {
     return ::copysign(x, y);
 }
-#else  /* __ANDROID__ */
+#else  /* (__ANDROID__ || BARE_METAL) */
 /** Convert integer and float values to string.
  *
  * @note This function acts as a convenience wrapper around std::to_string. The
@@ -260,7 +260,7 @@
 {
     return std::copysign(x, y);
 }
-#endif /* __ANDROID__ */
+#endif /* (__ANDROID__ || BARE_METAL) */
 
 inline std::string to_string(bool value)
 {