MLECO-2682: CMake and source refactoring.

MLECO-2930: logging macros were extracted from hal.h and used separately around the code.

MLECO-2931: arm_math lib introduced, cmsis-dsp removed from top level linkage.

MLECO-2915: platform related post-build steps.

Change-Id: Id718884e22f262a5c070ded3f3f5d4b048820147
Signed-off-by: alexander <alexander.efremov@arm.com>
diff --git a/source/math/CMakeLists.txt b/source/math/CMakeLists.txt
new file mode 100644
index 0000000..eab6622
--- /dev/null
+++ b/source/math/CMakeLists.txt
@@ -0,0 +1,44 @@
+#----------------------------------------------------------------------------
+#  Copyright (c) 2022 Arm Limited. All rights reserved.
+#  SPDX-License-Identifier: Apache-2.0
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#----------------------------------------------------------------------------
+
+#######################################################
+# Math functions for ML pipelines.                    #
+#######################################################
+
+project(arm_math
+        DESCRIPTION "Collection of the optimised math functions for ML pipelines"
+        LANGUAGES C CXX)
+
+add_library(arm_math STATIC)
+
+target_sources(arm_math
+        PRIVATE
+        PlatformMath.cc)
+
+target_include_directories(arm_math PUBLIC include)
+
+target_link_libraries(arm_math PRIVATE log)
+
+if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL cortex-m55)
+    include(${CMAKE_SCRIPTS_DIR}/cmsis-dsp.cmake)
+    target_link_libraries(arm_math PUBLIC cmsis-dsp)
+endif ()
+
+message(STATUS "*******************************************************")
+message(STATUS "Library                                : " arm_math)
+message(STATUS "CMAKE_SYSTEM_PROCESSOR                 : " ${CMAKE_SYSTEM_PROCESSOR})
+message(STATUS "*******************************************************")
diff --git a/source/math/PlatformMath.cc b/source/math/PlatformMath.cc
new file mode 100644
index 0000000..cc603f3
--- /dev/null
+++ b/source/math/PlatformMath.cc
@@ -0,0 +1,308 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "PlatformMath.hpp"
+#include "log_macros.h"
+#include <algorithm>
+
+namespace arm {
+namespace app {
+namespace math {
+
+    float MathUtils::CosineF32(float radians)
+    {
+#if ARM_MATH_DSP
+        return arm_cos_f32(radians);
+#else /* ARM_MATH_DSP */
+        return cosf(radians);
+#endif /* ARM_MATH_DSP */
+    }
+
+    float MathUtils::SineF32(float radians)
+    {
+#if ARM_MATH_DSP
+        return arm_sin_f32(radians);
+#else /* ARM_MATH_DSP */
+        return sinf(radians);
+#endif /* ARM_MATH_DSP */
+    }
+
+    float MathUtils::SqrtF32(float input)
+    {
+#if ARM_MATH_DSP
+        float output = 0.f;
+        arm_sqrt_f32(input, &output);
+        return output;
+#else /* ARM_MATH_DSP */
+        return sqrtf(input);
+#endif /* ARM_MATH_DSP */
+    }
+
+    float MathUtils::MeanF32(float* ptrSrc, const uint32_t srcLen)
+    {
+        if (!srcLen) {
+            return 0.f;
+        }
+
+#if ARM_MATH_DSP
+        float result = 0.f;
+        arm_mean_f32(ptrSrc, srcLen, &result);
+        return result;
+#else /* ARM_MATH_DSP */
+        float acc = std::accumulate(ptrSrc, ptrSrc + srcLen, 0.0);
+        return acc/srcLen;
+#endif /* ARM_MATH_DSP */
+    }
+
+    float MathUtils::StdDevF32(float* ptrSrc, const uint32_t srcLen,
+                               const float mean)
+    {
+        if (!srcLen) {
+            return 0.f;
+        }
+#if ARM_MATH_DSP
+        /**
+         * Note Standard deviation calculation can be off
+         * by > 0.01 but less than < 0.1, according to
+         * preliminary findings.
+         **/
+        UNUSED(mean);
+        float stdDev = 0;
+        arm_std_f32(ptrSrc, srcLen, &stdDev);
+        return stdDev;
+#else /* ARM_MATH_DSP */
+        auto VarianceFunction = [=](float acc, const float value) {
+            return acc + (((value - mean) * (value - mean))/ srcLen);
+        };
+
+        float acc = std::accumulate(ptrSrc, ptrSrc + srcLen, 0.0,
+                                    VarianceFunction);
+
+        return sqrtf(acc);
+#endif /* ARM_MATH_DSP */
+    }
+
+    void MathUtils::FftInitF32(const uint16_t fftLen,
+                               FftInstance& fftInstance,
+                               const FftType type)
+    {
+        fftInstance.m_fftLen = fftLen;
+        fftInstance.m_initialised = false;
+        fftInstance.m_optimisedOptionAvailable = false;
+        fftInstance.m_type = type;
+
+#if ARM_MATH_DSP
+        arm_status status = ARM_MATH_ARGUMENT_ERROR;
+        switch (fftInstance.m_type) {
+        case FftType::real:
+            status = arm_rfft_fast_init_f32(&fftInstance.m_instanceReal, fftLen);
+            break;
+
+        case FftType::complex:
+            status = arm_cfft_init_f32(&fftInstance.m_instanceComplex, fftLen);
+            break;
+
+        default:
+            printf_err("Invalid FFT type\n");
+            return;
+        }
+
+        if (ARM_MATH_SUCCESS != status) {
+            printf_err("Failed to initialise FFT for len %d\n", fftLen);
+        } else {
+            fftInstance.m_optimisedOptionAvailable = true;
+        }
+#endif /* ARM_MATH_DSP */
+
+        debug("Optimised FFT will be used: %s.\n", fftInstance.m_optimisedOptionAvailable? "yes": "no");
+
+        fftInstance.m_initialised = true;
+    }
+
+    static void FftRealF32(std::vector<float>& input,
+                           std::vector<float>& fftOutput)
+    {
+        const size_t inputLength = input.size();
+        const size_t halfLength = input.size() / 2;
+
+        fftOutput[0] = 0;
+        fftOutput[1] = 0;
+        for (size_t t = 0; t < inputLength; t++) {
+            fftOutput[0] += input[t];
+            fftOutput[1] += input[t] *
+                MathUtils::CosineF32(2 * M_PI * halfLength * t / inputLength);
+        }
+
+        for (size_t k = 1, j = 2; k < halfLength; ++k, j += 2) {
+            float sumReal = 0;
+            float sumImag = 0;
+
+            const auto theta = static_cast<float>(2 * M_PI * k / inputLength);
+
+            for (size_t t = 0; t < inputLength; t++) {
+                const auto angle = static_cast<float>(t * theta);
+                sumReal += input[t] * MathUtils::CosineF32(angle);
+                sumImag += -input[t]* MathUtils::SineF32(angle);
+            }
+
+            /* Arrange output to [real0, realN/2, real1, im1, real2, im2, ...] */
+            fftOutput[j] = sumReal;
+            fftOutput[j + 1] = sumImag;
+        }
+    }
+
+    static void FftComplexF32(std::vector<float>& input,
+                           std::vector<float>& fftOutput)
+    {
+        const size_t fftLen = input.size() / 2;
+        for (size_t k = 0; k < fftLen; k++) {
+            float sumReal = 0;
+            float sumImag = 0;
+            const auto theta = static_cast<float>(2 * M_PI * k / fftLen);
+            for (size_t t = 0; t < fftLen; t++) {
+                const auto angle = theta * t;
+                const auto cosine = MathUtils::CosineF32(angle);
+                const auto sine = MathUtils::SineF32(angle);
+                sumReal += input[t*2] * cosine + input[t*2 + 1] * sine;
+                sumImag += -input[t*2] * sine + input[t*2 + 1] * cosine;
+            }
+            fftOutput[k*2] = sumReal;
+            fftOutput[k*2 + 1] = sumImag;
+        }
+    }
+
+    void MathUtils::FftF32(std::vector<float>& input,
+                           std::vector<float>& fftOutput,
+                           arm::app::math::FftInstance& fftInstance)
+    {
+        if (!fftInstance.m_initialised) {
+            printf_err("FFT uninitialised\n");
+            return;
+        } else if (input.size() < fftInstance.m_fftLen) {
+            printf_err("FFT len: %" PRIu16 "; input len: %zu\n",
+                fftInstance.m_fftLen, input.size());
+            return;
+        } else if (fftOutput.size() < input.size()) {
+            printf_err("Output vector len insufficient to hold FFTs\n");
+            return;
+        }
+
+        switch (fftInstance.m_type) {
+        case FftType::real:
+
+#if ARM_MATH_DSP
+            if (fftInstance.m_optimisedOptionAvailable) {
+                arm_rfft_fast_f32(&fftInstance.m_instanceReal, input.data(), fftOutput.data(), 0);
+                return;
+            }
+#endif /* ARM_MATH_DSP */
+            FftRealF32(input, fftOutput);
+            return;
+
+        case FftType::complex:
+            if (input.size() < fftInstance.m_fftLen * 2) {
+                printf_err("Complex FFT instance should have input size >= (FFT len x 2)");
+                return;
+            }
+#if ARM_MATH_DSP
+            if (fftInstance.m_optimisedOptionAvailable) {
+                fftOutput = input; /* Complex function works in-place */
+                arm_cfft_f32(&fftInstance.m_instanceComplex, fftOutput.data(), 0, 1);
+                return;
+            }
+#endif /* ARM_MATH_DSP */
+            FftComplexF32(input, fftOutput);
+            return;
+
+        default:
+            printf_err("Invalid FFT type\n");
+            return;
+        }
+    }
+
+    void MathUtils::VecLogarithmF32(std::vector <float>& input,
+                                    std::vector <float>& output)
+    {
+#if ARM_MATH_DSP
+        arm_vlog_f32(input.data(), output.data(),
+                     output.size());
+#else /* ARM_MATH_DSP */
+        for (auto in = input.begin(), out = output.begin();
+             in != input.end() && out != output.end(); ++in, ++out) {
+            *out = logf(*in);
+        }
+#endif /* ARM_MATH_DSP */
+    }
+
+    float MathUtils::DotProductF32(float* srcPtrA, float* srcPtrB,
+                                   const uint32_t srcLen)
+    {
+        float output = 0.f;
+
+#if ARM_MATH_DSP
+        arm_dot_prod_f32(srcPtrA, srcPtrB, srcLen, &output);
+#else /* ARM_MATH_DSP */
+        for (uint32_t i = 0; i < srcLen; ++i) {
+            output += *srcPtrA++ * *srcPtrB++;
+        }
+#endif /* ARM_MATH_DSP */
+
+        return output;
+    }
+
+    bool MathUtils::ComplexMagnitudeSquaredF32(float* ptrSrc,
+                                               const uint32_t srcLen,
+                                               float* ptrDst,
+                                               const uint32_t dstLen)
+    {
+        if (dstLen < srcLen/2) {
+            printf_err("dstLen must be greater than srcLen/2");
+            return false;
+        }
+
+#if ARM_MATH_DSP
+        arm_cmplx_mag_squared_f32(ptrSrc, ptrDst, srcLen/2);
+#else /* ARM_MATH_DSP */
+        for (uint32_t j = 0; j < srcLen/2; ++j) {
+            const float real = *ptrSrc++;
+            const float im = *ptrSrc++;
+            *ptrDst++ = real*real + im*im;
+        }
+#endif /* ARM_MATH_DSP */
+        return true;
+    }
+
+    void MathUtils::SoftmaxF32(std::vector<float>& vec)
+    {
+        /* Fix for numerical stability and apply exp. */
+        auto start = vec.begin();
+        auto end = vec.end();
+
+        float maxValue = *std::max_element(start, end);
+        for (auto it = start; it != end; ++it) {
+            *it = std::exp((*it) - maxValue);
+        }
+
+        float sumExp = std::accumulate(start, end, 0.0f);
+
+        for (auto it = start; it != end; ++it) {
+            *it = (*it)/sumExp;
+        }
+    }
+
+} /* namespace math */
+} /* namespace app */
+} /* namespace arm */
diff --git a/source/math/include/PlatformMath.hpp b/source/math/include/PlatformMath.hpp
new file mode 100644
index 0000000..5ac10de
--- /dev/null
+++ b/source/math/include/PlatformMath.hpp
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef PLATFORM_MATH_HPP
+#define PLATFORM_MATH_HPP
+
+/* See if ARM DSP functions can be used. */
+#if defined(ARM_MATH_DSP)
+
+    #include "arm_math.h"
+    #define M_PI    (PI)
+#else
+    #include <cmath>
+#endif
+
+#include <vector>
+#include <cstdint>
+#include <numeric>
+
+namespace arm {
+namespace app {
+namespace math {
+
+    enum class FftType {
+        real = 0,
+        complex = 1
+    };
+
+    struct FftInstance {
+#if ARM_MATH_DSP
+        arm_rfft_fast_instance_f32  m_instanceReal;
+        arm_cfft_instance_f32       m_instanceComplex;
+#endif
+        uint16_t                    m_fftLen{0};
+        FftType                     m_type{FftType::real};
+        bool                        m_optimisedOptionAvailable{false};
+        bool                        m_initialised{false};
+    };
+
+    /* Class to provide Math functions like FFT, mean, stddev etc.
+     * This will allow other classes, functions to be independent of
+     * #if definition checks and provide a cleaner API. Also, it will
+     * consolidate all arm math functions used in one place and make
+     * them easier to test. */
+    class MathUtils {
+
+    public:
+        /**
+         * @brief       Get the cosine value of the argument in floating point.
+         * @param[in]   radians   Angle in radians.
+         * @return      Cosine value (floating point).
+         */
+        static float CosineF32(float radians);
+
+        /**
+         * @brief       Get the sine value of the argument in floating point.
+         * @param[in]   radians   Angle in radians.
+         * @return      Sine value (floating point).
+         */
+        static float SineF32(float radians);
+
+        /**
+         * @brief       Get the square root of the argument in floating point.
+         * @param[in]   input   Value to compute square root of.
+         * @return      Square root (floating point) value.
+         */
+        static float SqrtF32(float input);
+
+        /**
+         * @brief       Gets the mean of a floating point array of elements.
+         * @param[in]   ptrSrc   Pointer to the first element.
+         * @param[in]   srcLen   Number of elements in the array/vector.
+         * @return      Average value.
+         */
+        static float MeanF32(float* ptrSrc, uint32_t srcLen);
+
+        /**
+         * @brief       Gets the standard deviation of a floating point array
+         *              of elements.
+         * @param[in]   ptrSrc   Pointer to the first element.
+         * @param[in]   srcLen   Number of elements in the array/vector.
+         * @param[in]   mean     Pre-computed mean value.
+         * @return      Standard deviation value.
+         */
+        static float StdDevF32(float* ptrSrc, uint32_t srcLen,
+                               float mean);
+
+        /**
+         * @brief       Initialises the internal FFT structures (if available
+         *              for the platform). This function should be called
+         *              prior to Fft32 function call if built with ARM DSP functions.
+         * @param[in]   fftLen        Requested length of the FFT.
+         * @param[in]   fftInstance   FFT instance struct to use.
+         * @param[in]   type          FFT type (real or complex)
+         */
+        static void FftInitF32(uint16_t fftLen,
+                               FftInstance& fftInstance,
+                               FftType type = FftType::real);
+
+        /**
+         * @brief       Computes the FFT for the input vector.
+         * @param[in]   input       Floating point vector of input elements
+         * @param[out]  fftOutput   Output buffer to be populated by computed FFTs.
+         * @param[in]   fftInstance FFT instance struct to use.
+         */
+        static void FftF32(std::vector<float>& input,
+                           std::vector<float>& fftOutput,
+                           FftInstance& fftInstance);
+
+        /**
+         * @brief       Computes the natural logarithms of input floating point
+         *              vector
+         * @param[in]   input    Floating point input vector
+         * @param[out]  output   Pre-allocated buffer to be populated with
+         *                       natural log values of each input element.
+         */
+        static void VecLogarithmF32(std::vector <float>& input,
+                                    std::vector <float>& output);
+
+        /**
+         * @brief       Computes the dot product of two 1D floating point
+         *              vectors.
+         *              result = sum(srcA[0]*srcB[0] + srcA[1]*srcB[1] + ..)
+         * @param[in]   srcPtrA   Pointer to the first element of first
+         *                        array.
+         * @param[in]   srcPtrB   Pointer to the first element of second
+         *                        array.
+         * @param[in]   srcLen    Number of elements in the array/vector.
+         * @return      Dot product.
+         */
+        static float DotProductF32(float* srcPtrA, float* srcPtrB,
+                                   uint32_t srcLen);
+
+        /**
+         * @brief       Computes the squared magnitude of floating point
+         *              complex number array.
+         * @param[in]   ptrSrc   Pointer to the first element of input
+         *                       array.
+         * @param[in]   srcLen   Number of elements in the array/vector.
+         * @param[out]  ptrDst   Output buffer to be populated.
+         * @param[in]   dstLen   Output buffer len (for sanity check only).
+         * @return      true if successful, false otherwise.
+         */
+        static bool ComplexMagnitudeSquaredF32(float* ptrSrc,
+                                               uint32_t srcLen,
+                                               float* ptrDst,
+                                               uint32_t dstLen);
+
+        /**
+        * @brief       Scales output scores for an arbitrary number of classes so
+        *              that they sum to 1, allowing output to be expressed as a probability.
+        * @param[in]   vector Vector of floats modified in-place
+        */
+        static void SoftmaxF32(std::vector<float>& vec);
+    };
+
+} /* namespace math */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* PLATFORM_MATH_HPP */
\ No newline at end of file