telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 1 | // |
| 2 | // Copyright © 2017 Arm Ltd. All rights reserved. |
David Beck | ecb56cd | 2018-09-05 12:52:57 +0100 | [diff] [blame] | 3 | // SPDX-License-Identifier: MIT |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 4 | // |
| 5 | |
| 6 | #include "RefL2NormalizationFloat32Workload.hpp" |
| 7 | |
| 8 | #include "RefWorkloadUtils.hpp" |
| 9 | #include "TensorBufferArrayView.hpp" |
| 10 | |
| 11 | #include "Profiling.hpp" |
| 12 | |
| 13 | #include <cmath> |
| 14 | |
| 15 | namespace armnn |
| 16 | { |
| 17 | |
| 18 | void RefL2NormalizationFloat32Workload::Execute() const |
| 19 | { |
| 20 | ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefL2NormalizationFloat32Workload_Execute"); |
| 21 | |
| 22 | const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); |
| 23 | const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); |
| 24 | |
| 25 | TensorBufferArrayView<const float> input(inputInfo.GetShape(), GetInputTensorDataFloat(0, m_Data)); |
| 26 | TensorBufferArrayView<float> output(outputInfo.GetShape(), GetOutputTensorDataFloat(0, m_Data)); |
| 27 | |
| 28 | const unsigned int batchSize = inputInfo.GetShape()[0]; |
| 29 | const unsigned int depth = inputInfo.GetShape()[1]; |
| 30 | const unsigned int rows = inputInfo.GetShape()[2]; |
| 31 | const unsigned int cols = inputInfo.GetShape()[3]; |
| 32 | |
| 33 | for (unsigned int n = 0; n < batchSize; ++n) |
| 34 | { |
| 35 | for (unsigned int d = 0; d < depth; ++d) |
| 36 | { |
| 37 | for (unsigned int h = 0; h < rows; ++h) |
| 38 | { |
| 39 | for (unsigned int w = 0; w < cols; ++w) |
| 40 | { |
| 41 | float reduction = 0.0; |
| 42 | for (unsigned int c = 0; c < depth; ++c) |
| 43 | { |
| 44 | const float value = input.Get(n, c, h, w); |
| 45 | reduction += value * value; |
| 46 | } |
| 47 | |
| 48 | // Using std::max(reduction, epsilon) below would prevent against division by 0. |
| 49 | // However, at the time of writing: |
| 50 | // - This is not supported by the ACL functions used to implement L2Normalization in the CL |
| 51 | // backend. |
| 52 | // - The reference semantics for this operator do not include this parameter. |
| 53 | const float scale = 1.0f / sqrtf(reduction); |
| 54 | output.Get(n, d, h, w) = input.Get(n, d, h, w) * scale; |
| 55 | } |
| 56 | } |
| 57 | } |
| 58 | } |
| 59 | } |
| 60 | |
| 61 | } //namespace armnn |