blob: 958f4a9cfb98195fbd84a93bd5ba209ed2b7876f [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2016, 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h"
25
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ILut.h"
29#include "arm_compute/core/ITensor.h"
30#include "arm_compute/core/TensorInfo.h"
31#include "arm_compute/core/Utils.h"
32#include "arm_compute/core/Validate.h"
33
34#include <cstddef>
35#include <cstdint>
36
37using namespace arm_compute;
38
39namespace arm_compute
40{
41class Coordinates;
42
43constexpr unsigned int num_num_elems_processed_per_iteration = 16;
44} // namespace arm_compute
45
46NETableLookupKernel::NETableLookupKernel()
47 : _func(nullptr), _lut(nullptr)
48{
49}
50
51template <class T>
52void NETableLookupKernel::tableLookup(const Window &window)
53{
54 uint32_t offset = _lut->index_offset();
55 size_t count = _lut->num_elements();
56 const auto lut = reinterpret_cast<const T *>(_lut->buffer());
57 unsigned int step = num_num_elems_processed_per_iteration;
58
59 ARM_COMPUTE_ERROR_ON(lut == nullptr);
60
61 Iterator input = Iterator(_input, window);
62 Iterator output = Iterator(_output, window);
63
64 execute_window_loop(window, [&](const Coordinates & id)
65 {
66 auto input_ptr = reinterpret_cast<const T *>(input.ptr());
67 auto output_ptr = reinterpret_cast<T *>(output.ptr());
68
69 for(unsigned int i = 0; i < step; ++i, ++input_ptr, ++output_ptr)
70 {
71 const int32_t index = offset + *input_ptr;
72
73 if(0 <= index && index < static_cast<int32_t>(count))
74 {
75 *output_ptr = lut[index];
76 }
77 }
78 },
79 input, output);
80}
81
82namespace arm_compute
83{
84template <>
85void NETableLookupKernel::tableLookup<uint8_t>(const Window &window)
86{
87 const uint8_t *const lut = _lut->buffer();
88 unsigned int step = num_num_elems_processed_per_iteration;
89
90 ARM_COMPUTE_ERROR_ON(lut == nullptr);
91
92 Iterator input = Iterator(_input, window);
93 Iterator output = Iterator(_output, window);
94
95 execute_window_loop(window, [&](const Coordinates & id)
96 {
97 const uint8_t *input_ptr = input.ptr();
98 uint8_t *output_ptr = output.ptr();
99
100 for(unsigned int i = 0; i < step; ++i)
101 {
102 *output_ptr++ = lut[*input_ptr++];
103 }
104 },
105 input, output);
106}
107} // namespace arm_compute
108
109void NETableLookupKernel::configure(const ITensor *input, const ILut *lut, ITensor *output)
110{
111 ARM_COMPUTE_ERROR_ON(input == nullptr);
112 ARM_COMPUTE_ERROR_ON(lut == nullptr);
113 ARM_COMPUTE_ERROR_ON(output == nullptr);
114 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16);
115 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
116 ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
117
118 _lut = lut;
119
120 if(input->info()->data_type() == DataType::U8 && output->info()->data_type() == DataType::U8)
121 {
122 _func = &NETableLookupKernel::tableLookup<uint8_t>;
123 }
124 else if(input->info()->data_type() == DataType::S16 && output->info()->data_type() == DataType::S16)
125 {
126 _func = &NETableLookupKernel::tableLookup<int16_t>;
127 }
128 else
129 {
130 ARM_COMPUTE_ERROR("Unsupported combination of input and output DataType.");
131 }
132
133 INESimpleKernel::configure(input, output, num_num_elems_processed_per_iteration);
134}
135
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100136void NETableLookupKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100137{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100138 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100139 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
140 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window);
141 ARM_COMPUTE_ERROR_ON(_func == nullptr);
142 (this->*_func)(window);
143}