blob: 1e5c77b376e694493e0b165dc21ad2d07af9cc10 [file] [log] [blame]
Vidhya Sudhan Loganathan5e96be72018-12-18 14:17:00 +00001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2018 Arm Limited.
Vidhya Sudhan Loganathan5e96be72018-12-18 14:17:00 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "helpers.h"
25
26#if defined(VECTOR_SIZE) && defined(START) && defined(STEP) && defined(DATA_TYPE)
27/** Generates a sequence of numbers starting from START and extends by increments of 'STEP' up to but not including 'END'.
28 *
29 * @note starting value of the sequence must be given as a preprocessor argument using -DSTART=value. e.g. -DSTART=0
30 * @note difference between consequtive elements of the sequence must be given as a preprocessor argument using -DSTEP=value. e.g. -DSTEP=1
31 * @note Datatype must be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
32 * @note vector size supported by the device must be given as a preprocessor argument using -DVECTOR_SIZE=value. e.g. -DDATA_TYPE=4
33 *
34 * @param[out] out_ptr Pointer to the destination tensor. Supported data types: U8/S8/U16/S16/U32/S32/F16/F32.
35 * @param[in] out_stride_x Stride of the destination tensor in X dimension (in bytes)
36 * @param[in] out_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
37 * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination tensor
38 */
39__kernel void range(
40 VECTOR_DECLARATION(out))
41{
42 uint id = get_global_id(0) * VECTOR_SIZE;
43 __global void *dst_ptr = out_ptr + out_offset_first_element_in_bytes + id * sizeof(DATA_TYPE);
44#if VECTOR_SIZE == 1
45 DATA_TYPE seq;
46 seq = (DATA_TYPE)START + (DATA_TYPE)id * (DATA_TYPE)STEP;
47
48 *((__global DATA_TYPE *)dst_ptr) = seq;
49#else // VECTOR_SIZE == 1
50 VEC_DATA_TYPE(DATA_TYPE, VECTOR_SIZE)
51 seq;
52
53 seq.s0 = ((DATA_TYPE)START + (DATA_TYPE)id * (DATA_TYPE)STEP);
54#if VECTOR_SIZE > 1
55 seq.s1 = seq.s0 + (DATA_TYPE)STEP;
56#if VECTOR_SIZE > 2
57 seq.s2 = seq.s1 + (DATA_TYPE)STEP;
58#if VECTOR_SIZE > 3
59 seq.s3 = seq.s2 + (DATA_TYPE)STEP;
60#if VECTOR_SIZE > 4
61 seq.s4 = seq.s3 + (DATA_TYPE)STEP;
62#if VECTOR_SIZE > 5
63 seq.s5 = seq.s4 + (DATA_TYPE)STEP;
64#if VECTOR_SIZE > 6
65 seq.s6 = seq.s5 + (DATA_TYPE)STEP;
66#if VECTOR_SIZE > 7
67 seq.s7 = seq.s6 + (DATA_TYPE)STEP;
68#endif // VECTOR_SIZE > 7
69#endif // VECTOR_SIZE > 6
70#endif // VECTOR_SIZE > 5
71#endif // VECTOR_SIZE > 4
72#endif // VECTOR_SIZE > 3
73#endif // VECTOR_SIZE > 2
74#endif // VECTOR_SIZE > 1
75 VSTORE(VECTOR_SIZE)
76 (seq, 0, ((__global DATA_TYPE *)dst_ptr));
77#endif //VECTOR_SIZE == 1
78}
79
80#if defined(OFFSET_OUT) && defined(SCALE_OUT)
81
82#define CONVERT_RTE(x, type) (convert_##type##_rte((x)))
83#define CONVERT_DOWN(x, type) CONVERT_RTE(x, type)
84
85/** Generates a sequence of numbers starting from START and extends by increments of 'STEP' up to but not including 'END'.
86 *
87 * @note starting value of the sequence must be given as a preprocessor argument using -DSTART=value. e.g. -DSTART=0
88 * @note difference between consequtive elements of the sequence must be given as a preprocessor argument using -DSTEP=value. e.g. -DSTEP=1
89 * @note Datatype must be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
90 * @note vector size supported by the device must be given as a preprocessor argument using -DVECTOR_SIZE=vector_size. e.g. -DDATA_TYPE=4
91 * @note The quantization offset of the output must be passed at compile time using -DOFFSET_OUT, i.e. -DOFFSET_OUT=10
92 * @note The quantization scale of the output must be passed at compile time using -DSCALE_OUT, i.e. -DSCALE_OUT=10
93 *
94 * @param[out] out_ptr Pointer to the destination tensor. Supported data types: QASYMM8.
95 * @param[in] out_stride_x Stride of the destination tensor in X dimension (in bytes)
96 * @param[in] out_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
97 * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination tensor
98 */
99__kernel void range_quantized(
100 VECTOR_DECLARATION(out))
101{
102 size_t id = get_global_id(0) * VECTOR_SIZE;
103 __global void *dst_ptr = out_ptr + out_offset_first_element_in_bytes + id * sizeof(DATA_TYPE);
104#if VECTOR_SIZE == 1
105 float seq;
106 seq = (float)START + (float)id * (float)STEP;
107 seq = (DATA_TYPE)(int)(seq / ((float)SCALE_OUT) + (float)OFFSET_OUT);
108 seq = max(0.0f, min(seq, 255.0f));
109 *((__global uchar *)dst_ptr) = CONVERT_SAT(CONVERT_DOWN(seq, int), uchar);
110#else // VECTOR_SIZE == 1
111 VEC_DATA_TYPE(float, VECTOR_SIZE)
112 seq;
113 seq.s0 = (float)START + id * (float)STEP;
114#if VECTOR_SIZE > 1
115 seq.s1 = seq.s0 + (float)STEP;
116#if VECTOR_SIZE > 2
117 seq.s2 = seq.s1 + (float)STEP;
118#if VECTOR_SIZE > 3
119 seq.s3 = seq.s2 + (float)STEP;
120#if VECTOR_SIZE > 4
121 seq.s4 = seq.s3 + (float)STEP;
122#if VECTOR_SIZE > 5
123 seq.s5 = seq.s4 + (float)STEP;
124#if VECTOR_SIZE > 6
125 seq.s6 = seq.s5 + (float)STEP;
126#if VECTOR_SIZE > 7
127 seq.s7 = seq.s6 + (float)STEP;
128#endif // VECTOR_SIZE > 7
129#endif // VECTOR_SIZE > 6
130#endif // VECTOR_SIZE > 5
131#endif // VECTOR_SIZE > 4
132#endif // VECTOR_SIZE > 3
133#endif // VECTOR_SIZE > 2
134#endif // VECTOR_SIZE > 1
135 seq = seq / ((VEC_DATA_TYPE(float, VECTOR_SIZE))((float)SCALE_OUT)) + ((VEC_DATA_TYPE(float, VECTOR_SIZE))((float)OFFSET_OUT));
136 seq = max((VEC_DATA_TYPE(float, VECTOR_SIZE))(0.0f), min(seq, (VEC_DATA_TYPE(float, VECTOR_SIZE))(255.0f)));
137 VEC_DATA_TYPE(uchar, VECTOR_SIZE)
138 res = CONVERT_SAT(CONVERT_DOWN(seq, VEC_DATA_TYPE(int, VECTOR_SIZE)), VEC_DATA_TYPE(uchar, VECTOR_SIZE));
139 VSTORE(VECTOR_SIZE)
140 (res, 0, ((__global DATA_TYPE *)dst_ptr));
141#endif // VECTOR_SIZE == 1
142}
143#endif // defined(OFFSET_OUT) && defined(SCALE_OUT)
144
145#endif // defined(VECTOR_SIZE) && defined(START) && defined(STEP) && defined(DATA_TYPE)