blob: 480d9cd20c127e673f87b4a5776e5ba3dd699204 [file] [log] [blame]
Manuel Bottini79f88e62019-09-18 15:02:53 +01001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2019-2020 Arm Limited.
Manuel Bottini79f88e62019-09-18 15:02:53 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "helpers.h"
25
Georgios Pinitas55a687d2020-01-30 12:00:23 +000026#if defined(VEC_SIZE) && defined(DATA_TYPE) && defined(INTERNAL_DATA_TYPE) && defined(GAMMA) && defined(BETA) && defined(EPSILON) && defined(DIM_X) && defined(DIM_Y) && defined(DIM_Z)
Manuel Bottini79f88e62019-09-18 15:02:53 +010027/** This function normalizes the input 2D tensor across the first dimension with respect to mean and standard deviation of the same dimension.
28 *
29 * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
30 * @attention Data type should be passed using the -DDATA_TYPE=data_type compile flag, e.g. -DDATA_TYPE=float
31 * @attention The scale scalar value applied to the normalized tensor should be passed using the -DGAMMA=value compile flag, e.g. -DGAMMA=1.3
32 * @attention The offset scalar value applied to the normalized tensor should be passed using the -DBETA=value compile flag, e.g. -DBETA=2.4
33 * @attention Normalization epsilon parameter should be given as a preprocessor argument with -DEPSILON=value. e.g. -DEPSILON=0.001f
34 * @attention Dimensions X, Y, and Z should be given as a preprocessor argument with -DDIM_X=value, -DDIM_Y=value, -DDIM_Z=value. e.g. -DDIM_X=6, -DDIM_Y=2, -DDIM_Z=7
35 *
36 * @param[in] input_ptr Pointer to the first source tensor. Supported data types: F16/F32
37 * @param[in] input_stride_x Stride of the first source tensor in X dimension (in bytes)
38 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
39 * @param[in] input_stride_y Stride of the first source tensor in Y dimension (in bytes)
40 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
41 * @param[in] input_stride_z Stride of the first source tensor in Z dimension (in bytes)
42 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
43 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source tensor
44 * @param[out] output_ptr (Optional) Pointer to the destination tensor. Supported data types: same as @p input_ptr
45 * @param[in] output_stride_x (Optional) Stride of the destination tensor in X dimension (in bytes)
46 * @param[in] output_step_x (Optional) output_stride_x * number of elements along X processed per workitem(in bytes)
47 * @param[in] output_stride_y (Optional) Stride of the destination tensor in Y dimension (in bytes)
48 * @param[in] output_step_y (Optional) output_stride_y * number of elements along Y processed per workitem(in bytes)
49 * @param[in] output_stride_z (Optional) Stride of the destination tensor in Z dimension (in bytes)
50 * @param[in] output_step_z (Optional) output_stride_z * number of elements along Z processed per workitem(in bytes)
51 * @param[in] output_offset_first_element_in_bytes (Optional) The offset of the first element in the destination tensor
52 */
53__kernel void instance_normalization(
54 TENSOR4D_DECLARATION(input)
55#ifndef IN_PLACE
56 ,
57 TENSOR4D_DECLARATION(output)
58#endif /* IN_PLACE */
59)
60{
Michele Di Giorgioa0a3d202019-11-20 15:06:52 +000061 Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
62#ifndef IN_PLACE
63 Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0);
64#endif /* IN_PLACE */
65
Georgios Pinitas55a687d2020-01-30 12:00:23 +000066 INTERNAL_DATA_TYPE sum = 0.f;
67 INTERNAL_DATA_TYPE sum_sq = 0.f;
Manuel Bottini79f88e62019-09-18 15:02:53 +010068
69#if defined(NHWC)
70
Michele Di Giorgioa0a3d202019-11-20 15:06:52 +000071 const int ch = get_global_id(0); // Current channel
72 const int batch = get_global_id(2); // Current batch
Manuel Bottini79f88e62019-09-18 15:02:53 +010073 const int elements_plane = DIM_Y * DIM_Z;
Manuel Bottini79f88e62019-09-18 15:02:53 +010074
75 for(int i_w = 0; i_w < DIM_Y; ++i_w)
76 {
77 for(int i_h = 0; i_h < DIM_Z; ++i_h)
78 {
Georgios Pinitas55a687d2020-01-30 12:00:23 +000079 INTERNAL_DATA_TYPE data = (INTERNAL_DATA_TYPE) * ((__global DATA_TYPE *)tensor4D_offset(&in, ch, i_w, i_h, batch));
Manuel Bottini79f88e62019-09-18 15:02:53 +010080 sum += data;
81 sum_sq += data * data;
82 }
83 }
84
85#else // !defined(NHWC)
Michele Di Giorgioa0a3d202019-11-20 15:06:52 +000086 const int ch = get_global_id(2) % DIM_Z; // Current channel
87 const int batch = get_global_id(2) / DIM_Z; // Current batch
Manuel Bottini79f88e62019-09-18 15:02:53 +010088 const int elements_plane = DIM_X * DIM_Y;
Manuel Bottini79f88e62019-09-18 15:02:53 +010089
Georgios Pinitas55a687d2020-01-30 12:00:23 +000090 VEC_DATA_TYPE(INTERNAL_DATA_TYPE, VEC_SIZE)
Manuel Bottini79f88e62019-09-18 15:02:53 +010091 part_sum = 0.f;
Georgios Pinitas55a687d2020-01-30 12:00:23 +000092 VEC_DATA_TYPE(INTERNAL_DATA_TYPE, VEC_SIZE)
Manuel Bottini79f88e62019-09-18 15:02:53 +010093 part_sum_sq = 0.f;
94 // Calculate partial sum
Michele Di Giorgioa0a3d202019-11-20 15:06:52 +000095 for(int y = 0; y < DIM_Y; ++y)
Manuel Bottini79f88e62019-09-18 15:02:53 +010096 {
Michele Di Giorgioa0a3d202019-11-20 15:06:52 +000097 int x = 0;
98 for(; x <= (DIM_X - VEC_SIZE); x += VEC_SIZE)
99 {
100 // Load data
Georgios Pinitas55a687d2020-01-30 12:00:23 +0000101 VEC_DATA_TYPE(INTERNAL_DATA_TYPE, VEC_SIZE)
102 data = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch)), VEC_DATA_TYPE(INTERNAL_DATA_TYPE, VEC_SIZE));
Michele Di Giorgioa0a3d202019-11-20 15:06:52 +0000103 part_sum += data;
104 part_sum_sq += data * data;
105 }
106 // Left-overs loop
107 for(; x < DIM_X; ++x)
108 {
Georgios Pinitas55a687d2020-01-30 12:00:23 +0000109 INTERNAL_DATA_TYPE data = (INTERNAL_DATA_TYPE)(*((__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch)));
Michele Di Giorgioa0a3d202019-11-20 15:06:52 +0000110 part_sum.s0 += data;
111 part_sum_sq.s0 += data * data;
112 }
Manuel Bottini79f88e62019-09-18 15:02:53 +0100113 }
114 // Perform reduction
115#if VEC_SIZE > 8
116 part_sum.s01234567 += part_sum.s89abcdef;
117 part_sum_sq.s01234567 += part_sum_sq.s89abcdef;
118#endif // VEC_SIZE > 8
119#if VEC_SIZE > 4
120 part_sum.s0123 += part_sum.s4567;
121 part_sum_sq.s0123 += part_sum_sq.s4567;
122#endif // VEC_SIZE > 4
123#if VEC_SIZE > 2
124 part_sum.s01 += part_sum.s23;
125 part_sum_sq.s01 += part_sum_sq.s23;
126#endif // VEC_SIZE > 2
127 part_sum.s0 += part_sum.s1;
128 part_sum_sq.s0 += part_sum_sq.s1;
Manuel Bottini79f88e62019-09-18 15:02:53 +0100129
Georgios Pinitas55a687d2020-01-30 12:00:23 +0000130 sum = (INTERNAL_DATA_TYPE)part_sum.s0;
131 sum_sq = (INTERNAL_DATA_TYPE)part_sum_sq.s0;
Manuel Bottini79f88e62019-09-18 15:02:53 +0100132
133#endif // defined(NHWC)
134
Georgios Pinitas55a687d2020-01-30 12:00:23 +0000135 const INTERNAL_DATA_TYPE mean = (sum / elements_plane);
136 const INTERNAL_DATA_TYPE var = (sum_sq / elements_plane) - (mean * mean);
137 const INTERNAL_DATA_TYPE multip = GAMMA / sqrt(var + EPSILON);
Manuel Bottini79f88e62019-09-18 15:02:53 +0100138
139#if defined(NHWC)
140
141 for(int i_w = 0; i_w < DIM_Y; ++i_w)
142 {
143 for(int i_h = 0; i_h < DIM_Z; ++i_h)
144 {
Michele Di Giorgioa0a3d202019-11-20 15:06:52 +0000145 __global DATA_TYPE *input_address = (__global DATA_TYPE *)tensor4D_offset(&in, ch, i_w, i_h, batch);
Manuel Bottini79f88e62019-09-18 15:02:53 +0100146#ifdef IN_PLACE
147 __global DATA_TYPE *output_address = input_address;
148#else /* !IN_PLACE */
Michele Di Giorgioa0a3d202019-11-20 15:06:52 +0000149 __global DATA_TYPE *output_address = (__global DATA_TYPE *)tensor4D_offset(&out, ch, i_w, i_h, batch);
Manuel Bottini79f88e62019-09-18 15:02:53 +0100150#endif /* IN_PLACE */
Georgios Pinitas55a687d2020-01-30 12:00:23 +0000151 *(output_address) = (*(input_address) - mean) * multip + (INTERNAL_DATA_TYPE)BETA;
Manuel Bottini79f88e62019-09-18 15:02:53 +0100152 }
153 }
154
155#else // !defined(NHWC)
Michele Di Giorgioa0a3d202019-11-20 15:06:52 +0000156 for(int y = 0; y < DIM_Y; ++y)
Manuel Bottini79f88e62019-09-18 15:02:53 +0100157 {
Michele Di Giorgioa0a3d202019-11-20 15:06:52 +0000158 int x = 0;
159 for(; x <= (DIM_X - VEC_SIZE); x += VEC_SIZE)
160 {
161 __global DATA_TYPE *input_address = (__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch);
Manuel Bottini79f88e62019-09-18 15:02:53 +0100162#ifdef IN_PLACE
Michele Di Giorgioa0a3d202019-11-20 15:06:52 +0000163 __global DATA_TYPE *output_address = input_address;
Manuel Bottini79f88e62019-09-18 15:02:53 +0100164#else /* !IN_PLACE */
Michele Di Giorgioa0a3d202019-11-20 15:06:52 +0000165 __global DATA_TYPE *output_address = (__global DATA_TYPE *)tensor4D_offset(&out, x, y, ch, batch);
Manuel Bottini79f88e62019-09-18 15:02:53 +0100166#endif /* IN_PLACE */
167
Georgios Pinitas55a687d2020-01-30 12:00:23 +0000168 VEC_DATA_TYPE(INTERNAL_DATA_TYPE, VEC_SIZE)
169 data = CONVERT(VLOAD(VEC_SIZE)(0, input_address), VEC_DATA_TYPE(INTERNAL_DATA_TYPE, VEC_SIZE));
Manuel Bottini79f88e62019-09-18 15:02:53 +0100170
Georgios Pinitas55a687d2020-01-30 12:00:23 +0000171 VEC_DATA_TYPE(INTERNAL_DATA_TYPE, VEC_SIZE)
172 res = (data - mean) * multip + (INTERNAL_DATA_TYPE)BETA;
Michele Di Giorgioa0a3d202019-11-20 15:06:52 +0000173 VSTORE(VEC_SIZE)
Georgios Pinitas55a687d2020-01-30 12:00:23 +0000174 (CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), 0, output_address);
Michele Di Giorgioa0a3d202019-11-20 15:06:52 +0000175 }
176 // Left-overs loop
177 for(; x < DIM_X; ++x)
178 {
179 __global DATA_TYPE *input_address = (__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch);
Manuel Bottini79f88e62019-09-18 15:02:53 +0100180#ifdef IN_PLACE
Michele Di Giorgioa0a3d202019-11-20 15:06:52 +0000181 __global DATA_TYPE *output_address = input_address;
Manuel Bottini79f88e62019-09-18 15:02:53 +0100182#else /* !IN_PLACE */
Michele Di Giorgioa0a3d202019-11-20 15:06:52 +0000183 __global DATA_TYPE *output_address = (__global DATA_TYPE *)tensor4D_offset(&out, x, y, ch, batch);
Manuel Bottini79f88e62019-09-18 15:02:53 +0100184#endif /* IN_PLACE */
Georgios Pinitas55a687d2020-01-30 12:00:23 +0000185 *(output_address) = (*(input_address) - mean) * multip + (INTERNAL_DATA_TYPE)BETA;
Michele Di Giorgioa0a3d202019-11-20 15:06:52 +0000186 }
Manuel Bottini79f88e62019-09-18 15:02:53 +0100187 }
188#endif // defined(NHWC)
189}
Georgios Pinitas55a687d2020-01-30 12:00:23 +0000190#endif /* defined(VEC_SIZE) && defined(DATA_TYPE) && defined(INTERNAL_DATA_TYPE) && defined(GAMMA) && defined(BETA) && defined(EPSILON) && defined(DIM_X) && defined(DIM_Y) && defined(DIM_Z) */