Blame - src/core/CL/cl_kernels/nchw/normalization_layer.cl - ml/ComputeLibrary

* @note The leftover size in the X dimension shoud be given as preprocessor argument using -DVEC_SIZE_LEFTOVER is; x_dimension % VEC_SIZE. e.g. -DVEC_SIZE_LEFTOVER=1

104

*

105

* @param[in] input_ptr Pointer to the first source tensor. Supported data types: F16/F32

106

* @param[in] input_stride_x Stride of the first source tensor in X dimension (in bytes)

107

* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)

108

* @param[in] input_stride_y Stride of the first source tensor in Y dimension (in bytes)

109

* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)

110

* @param[in] input_stride_z Stride of the first source tensor in Z dimension (in bytes)

111

* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)

112

* @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source tensor

113

* @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr

114

* @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)

115

* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)

116

* @param[in] output_stride_y Stride of the first destination tensor in Y dimension (in bytes)

117

* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)

118

* @param[in] output_stride_z Stride of the first source tensor in Z dimension (in bytes)

119

* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)

120

* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor

121

*/

122

__kernel void normalization_layer_in_map_nchw(TENSOR3D_DECLARATION(input),

123

TENSOR3D_DECLARATION(output))

124

{

125

Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(input);

126

Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output);

127

128

VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)

129

acc = 0;

130

const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)

131

coeff_v = SQCVT_SAT(COEFF);

132

const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)

133

beta_v = SQCVT_SAT(BETA);

134

const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)

135

kappa_v = SQCVT_SAT(KAPPA);

136

SiCongLi

aed61f2

2021-08-26 17:44:08 +0100

[diff] [blame]

137

const int left_pos = -(int)RADIUS;

138

const int right_pos = (int)RADIUS;

Adnan AlSinan

7075fe2

2021-07-05 13:12:52 +0100

[diff] [blame]

139

140

#if defined(IN_MAP_2D)

141

const int current_row = get_global_id(1);

142

const int first_row = max(-(int)RADIUS, -current_row);

143

const int last_row = min((int)RADIUS, (int)get_global_size(1) - 1 - current_row);

144

#endif /* defined(IN_MAP_2D) */

145

146

#if defined(IN_MAP_2D)

147

for(int j = first_row; j <= last_row; ++j)

148

{

149

#endif /* defined(IN_MAP_2D) */

150

for(int i = left_pos; i <= right_pos; ++i)

151

{

152

#if defined(IN_MAP_2D)

153

VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)

154

values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)tensor3D_offset(&in, i, j, 0));

155

#else /* defined(IN_MAP_2D) */

156

VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)

157

values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)tensor3D_offset(&in, i, 0, 0));

158

#endif /* defined(IN_MAP_2D) */

159

acc = ADD_OP(acc, MUL_OP(values, values));

160

}

161

#if defined(IN_MAP_2D)

162

}

163

#endif /* defined(IN_MAP_2D) */

164

165

acc = ADD_OP(MUL_OP(acc, coeff_v), kappa_v);

166

const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)

167

normalized = POW_OP(acc, beta_v);

168

const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)

169

normalized_pixel = DIV_OP(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in.ptr), normalized);

170

171

VSTORE(VEC_SIZE)

172

(normalized_pixel, 0, (__global DATA_TYPE *)out.ptr);

173

}

174

#endif // defined(WIDTH_SIZE)