Blame - src/core/CL/cl_kernels/batch_to_space.cl - ml/ComputeLibrary

2018-08-31 10:07:09 +0100

[diff] [blame]

1

/*

Michele Di Giorgio

d9eaf61

2020-07-08 11:12:57 +0100

[diff] [blame]

2

Michalis Spyrou

2018-08-31 10:07:09 +0100

[diff] [blame]

3

*

4

* SPDX-License-Identifier: MIT

5

*

6

* Permission is hereby granted, free of charge, to any person obtaining a copy

7

* of this software and associated documentation files (the "Software"), to

Sheri Zhang

2020-04-17 14:59:13 +0100

[diff] [blame]

8

* deal in the Software without restriction, including without limitation the

Michalis Spyrou

2018-08-31 10:07:09 +0100

[diff] [blame]

9

* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

10

* sell copies of the Software, and to permit persons to whom the Software is

11

* furnished to do so, subject to the following conditions:

12

*

13

* The above copyright notice and this permission notice shall be included in all

14

* copies or substantial portions of the Software.

15

*

16

* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

17

* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

18

* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

19

* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

20

* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

21

* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

* SOFTWARE.

*/

#include "helpers.h"

#if defined(DATA_TYPE) && defined(BATCH_SIZE)

Michalis Spyrou

2018-09-11 11:16:47 +0100

[diff] [blame]

27

/** Batch to space transformation. (NCHW)

Michalis Spyrou

2018-08-31 10:07:09 +0100

[diff] [blame]

28

*

29

* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float

30

* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float

31

* @note The input tensor batch size must be passed at compile time using -DBATCH_SIZE. e.g. -DBATCH_SIZE=2

32

*

Sheri Zhang

2020-04-17 14:59:13 +0100

[diff] [blame]

33

* @param[in] input_ptr Pointer to the source tensor. Supported data types: All

Michalis Spyrou

2018-08-31 10:07:09 +0100

[diff] [blame]

34

* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)

35

* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)

36

* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)

37

* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)

38

* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)

39

* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)

40

* @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source tensor

41

* @param[in] batch_id The input tensor batch id

42

* @param[in] block_shape_ptr Pointer to the source tensor. Supported data types: S32

43

* @param[in] block_shape_stride_x Stride of the source tensor in X dimension (in bytes)

44

* @param[in] block_shape_step_x block_shape_stride_x * number of elements along X processed per workitem(in bytes)

45

* @param[in] block_shape_stride_y Stride of the source tensor in Y dimension (in bytes)

46

* @param[in] block_shape_step_y block_shape_stride_y * number of elements along Y processed per workitem(in bytes)

47

* @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source tensor

48

* @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr

49

* @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)

50

* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)

51

* @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)

52

* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)

53

* @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)

54

* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)

55

* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor

56

*/

Michalis Spyrou

2018-09-11 11:16:47 +0100

[diff] [blame]

57

__kernel void batch_to_space_nchw(

Michalis Spyrou

2018-08-31 10:07:09 +0100

[diff] [blame]

58

TENSOR3D_DECLARATION(input),

59

const int batch_id,

60

VECTOR_DECLARATION(block_shape),

61

TENSOR4D_DECLARATION(output))

62

{

63

Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(input);

64

Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0);

65

Vector block = CONVERT_TO_VECTOR_STRUCT_NO_STEP(block_shape);

66

67

const int block_x = *((__global int *)vector_offset(&block, 0));

68

const int block_y = *((__global int *)vector_offset(&block, 1));

69

70

const int r = (BATCH_SIZE / (block_x * block_y));

71

const int x = get_global_id(0);

72

const int y = get_global_id(1);

73

const int z = get_global_id(2);

74

const int w = batch_id % r;

75

76

const int out_x = x * block_x + (batch_id / r) % block_x;

77

const int out_y = y * block_y + (batch_id / r) / block_x;

78

79

*((__global DATA_TYPE *)tensor4D_offset(&out, out_x, out_y, z, w)) = *((__global DATA_TYPE *)in.ptr);

80

}

Michalis Spyrou

2018-09-11 11:16:47 +0100

[diff] [blame]

81

/** Batch to space transformation. (NHWC)

82

*

83

* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float

84

* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float

85

* @note The input tensor batch size must be passed at compile time using -DBATCH_SIZE. e.g. -DBATCH_SIZE=2

86

*

Sheri Zhang

2020-04-17 14:59:13 +0100

[diff] [blame]

87

* @param[in] input_ptr Pointer to the source tensor. Supported data types: All

Michalis Spyrou

2018-09-11 11:16:47 +0100

[diff] [blame]

88

* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)

89

* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)

90

* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)

91

* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)

92

* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)

93

* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)

94

* @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source tensor

95

* @param[in] batch_id The input tensor batch id

96

* @param[in] block_shape_ptr Pointer to the source tensor. Supported data types: S32

97

* @param[in] block_shape_stride_x Stride of the source tensor in X dimension (in bytes)

98

* @param[in] block_shape_step_x block_shape_stride_x * number of elements along X processed per workitem(in bytes)

99

* @param[in] block_shape_stride_y Stride of the source tensor in Y dimension (in bytes)

100

* @param[in] block_shape_step_y block_shape_stride_y * number of elements along Y processed per workitem(in bytes)

101

* @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source tensor

102

* @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr

103

* @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)

104

* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)

105

* @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)

106

* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)

107

* @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)

108

* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)

109

* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor

110

*/

111

__kernel void batch_to_space_nhwc(

112

TENSOR3D_DECLARATION(input),

113

const int batch_id,

114

VECTOR_DECLARATION(block_shape),

115

TENSOR4D_DECLARATION(output))

116

{

117

Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(input);

118

Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0);

119

Vector block = CONVERT_TO_VECTOR_STRUCT_NO_STEP(block_shape);

120

121

const int block_x = *((__global int *)vector_offset(&block, 0));

122

const int block_y = *((__global int *)vector_offset(&block, 1));

123

124

const int r = (BATCH_SIZE / (block_x * block_y));

125

const int x = get_global_id(1);

126

const int y = get_global_id(2);

127

const int z = get_global_id(0);

128

const int w = batch_id % r;

129

130

const int out_x = x * block_x + (batch_id / r) % block_x;

131

const int out_y = y * block_y + (batch_id / r) / block_x;

132

133

*((__global DATA_TYPE *)tensor4D_offset(&out, z, out_x, out_y, w)) = *((__global DATA_TYPE *)in.ptr);

134

}

Michalis Spyrou

2018-08-31 10:07:09 +0100

[diff] [blame]

135

#endif // defined(DATA_TYPE) && defined(BATCH_SIZE)

136

137

#if defined(DATA_TYPE) && defined(BATCH_SIZE) && defined(BLOCK_SHAPE_X) && defined(BLOCK_SHAPE_Y)

Michalis Spyrou

2018-09-11 11:16:47 +0100

[diff] [blame]

138

/** Batch to space transformation. (NCHW)

Michalis Spyrou

2018-08-31 10:07:09 +0100

[diff] [blame]

139

*

140

* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float

141

* @note The input tensor batch size must be passed at compile time using -DBATCH_SIZE. e.g. -DBATCH_SIZE=2

142

* @note The block shape x must be passed at compile time using -DBLOCK_SHAPE_X. e.g. -DBLOCK_SHAPE_X=2

143

* @note The block shape y must be passed at compile time using -DBLOCK_SHAPE_Y. e.g. -DBLOCK_SHAPE_Y=2

144

*

Sheri Zhang

2020-04-17 14:59:13 +0100

[diff] [blame]

145

* @param[in] input_ptr Pointer to the source tensor. Supported data types: All

Michalis Spyrou

2018-08-31 10:07:09 +0100

[diff] [blame]

146

* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)

147

* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)

148

* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)

149

* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)

150

* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)

151

* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)

152

* @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source tensor

153

* @param[in] batch_id The input tensor batch id

154

* @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr

155

* @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)

156

* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)

157

* @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)

158

* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)

159

* @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)

160

* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)

161

* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor

162

*/

Michalis Spyrou

2018-09-11 11:16:47 +0100

[diff] [blame]

163

__kernel void batch_to_space_static_nchw(

Michalis Spyrou

2018-08-31 10:07:09 +0100

[diff] [blame]

164

TENSOR3D_DECLARATION(input),

165

const int batch_id,

166

TENSOR4D_DECLARATION(output))

167

{

168

Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(input);

169

Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0);

170

171

const int block_x = BLOCK_SHAPE_X;

172

const int block_y = BLOCK_SHAPE_Y;

173

174

const int r = (BATCH_SIZE / (block_x * block_y));

175

const int x = get_global_id(0);

176

const int y = get_global_id(1);

177

const int z = get_global_id(2);

178

const int w = batch_id % r;

179

180

const int out_x = x * block_x + (batch_id / r) % block_x;

181

const int out_y = y * block_y + (batch_id / r) / block_x;

182

183

*((__global DATA_TYPE *)tensor4D_offset(&out, out_x, out_y, z, w)) = *((__global DATA_TYPE *)in.ptr);

184

}

Michalis Spyrou

2018-09-11 11:16:47 +0100

[diff] [blame]

185

/** Batch to space transformation. (NHWC)

186

*

187

* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float

188

* @note The input tensor batch size must be passed at compile time using -DBATCH_SIZE. e.g. -DBATCH_SIZE=2

189

* @note The block shape x must be passed at compile time using -DBLOCK_SHAPE_X. e.g. -DBLOCK_SHAPE_X=2

190

* @note The block shape y must be passed at compile time using -DBLOCK_SHAPE_Y. e.g. -DBLOCK_SHAPE_Y=2

191

*

Sheri Zhang

2020-04-17 14:59:13 +0100

[diff] [blame]

192

* @param[in] input_ptr Pointer to the source tensor. Supported data types: All

Michalis Spyrou

2018-09-11 11:16:47 +0100

[diff] [blame]

193

* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)

194

* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)

195

* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)

196

* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)

197

* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)

198

* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)

199

* @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source tensor

200

* @param[in] batch_id The input tensor batch id

201

* @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr

202

* @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)

203

* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)

204

* @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)

205

* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)

206

* @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)

207

* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)

208

* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor

209

*/

210

__kernel void batch_to_space_static_nhwc(

211

TENSOR3D_DECLARATION(input),

212

const int batch_id,

213

TENSOR4D_DECLARATION(output))

214

{

215

Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(input);

216

Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0);

217

218

const int block_x = BLOCK_SHAPE_X;

219

const int block_y = BLOCK_SHAPE_Y;

220

221

const int r = (BATCH_SIZE / (block_x * block_y));

222

const int x = get_global_id(1);

223

const int y = get_global_id(2);

224

const int z = get_global_id(0);

225

const int w = batch_id % r;

226

227

const int out_x = x * block_x + (batch_id / r) % block_x;

228

const int out_y = y * block_y + (batch_id / r) / block_x;

229

230

*((__global DATA_TYPE *)tensor4D_offset(&out, z, out_x, out_y, w)) = *((__global DATA_TYPE *)in.ptr);

231

}

Michalis Spyrou