blob: bdb04904ae1c210d50d98d07c5fabf800a44e7d1 [file] [log] [blame]
Tim Hall79d07d22020-04-27 18:20:16 +01001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17
18# Description:
19# Contains classes that hold commands for the high-level command stream (one command per DMA or NPU stripe).
20
Diego Russoea6111a2020-04-14 18:41:58 +010021from enum import IntEnum
22
Tim Hall79d07d22020-04-27 18:20:16 +010023import numpy as np
Diego Russoea6111a2020-04-14 18:41:58 +010024
Tim Hall79d07d22020-04-27 18:20:16 +010025from .operation import NpuBlockType
26from .numeric_util import round_up_divide
27from .range_set import MemoryAccessSet, AccessDirection
28
29
30class Box:
31 def __init__(self, start_coord, end_coord):
32 self.start_coord = list(start_coord)
33 self.end_coord = list(end_coord)
34 assert len(self.start_coord) == len(end_coord)
35 for i in range(len(self.start_coord)):
36 assert self.start_coord[i] <= self.end_coord[i]
37
38 def transform_with_strides_and_skirt(
39 self, strides, skirt, ifm_shape, npu_block_type, concat_axis=0, concat_offset=0, split_offset=None, k_height=1
40 ):
41 new_start_coord = list(self.start_coord)
42 new_end_coord = list(self.end_coord)
43
44 new_start_coord[concat_axis] -= concat_offset
45 new_end_coord[concat_axis] -= concat_offset
46
Diego Russoea6111a2020-04-14 18:41:58 +010047 if split_offset is not None:
Tim Hall79d07d22020-04-27 18:20:16 +010048 for idx in range(len(split_offset)):
49 new_start_coord[idx] += split_offset[idx]
50 new_end_coord[idx] += split_offset[idx]
51
Diego Russoea6111a2020-04-14 18:41:58 +010052 if split_offset is None and npu_block_type in set((NpuBlockType.ConvolutionMxN, NpuBlockType.VectorProduct)):
Tim Hall79d07d22020-04-27 18:20:16 +010053 # these types of operations do a "dot product" over the entire IFM
54 new_start_coord[-1] = 0
55 new_end_coord[-1] = ifm_shape[-1]
56
57 if min(len(new_end_coord), len(ifm_shape)) >= 2:
58 new_end_coord[-2] = min(new_end_coord[-2], ifm_shape[-2])
59 if min(len(new_end_coord), len(ifm_shape)) >= 3:
60 new_end_coord[-3] = min(new_end_coord[-3], ifm_shape[-3])
61
62 pad_top = 0
63 pad_bottom = 0
64 if strides is not None and skirt is not None:
65 if len(new_start_coord) >= 2:
66 stride = strides[2]
67 new_start_coord[-2] = max(new_start_coord[-2] * stride - skirt[1], 0)
68 new_end_coord[-2] = min(new_end_coord[-2] * stride + skirt[3], ifm_shape[-2])
69
70 if len(new_start_coord) >= 3:
71 stride = strides[1]
72
73 total_stride = stride * (new_end_coord[-3] - new_start_coord[-3] - 1)
74 new_start_coord[-3] = new_start_coord[-3] * stride - skirt[0]
75
76 pad_top = max(0, 0 - new_start_coord[-3])
77 new_start_coord[-3] = max(new_start_coord[-3], 0)
78
79 while len(ifm_shape) < 3:
80 ifm_shape = [1] + ifm_shape
81 if (new_end_coord[-3] * stride + skirt[2]) > ifm_shape[-3]:
82 # pad_bottom is calculated based the diff between the end position of the weight kernel,
83 # after last stride and the ifm height.
84 k_start = new_start_coord[-3] - pad_top
85 pad_bottom = max(0, k_start + total_stride + k_height - ifm_shape[-3])
86
87 new_end_coord[-3] = min(new_end_coord[-3] * stride + skirt[2], ifm_shape[-3])
88
89 return Box(new_start_coord, new_end_coord), pad_top, pad_bottom
90
91 def make_weight_box(weight_shape, npu_block_type, oc_range_start=None, oc_range_end=None, weights_transposed=False):
92 start = [0] * len(weight_shape)
93 end = list(weight_shape)
94 if oc_range_start is not None and oc_range_end is not None:
95 if npu_block_type == NpuBlockType.ConvolutionDepthWise:
96 # input range is output range divided by channel multiplier
97 if weights_transposed:
98 start[-1] = oc_range_start // weight_shape[-2]
99 end[-1] = oc_range_end // weight_shape[-2]
100 else:
101 start[-2] = oc_range_start // weight_shape[-1]
102 end[-2] = oc_range_end // weight_shape[-1]
103 else:
104 start[-1] = oc_range_start
105 end[-1] = oc_range_end
106 for i in range(len(end)):
107 assert 0 <= start[i] < weight_shape[i]
108 assert 0 < end[i] <= weight_shape[i]
109
110 return Box(start, end)
111
112 def get_size_shape(self):
113 return [int(self.end_coord[i] - self.start_coord[i]) for i in range(len(self.end_coord))]
114
115 def get_size(self):
116 return int(np.prod(self.get_size_shape()))
117
118 def __str__(self):
119 return "<Box %s - %s>" % (self.start_coord, self.end_coord)
120
121 __repr__ = __str__
122
123
124class CommandType(IntEnum):
125 NpuStripe = 0
126 DMA = 1
127 Size = 2
128
129
130class Command:
131 def get_ofm_y_range_for_pass(self, ps_requested):
132 return None
133
134 def is_npu_pass_command(self):
135 return False
136
137 def get_memory_accesses(self):
138 return None
139
140 def get_operation_count(self):
141 # returns numpy array of (DPU blocks, dma_ops). Should line up with the CommandType enum
142 return np.array((0, 0))
143
144
145class NpuStripe(Command):
146 def __init__(
147 self,
148 ps,
149 block_config,
150 is_first,
151 is_last,
152 is_first_h_stripe,
153 is_last_h_stripe,
154 ifm_tensor,
155 ifm_box,
156 ofm_tensor,
157 ofm_box,
158 weight_tensor=None,
159 weight_box=None,
160 scale_tensor=None,
161 concat_axis=0,
162 concat_offset=0,
163 ifm2_tensor=None,
164 ifm2_box=None,
165 pad_top=0,
166 pad_bottom=0,
167 ):
168 self.cmdtype = CommandType.NpuStripe
169 self.ps = ps
170 self.block_config = block_config
171 self.is_first = is_first
172 self.is_last = is_last
173 self.is_first_h_stripe = is_first_h_stripe
174 self.is_last_h_stripe = is_last_h_stripe
175 self.ifm_tensor = ifm_tensor
176 self.ifm_box = ifm_box
177 self.ifm2_tensor = ifm2_tensor
178 self.ifm2_box = ifm2_box
179 self.ofm_tensor = ofm_tensor
180 self.ofm_box = ofm_box
181 self.weight_tensor = weight_tensor
182 self.scale_tensor = scale_tensor
183 self.weight_box = weight_box
184 self.concat_axis = concat_axis
185 self.concat_offset = concat_offset
186 self.pad_top = pad_top
187 self.pad_bottom = pad_bottom
188 for i in range(len(self.ofm_box.end_coord)):
189 assert self.ofm_box.end_coord[i] <= self.ofm_tensor.shape[i]
190
191 def get_memory_accesses(self):
192 res = MemoryAccessSet()
193 if self.ifm_tensor is not None and self.ifm_tensor.shape != []:
194 res.add(
195 self.ifm_tensor.get_address_ranges_for_coordinates(self.ifm_box.start_coord, self.ifm_box.end_coord),
196 AccessDirection.Read,
197 )
198 if self.ifm2_tensor is not None and self.ifm2_tensor.shape != []:
199 res.add(
200 self.ifm2_tensor.get_address_ranges_for_coordinates(self.ifm2_box.start_coord, self.ifm2_box.end_coord),
201 AccessDirection.Read,
202 )
203 if self.ofm_tensor is not None:
204 res.add(
205 self.ofm_tensor.get_address_ranges_for_coordinates(self.ofm_box.start_coord, self.ofm_box.end_coord),
206 AccessDirection.Write,
207 )
208 if self.weight_tensor is not None:
209 res.add(
210 self.weight_tensor.get_address_ranges_for_coordinates(
211 self.weight_box.start_coord, self.weight_box.end_coord
212 ),
213 AccessDirection.Read,
214 )
215 return res
216
217 def is_npu_pass_command(self):
218 return True
219
220 def __str__(self):
221 return "<NPUStripe: ps=%s, ifm_box=%s, ifm2_box=%s, ofm_box=%s, weight_box=%s, block_config=%s>" % (
222 self.ps.name,
223 self.ifm_box,
224 self.ifm2_box,
225 self.ofm_box,
226 self.weight_box,
227 self.block_config,
228 )
229
230 __repr__ = __str__
231
232 def get_ofm_y_range_for_pass(self, ps_requested):
233 if ps_requested != self.ps:
234 return None
235 if len(self.ofm_box.start_coord) >= 3:
236 return (self.ofm_box.start_coord[-3], self.ofm_box.end_coord[-3])
237 return None
238
239 def get_block_dimensions(self):
240 ofm_box = self.ofm_box
241 block_config = self.block_config
242
243 out_height = 1
244 out_width = 1
245 out_depth = ofm_box.end_coord[-1] - ofm_box.start_coord[-1]
246 if len(ofm_box.end_coord) >= 4:
247 out_width = ofm_box.end_coord[-2] - ofm_box.start_coord[-2]
248 out_height = ofm_box.end_coord[-3] - ofm_box.start_coord[-3]
249
250 assert out_height >= 0
251 assert out_width >= 0
252 assert out_depth >= 0
253 return (
254 round_up_divide(out_height, block_config[0]),
255 round_up_divide(out_width, block_config[1]),
256 round_up_divide(out_depth, block_config[3]),
257 )
258
259 def get_operation_count(self):
260 # returns numpy array of (DPU blocks, dma_ops)
261 return np.array((self.get_n_blocks(), 0))
262
263 def get_n_blocks(self):
264 h, w, d = self.get_block_dimensions()
265 res = h * w * d
266 assert res >= 0
267 return res
268
269 def get_single_block_command(self, block_idx):
270 block_cfg = (self.block_config[0], self.block_config[1], self.block_config[3])
271 dims = self.get_block_dimensions()
272 strides = dims[1] * dims[2], dims[2], 1
273 coord = []
274 idx_left = block_idx
275 for s in strides:
276 c = idx_left // s
277 idx_left -= c * s
278 coord.append(c)
279
280 assert idx_left == 0
281
282 # put in dummy height/widths in case we're dealing with FC layers
283 ofm_start = list(self.ofm_box.start_coord)
284 ofm_end = list(self.ofm_box.end_coord)
285
286 # cut out a nice block shape
287 for idx in (-1, -2, -3):
288 if len(ofm_start) >= -idx:
289 ofm_start[idx] += block_cfg[idx] * coord[idx]
290 ofm_end[idx] = min(ofm_end[idx], ofm_start[idx] + block_cfg[idx])
291
292 ps = self.ps
293 strides = None
294 skirt = None
295 if ps.primary_op is not None:
296 strides = ps.primary_op.attrs.get("strides", None)
297 skirt = ps.primary_op.attrs.get("skirt", None)
298 npu_block_type = ps.npu_block_type
299
300 ofm_box = Box(ofm_start, ofm_end)
301 ifm_box, _, _ = ofm_box.transform_with_strides_and_skirt(
302 strides, skirt, self.ifm_tensor.shape, npu_block_type, self.concat_axis, self.concat_offset
303 )
304
305 weight_box = None
306 if self.weight_tensor is not None:
307 weight_oc_start = ofm_start[-1]
308 weight_oc_end = ofm_end[-1]
309 if self.concat_axis - len(self.weight_tensor.shape) == -1:
310 weight_oc_start -= self.concat_offset
311 weight_oc_end -= self.concat_offset
312
313 weight_box = Box.make_weight_box(
314 self.weight_tensor.shape,
315 npu_block_type,
316 weight_oc_start,
317 weight_oc_end,
318 self.weight_tensor.weight_transpose_depthwise,
319 )
320
321 return NpuStripe(
322 self.ps,
323 self.block_config,
324 self.is_first,
325 self.is_last,
326 self.is_first_h_stripe,
327 self.is_last_h_stripe,
328 self.ifm_tensor,
329 ifm_box,
330 self.ofm_tensor,
331 ofm_box,
332 self.weight_tensor,
333 weight_box,
334 self.scale_tensor,
335 self.concat_axis,
336 self.concat_offset,
337 )
338
339
340class DMA(Command):
341 def __init__(self, in_tensor, out_tensor, box):
342 self.cmdtype = CommandType.DMA
343 self.in_tensor = in_tensor
344 self.out_tensor = out_tensor
345 self.box = box
346
347 def __str__(self):
348 return "<DMA: in=%s, out=%s, box=%s>" % (self.in_tensor.name, self.out_tensor.name, self.box)
349
350 __repr__ = __str__
351
352 def get_memory_accesses(self):
353 res = MemoryAccessSet()
354
355 res.add(
356 self.in_tensor.get_address_ranges_for_coordinates(self.box.start_coord, self.box.end_coord),
357 AccessDirection.Read,
358 )
359 res.add(
360 self.out_tensor.get_address_ranges_for_coordinates(self.box.start_coord, self.box.end_coord),
361 AccessDirection.Write,
362 )
363 return res
364
365 def get_operation_count(self):
366 # returns numpy array of (DPU blocks, dma_ops)
367 return np.array((0, 1))