blob: 0007553c2c5bd99744005491a428961e4f96f4eb [file] [log] [blame]
Eric Kunzee5e26762020-10-13 16:11:07 -07001
Kevin Cheng3a478572021-01-22 17:21:02 -08002// Copyright (c) 2020-2021, ARM Limited.
Eric Kunzee5e26762020-10-13 16:11:07 -07003//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16#include "tensor_ops.h"
17#include "quant_util.h"
18#include "template_types.h"
19
20using namespace TosaReference;
21using namespace Eigen;
22using namespace tosa;
23
24template <int Rank, DType Dtype>
25OpArgMax<Rank, Dtype>::OpArgMax(TosaAttributeBase* attribute_, TosaQuantInfoBase* qinfo_, uint64_t id_)
26 : GraphNode(Op_ARGMAX, id_)
27{
28 setRequiredOperands(1, 1);
29 setRequiredRank(0, 6);
30
31 INIT_ATTRIBUTE(Axis);
32}
33
34template <int Rank, DType Dtype>
35OpArgMax<Rank, Dtype>::~OpArgMax()
36{
37 if (attribute)
38 delete attribute;
39}
40
41template <int Rank, DType Dtype>
42int OpArgMax<Rank, Dtype>::checkTensorAttributes()
43{
44 if (validateRequiredOperands())
45 return 1;
46
47 if (validateRequiredRank(inputs[0]) || validateRequiredRank(outputs[0]))
48 {
49 return 1;
50 }
51
52 input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
53 output = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
54
55 return 0;
56}
57
58template <int Rank, DType Dtype>
59int OpArgMax<Rank, Dtype>::eval()
60{
61 Eigen::Tensor<DenseIndex, Rank - 1> index = this->input->getTensor().argmax(attribute->axis());
62
63 this->output->getTensor() = index.unaryExpr([](DenseIndex in) -> OutEigenType { return (OutEigenType)in; });
64
65 return GraphNode::eval();
66}
67
68template <DType Dtype>
69OpAvgPool2d<Dtype>::OpAvgPool2d(TosaAttributeBase* attribute_, TosaQuantInfoBase* qinfo_, uint64_t id_)
70 : GraphNode(Op_AVG_POOL2D, id_)
71{
72 setRequiredOperands(1, 1);
73 setRequiredRank(4);
74
75 INIT_ATTRIBUTE(Pool2d);
76 INIT_QINFO(Unary);
77}
78
79template <DType Dtype>
80OpAvgPool2d<Dtype>::~OpAvgPool2d()
81{
82 if (attribute)
83 delete attribute;
84}
85
86template <DType Dtype>
87int OpAvgPool2d<Dtype>::checkTensorAttributes()
88{
89 if (validateRequiredOperands())
90 return 1;
91
92 if (validateRequiredRank(inputs[0]) || validateRequiredRank(outputs[0]))
93 {
94 return 1;
95 }
96
97 if (inputs[0]->matchType(*outputs[0]))
98 {
99 printNodeValidationError("OpAvgPool2d: input and output tensor type mismatch");
100 return 1;
101 }
102
103 in = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
104 out = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
105
Eric Kunzee5e26762020-10-13 16:11:07 -0700106 if (attribute->padding().size() != 4)
107 {
108 printNodeValidationError("OpAvgPool2d: illegal size for attribute padding");
109 return 1;
110 }
111
112 if (attribute->kernel().size() != 2)
113 {
114 printNodeValidationError("OpAvgPool2d: illegal size for attribute kernel");
115 return 1;
116 }
117
118 if (attribute->stride().size() != 2)
119 {
120 printNodeValidationError("OpAvgPool2d: illegal size for attribute stride");
121 return 1;
122 }
123
124 return 0;
125}
126
127template <DType Dtype>
128ETensor1<int32_t> OpAvgPool2d<Dtype>::calculate_div_map_1d(int in_size, int out_size, int kernel_size, int stride)
129{
130 ETensor1<int32_t> result(out_size);
131
132 int32_t total_pad = (out_size - 1) * stride + kernel_size - in_size;
133 total_pad = total_pad < 0 ? 0 : total_pad;
134
135 int32_t pad_left = total_pad >> 1;
136 int32_t pad_right = total_pad - pad_left;
137
138 result.setConstant(kernel_size);
139
140 // the index left to 'left_index' and index right to 'right_index' indicates
141 // the input window of this output covers a pad bit
142 int32_t left_index = pad_left / stride;
143 int32_t right_index = pad_right / stride;
144
145 // not handle ultra small activation yet
146 ASSERT_MSG_NODE((out_size - 1 - right_index) >= left_index, "AvgPool2d: Small activations not supported yet");
147
148 // minus the number of pad bit this index cover
149 while (left_index >= 0)
150 {
151 result(left_index) -= (pad_left - left_index * stride);
152 left_index--;
153 }
154
155 while (right_index >= 0)
156 {
157 result(out_size - 1 - right_index) -= (pad_right - right_index * stride);
158 right_index--;
159 }
160
161 return result;
162}
163
164// assuming input and output tensor have same scales like tflite reference
165// so no need to scale input and output
166template <DType Dtype>
167int OpAvgPool2d<Dtype>::eval()
168{
169 int in_batch = this->in->getShape()[0];
170 int in_height = this->in->getShape()[1];
171 int in_width = this->in->getShape()[2];
172 int in_channels = this->in->getShape()[3];
173
174 int out_batch = this->out->getShape()[0];
175 int out_height = this->out->getShape()[1];
176 int out_width = this->out->getShape()[2];
177 int out_channels = this->out->getShape()[3];
178
179 ASSERT_MSG_NODE(in_batch == out_batch, "OpAvgPool2d: tensor batch mismatch %d != %d", in_batch, out_batch);
180
181 int padding_top = this->attribute->padding()[0];
182 int padding_bottom = this->attribute->padding()[1];
183 int padding_left = this->attribute->padding()[2];
184 int padding_right = this->attribute->padding()[3];
185 int kernel_h = this->attribute->kernel()[0];
186 int kernel_w = this->attribute->kernel()[1];
187 int stride_h = this->attribute->stride()[0];
188 int stride_w = this->attribute->stride()[1];
189
190 DEBUG_INFO(OP,
191 "perform AvgPool2d, input.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], kernel=[%d,%d], "
192 "stride=[%d,%d], padding=[%d,%d,%d,%d]",
193 in_batch, in_height, in_width, in_channels, out_batch, out_height, out_width, out_channels, kernel_h,
194 kernel_w, stride_h, stride_w, padding_top, padding_bottom, padding_left, padding_right);
195
196 Eigen::array<Eigen::Index, 2> im2col_input_dims;
197 im2col_input_dims[0] = kernel_h * kernel_w;
198 im2col_input_dims[1] = out_batch * out_height * out_width * out_channels;
199
200 Eigen::array<Eigen::Index, 4> col2im_output_dims;
201 col2im_output_dims[0] = out_batch;
202 col2im_output_dims[1] = out_height;
203 col2im_output_dims[2] = out_width;
204 col2im_output_dims[3] = out_channels;
205
206 Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
207 padding[0] = std::make_pair(0, 0);
208 padding[1] = std::make_pair(padding_top, padding_bottom);
209 padding[2] = std::make_pair(padding_left, padding_right);
210 padding[3] = std::make_pair(0, 0);
211
212 ETensor4<InEigenType> input_val = this->in->getTensor();
213 if (this->qinfo)
214 {
215 input_val = input_val - (InEigenType)this->qinfo->input_zp();
216 }
217
218 ETensor4<InEigenType> input_padded = input_val.pad(padding);
219
220 // assuming input and output have same scales
221 // so input and output scaling is not required
222 // TODO: check if this assumption TOSA made
223
224 // extract_image_patches() output [N, KH, KW, H * W, C]
225 // transpose to [KH, KW, N, H * W, C]
226 // reshape to [KH * KW, N * H * W * C]
227 ETensor2<InEigenType> input_extract_patches =
228 input_padded.extract_image_patches(kernel_h, kernel_w, stride_h, stride_w, 1, 1, Eigen::PADDING_VALID)
229 .shuffle(Eigen::array<Eigen::Index, 5>{ 1, 2, 0, 3, 4 })
230 .reshape(im2col_input_dims);
231
232 // 1D result with [N * H * W * C]
233 ETensor1<AccEigenType> out_1d(this->out->getElementCount());
234 out_1d.setZero();
235
236 // sum pool
237 for (size_t i = 0; i < this->out->getElementCount(); i++)
238 {
239 for (int32_t j = 0; j < kernel_h * kernel_w; j++)
240 {
241 out_1d(i) += (AccEigenType)input_extract_patches(j, i);
242 }
243 }
244
245 // reshape result to [N, H, W, C] and divide with div_map
246 ETensor4<AccEigenType> sum = out_1d.reshape(col2im_output_dims);
247
248 // calculate 1d height/width div_map (number of elements this pooling window covers)
249 // and outer product to get 2d div_map, then reshape/broadcast to [N, H, W, C]
250 ETensor1<int32_t> div_map_h = calculate_div_map_1d(in_height, out_height, kernel_h, stride_h);
251 ETensor1<int32_t> div_map_w = calculate_div_map_1d(in_width, out_width, kernel_w, stride_w);
252 Eigen::array<Eigen::IndexPair<Eigen::Index>, 1> contract_dims = { Eigen::IndexPair<Eigen::Index>(1, 0) };
253 Eigen::array<Eigen::Index, 4> bcast{ out_batch, 1, 1, out_channels };
254
255 ETensor4<int32_t> div_map =
256 div_map_h.reshape(Eigen::array<Eigen::Index, 2>{ out_height, 1 })
257 .contract(div_map_w.reshape(Eigen::array<Eigen::Index, 2>{ 1, out_width }), contract_dims)
258 .reshape(Eigen::array<Eigen::Index, 4>{ 1, out_height, out_width, 1 })
259 .broadcast(bcast);
260
261 if (Dtype != DType_FLOAT)
262 {
263 this->out->getTensor() = sum.binaryExpr(div_map, [](AccEigenType value, int32_t div) -> OutEigenType {
264 int32_t multiplier, shift;
Kevin Cheng99bea142020-10-19 12:35:05 -0700265 TosaReference::QuantUtil::reciprocal_scale(div, multiplier, shift);
Eric Kunzee5e26762020-10-13 16:11:07 -0700266
Kevin Cheng99bea142020-10-19 12:35:05 -0700267 return (OutEigenType)TosaReference::QuantUtil::apply_scale_32(value, multiplier, shift, false);
Eric Kunzee5e26762020-10-13 16:11:07 -0700268 });
269 this->out->getTensor() = this->out->getTensor() + (OutEigenType)(this->qinfo->output_zp());
270 this->out->getTensor() = this->out->getTensor().cwiseMax((OutEigenType)QMin);
271 this->out->getTensor() = this->out->getTensor().cwiseMin((OutEigenType)QMax);
272 }
273 else
274 {
275 this->out->getTensor() = (sum / div_map.template cast<AccEigenType>()).template cast<OutEigenType>();
276 }
277
278 return GraphNode::eval();
279}
280
281template <DType InDtype, DType WeightDtype>
282OpConv2d<InDtype, WeightDtype>::OpConv2d(TosaAttributeBase* attribute_, TosaQuantInfoBase* qinfo_, uint64_t id_)
283 : GraphNode(Op_CONV2D, id_)
284{
285 setRequiredOperands(3, 1);
286 setRequiredRank(4);
287
288 INIT_ATTRIBUTE(Conv2d);
289 INIT_QINFO(Conv);
290}
291
292template <DType InDtype, DType WeightDtype>
293OpConv2d<InDtype, WeightDtype>::~OpConv2d()
294{
295 if (attribute)
296 delete attribute;
297 if (qinfo)
298 delete qinfo;
299}
300
301template <DType InDtype, DType WeightDtype>
302int OpConv2d<InDtype, WeightDtype>::checkTensorAttributes()
303{
304 if (validateRequiredOperands())
305 return 1;
306
307 if (validateRequiredRank(inputs[0]) || validateRequiredRank(inputs[1]) || validateRequiredRank(outputs[0]))
308 {
309 return 1;
310 }
311
312 // 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
313 if (inputs[2]->getRank() != 1)
314 {
315 printNodeValidationError("OpConv2d: bias tensor must be rank 1");
316 }
317
Eric Kunzee5e26762020-10-13 16:11:07 -0700318 input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
319 weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
320 bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
321 output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
322
Eric Kunzee5e26762020-10-13 16:11:07 -0700323 if (attribute->padding().size() != 4)
324 {
325 printNodeValidationError("OpConv2d: illegal size for attribute padding");
326 return 1;
327 }
328
329 if (attribute->stride().size() != 2)
330 {
331 printNodeValidationError("OpConv2d: illegal size for attribute stride");
332 return 1;
333 }
334
335 if (attribute->dilation().size() != 2)
336 {
337 printNodeValidationError("OpConv2d: illegal size for attribute dilation");
338 return 1;
339 }
340
341 return 0;
342}
343
344template <DType InDtype, DType WeightDtype>
345int OpConv2d<InDtype, WeightDtype>::eval()
346{
347 int in_batch = this->input->getShape()[0];
348 int in_height = this->input->getShape()[1];
349 int in_width = this->input->getShape()[2];
350 int in_channels = this->input->getShape()[3];
351
352 int f_out_channels = this->weight->getShape()[0];
353 int f_height = this->weight->getShape()[1];
354 int f_width = this->weight->getShape()[2];
355 int f_in_channels = this->weight->getShape()[3];
356
357 int b_out_channels = this->bias->getShape()[0];
358
359 int out_batch = this->output->getShape()[0];
360 int out_height = this->output->getShape()[1];
361 int out_width = this->output->getShape()[2];
362 int out_channels = this->output->getShape()[3];
363
364 ASSERT_MSG_NODE(in_batch == out_batch, "OpConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
365 ASSERT_MSG_NODE(f_in_channels == in_channels, "OpConv2d: tensor input channel mismatch %d != %d", f_in_channels,
366 in_channels);
367 ASSERT_MSG_NODE(f_out_channels == out_channels, "OpConv2d: tensor output channel mismatch %d != %d", f_out_channels,
368 out_channels);
369 ASSERT_MSG_NODE(b_out_channels == out_channels, "OpConv2d: tensor output channel mismatch %d != %d", b_out_channels,
370 out_channels);
371
372 int padding_top = this->attribute->padding()[0];
373 int padding_bottom = this->attribute->padding()[1];
374 int padding_left = this->attribute->padding()[2];
375 int padding_right = this->attribute->padding()[3];
376 int stride_h = this->attribute->stride()[0];
377 int stride_w = this->attribute->stride()[1];
378 int dilation_h = this->attribute->dilation()[0];
379 int dilation_w = this->attribute->dilation()[1];
380
381 DEBUG_INFO(OP,
382 "perform OpConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], "
383 "stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d,%d,%d]",
384 in_batch, in_height, in_width, in_channels, f_height, f_width, f_in_channels, f_out_channels, out_batch,
385 out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
386 padding_bottom, padding_left, padding_right);
387
388 // GEMM-conv2d, left matrix is input, right matrix is weight
389 Eigen::array<Eigen::Index, 2> im2col_input_dims;
390 im2col_input_dims[0] = out_batch * out_height * out_width;
391 im2col_input_dims[1] = f_height * f_width * f_in_channels;
392
393 Eigen::array<Eigen::Index, 2> im2col_weight_dims;
394 im2col_weight_dims[0] = f_height * f_width * f_in_channels;
395 im2col_weight_dims[1] = f_out_channels;
396
397 Eigen::array<Eigen::Index, 2> bias_reshaped_dims;
398 bias_reshaped_dims[0] = 1;
399 bias_reshaped_dims[1] = b_out_channels;
400
401 Eigen::array<Eigen::Index, 4> weight_zp_bcast_dims;
402 weight_zp_bcast_dims[0] = f_height;
403 weight_zp_bcast_dims[1] = f_width;
404 weight_zp_bcast_dims[2] = f_in_channels;
405
406 Eigen::array<Eigen::Index, 2> bias_bcast_dims;
407 bias_bcast_dims[0] = out_batch * out_height * out_width;
408 bias_bcast_dims[1] = 1;
409
410 Eigen::array<Eigen::Index, 4> col2im_output_dims;
411 col2im_output_dims[0] = out_batch;
412 col2im_output_dims[1] = out_height;
413 col2im_output_dims[2] = out_width;
414 col2im_output_dims[3] = out_channels;
415
416 Eigen::array<Eigen::IndexPair<Eigen::Index>, 1> contract_dims = { Eigen::IndexPair<Eigen::Index>(1, 0) };
417
418 Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
419 padding[0] = std::make_pair(0, 0);
420 padding[1] = std::make_pair(padding_top, padding_bottom);
421 padding[2] = std::make_pair(padding_left, padding_right);
422 padding[3] = std::make_pair(0, 0);
423
424 TIn input_val = this->input->getTensor();
425 TWeight weight_val = this->weight->getTensor();
426 if (this->qinfo)
427 {
428 input_val = input_val - (InEigenType)this->qinfo->input_zp();
429 weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
430 }
431
432 ETensor4<InEigenType> input_padded = input_val.pad(padding);
433
434 // extract_image_patches() output [N, KH, KW, H * W, C]
435 // need to transpose to [N, H * W, KH, KW, C]
436 ETensor5<InEigenType> input_extract_patches =
437 input_padded
438 .extract_image_patches(f_height, f_width, stride_h, stride_w, dilation_h, dilation_w, Eigen::PADDING_VALID)
439 .shuffle(Eigen::array<Eigen::Index, 5>{ 0, 3, 1, 2, 4 });
440
441 // reshape input to [N * H * W, KH * KW * C]
442 ETensor2<InEigenType> im2col_input = input_extract_patches.reshape(im2col_input_dims);
443
444 // transpose and reshape weight from [OC, H, W, IC] to [H * W * IC, OC]
445 ETensor2<WeightEigenType> im2col_weight =
446 weight_val.shuffle(Eigen::array<Eigen::Index, 4>({ 1, 2, 3, 0 })).reshape(im2col_weight_dims);
447
448 // don't need to apply bias_multiplier ( * bias_scale and >> bias_shift) since tflite already scale it
449 // and reshaped from [C] to [1, C], and broadcast to [N * H * W, C]
450 ETensor2<AccEigenType> bias_2d = this->bias->getTensor().reshape(bias_reshaped_dims).broadcast(bias_bcast_dims);
451
452 // output matrix is [N * H * W, C]
453 ETensor2<AccEigenType> contracted_result =
454 im2col_input.template cast<AccEigenType>().contract(im2col_weight.template cast<AccEigenType>(), contract_dims);
455
456 // adding bias
457 ETensor2<AccEigenType> biased_output = contracted_result + bias_2d.template cast<AccEigenType>();
458
459 // reshape back to [N, H, W, C]
460 this->output->getTensor() = biased_output.reshape(col2im_output_dims);
461
462 if (AccDtype == DType_INT48)
463 {
464 this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
465 this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
466 }
467
468 return GraphNode::eval();
469}
470
471template <DType InDtype, DType WeightDtype>
472OpDepthwiseConv2d<InDtype, WeightDtype>::OpDepthwiseConv2d(TosaAttributeBase* attribute_,
473 TosaQuantInfoBase* qinfo_,
474 uint64_t id_)
475 : GraphNode(Op_DEPTHWISE_CONV2D, id_)
476{
477 setRequiredOperands(3, 1);
478 setRequiredRank(4);
479
480 INIT_ATTRIBUTE(Conv2d);
481 INIT_QINFO(Conv);
482}
483
484template <DType InDtype, DType WeightDtype>
485OpDepthwiseConv2d<InDtype, WeightDtype>::~OpDepthwiseConv2d()
486{
487 if (attribute)
488 delete attribute;
489 if (qinfo)
490 delete qinfo;
491}
492
493template <DType InDtype, DType WeightDtype>
494int OpDepthwiseConv2d<InDtype, WeightDtype>::checkTensorAttributes()
495{
496 if (validateRequiredOperands())
497 return 1;
498
499 if (validateRequiredRank(inputs[0]) || validateRequiredRank(inputs[1]) || validateRequiredRank(outputs[0]))
500 {
501 return 1;
502 }
503
504 // 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
505 if (inputs[2]->getRank() != 1)
506 {
507 printNodeValidationError("OpDepthwiseConv2d: bias tensor must be rank 1");
508 }
509
Eric Kunzee5e26762020-10-13 16:11:07 -0700510 input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
511 weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
512 bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
513 output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
514
Eric Kunzee5e26762020-10-13 16:11:07 -0700515 if (attribute->padding().size() != 4)
516 {
517 printNodeValidationError("OpDepthwiseConv2d: illegal size for attribute padding");
518 return 1;
519 }
520
521 if (attribute->stride().size() != 2)
522 {
523 printNodeValidationError("OpDepthwiseConv2d: illegal size for attribute stride");
524 return 1;
525 }
526
527 if (attribute->dilation().size() != 2)
528 {
529 printNodeValidationError("OpDepthwiseConv2d: illegal size for attribute dilation");
530 return 1;
531 }
532
533 return 0;
534}
535
536template <DType InDtype, DType WeightDtype>
537int OpDepthwiseConv2d<InDtype, WeightDtype>::eval()
538{
539 int in_batch = this->input->getShape()[0];
540 int in_height = this->input->getShape()[1];
541 int in_width = this->input->getShape()[2];
542 int in_channels = this->input->getShape()[3];
543
544 int f_height = this->weight->getShape()[0];
545 int f_width = this->weight->getShape()[1];
546 int f_in_channels = this->weight->getShape()[2];
547 int f_multiplier = this->weight->getShape()[3];
548
549 int b_out_channels = this->bias->getShape()[0];
550
551 int out_batch = this->output->getShape()[0];
552 int out_height = this->output->getShape()[1];
553 int out_width = this->output->getShape()[2];
554 int out_channels = this->output->getShape()[3];
555
556 ASSERT_MSG_NODE(in_batch == out_batch, "OpDepthwiseConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
557 ASSERT_MSG_NODE(f_in_channels == in_channels, "OpDepthwiseConv2d: tensor input channel mismatch %d != %d",
558 f_in_channels, in_channels);
559 ASSERT_MSG_NODE(in_channels * f_multiplier == out_channels,
560 "OpDepthwiseConv2d: tensor output channel mismatch %d != %d", in_channels * f_multiplier,
561 out_channels);
562 ASSERT_MSG_NODE(b_out_channels == out_channels, "OpDepthwiseConv2d: tensor b_out_channels mismatch %d != %d",
563 b_out_channels, out_channels);
564
565 int padding_top = this->attribute->padding()[0];
566 int padding_bottom = this->attribute->padding()[1];
567 int padding_left = this->attribute->padding()[2];
568 int padding_right = this->attribute->padding()[3];
569 int stride_h = this->attribute->stride()[0];
570 int stride_w = this->attribute->stride()[1];
571 int dilation_h = this->attribute->dilation()[0];
572 int dilation_w = this->attribute->dilation()[1];
573
574 DEBUG_INFO(OP,
575 "perform OpDepthwiseConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], "
576 "output.shape=[%d,%d,%d,%d], stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d,%d,%d]",
577 in_batch, in_height, in_width, in_channels, f_height, f_width, f_in_channels, f_multiplier, out_batch,
578 out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
579 padding_bottom, padding_left, padding_right);
580
581 Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
582 padding[0] = std::make_pair(0, 0);
583 padding[1] = std::make_pair(padding_top, padding_bottom);
584 padding[2] = std::make_pair(padding_left, padding_right);
585 padding[3] = std::make_pair(0, 0);
586
587 TIn input_val = this->input->getTensor();
588 TWeight weight_val = this->weight->getTensor();
589 if (this->qinfo)
590 {
591 input_val = input_val - (InEigenType)this->qinfo->input_zp();
592 weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
593 }
594
595 ETensor4<InEigenType> input_padded = input_val.pad(padding);
596
597 // GEMM doesn't fit well with DepthwiseConv2d
598 // 1. use extract_image_patches() to handle stride/dilation/padding
599 // 2. perform direct convolution
600
601 // 1. extract_image_patches() output [N, KH, KW, OH * OW, IC]
602 ETensor5<InEigenType> input_extract_patches = input_padded.extract_image_patches(
603 f_height, f_width, stride_h, stride_w, dilation_h, dilation_w, Eigen::PADDING_VALID);
604
605 Eigen::array<Eigen::Index, 4> reshape_dim;
606 reshape_dim.fill(1);
607 reshape_dim[3] = b_out_channels;
608
609 Eigen::array<Eigen::Index, 4> bcast;
610 bcast[0] = out_batch;
611 bcast[1] = out_height;
612 bcast[2] = out_width;
613 bcast[3] = 1;
614
615 // initialize with bias
616 this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
617
618 // 2. direct depthwise convolution
619 for (int ob = 0; ob < out_batch; ob++)
620 {
621 for (int oh = 0; oh < out_height; oh++)
622 {
623 for (int ow = 0; ow < out_width; ow++)
624 {
625 for (int ic = 0; ic < in_channels; ic++)
626 {
627 for (int cm = 0; cm < f_multiplier; cm++)
628 {
629 for (int fh = 0; fh < f_height; fh++)
630 {
631 for (int fw = 0; fw < f_width; fw++)
632 {
633 this->output->getTensor()(ob, oh, ow, ic * f_multiplier + cm) +=
634 ((AccEigenType)input_extract_patches(ob, fh, fw, ow * out_height + oh, ic) *
635 (AccEigenType)weight_val(fh, fw, ic, cm));
636 }
637 }
638 }
639 }
640 }
641 }
642 }
643
644 if (AccDtype == DType_INT48)
645 {
646 this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
647 this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
648 }
649
650 return GraphNode::eval();
651}
652
653template <DType InDtype, DType WeightDtype>
654OpFullyConnected<InDtype, WeightDtype>::OpFullyConnected(TosaAttributeBase* attribute_,
655 TosaQuantInfoBase* qinfo_,
656 uint64_t id_)
657 : GraphNode(Op_FULLY_CONNECTED, id_)
658{
659 setRequiredOperands(3, 1);
660 setRequiredRank(2);
661
662 INIT_QINFO(Conv);
663}
664
665template <DType InDtype, DType WeightDtype>
666OpFullyConnected<InDtype, WeightDtype>::~OpFullyConnected()
667{
668 if (qinfo)
669 delete qinfo;
670}
671
672template <DType InDtype, DType WeightDtype>
673int OpFullyConnected<InDtype, WeightDtype>::checkTensorAttributes()
674{
675 if (validateRequiredOperands())
676 return 1;
677
678 if (validateRequiredRank(inputs[0]) || validateRequiredRank(inputs[1]) || validateRequiredRank(outputs[0]))
679 {
680 return 1;
681 }
682
683 input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
684 weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
685 bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
686
687 if (input->getShape()[1] != weight->getShape()[1])
688 {
689 printNodeValidationError("OpFullyConnected operator input.shape[1] should match weight.shape[1]");
690 return 1;
691 }
692
693 if (weight->getShape()[0] != bias->getShape()[0])
694 {
695 printNodeValidationError("OpFullyConnected operator bias.shape[0] should match weight.shape[0]");
696 return 1;
697 }
698
699 output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
700
701 return 0;
702}
703
704template <DType InDtype, DType WeightDtype>
705int OpFullyConnected<InDtype, WeightDtype>::eval()
706{
707 typedef Eigen::Tensor<int, 1>::DimensionPair DimPair;
708 Eigen::array<DimPair, 1> dims{ { DimPair(1, 0) } };
709
710 Eigen::array<Eigen::Index, 2> weight_shuffle{ 1, 0 };
711
712 Eigen::array<Eigen::Index, 2> bias_reshape;
713 bias_reshape[0] = 1;
714 bias_reshape[1] = this->bias->getShape()[0];
715
716 Eigen::array<Eigen::Index, 2> bias_bcast;
717 bias_bcast[0] = this->input->getShape()[0];
718 bias_bcast[1] = 1;
719
720 TIn input_val = this->input->getTensor();
721 TWeight weight_val = this->weight->getTensor().shuffle(weight_shuffle);
722 if (this->qinfo)
723 {
724 input_val = input_val - (InEigenType)this->qinfo->input_zp();
725 weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
726 }
727
728 this->output->getTensor() =
729 input_val.template cast<AccEigenType>().contract(weight_val.template cast<AccEigenType>(), dims) +
730 this->bias->getTensor().reshape(bias_reshape).broadcast(bias_bcast);
731
732 if (AccDtype == DType_INT48)
733 {
734 this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
735 this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
736 }
737 return GraphNode::eval();
738}
739
740template <DType Dtype>
741OpMatMul<Dtype>::OpMatMul(TosaAttributeBase* attribute_, TosaQuantInfoBase* qinfo_, uint64_t id_)
742 : GraphNode(Op_MATMUL, id_)
743{
744 setRequiredOperands(2, 1);
Kevin Cheng2d60f002021-06-09 14:18:32 -0700745 setRequiredRank(3);
Eric Kunzee5e26762020-10-13 16:11:07 -0700746
747 INIT_QINFO(MatMul);
748}
749
750template <DType Dtype>
751OpMatMul<Dtype>::~OpMatMul()
752{
753 if (qinfo)
754 delete qinfo;
755}
756
757template <DType Dtype>
758int OpMatMul<Dtype>::checkTensorAttributes()
759{
760 if (validateRequiredOperands())
761 return 1;
762
763 if (validateRequiredRank(inputs[0]) || validateRequiredRank(inputs[1]) || validateRequiredRank(outputs[0]))
764 {
765 return 1;
766 }
767
Kevin Cheng2d60f002021-06-09 14:18:32 -0700768 a = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
769 b = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[1]);
770 output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
Eric Kunzee5e26762020-10-13 16:11:07 -0700771
Kevin Cheng2d60f002021-06-09 14:18:32 -0700772 ASSERT_MEM(a && b && output);
773
774 // a: [N, H, C]
775 // b: [N, C, W]
776 // c: [N, H, W]
777
778 // Check N
779 if (a->getShape()[0] != b->getShape()[0] || a->getShape()[0] != output->getShape()[0])
Eric Kunzee5e26762020-10-13 16:11:07 -0700780 {
Kevin Cheng2d60f002021-06-09 14:18:32 -0700781 printNodeValidationError("OpMatMul operator a.shape[0], b.shape[0] and output.shape[0] should match");
Eric Kunzee5e26762020-10-13 16:11:07 -0700782 return 1;
783 }
Kevin Cheng2d60f002021-06-09 14:18:32 -0700784 N = a->getShape()[0];
Eric Kunzee5e26762020-10-13 16:11:07 -0700785
Kevin Cheng2d60f002021-06-09 14:18:32 -0700786 // Check C
787 if (a->getShape()[2] != b->getShape()[1])
788 {
789 printNodeValidationError("OpMatMul operator a.shape[2] should match b.shape[1]");
790 return 1;
791 }
792 C = a->getShape()[2];
793
794 // Check H
795 if (a->getShape()[1] != output->getShape()[1])
796 {
797 printNodeValidationError("OpMatMul operator a.shape[1] should match output.shape[1]");
798 return 1;
799 }
800 H = a->getShape()[1];
801
802 // Check W
803 if (b->getShape()[2] != output->getShape()[2])
804 {
805 printNodeValidationError("OpMatMul operator output.shape[2] should match output.shape[2]");
806 return 1;
807 }
808 W = b->getShape()[2];
Eric Kunzee5e26762020-10-13 16:11:07 -0700809
810 return 0;
811}
812
813template <DType Dtype>
814int OpMatMul<Dtype>::eval()
815{
816 typedef Eigen::Tensor<int, 1>::DimensionPair DimPair;
817 Eigen::array<DimPair, 1> dims{ { DimPair(1, 0) } };
818
819 TIn a_val = this->a->getTensor();
820 TIn b_val = this->b->getTensor();
821 if (this->qinfo)
822 {
823 a_val = a_val - (InEigenType)this->qinfo->a_zp();
824 b_val = b_val - (InEigenType)this->qinfo->b_zp();
825 }
826
Kevin Cheng2d60f002021-06-09 14:18:32 -0700827 Eigen::array<Eigen::Index, 2> a_rank2_shape({ H, C });
828 Eigen::array<Eigen::Index, 2> b_rank2_shape({ C, W });
829 Eigen::array<Eigen::Index, 3> output_rank3_shape({ 1, H, W });
830
831 Eigen::array<Eigen::Index, 3> a_size_array({ 1, H, C });
832 Eigen::array<Eigen::Index, 3> b_size_array({ 1, C, W });
833
834 Eigen::array<Eigen::Index, 3> a_begin_array({ 0, 0, 0 });
835 Eigen::array<Eigen::Index, 3> b_begin_array({ 0, 0, 0 });
836
837 // Iterate N dimension.
838 for (int i = 0; i < N; i++)
839 {
840 a_begin_array[0] = i;
841 b_begin_array[0] = i;
842
843 TInRank2 a_rank2_val = a_val.slice(a_begin_array, a_size_array).reshape(a_rank2_shape);
844 TInRank2 b_rank2_val = b_val.slice(b_begin_array, b_size_array).reshape(b_rank2_shape);
845 TAccRank2 output_rank2_val =
846 a_rank2_val.template cast<AccEigenType>().contract(b_rank2_val.template cast<AccEigenType>(), dims);
847 TAcc output_rank3_val = output_rank2_val.reshape(output_rank3_shape);
848 if (i == 0)
849 {
850 this->output->getTensor() = output_rank3_val;
851 }
852 else
853 {
854 TAcc temp = this->output->getTensor().concatenate(output_rank3_val, 0);
855 this->output->getTensor() = temp;
856 }
857 }
Eric Kunzee5e26762020-10-13 16:11:07 -0700858
859 if (AccDtype == DType_INT48)
860 {
Kevin Cheng2d60f002021-06-09 14:18:32 -0700861 this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
862 this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
Eric Kunzee5e26762020-10-13 16:11:07 -0700863 }
864
865 return GraphNode::eval();
866}
867
868template <DType Dtype>
869OpMaxPool2d<Dtype>::OpMaxPool2d(TosaAttributeBase* attribute_, TosaQuantInfoBase* qinfo_, uint64_t id_)
870 : GraphNode(Op_MAX_POOL2D, id_)
871{
872 setRequiredOperands(1, 1);
873 setRequiredRank(4);
874
875 INIT_ATTRIBUTE(Pool2d);
876}
877
878template <DType Dtype>
879OpMaxPool2d<Dtype>::~OpMaxPool2d()
880{
881 if (attribute)
882 delete attribute;
883}
884
885template <DType Dtype>
886int OpMaxPool2d<Dtype>::checkTensorAttributes()
887{
888 if (validateRequiredOperands())
889 return 1;
890
891 if (validateRequiredRank(inputs[0]) || validateRequiredRank(outputs[0]))
892 {
893 return 1;
894 }
895
896 if (inputs[0]->matchType(*outputs[0]))
897 {
898 printNodeValidationError("OpMaxPool2d: input and output tensor type mismatch");
899 return 1;
900 }
901
902 in = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
903 out = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
904
Eric Kunzee5e26762020-10-13 16:11:07 -0700905 if (attribute->padding().size() != 4)
906 {
907 printNodeValidationError("OpMaxPool2d: illegal size for attribute padding");
908 return 1;
909 }
910
911 if (attribute->kernel().size() != 2)
912 {
913 printNodeValidationError("OpMaxPool2d: illegal size for attribute kernel");
914 return 1;
915 }
916
917 if (attribute->stride().size() != 2)
918 {
919 printNodeValidationError("OpMaxPool2d: illegal size for attribute stride");
920 return 1;
921 }
922
923 return 0;
924}
925
926template <DType Dtype>
927int OpMaxPool2d<Dtype>::eval()
928{
929 int in_batch = this->in->getShape()[0];
930 int in_height = this->in->getShape()[1];
931 int in_width = this->in->getShape()[2];
932 int in_channels = this->in->getShape()[3];
933
934 int out_batch = this->out->getShape()[0];
935 int out_height = this->out->getShape()[1];
936 int out_width = this->out->getShape()[2];
937 int out_channels = this->out->getShape()[3];
938
939 ASSERT_MSG_NODE(in_batch == out_batch, "OpMaxPool2d: tensor batch mismatch %d != %d", in_batch, out_batch);
940
941 int padding_top = this->attribute->padding()[0];
942 int padding_bottom = this->attribute->padding()[1];
943 int padding_left = this->attribute->padding()[2];
944 int padding_right = this->attribute->padding()[3];
945 int kernel_h = this->attribute->kernel()[0];
946 int kernel_w = this->attribute->kernel()[1];
947 int stride_h = this->attribute->stride()[0];
948 int stride_w = this->attribute->stride()[1];
949
950 DEBUG_INFO(OP,
951 "perform MaxPool2d, input.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], kernel=[%d,%d], "
952 "stride=[%d,%d], padding=[%d,%d,%d,%d]",
953 in_batch, in_height, in_width, in_channels, out_batch, out_height, out_width, out_channels, kernel_h,
954 kernel_w, stride_h, stride_w, padding_top, padding_bottom, padding_left, padding_right);
955
956 Eigen::array<Eigen::Index, 2> im2col_input_dims;
957 im2col_input_dims[0] = kernel_h * kernel_w;
958 im2col_input_dims[1] = out_batch * out_height * out_width * out_channels;
959
960 Eigen::array<Eigen::Index, 4> col2im_output_dims;
961 col2im_output_dims[0] = out_batch;
962 col2im_output_dims[1] = out_height;
963 col2im_output_dims[2] = out_width;
964 col2im_output_dims[3] = out_channels;
965
966 Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
967 padding[0] = std::make_pair(0, 0);
968 padding[1] = std::make_pair(padding_top, padding_bottom);
969 padding[2] = std::make_pair(padding_left, padding_right);
970 padding[3] = std::make_pair(0, 0);
971
972 ETensor4<InEigenType> input_padded = this->in->getTensor().pad(padding, std::numeric_limits<InEigenType>::lowest());
973
974 // extract_image_patches() output [N, KH, KW, H * W, C]
975 // transpose to [KH, KW, N, H * W, C]
976 // reshape to [KH * KW, N * H * W * C]
977 //
978 // Set the padding value to be the most negative value that can be
979 // represented by the datatype to ensure that any padding values will be equal
980 // to or smaller than the actual maximum in the KH x KW patch.
981 ETensor2<InEigenType> input_extract_patches =
982 input_padded
983 .extract_image_patches(kernel_h, kernel_w, stride_h, stride_w, 1, 1, Eigen::PADDING_VALID,
984 std::numeric_limits<InEigenType>::lowest())
985 .shuffle(Eigen::array<Eigen::Index, 5>{ 1, 2, 0, 3, 4 })
986 .reshape(im2col_input_dims);
987
988 // Get the maximum of the KHxHW patches along axis 0
989 Eigen::Tensor<DenseIndex, 1> tensor_argmax = input_extract_patches.argmax(0);
990
991 // 1D result with [N * H * W * C]
992 ETensor1<OutEigenType> out_1d(this->out->getElementCount());
993
994 // index input_patches with argmax array should give the result
995 for (size_t i = 0; i < this->out->getElementCount(); i++)
996 {
997 out_1d(i) = (OutEigenType)input_extract_patches(tensor_argmax(i), i);
998 }
999
1000 // reshape result to [N, H, W, C]
1001 this->out->getTensor() = out_1d.reshape(col2im_output_dims);
1002
1003 return GraphNode::eval();
1004}
1005
1006template <DType InDtype, DType OutDtype>
1007OpTransposeConv2d<InDtype, OutDtype>::OpTransposeConv2d(TosaAttributeBase* attribute_,
1008 TosaQuantInfoBase* qinfo_,
1009 uint64_t id_)
1010 : GraphNode(Op_TRANSPOSE_CONV2D, id_)
1011{
1012 setRequiredOperands(3, 1);
1013 setRequiredRank(4);
1014
1015 INIT_ATTRIBUTE(TransposeConv2d);
1016 INIT_QINFO(Conv);
1017}
1018
1019template <DType InDtype, DType OutDtype>
1020OpTransposeConv2d<InDtype, OutDtype>::~OpTransposeConv2d()
1021{
1022 if (attribute)
1023 delete attribute;
1024 if (qinfo)
1025 delete qinfo;
1026}
1027
1028template <DType InDtype, DType OutDtype>
1029int OpTransposeConv2d<InDtype, OutDtype>::checkTensorAttributes()
1030{
1031 if (validateRequiredOperands())
1032 return 1;
1033
1034 if (validateRequiredRank(inputs[0]) || validateRequiredRank(inputs[1]) || validateRequiredRank(outputs[0]))
1035 {
1036 return 1;
1037 }
1038
Eric Kunzee5e26762020-10-13 16:11:07 -07001039 input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
1040 weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
1041 bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
1042 output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
1043
Eric Kunzee5e26762020-10-13 16:11:07 -07001044 if (attribute->outpad().size() != 2)
1045 {
1046 printNodeValidationError("OpTransposeConv2d: illegal size for attribute outpad");
1047 return 1;
1048 }
1049
1050 if (attribute->stride().size() != 2)
1051 {
1052 printNodeValidationError("OpTransposeConv2d: illegal size for attribute stride");
1053 return 1;
1054 }
1055
1056 if (attribute->dilation().size() != 2)
1057 {
1058 printNodeValidationError("OpTransposeConv2d: illegal size for attribute dilation");
1059 return 1;
1060 }
1061
1062 if (attribute->output_shape().size() != 4)
1063 {
1064 printNodeValidationError("OpTransposeConv2d: illegal size for attribute output_shape");
1065 return 1;
1066 }
1067
1068 for (int d = 0; d < 4; d++)
1069 {
1070 if (attribute->output_shape()[d] != this->output->getShape()[d])
1071 {
1072 printNodeValidationError("OpTransposeConv2d: illegal size for attribute output_shape");
1073 return 1;
1074 }
1075 }
1076
1077 return 0;
1078}
1079
1080template <DType InDtype, DType OutDtype>
1081int OpTransposeConv2d<InDtype, OutDtype>::eval()
1082{
1083 int in_batch = this->input->getShape()[0];
1084 int in_height = this->input->getShape()[1];
1085 int in_width = this->input->getShape()[2];
1086 int in_channels = this->input->getShape()[3];
1087
1088 int f_out_channels = this->weight->getShape()[0];
1089 int f_height = this->weight->getShape()[1];
1090 int f_width = this->weight->getShape()[2];
1091 int f_in_channels = this->weight->getShape()[3];
1092
1093 int b_out_channels = this->bias->getShape()[0];
1094
1095 int out_batch = this->output->getShape()[0];
1096 int out_height = this->output->getShape()[1];
1097 int out_width = this->output->getShape()[2];
1098 int out_channels = this->output->getShape()[3];
1099
1100 int padding_top = this->attribute->outpad()[0];
1101 int padding_left = this->attribute->outpad()[1];
1102 int stride_h = this->attribute->stride()[0];
1103 int stride_w = this->attribute->stride()[1];
1104 int dilation_h = this->attribute->dilation()[0];
1105 int dilation_w = this->attribute->dilation()[1];
1106
1107 ASSERT_MSG_NODE(in_batch == out_batch, "OpTransposeConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
1108 ASSERT_MSG_NODE(f_in_channels == in_channels, "OpTransposeConv2d: tensor input channel mismatch %d != %d",
1109 f_in_channels, in_channels);
1110 ASSERT_MSG_NODE(f_out_channels == out_channels, "OpTransposeConv2d: tensor output channel mismatch %d != %d",
1111 f_out_channels, out_channels);
1112 ASSERT_MSG_NODE(b_out_channels == out_channels, "OpDepthwiseConv2d: tensor b_out_channels mismatch %d != %d",
1113 b_out_channels, out_channels);
1114
1115 DEBUG_INFO(OP,
1116 "perform OpTransposeConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], "
1117 "output.shape=[%d,%d,%d,%d], stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d]",
1118 in_batch, in_height, in_width, in_channels, f_height, f_width, f_out_channels, f_in_channels, out_batch,
1119 out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
1120 padding_left);
1121
1122 TIn input_val = this->input->getTensor();
1123 TWeight weight_val = this->weight->getTensor();
1124 if (this->qinfo)
1125 {
1126 input_val = input_val - (InEigenType)this->qinfo->input_zp();
1127 weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
1128 }
1129
1130 Eigen::array<Eigen::Index, 4> reshape_dim;
1131 reshape_dim.fill(1);
1132 reshape_dim[3] = b_out_channels;
1133
1134 Eigen::array<Eigen::Index, 4> bcast;
1135 bcast[0] = out_batch;
1136 bcast[1] = out_height;
1137 bcast[2] = out_width;
1138 bcast[3] = 1;
1139
1140 // initialize with bias
1141 this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
1142
1143 int out_x_origin, out_y_origin;
1144 int out_x, out_y;
1145
1146 // reference implementation from: tensorflow/tensorflow/lite/kernels/internal/reference/reference_ops.h
1147 for (int ob = 0; ob < out_batch; ob++)
1148 {
1149 for (int ih = 0; ih < in_height; ih++)
1150 {
1151 for (int iw = 0; iw < in_width; iw++)
1152 {
1153 out_x_origin = iw * stride_w - padding_left;
1154 out_y_origin = ih * stride_h - padding_top;
1155 for (int ic = 0; ic < in_channels; ic++)
1156 {
1157 for (int fh = 0; fh < f_height; fh++)
1158 {
1159 for (int fw = 0; fw < f_width; fw++)
1160 {
1161 out_x = out_x_origin + fw * dilation_w;
1162 out_y = out_y_origin + fh * dilation_h;
1163 for (int oc = 0; oc < out_channels; oc++)
1164 {
1165 if ((out_x >= 0 && out_x < out_width) && (out_y >= 0 && out_y < out_height))
1166 {
1167 this->output->getTensor()(ob, out_y, out_x, oc) +=
1168 ((AccEigenType)input_val(ob, ih, iw, ic) *
1169 (AccEigenType)weight_val(oc, fh, fw, ic));
1170 }
1171 }
1172 }
1173 }
1174 }
1175 }
1176 }
1177 }
1178
1179 if (AccDtype == DType_INT48)
1180 {
1181 this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
1182 this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
1183 }
1184
1185 return GraphNode::eval();
1186}
1187
1188// template explicit instantiation
1189DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, FLOAT);
Kevin Cheng3a478572021-01-22 17:21:02 -08001190DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, INT8);
Eric Kunzee5e26762020-10-13 16:11:07 -07001191DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, INT16);
1192
1193DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, FLOAT)
Kevin Cheng3a478572021-01-22 17:21:02 -08001194DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, INT8)
Eric Kunzee5e26762020-10-13 16:11:07 -07001195DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, INT16)
1196
1197DEF_INSTANTIATE_TWO_TYPE(OpConv2d, FLOAT, FLOAT);
Kevin Cheng3a478572021-01-22 17:21:02 -08001198DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT8, INT4);
1199DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT8, INT8);
Eric Kunzee5e26762020-10-13 16:11:07 -07001200DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT16, INT8);
1201
1202DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, FLOAT, FLOAT);
Kevin Cheng3a478572021-01-22 17:21:02 -08001203DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT8, INT4);
1204DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT8, INT8);
Eric Kunzee5e26762020-10-13 16:11:07 -07001205DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT16, INT8);
1206
1207DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, FLOAT, FLOAT);
Kevin Cheng3a478572021-01-22 17:21:02 -08001208DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT8, INT4);
1209DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT8, INT8);
Eric Kunzee5e26762020-10-13 16:11:07 -07001210DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT16, INT8);
1211
Kevin Cheng3a478572021-01-22 17:21:02 -08001212DEF_INSTANTIATE_ONE_TYPE(OpMatMul, INT8);
Eric Kunzee5e26762020-10-13 16:11:07 -07001213DEF_INSTANTIATE_ONE_TYPE(OpMatMul, INT16);
1214DEF_INSTANTIATE_ONE_TYPE(OpMatMul, FLOAT);
1215
1216DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, FLOAT);
Kevin Cheng3a478572021-01-22 17:21:02 -08001217DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, INT8);
Eric Kunzee5e26762020-10-13 16:11:07 -07001218DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, INT16);
1219
1220DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, FLOAT, FLOAT);
Kevin Cheng3a478572021-01-22 17:21:02 -08001221DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT8, INT4);
1222DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT8, INT8);
Eric Kunzee5e26762020-10-13 16:11:07 -07001223DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT16, INT8);