blob: 7942a2498741d2a4ea73afb0a453c25f950efbaa [file] [log] [blame]
Eric Kunzee5e26762020-10-13 16:11:07 -07001
Kevin Cheng3a478572021-01-22 17:21:02 -08002// Copyright (c) 2020-2021, ARM Limited.
Eric Kunzee5e26762020-10-13 16:11:07 -07003//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16#include "tensor_ops.h"
17#include "quant_util.h"
18#include "template_types.h"
19
20using namespace TosaReference;
21using namespace Eigen;
22using namespace tosa;
23
Kevin Cheng9fe17242021-11-10 01:04:39 +000024int check_pool2d_attribute(tosa::TosaPoolAttribute* attribute,
25 std::vector<int32_t> input_shape,
26 std::vector<int32_t> output_shape,
27 std::string& msg)
Kevin Cheng7eb93d72021-10-09 01:26:08 +000028{
29 if (attribute->padding().size() != 4)
30 {
31 msg = "illegal size for attribute padding";
32 return 1;
33 }
34
35 if (attribute->kernel().size() != 2)
36 {
37 msg = "illegal size for attribute kernel";
38 return 1;
39 }
40
41 if (attribute->stride().size() != 2)
42 {
43 msg = "illegal size for attribute stride";
44 return 1;
45 }
46
47 for (int32_t i : attribute->padding())
48 {
49 if (i < 0)
50 {
51 msg = "At least one pad is smaller than zero";
52 return 1;
53 }
54 }
55
56 for (int32_t i : attribute->kernel())
57 {
58 if (i < 1)
59 {
Kevin Cheng9fe17242021-11-10 01:04:39 +000060 msg = "At least one kernel dimension is smaller than one";
Kevin Cheng7eb93d72021-10-09 01:26:08 +000061 return 1;
62 }
63 }
64
65 for (int32_t i : attribute->stride())
66 {
67 if (i < 1)
68 {
Kevin Cheng9fe17242021-11-10 01:04:39 +000069 msg = "At least one stride dimension is smaller than one";
Kevin Cheng7eb93d72021-10-09 01:26:08 +000070 return 1;
71 }
72 }
73
74 int32_t IH = input_shape[1];
75 int32_t IW = input_shape[2];
76 int32_t OH = output_shape[1];
77 int32_t OW = output_shape[2];
78
79 int32_t pad_top = attribute->padding()[0];
80 int32_t pad_bottom = attribute->padding()[1];
81 int32_t pad_left = attribute->padding()[2];
82 int32_t pad_right = attribute->padding()[3];
83
84 int32_t stride_y = attribute->stride()[0];
85 int32_t stride_x = attribute->stride()[1];
86 int32_t kernel_y = attribute->kernel()[0];
87 int32_t kernel_x = attribute->kernel()[1];
88
89 if (pad_top >= kernel_y || pad_bottom >= kernel_y || pad_left >= kernel_x || pad_right >= kernel_x)
90 {
91 msg = "At least one pad is >= kernel dimension";
92 return 1;
93 }
94
Kevin Cheng80794802021-11-01 11:14:13 -070095 if ((OH != (IH + pad_top + pad_bottom + stride_y - kernel_y) / stride_y) ||
96 (OW != (IW + pad_left + pad_right + stride_x - kernel_x) / stride_x))
Kevin Cheng7eb93d72021-10-09 01:26:08 +000097 {
98 msg = "Mismatch between output shape provided and expected output shape";
99 return 1;
100 }
101
102 return 0;
103}
104
Kevin Cheng9fe17242021-11-10 01:04:39 +0000105int check_conv_attribute_qinfo(tosa::TosaConvAttribute* attribute,
106 tosa::TosaConvQuantInfo* qinfo,
107 uint32_t conv_dimension,
108 std::vector<int32_t> input_shape,
109 std::vector<int32_t> output_shape,
110 DType InDtype,
111 DType WeightDtype,
112 std::string& msg)
113{
114 if (attribute->padding().size() != (2 * conv_dimension))
115 {
116 msg = "Illegal size for attribute padding";
117 return 1;
118 }
119
120 if (attribute->stride().size() != conv_dimension)
121 {
122 msg = "Illegal size for attribute stride";
123 return 1;
124 }
125
126 if (attribute->dilation().size() != conv_dimension)
127 {
128 msg = "Illegal size for attribute dilation";
129 return 1;
130 }
131
132 for (int32_t i : attribute->padding())
133 {
134 if (i < 0)
135 {
136 msg = "At least one pad is smaller than zero";
137 return 1;
138 }
139 }
140
141 for (int32_t i : attribute->stride())
142 {
143 if (i < 1)
144 {
145 msg = "At least one stride dimension is smaller than one";
146 return 1;
147 }
148 }
149
150 for (int32_t i : attribute->dilation())
151 {
152 if (i < 1)
153 {
154 msg = "At least one dilation dimension is smaller than one";
155 return 1;
156 }
157 }
158
159 if (qinfo)
160 {
161 if (InDtype != DType_INT8 && qinfo->input_zp() != 0)
162 {
163 msg = "zeropoint only for int8_t";
164 return 1;
165 }
166 if (WeightDtype != DType_INT8 && qinfo->weight_zp() != 0)
167 {
168 msg = "zeropoint only for int8_t";
169 return 1;
170 }
171 }
172
173 return 0;
174}
175
Eric Kunzee5e26762020-10-13 16:11:07 -0700176template <int Rank, DType Dtype>
Kevin Chengacb550f2021-06-29 15:32:19 -0700177OpArgMax<Rank, Dtype>::OpArgMax(SubgraphTraverser* sgt_,
178 TosaAttributeBase* attribute_,
179 TosaQuantInfoBase* qinfo_,
180 uint64_t id_)
181 : GraphNode(sgt_, Op_ARGMAX, id_)
Eric Kunzee5e26762020-10-13 16:11:07 -0700182{
183 setRequiredOperands(1, 1);
Kevin Chengcc61be32021-10-14 17:09:57 -0700184 setRequiredRank(1, 4);
Eric Kunzee5e26762020-10-13 16:11:07 -0700185
186 INIT_ATTRIBUTE(Axis);
187}
188
189template <int Rank, DType Dtype>
190OpArgMax<Rank, Dtype>::~OpArgMax()
191{
192 if (attribute)
193 delete attribute;
194}
195
196template <int Rank, DType Dtype>
197int OpArgMax<Rank, Dtype>::checkTensorAttributes()
198{
199 if (validateRequiredOperands())
200 return 1;
201
Kevin Chengcc61be32021-10-14 17:09:57 -0700202 if (validateRequiredRank(inputs[0]))
Eric Kunzee5e26762020-10-13 16:11:07 -0700203 {
204 return 1;
205 }
206
Kevin Chengcc61be32021-10-14 17:09:57 -0700207 int32_t output_rank = inputs[0]->getRank() - 1;
208 if (output_rank != outputs[0]->getRank())
209 {
210 printNodeValidationError("OpArgMax: Output rank needs to be rank(input) - 1");
211 return 1;
212 }
213
214 if (outputs[0]->getDtype() != DType_INT32)
215 {
216 printNodeValidationError("OpArgMax: Output data type not supported for this configuration of operator");
217 return 1;
218 }
219
Eric Kunzee5e26762020-10-13 16:11:07 -0700220 input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
221 output = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
222
Kevin Chengcc61be32021-10-14 17:09:57 -0700223 if (attribute->axis() < 0 || attribute->axis() >= input->getRank())
224 {
225 printNodeValidationError("OpArgMax: Axis needs to be within [0, rank(input)]");
226 return 1;
227 }
228
229 bool shape_check = true;
230 for (int32_t i = 0; i < input->getRank(); i++)
231 {
232 if (i < attribute->axis())
233 {
234 if (input->getShape()[i] != output->getShape()[i])
235 {
236 shape_check = false;
237 break;
238 }
239 }
240 else if (i > attribute->axis())
241 {
242 if (input->getShape()[i] != output->getShape()[i - 1])
243 {
244 shape_check = false;
245 break;
246 }
247 }
248 // No need to check i == axis
249 }
250 if (!shape_check)
251 {
252 printNodeValidationError("OpArgMax: Mismatch between output shape provided and expected output shape");
253 return 1;
254 }
255
Eric Kunzee5e26762020-10-13 16:11:07 -0700256 return 0;
257}
258
259template <int Rank, DType Dtype>
260int OpArgMax<Rank, Dtype>::eval()
261{
262 Eigen::Tensor<DenseIndex, Rank - 1> index = this->input->getTensor().argmax(attribute->axis());
263
264 this->output->getTensor() = index.unaryExpr([](DenseIndex in) -> OutEigenType { return (OutEigenType)in; });
265
266 return GraphNode::eval();
267}
268
269template <DType Dtype>
Kevin Chengacb550f2021-06-29 15:32:19 -0700270OpAvgPool2d<Dtype>::OpAvgPool2d(SubgraphTraverser* sgt_,
271 TosaAttributeBase* attribute_,
272 TosaQuantInfoBase* qinfo_,
273 uint64_t id_)
274 : GraphNode(sgt_, Op_AVG_POOL2D, id_)
Eric Kunzee5e26762020-10-13 16:11:07 -0700275{
276 setRequiredOperands(1, 1);
277 setRequiredRank(4);
278
Kevin Cheng93a16282021-08-31 16:14:03 -0700279 INIT_ATTRIBUTE(Pool);
Eric Kunzee5e26762020-10-13 16:11:07 -0700280 INIT_QINFO(Unary);
281}
282
283template <DType Dtype>
284OpAvgPool2d<Dtype>::~OpAvgPool2d()
285{
286 if (attribute)
287 delete attribute;
288}
289
290template <DType Dtype>
291int OpAvgPool2d<Dtype>::checkTensorAttributes()
292{
293 if (validateRequiredOperands())
294 return 1;
295
296 if (validateRequiredRank(inputs[0]) || validateRequiredRank(outputs[0]))
297 {
298 return 1;
299 }
300
301 if (inputs[0]->matchType(*outputs[0]))
302 {
303 printNodeValidationError("OpAvgPool2d: input and output tensor type mismatch");
304 return 1;
305 }
306
307 in = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
308 out = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
309
Kevin Cheng7eb93d72021-10-09 01:26:08 +0000310 if (Dtype != DType_INT8 && this->qinfo)
Eric Kunzee5e26762020-10-13 16:11:07 -0700311 {
Kevin Cheng7eb93d72021-10-09 01:26:08 +0000312 ERROR_IF(this->qinfo->input_zp() != 0, "OpAvgPool2d: zeropoint only for int8_t");
313 ERROR_IF(this->qinfo->output_zp() != 0, "OpAvgPool2d: zeropoint only for int8_t");
Eric Kunzee5e26762020-10-13 16:11:07 -0700314 }
315
Kevin Cheng7eb93d72021-10-09 01:26:08 +0000316 std::string msg;
Kevin Cheng9fe17242021-11-10 01:04:39 +0000317 if (check_pool2d_attribute(attribute, in->getShape(), out->getShape(), msg))
Eric Kunzee5e26762020-10-13 16:11:07 -0700318 {
Kevin Cheng7eb93d72021-10-09 01:26:08 +0000319 msg = "OpAvgPool2d: " + msg;
320 printNodeValidationError(msg.c_str());
Eric Kunzee5e26762020-10-13 16:11:07 -0700321 return 1;
322 }
323
324 return 0;
325}
326
327template <DType Dtype>
328ETensor1<int32_t> OpAvgPool2d<Dtype>::calculate_div_map_1d(int in_size, int out_size, int kernel_size, int stride)
329{
330 ETensor1<int32_t> result(out_size);
331
332 int32_t total_pad = (out_size - 1) * stride + kernel_size - in_size;
333 total_pad = total_pad < 0 ? 0 : total_pad;
334
335 int32_t pad_left = total_pad >> 1;
336 int32_t pad_right = total_pad - pad_left;
337
338 result.setConstant(kernel_size);
339
340 // the index left to 'left_index' and index right to 'right_index' indicates
341 // the input window of this output covers a pad bit
342 int32_t left_index = pad_left / stride;
343 int32_t right_index = pad_right / stride;
344
Eric Kunzee5e26762020-10-13 16:11:07 -0700345 // minus the number of pad bit this index cover
346 while (left_index >= 0)
347 {
348 result(left_index) -= (pad_left - left_index * stride);
349 left_index--;
350 }
351
352 while (right_index >= 0)
353 {
354 result(out_size - 1 - right_index) -= (pad_right - right_index * stride);
355 right_index--;
356 }
357
358 return result;
359}
360
361// assuming input and output tensor have same scales like tflite reference
362// so no need to scale input and output
363template <DType Dtype>
364int OpAvgPool2d<Dtype>::eval()
365{
366 int in_batch = this->in->getShape()[0];
367 int in_height = this->in->getShape()[1];
368 int in_width = this->in->getShape()[2];
369 int in_channels = this->in->getShape()[3];
370
371 int out_batch = this->out->getShape()[0];
372 int out_height = this->out->getShape()[1];
373 int out_width = this->out->getShape()[2];
374 int out_channels = this->out->getShape()[3];
375
Kevin Chengacb550f2021-06-29 15:32:19 -0700376 ERROR_IF(in_batch != out_batch, "OpAvgPool2d: tensor batch mismatch %d != %d", in_batch, out_batch);
377 ERROR_IF(in_channels != out_channels, "OpAvgPool2d: tensor channel mismatch %d != %d", in_channels, out_channels);
Eric Kunzee5e26762020-10-13 16:11:07 -0700378
379 int padding_top = this->attribute->padding()[0];
380 int padding_bottom = this->attribute->padding()[1];
381 int padding_left = this->attribute->padding()[2];
382 int padding_right = this->attribute->padding()[3];
383 int kernel_h = this->attribute->kernel()[0];
384 int kernel_w = this->attribute->kernel()[1];
385 int stride_h = this->attribute->stride()[0];
386 int stride_w = this->attribute->stride()[1];
387
388 DEBUG_INFO(OP,
389 "perform AvgPool2d, input.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], kernel=[%d,%d], "
390 "stride=[%d,%d], padding=[%d,%d,%d,%d]",
391 in_batch, in_height, in_width, in_channels, out_batch, out_height, out_width, out_channels, kernel_h,
392 kernel_w, stride_h, stride_w, padding_top, padding_bottom, padding_left, padding_right);
393
394 Eigen::array<Eigen::Index, 2> im2col_input_dims;
395 im2col_input_dims[0] = kernel_h * kernel_w;
396 im2col_input_dims[1] = out_batch * out_height * out_width * out_channels;
397
398 Eigen::array<Eigen::Index, 4> col2im_output_dims;
399 col2im_output_dims[0] = out_batch;
400 col2im_output_dims[1] = out_height;
401 col2im_output_dims[2] = out_width;
402 col2im_output_dims[3] = out_channels;
403
404 Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
405 padding[0] = std::make_pair(0, 0);
406 padding[1] = std::make_pair(padding_top, padding_bottom);
407 padding[2] = std::make_pair(padding_left, padding_right);
408 padding[3] = std::make_pair(0, 0);
409
410 ETensor4<InEigenType> input_val = this->in->getTensor();
411 if (this->qinfo)
412 {
413 input_val = input_val - (InEigenType)this->qinfo->input_zp();
414 }
415
416 ETensor4<InEigenType> input_padded = input_val.pad(padding);
417
418 // assuming input and output have same scales
419 // so input and output scaling is not required
420 // TODO: check if this assumption TOSA made
421
422 // extract_image_patches() output [N, KH, KW, H * W, C]
423 // transpose to [KH, KW, N, H * W, C]
424 // reshape to [KH * KW, N * H * W * C]
425 ETensor2<InEigenType> input_extract_patches =
426 input_padded.extract_image_patches(kernel_h, kernel_w, stride_h, stride_w, 1, 1, Eigen::PADDING_VALID)
427 .shuffle(Eigen::array<Eigen::Index, 5>{ 1, 2, 0, 3, 4 })
428 .reshape(im2col_input_dims);
429
430 // 1D result with [N * H * W * C]
431 ETensor1<AccEigenType> out_1d(this->out->getElementCount());
432 out_1d.setZero();
433
434 // sum pool
435 for (size_t i = 0; i < this->out->getElementCount(); i++)
436 {
437 for (int32_t j = 0; j < kernel_h * kernel_w; j++)
438 {
439 out_1d(i) += (AccEigenType)input_extract_patches(j, i);
440 }
441 }
442
443 // reshape result to [N, H, W, C] and divide with div_map
444 ETensor4<AccEigenType> sum = out_1d.reshape(col2im_output_dims);
445
446 // calculate 1d height/width div_map (number of elements this pooling window covers)
447 // and outer product to get 2d div_map, then reshape/broadcast to [N, H, W, C]
448 ETensor1<int32_t> div_map_h = calculate_div_map_1d(in_height, out_height, kernel_h, stride_h);
449 ETensor1<int32_t> div_map_w = calculate_div_map_1d(in_width, out_width, kernel_w, stride_w);
450 Eigen::array<Eigen::IndexPair<Eigen::Index>, 1> contract_dims = { Eigen::IndexPair<Eigen::Index>(1, 0) };
451 Eigen::array<Eigen::Index, 4> bcast{ out_batch, 1, 1, out_channels };
452
453 ETensor4<int32_t> div_map =
454 div_map_h.reshape(Eigen::array<Eigen::Index, 2>{ out_height, 1 })
455 .contract(div_map_w.reshape(Eigen::array<Eigen::Index, 2>{ 1, out_width }), contract_dims)
456 .reshape(Eigen::array<Eigen::Index, 4>{ 1, out_height, out_width, 1 })
457 .broadcast(bcast);
458
459 if (Dtype != DType_FLOAT)
460 {
Kevin Chengacb550f2021-06-29 15:32:19 -0700461 try
462 {
463 this->out->getTensor() = sum.binaryExpr(div_map, [](AccEigenType value, int32_t div) -> OutEigenType {
464 int32_t multiplier, shift;
465 TosaReference::QuantUtil::reciprocal_scale(div, multiplier, shift);
Eric Kunzee5e26762020-10-13 16:11:07 -0700466
Kevin Chengacb550f2021-06-29 15:32:19 -0700467 return (OutEigenType)TosaReference::QuantUtil::apply_scale_32(value, multiplier, shift, false);
468 });
469 }
470 catch (std::string desc)
471 {
472 REQUIRE(false, "OpAvgPool2d apply_scale_32() fails: %s.", desc.c_str());
473 }
Eric Kunzee5e26762020-10-13 16:11:07 -0700474 this->out->getTensor() = this->out->getTensor() + (OutEigenType)(this->qinfo->output_zp());
475 this->out->getTensor() = this->out->getTensor().cwiseMax((OutEigenType)QMin);
476 this->out->getTensor() = this->out->getTensor().cwiseMin((OutEigenType)QMax);
477 }
478 else
479 {
480 this->out->getTensor() = (sum / div_map.template cast<AccEigenType>()).template cast<OutEigenType>();
481 }
482
483 return GraphNode::eval();
484}
485
486template <DType InDtype, DType WeightDtype>
Kevin Chengacb550f2021-06-29 15:32:19 -0700487OpConv2d<InDtype, WeightDtype>::OpConv2d(SubgraphTraverser* sgt_,
488 TosaAttributeBase* attribute_,
489 TosaQuantInfoBase* qinfo_,
490 uint64_t id_)
491 : GraphNode(sgt_, Op_CONV2D, id_)
Eric Kunzee5e26762020-10-13 16:11:07 -0700492{
493 setRequiredOperands(3, 1);
494 setRequiredRank(4);
495
Kevin Cheng93a16282021-08-31 16:14:03 -0700496 INIT_ATTRIBUTE(Conv);
Eric Kunzee5e26762020-10-13 16:11:07 -0700497 INIT_QINFO(Conv);
498}
499
500template <DType InDtype, DType WeightDtype>
501OpConv2d<InDtype, WeightDtype>::~OpConv2d()
502{
503 if (attribute)
504 delete attribute;
505 if (qinfo)
506 delete qinfo;
507}
508
509template <DType InDtype, DType WeightDtype>
510int OpConv2d<InDtype, WeightDtype>::checkTensorAttributes()
511{
512 if (validateRequiredOperands())
513 return 1;
514
515 if (validateRequiredRank(inputs[0]) || validateRequiredRank(inputs[1]) || validateRequiredRank(outputs[0]))
516 {
517 return 1;
518 }
519
520 // 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
521 if (inputs[2]->getRank() != 1)
522 {
523 printNodeValidationError("OpConv2d: bias tensor must be rank 1");
524 }
525
Kevin Chengcc61be32021-10-14 17:09:57 -0700526 ERROR_IF(outputs[0]->getDtype() != AccDtype,
Kevin Cheng80794802021-11-01 11:14:13 -0700527 "OpConv2d: Output data type not supported for this configuration of operator");
Kevin Chengcc61be32021-10-14 17:09:57 -0700528
Eric Kunzee5e26762020-10-13 16:11:07 -0700529 input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
530 weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
531 bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
532 output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
533
Kevin Cheng9fe17242021-11-10 01:04:39 +0000534 std::string msg;
535 if (check_conv_attribute_qinfo(attribute, qinfo, 2 /* conv_dimension */, input->getShape(), output->getShape(),
536 InDtype, WeightDtype, msg))
Eric Kunzee5e26762020-10-13 16:11:07 -0700537 {
Kevin Cheng9fe17242021-11-10 01:04:39 +0000538 msg = "OpConv2d: " + msg;
539 printNodeValidationError(msg.c_str());
Eric Kunzee5e26762020-10-13 16:11:07 -0700540 return 1;
541 }
542
Eric Kunzee5e26762020-10-13 16:11:07 -0700543 return 0;
544}
545
546template <DType InDtype, DType WeightDtype>
547int OpConv2d<InDtype, WeightDtype>::eval()
548{
549 int in_batch = this->input->getShape()[0];
550 int in_height = this->input->getShape()[1];
551 int in_width = this->input->getShape()[2];
552 int in_channels = this->input->getShape()[3];
553
554 int f_out_channels = this->weight->getShape()[0];
555 int f_height = this->weight->getShape()[1];
556 int f_width = this->weight->getShape()[2];
557 int f_in_channels = this->weight->getShape()[3];
558
559 int b_out_channels = this->bias->getShape()[0];
560
561 int out_batch = this->output->getShape()[0];
562 int out_height = this->output->getShape()[1];
563 int out_width = this->output->getShape()[2];
564 int out_channels = this->output->getShape()[3];
565
Kevin Chengacb550f2021-06-29 15:32:19 -0700566 ERROR_IF(in_batch != out_batch, "OpConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
567 ERROR_IF(f_in_channels != in_channels, "OpConv2d: tensor input channel mismatch %d != %d", f_in_channels,
568 in_channels);
569 ERROR_IF(f_out_channels != out_channels, "OpConv2d: tensor output channel mismatch %d != %d", f_out_channels,
570 out_channels);
571 ERROR_IF(b_out_channels != out_channels, "OpConv2d: bias channel mismatch %d != %d", b_out_channels, out_channels);
Eric Kunzee5e26762020-10-13 16:11:07 -0700572
573 int padding_top = this->attribute->padding()[0];
574 int padding_bottom = this->attribute->padding()[1];
575 int padding_left = this->attribute->padding()[2];
576 int padding_right = this->attribute->padding()[3];
577 int stride_h = this->attribute->stride()[0];
578 int stride_w = this->attribute->stride()[1];
579 int dilation_h = this->attribute->dilation()[0];
580 int dilation_w = this->attribute->dilation()[1];
581
582 DEBUG_INFO(OP,
583 "perform OpConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], "
584 "stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d,%d,%d]",
585 in_batch, in_height, in_width, in_channels, f_height, f_width, f_in_channels, f_out_channels, out_batch,
586 out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
587 padding_bottom, padding_left, padding_right);
588
589 // GEMM-conv2d, left matrix is input, right matrix is weight
590 Eigen::array<Eigen::Index, 2> im2col_input_dims;
591 im2col_input_dims[0] = out_batch * out_height * out_width;
592 im2col_input_dims[1] = f_height * f_width * f_in_channels;
593
594 Eigen::array<Eigen::Index, 2> im2col_weight_dims;
595 im2col_weight_dims[0] = f_height * f_width * f_in_channels;
596 im2col_weight_dims[1] = f_out_channels;
597
598 Eigen::array<Eigen::Index, 2> bias_reshaped_dims;
599 bias_reshaped_dims[0] = 1;
600 bias_reshaped_dims[1] = b_out_channels;
601
602 Eigen::array<Eigen::Index, 4> weight_zp_bcast_dims;
603 weight_zp_bcast_dims[0] = f_height;
604 weight_zp_bcast_dims[1] = f_width;
605 weight_zp_bcast_dims[2] = f_in_channels;
606
607 Eigen::array<Eigen::Index, 2> bias_bcast_dims;
608 bias_bcast_dims[0] = out_batch * out_height * out_width;
609 bias_bcast_dims[1] = 1;
610
611 Eigen::array<Eigen::Index, 4> col2im_output_dims;
612 col2im_output_dims[0] = out_batch;
613 col2im_output_dims[1] = out_height;
614 col2im_output_dims[2] = out_width;
615 col2im_output_dims[3] = out_channels;
616
617 Eigen::array<Eigen::IndexPair<Eigen::Index>, 1> contract_dims = { Eigen::IndexPair<Eigen::Index>(1, 0) };
618
619 Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
620 padding[0] = std::make_pair(0, 0);
621 padding[1] = std::make_pair(padding_top, padding_bottom);
622 padding[2] = std::make_pair(padding_left, padding_right);
623 padding[3] = std::make_pair(0, 0);
624
625 TIn input_val = this->input->getTensor();
626 TWeight weight_val = this->weight->getTensor();
627 if (this->qinfo)
628 {
629 input_val = input_val - (InEigenType)this->qinfo->input_zp();
630 weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
631 }
632
633 ETensor4<InEigenType> input_padded = input_val.pad(padding);
634
635 // extract_image_patches() output [N, KH, KW, H * W, C]
636 // need to transpose to [N, H * W, KH, KW, C]
637 ETensor5<InEigenType> input_extract_patches =
638 input_padded
639 .extract_image_patches(f_height, f_width, stride_h, stride_w, dilation_h, dilation_w, Eigen::PADDING_VALID)
640 .shuffle(Eigen::array<Eigen::Index, 5>{ 0, 3, 1, 2, 4 });
641
642 // reshape input to [N * H * W, KH * KW * C]
643 ETensor2<InEigenType> im2col_input = input_extract_patches.reshape(im2col_input_dims);
644
645 // transpose and reshape weight from [OC, H, W, IC] to [H * W * IC, OC]
646 ETensor2<WeightEigenType> im2col_weight =
647 weight_val.shuffle(Eigen::array<Eigen::Index, 4>({ 1, 2, 3, 0 })).reshape(im2col_weight_dims);
648
649 // don't need to apply bias_multiplier ( * bias_scale and >> bias_shift) since tflite already scale it
650 // and reshaped from [C] to [1, C], and broadcast to [N * H * W, C]
651 ETensor2<AccEigenType> bias_2d = this->bias->getTensor().reshape(bias_reshaped_dims).broadcast(bias_bcast_dims);
652
653 // output matrix is [N * H * W, C]
654 ETensor2<AccEigenType> contracted_result =
655 im2col_input.template cast<AccEigenType>().contract(im2col_weight.template cast<AccEigenType>(), contract_dims);
656
657 // adding bias
658 ETensor2<AccEigenType> biased_output = contracted_result + bias_2d.template cast<AccEigenType>();
659
660 // reshape back to [N, H, W, C]
661 this->output->getTensor() = biased_output.reshape(col2im_output_dims);
662
663 if (AccDtype == DType_INT48)
664 {
665 this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
666 this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
667 }
668
669 return GraphNode::eval();
670}
671
672template <DType InDtype, DType WeightDtype>
Kevin Cheng1533b852021-09-01 12:51:58 -0700673OpConv3d<InDtype, WeightDtype>::OpConv3d(SubgraphTraverser* sgt_,
674 TosaAttributeBase* attribute_,
675 TosaQuantInfoBase* qinfo_,
676 uint64_t id_)
677 : GraphNode(sgt_, Op_CONV3D, id_)
678{
679 setRequiredOperands(3, 1);
680 setRequiredRank(5);
681
682 INIT_ATTRIBUTE(Conv);
683 INIT_QINFO(Conv);
684}
685
686template <DType InDtype, DType WeightDtype>
687OpConv3d<InDtype, WeightDtype>::~OpConv3d()
688{
689 if (attribute)
690 delete attribute;
691 if (qinfo)
692 delete qinfo;
693}
694
695template <DType InDtype, DType WeightDtype>
696int OpConv3d<InDtype, WeightDtype>::checkTensorAttributes()
697{
698 if (validateRequiredOperands())
699 return 1;
700
701 if (validateRequiredRank(inputs[0]) || validateRequiredRank(inputs[1]) || validateRequiredRank(outputs[0]))
702 {
703 return 1;
704 }
705
706 // 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
707 if (inputs[2]->getRank() != 1)
708 {
709 printNodeValidationError("OpConv3d: bias tensor must be rank 1");
710 }
711
Kevin Chengcc61be32021-10-14 17:09:57 -0700712 ERROR_IF(outputs[0]->getDtype() != AccDtype,
Kevin Cheng80794802021-11-01 11:14:13 -0700713 "OpConv3d: Output data type not supported for this configuration of operator");
Kevin Chengcc61be32021-10-14 17:09:57 -0700714
Kevin Cheng1533b852021-09-01 12:51:58 -0700715 input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
716 weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
717 bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
718 output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
719
Kevin Cheng9fe17242021-11-10 01:04:39 +0000720 std::string msg;
721 if (check_conv_attribute_qinfo(attribute, qinfo, 3 /* conv_dimension */, input->getShape(), output->getShape(),
722 InDtype, WeightDtype, msg))
Kevin Cheng1533b852021-09-01 12:51:58 -0700723 {
Kevin Cheng9fe17242021-11-10 01:04:39 +0000724 msg = "OpConv3d: " + msg;
725 printNodeValidationError(msg.c_str());
Kevin Cheng1533b852021-09-01 12:51:58 -0700726 return 1;
727 }
728
Kevin Cheng1533b852021-09-01 12:51:58 -0700729 return 0;
730}
731
732template <DType InDtype, DType WeightDtype>
733int OpConv3d<InDtype, WeightDtype>::eval()
734{
735 int in_batch = this->input->getShape()[0];
736 int in_depth = this->input->getShape()[1];
737 int in_height = this->input->getShape()[2];
738 int in_width = this->input->getShape()[3];
739 int in_channels = this->input->getShape()[4];
740
741 int f_out_channels = this->weight->getShape()[0];
742 int f_depth = this->weight->getShape()[1];
743 int f_height = this->weight->getShape()[2];
744 int f_width = this->weight->getShape()[3];
745 int f_in_channels = this->weight->getShape()[4];
746
747 int b_out_channels = this->bias->getShape()[0];
748
749 int out_batch = this->output->getShape()[0];
750 int out_depth = this->output->getShape()[1];
751 int out_height = this->output->getShape()[2];
752 int out_width = this->output->getShape()[3];
753 int out_channels = this->output->getShape()[4];
754
755 ERROR_IF(in_batch != out_batch, "OpConv3d: tensor batch mismatch %d != %d", in_batch, out_batch);
756 ERROR_IF(f_in_channels != in_channels, "OpConv3d: tensor input channel mismatch %d != %d", f_in_channels,
757 in_channels);
758 ERROR_IF(f_out_channels != out_channels, "OpConv3d: tensor output channel mismatch %d != %d", f_out_channels,
759 out_channels);
760 ERROR_IF(b_out_channels != out_channels, "OpConv3d: bias channel mismatch %d != %d", b_out_channels, out_channels);
761
762 int padding_d0 = this->attribute->padding()[0];
763 int padding_d1 = this->attribute->padding()[1];
764 int padding_top = this->attribute->padding()[2];
765 int padding_bottom = this->attribute->padding()[3];
766 int padding_left = this->attribute->padding()[4];
767 int padding_right = this->attribute->padding()[5];
768 int stride_d = this->attribute->stride()[0];
769 int stride_h = this->attribute->stride()[1];
770 int stride_w = this->attribute->stride()[2];
771 int dilation_d = this->attribute->dilation()[0];
772 int dilation_h = this->attribute->dilation()[1];
773 int dilation_w = this->attribute->dilation()[2];
774
775 DEBUG_INFO(
776 OP,
777 "perform OpConv3d, input.shape=[%d,%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d,%d], output.shape=[%d,%d,%d,%d,%d], "
778 "stride=[%d,%d,%d], dilation=[%d,%d,%d], padding=[%d,%d,%d,%d,%d,%d]",
779 in_batch, in_depth, in_height, in_width, in_channels, f_out_channels, f_depth, f_height, f_width, f_in_channels,
780 out_batch, out_depth, out_height, out_width, out_channels, stride_d, stride_h, stride_w, dilation_d, dilation_h,
781 dilation_w, padding_d0, padding_d1, padding_top, padding_bottom, padding_left, padding_right);
782
783 Eigen::array<std::pair<int32_t, int32_t>, 5> padding;
784 padding[0] = std::make_pair(0, 0);
785 padding[1] = std::make_pair(padding_d0, padding_d1);
786 padding[2] = std::make_pair(padding_top, padding_bottom);
787 padding[3] = std::make_pair(padding_left, padding_right);
788 padding[4] = std::make_pair(0, 0);
789
790 TIn input_val = this->input->getTensor();
791 TWeight weight_val = this->weight->getTensor();
792 if (this->qinfo)
793 {
794 input_val = input_val - (InEigenType)this->qinfo->input_zp();
795 weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
796 }
797
798 ETensor5<InEigenType> input_padded = input_val.pad(padding);
799
800 // 1. initialize with bias
801 Eigen::array<Eigen::Index, 5> reshape_dim;
802 reshape_dim.fill(1);
803 reshape_dim[4] = b_out_channels;
804
805 Eigen::array<Eigen::Index, 5> bcast;
806 bcast[0] = out_batch;
807 bcast[1] = out_depth;
808 bcast[2] = out_height;
809 bcast[3] = out_width;
810 bcast[4] = 1;
811 this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
812
813 // 2. direct convolution
814 AccEigenType acc = 0;
815 int d_idx, h_idx, w_idx;
816
817 for (int ob = 0; ob < out_batch; ob++)
818 {
819 for (int od = 0; od < out_depth; od++)
820 {
821 for (int oh = 0; oh < out_height; oh++)
822 {
823 for (int ow = 0; ow < out_width; ow++)
824 {
825 for (int oc = 0; oc < out_channels; oc++)
826 {
827 acc = 0;
828 for (int fd = 0; fd < f_depth; fd++)
829 {
830 d_idx = od * stride_d + fd * dilation_d;
831 for (int fh = 0; fh < f_height; fh++)
832 {
833 h_idx = oh * stride_h + fh * dilation_h;
834 for (int fw = 0; fw < f_width; fw++)
835 {
836 w_idx = ow * stride_w + fw * dilation_w;
837 for (int ic = 0; ic < in_channels; ic++)
838 {
839 acc += ((AccEigenType)input_padded(ob, d_idx, h_idx, w_idx, ic) *
840 (AccEigenType)weight_val(oc, fd, fh, fw, ic));
841 }
842 }
843 }
844 }
845 this->output->getTensor()(ob, od, oh, ow, oc) = acc;
846 }
847 }
848 }
849 }
850 }
851
852 if (AccDtype == DType_INT48)
853 {
854 this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
855 this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
856 }
857
858 return GraphNode::eval();
859}
860
861template <DType InDtype, DType WeightDtype>
Kevin Chengacb550f2021-06-29 15:32:19 -0700862OpDepthwiseConv2d<InDtype, WeightDtype>::OpDepthwiseConv2d(SubgraphTraverser* sgt_,
863 TosaAttributeBase* attribute_,
Eric Kunzee5e26762020-10-13 16:11:07 -0700864 TosaQuantInfoBase* qinfo_,
865 uint64_t id_)
Kevin Chengacb550f2021-06-29 15:32:19 -0700866 : GraphNode(sgt_, Op_DEPTHWISE_CONV2D, id_)
Eric Kunzee5e26762020-10-13 16:11:07 -0700867{
868 setRequiredOperands(3, 1);
869 setRequiredRank(4);
870
Kevin Cheng93a16282021-08-31 16:14:03 -0700871 INIT_ATTRIBUTE(Conv);
Eric Kunzee5e26762020-10-13 16:11:07 -0700872 INIT_QINFO(Conv);
873}
874
875template <DType InDtype, DType WeightDtype>
876OpDepthwiseConv2d<InDtype, WeightDtype>::~OpDepthwiseConv2d()
877{
878 if (attribute)
879 delete attribute;
880 if (qinfo)
881 delete qinfo;
882}
883
884template <DType InDtype, DType WeightDtype>
885int OpDepthwiseConv2d<InDtype, WeightDtype>::checkTensorAttributes()
886{
887 if (validateRequiredOperands())
888 return 1;
889
890 if (validateRequiredRank(inputs[0]) || validateRequiredRank(inputs[1]) || validateRequiredRank(outputs[0]))
891 {
892 return 1;
893 }
894
895 // 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
896 if (inputs[2]->getRank() != 1)
897 {
898 printNodeValidationError("OpDepthwiseConv2d: bias tensor must be rank 1");
899 }
900
Kevin Chengcc61be32021-10-14 17:09:57 -0700901 ERROR_IF(outputs[0]->getDtype() != AccDtype,
Kevin Cheng80794802021-11-01 11:14:13 -0700902 "OpDepthwiseConv2d: Output data type not supported for this configuration of operator");
Kevin Chengcc61be32021-10-14 17:09:57 -0700903
Eric Kunzee5e26762020-10-13 16:11:07 -0700904 input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
905 weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
906 bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
907 output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
908
Kevin Cheng9fe17242021-11-10 01:04:39 +0000909 std::string msg;
910 if (check_conv_attribute_qinfo(attribute, qinfo, 2 /* conv_dimension */, input->getShape(), output->getShape(),
911 InDtype, WeightDtype, msg))
Eric Kunzee5e26762020-10-13 16:11:07 -0700912 {
Kevin Cheng9fe17242021-11-10 01:04:39 +0000913 msg = "OpDepthwiseConv2d: " + msg;
914 printNodeValidationError(msg.c_str());
Eric Kunzee5e26762020-10-13 16:11:07 -0700915 return 1;
916 }
917
Eric Kunzee5e26762020-10-13 16:11:07 -0700918 return 0;
919}
920
921template <DType InDtype, DType WeightDtype>
922int OpDepthwiseConv2d<InDtype, WeightDtype>::eval()
923{
924 int in_batch = this->input->getShape()[0];
925 int in_height = this->input->getShape()[1];
926 int in_width = this->input->getShape()[2];
927 int in_channels = this->input->getShape()[3];
928
929 int f_height = this->weight->getShape()[0];
930 int f_width = this->weight->getShape()[1];
931 int f_in_channels = this->weight->getShape()[2];
932 int f_multiplier = this->weight->getShape()[3];
933
934 int b_out_channels = this->bias->getShape()[0];
935
936 int out_batch = this->output->getShape()[0];
937 int out_height = this->output->getShape()[1];
938 int out_width = this->output->getShape()[2];
939 int out_channels = this->output->getShape()[3];
940
Kevin Chengacb550f2021-06-29 15:32:19 -0700941 ERROR_IF(in_batch != out_batch, "OpDepthwiseConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
942 ERROR_IF(f_in_channels != in_channels, "OpDepthwiseConv2d: tensor input channel mismatch %d != %d", f_in_channels,
943 in_channels);
944 ERROR_IF(in_channels * f_multiplier != out_channels, "OpDepthwiseConv2d: tensor output channel mismatch %d != %d",
945 in_channels * f_multiplier, out_channels);
946 ERROR_IF(b_out_channels != out_channels, "OpDepthwiseConv2d: bias channels mismatch %d != %d", b_out_channels,
947 out_channels);
Eric Kunzee5e26762020-10-13 16:11:07 -0700948
949 int padding_top = this->attribute->padding()[0];
950 int padding_bottom = this->attribute->padding()[1];
951 int padding_left = this->attribute->padding()[2];
952 int padding_right = this->attribute->padding()[3];
953 int stride_h = this->attribute->stride()[0];
954 int stride_w = this->attribute->stride()[1];
955 int dilation_h = this->attribute->dilation()[0];
956 int dilation_w = this->attribute->dilation()[1];
957
958 DEBUG_INFO(OP,
959 "perform OpDepthwiseConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], "
960 "output.shape=[%d,%d,%d,%d], stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d,%d,%d]",
961 in_batch, in_height, in_width, in_channels, f_height, f_width, f_in_channels, f_multiplier, out_batch,
962 out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
963 padding_bottom, padding_left, padding_right);
964
965 Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
966 padding[0] = std::make_pair(0, 0);
967 padding[1] = std::make_pair(padding_top, padding_bottom);
968 padding[2] = std::make_pair(padding_left, padding_right);
969 padding[3] = std::make_pair(0, 0);
970
971 TIn input_val = this->input->getTensor();
972 TWeight weight_val = this->weight->getTensor();
973 if (this->qinfo)
974 {
975 input_val = input_val - (InEigenType)this->qinfo->input_zp();
976 weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
977 }
978
979 ETensor4<InEigenType> input_padded = input_val.pad(padding);
980
981 // GEMM doesn't fit well with DepthwiseConv2d
982 // 1. use extract_image_patches() to handle stride/dilation/padding
983 // 2. perform direct convolution
984
985 // 1. extract_image_patches() output [N, KH, KW, OH * OW, IC]
986 ETensor5<InEigenType> input_extract_patches = input_padded.extract_image_patches(
987 f_height, f_width, stride_h, stride_w, dilation_h, dilation_w, Eigen::PADDING_VALID);
988
989 Eigen::array<Eigen::Index, 4> reshape_dim;
990 reshape_dim.fill(1);
991 reshape_dim[3] = b_out_channels;
992
993 Eigen::array<Eigen::Index, 4> bcast;
994 bcast[0] = out_batch;
995 bcast[1] = out_height;
996 bcast[2] = out_width;
997 bcast[3] = 1;
998
999 // initialize with bias
1000 this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
1001
1002 // 2. direct depthwise convolution
1003 for (int ob = 0; ob < out_batch; ob++)
1004 {
1005 for (int oh = 0; oh < out_height; oh++)
1006 {
1007 for (int ow = 0; ow < out_width; ow++)
1008 {
1009 for (int ic = 0; ic < in_channels; ic++)
1010 {
1011 for (int cm = 0; cm < f_multiplier; cm++)
1012 {
1013 for (int fh = 0; fh < f_height; fh++)
1014 {
1015 for (int fw = 0; fw < f_width; fw++)
1016 {
1017 this->output->getTensor()(ob, oh, ow, ic * f_multiplier + cm) +=
1018 ((AccEigenType)input_extract_patches(ob, fh, fw, ow * out_height + oh, ic) *
1019 (AccEigenType)weight_val(fh, fw, ic, cm));
1020 }
1021 }
1022 }
1023 }
1024 }
1025 }
1026 }
1027
1028 if (AccDtype == DType_INT48)
1029 {
1030 this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
1031 this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
1032 }
1033
1034 return GraphNode::eval();
1035}
1036
1037template <DType InDtype, DType WeightDtype>
Kevin Chengacb550f2021-06-29 15:32:19 -07001038OpFullyConnected<InDtype, WeightDtype>::OpFullyConnected(SubgraphTraverser* sgt_,
1039 TosaAttributeBase* attribute_,
Eric Kunzee5e26762020-10-13 16:11:07 -07001040 TosaQuantInfoBase* qinfo_,
1041 uint64_t id_)
Kevin Chengacb550f2021-06-29 15:32:19 -07001042 : GraphNode(sgt_, Op_FULLY_CONNECTED, id_)
Eric Kunzee5e26762020-10-13 16:11:07 -07001043{
1044 setRequiredOperands(3, 1);
1045 setRequiredRank(2);
1046
1047 INIT_QINFO(Conv);
1048}
1049
1050template <DType InDtype, DType WeightDtype>
1051OpFullyConnected<InDtype, WeightDtype>::~OpFullyConnected()
1052{
1053 if (qinfo)
1054 delete qinfo;
1055}
1056
1057template <DType InDtype, DType WeightDtype>
1058int OpFullyConnected<InDtype, WeightDtype>::checkTensorAttributes()
1059{
1060 if (validateRequiredOperands())
1061 return 1;
1062
1063 if (validateRequiredRank(inputs[0]) || validateRequiredRank(inputs[1]) || validateRequiredRank(outputs[0]))
1064 {
1065 return 1;
1066 }
1067
1068 input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
1069 weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
1070 bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
1071
1072 if (input->getShape()[1] != weight->getShape()[1])
1073 {
1074 printNodeValidationError("OpFullyConnected operator input.shape[1] should match weight.shape[1]");
1075 return 1;
1076 }
1077
1078 if (weight->getShape()[0] != bias->getShape()[0])
1079 {
1080 printNodeValidationError("OpFullyConnected operator bias.shape[0] should match weight.shape[0]");
1081 return 1;
1082 }
1083
Kevin Chengcc61be32021-10-14 17:09:57 -07001084 ERROR_IF(outputs[0]->getDtype() != AccDtype,
1085 "OpFullyConnected: Output data type not supported for this configuration of operator");
1086
Eric Kunzee5e26762020-10-13 16:11:07 -07001087 output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
1088
Kevin Chengcc61be32021-10-14 17:09:57 -07001089 if (this->qinfo)
1090 {
1091 if (InDtype != DType_INT8)
1092 {
1093 ERROR_IF(this->qinfo->input_zp() != 0, "OpFullyConnected: zeropoint only for int8_t");
1094 }
1095 if (WeightDtype != DType_INT8)
1096 {
1097 ERROR_IF(this->qinfo->weight_zp() != 0, "OpFullyConnected: zeropoint only for int8_t");
1098 }
1099 }
1100
Eric Kunzee5e26762020-10-13 16:11:07 -07001101 return 0;
1102}
1103
1104template <DType InDtype, DType WeightDtype>
1105int OpFullyConnected<InDtype, WeightDtype>::eval()
1106{
1107 typedef Eigen::Tensor<int, 1>::DimensionPair DimPair;
1108 Eigen::array<DimPair, 1> dims{ { DimPair(1, 0) } };
1109
1110 Eigen::array<Eigen::Index, 2> weight_shuffle{ 1, 0 };
1111
1112 Eigen::array<Eigen::Index, 2> bias_reshape;
1113 bias_reshape[0] = 1;
1114 bias_reshape[1] = this->bias->getShape()[0];
1115
1116 Eigen::array<Eigen::Index, 2> bias_bcast;
1117 bias_bcast[0] = this->input->getShape()[0];
1118 bias_bcast[1] = 1;
1119
1120 TIn input_val = this->input->getTensor();
1121 TWeight weight_val = this->weight->getTensor().shuffle(weight_shuffle);
1122 if (this->qinfo)
1123 {
1124 input_val = input_val - (InEigenType)this->qinfo->input_zp();
1125 weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
1126 }
1127
1128 this->output->getTensor() =
1129 input_val.template cast<AccEigenType>().contract(weight_val.template cast<AccEigenType>(), dims) +
1130 this->bias->getTensor().reshape(bias_reshape).broadcast(bias_bcast);
1131
1132 if (AccDtype == DType_INT48)
1133 {
1134 this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
1135 this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
1136 }
1137 return GraphNode::eval();
1138}
1139
1140template <DType Dtype>
Kevin Chengacb550f2021-06-29 15:32:19 -07001141OpMatMul<Dtype>::OpMatMul(SubgraphTraverser* sgt_,
1142 TosaAttributeBase* attribute_,
1143 TosaQuantInfoBase* qinfo_,
1144 uint64_t id_)
1145 : GraphNode(sgt_, Op_MATMUL, id_)
Eric Kunzee5e26762020-10-13 16:11:07 -07001146{
1147 setRequiredOperands(2, 1);
Kevin Cheng2d60f002021-06-09 14:18:32 -07001148 setRequiredRank(3);
Eric Kunzee5e26762020-10-13 16:11:07 -07001149
1150 INIT_QINFO(MatMul);
1151}
1152
1153template <DType Dtype>
1154OpMatMul<Dtype>::~OpMatMul()
1155{
1156 if (qinfo)
1157 delete qinfo;
1158}
1159
1160template <DType Dtype>
1161int OpMatMul<Dtype>::checkTensorAttributes()
1162{
1163 if (validateRequiredOperands())
1164 return 1;
1165
1166 if (validateRequiredRank(inputs[0]) || validateRequiredRank(inputs[1]) || validateRequiredRank(outputs[0]))
1167 {
1168 return 1;
1169 }
1170
Kevin Chengcc61be32021-10-14 17:09:57 -07001171 ERROR_IF(outputs[0]->getDtype() != AccDtype,
Kevin Cheng80794802021-11-01 11:14:13 -07001172 "OpMatMul: Output data type not supported for this configuration of operator");
Kevin Chengcc61be32021-10-14 17:09:57 -07001173
Kevin Cheng2d60f002021-06-09 14:18:32 -07001174 a = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
1175 b = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[1]);
1176 output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
Eric Kunzee5e26762020-10-13 16:11:07 -07001177
Kevin Cheng2d60f002021-06-09 14:18:32 -07001178 ASSERT_MEM(a && b && output);
1179
1180 // a: [N, H, C]
1181 // b: [N, C, W]
1182 // c: [N, H, W]
1183
1184 // Check N
1185 if (a->getShape()[0] != b->getShape()[0] || a->getShape()[0] != output->getShape()[0])
Eric Kunzee5e26762020-10-13 16:11:07 -07001186 {
Kevin Cheng2d60f002021-06-09 14:18:32 -07001187 printNodeValidationError("OpMatMul operator a.shape[0], b.shape[0] and output.shape[0] should match");
Eric Kunzee5e26762020-10-13 16:11:07 -07001188 return 1;
1189 }
Kevin Cheng2d60f002021-06-09 14:18:32 -07001190 N = a->getShape()[0];
Eric Kunzee5e26762020-10-13 16:11:07 -07001191
Kevin Cheng2d60f002021-06-09 14:18:32 -07001192 // Check C
1193 if (a->getShape()[2] != b->getShape()[1])
1194 {
1195 printNodeValidationError("OpMatMul operator a.shape[2] should match b.shape[1]");
1196 return 1;
1197 }
1198 C = a->getShape()[2];
1199
1200 // Check H
1201 if (a->getShape()[1] != output->getShape()[1])
1202 {
1203 printNodeValidationError("OpMatMul operator a.shape[1] should match output.shape[1]");
1204 return 1;
1205 }
1206 H = a->getShape()[1];
1207
1208 // Check W
1209 if (b->getShape()[2] != output->getShape()[2])
1210 {
1211 printNodeValidationError("OpMatMul operator output.shape[2] should match output.shape[2]");
1212 return 1;
1213 }
1214 W = b->getShape()[2];
Eric Kunzee5e26762020-10-13 16:11:07 -07001215
Kevin Cheng80794802021-11-01 11:14:13 -07001216 if (Dtype != DType_INT8 && this->qinfo)
Kevin Chengcc61be32021-10-14 17:09:57 -07001217 {
1218 ERROR_IF(this->qinfo->a_zp() != 0, "OpMatMul: zeropoint only for int8_t");
1219 ERROR_IF(this->qinfo->b_zp() != 0, "OpMatMul: zeropoint only for int8_t");
1220 }
1221
Eric Kunzee5e26762020-10-13 16:11:07 -07001222 return 0;
1223}
1224
1225template <DType Dtype>
1226int OpMatMul<Dtype>::eval()
1227{
1228 typedef Eigen::Tensor<int, 1>::DimensionPair DimPair;
1229 Eigen::array<DimPair, 1> dims{ { DimPair(1, 0) } };
1230
1231 TIn a_val = this->a->getTensor();
1232 TIn b_val = this->b->getTensor();
1233 if (this->qinfo)
1234 {
1235 a_val = a_val - (InEigenType)this->qinfo->a_zp();
1236 b_val = b_val - (InEigenType)this->qinfo->b_zp();
1237 }
1238
Kevin Cheng2d60f002021-06-09 14:18:32 -07001239 Eigen::array<Eigen::Index, 2> a_rank2_shape({ H, C });
1240 Eigen::array<Eigen::Index, 2> b_rank2_shape({ C, W });
1241 Eigen::array<Eigen::Index, 3> output_rank3_shape({ 1, H, W });
1242
1243 Eigen::array<Eigen::Index, 3> a_size_array({ 1, H, C });
1244 Eigen::array<Eigen::Index, 3> b_size_array({ 1, C, W });
1245
1246 Eigen::array<Eigen::Index, 3> a_begin_array({ 0, 0, 0 });
1247 Eigen::array<Eigen::Index, 3> b_begin_array({ 0, 0, 0 });
1248
1249 // Iterate N dimension.
1250 for (int i = 0; i < N; i++)
1251 {
1252 a_begin_array[0] = i;
1253 b_begin_array[0] = i;
1254
1255 TInRank2 a_rank2_val = a_val.slice(a_begin_array, a_size_array).reshape(a_rank2_shape);
1256 TInRank2 b_rank2_val = b_val.slice(b_begin_array, b_size_array).reshape(b_rank2_shape);
1257 TAccRank2 output_rank2_val =
1258 a_rank2_val.template cast<AccEigenType>().contract(b_rank2_val.template cast<AccEigenType>(), dims);
1259 TAcc output_rank3_val = output_rank2_val.reshape(output_rank3_shape);
1260 if (i == 0)
1261 {
1262 this->output->getTensor() = output_rank3_val;
1263 }
1264 else
1265 {
1266 TAcc temp = this->output->getTensor().concatenate(output_rank3_val, 0);
1267 this->output->getTensor() = temp;
1268 }
1269 }
Eric Kunzee5e26762020-10-13 16:11:07 -07001270
1271 if (AccDtype == DType_INT48)
1272 {
Kevin Cheng2d60f002021-06-09 14:18:32 -07001273 this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
1274 this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
Eric Kunzee5e26762020-10-13 16:11:07 -07001275 }
1276
1277 return GraphNode::eval();
1278}
1279
1280template <DType Dtype>
Kevin Chengacb550f2021-06-29 15:32:19 -07001281OpMaxPool2d<Dtype>::OpMaxPool2d(SubgraphTraverser* sgt_,
1282 TosaAttributeBase* attribute_,
1283 TosaQuantInfoBase* qinfo_,
1284 uint64_t id_)
1285 : GraphNode(sgt_, Op_MAX_POOL2D, id_)
Eric Kunzee5e26762020-10-13 16:11:07 -07001286{
1287 setRequiredOperands(1, 1);
1288 setRequiredRank(4);
1289
Kevin Cheng93a16282021-08-31 16:14:03 -07001290 INIT_ATTRIBUTE(Pool);
Eric Kunzee5e26762020-10-13 16:11:07 -07001291}
1292
1293template <DType Dtype>
1294OpMaxPool2d<Dtype>::~OpMaxPool2d()
1295{
1296 if (attribute)
1297 delete attribute;
1298}
1299
1300template <DType Dtype>
1301int OpMaxPool2d<Dtype>::checkTensorAttributes()
1302{
1303 if (validateRequiredOperands())
1304 return 1;
1305
1306 if (validateRequiredRank(inputs[0]) || validateRequiredRank(outputs[0]))
1307 {
1308 return 1;
1309 }
1310
1311 if (inputs[0]->matchType(*outputs[0]))
1312 {
1313 printNodeValidationError("OpMaxPool2d: input and output tensor type mismatch");
1314 return 1;
1315 }
1316
1317 in = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
1318 out = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
1319
Kevin Cheng7eb93d72021-10-09 01:26:08 +00001320 std::string msg;
Kevin Cheng9fe17242021-11-10 01:04:39 +00001321 if (check_pool2d_attribute(attribute, in->getShape(), out->getShape(), msg))
Eric Kunzee5e26762020-10-13 16:11:07 -07001322 {
Kevin Cheng7eb93d72021-10-09 01:26:08 +00001323 msg = "OpMaxPool2d: " + msg;
1324 printNodeValidationError(msg.c_str());
Eric Kunzee5e26762020-10-13 16:11:07 -07001325 return 1;
1326 }
1327
1328 return 0;
1329}
1330
1331template <DType Dtype>
1332int OpMaxPool2d<Dtype>::eval()
1333{
1334 int in_batch = this->in->getShape()[0];
1335 int in_height = this->in->getShape()[1];
1336 int in_width = this->in->getShape()[2];
1337 int in_channels = this->in->getShape()[3];
1338
1339 int out_batch = this->out->getShape()[0];
1340 int out_height = this->out->getShape()[1];
1341 int out_width = this->out->getShape()[2];
1342 int out_channels = this->out->getShape()[3];
1343
Kevin Chengacb550f2021-06-29 15:32:19 -07001344 ERROR_IF(in_batch != out_batch, "OpMaxPool2d: tensor batch mismatch %d != %d", in_batch, out_batch);
1345 ERROR_IF(in_channels != out_channels, "OpMaxPool2d: tensor channel mismatch %d != %d", in_channels, out_channels);
Eric Kunzee5e26762020-10-13 16:11:07 -07001346
1347 int padding_top = this->attribute->padding()[0];
1348 int padding_bottom = this->attribute->padding()[1];
1349 int padding_left = this->attribute->padding()[2];
1350 int padding_right = this->attribute->padding()[3];
1351 int kernel_h = this->attribute->kernel()[0];
1352 int kernel_w = this->attribute->kernel()[1];
1353 int stride_h = this->attribute->stride()[0];
1354 int stride_w = this->attribute->stride()[1];
1355
1356 DEBUG_INFO(OP,
1357 "perform MaxPool2d, input.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], kernel=[%d,%d], "
1358 "stride=[%d,%d], padding=[%d,%d,%d,%d]",
1359 in_batch, in_height, in_width, in_channels, out_batch, out_height, out_width, out_channels, kernel_h,
1360 kernel_w, stride_h, stride_w, padding_top, padding_bottom, padding_left, padding_right);
1361
1362 Eigen::array<Eigen::Index, 2> im2col_input_dims;
1363 im2col_input_dims[0] = kernel_h * kernel_w;
1364 im2col_input_dims[1] = out_batch * out_height * out_width * out_channels;
1365
1366 Eigen::array<Eigen::Index, 4> col2im_output_dims;
1367 col2im_output_dims[0] = out_batch;
1368 col2im_output_dims[1] = out_height;
1369 col2im_output_dims[2] = out_width;
1370 col2im_output_dims[3] = out_channels;
1371
1372 Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
1373 padding[0] = std::make_pair(0, 0);
1374 padding[1] = std::make_pair(padding_top, padding_bottom);
1375 padding[2] = std::make_pair(padding_left, padding_right);
1376 padding[3] = std::make_pair(0, 0);
1377
1378 ETensor4<InEigenType> input_padded = this->in->getTensor().pad(padding, std::numeric_limits<InEigenType>::lowest());
1379
1380 // extract_image_patches() output [N, KH, KW, H * W, C]
1381 // transpose to [KH, KW, N, H * W, C]
1382 // reshape to [KH * KW, N * H * W * C]
1383 //
1384 // Set the padding value to be the most negative value that can be
1385 // represented by the datatype to ensure that any padding values will be equal
1386 // to or smaller than the actual maximum in the KH x KW patch.
1387 ETensor2<InEigenType> input_extract_patches =
1388 input_padded
1389 .extract_image_patches(kernel_h, kernel_w, stride_h, stride_w, 1, 1, Eigen::PADDING_VALID,
1390 std::numeric_limits<InEigenType>::lowest())
1391 .shuffle(Eigen::array<Eigen::Index, 5>{ 1, 2, 0, 3, 4 })
1392 .reshape(im2col_input_dims);
1393
1394 // Get the maximum of the KHxHW patches along axis 0
1395 Eigen::Tensor<DenseIndex, 1> tensor_argmax = input_extract_patches.argmax(0);
1396
1397 // 1D result with [N * H * W * C]
1398 ETensor1<OutEigenType> out_1d(this->out->getElementCount());
1399
1400 // index input_patches with argmax array should give the result
1401 for (size_t i = 0; i < this->out->getElementCount(); i++)
1402 {
1403 out_1d(i) = (OutEigenType)input_extract_patches(tensor_argmax(i), i);
1404 }
1405
1406 // reshape result to [N, H, W, C]
1407 this->out->getTensor() = out_1d.reshape(col2im_output_dims);
1408
1409 return GraphNode::eval();
1410}
1411
Kevin Chengcc61be32021-10-14 17:09:57 -07001412template <DType InDtype, DType WeightDtype>
1413OpTransposeConv2d<InDtype, WeightDtype>::OpTransposeConv2d(SubgraphTraverser* sgt_,
1414 TosaAttributeBase* attribute_,
1415 TosaQuantInfoBase* qinfo_,
1416 uint64_t id_)
Kevin Chengacb550f2021-06-29 15:32:19 -07001417 : GraphNode(sgt_, Op_TRANSPOSE_CONV2D, id_)
Eric Kunzee5e26762020-10-13 16:11:07 -07001418{
1419 setRequiredOperands(3, 1);
1420 setRequiredRank(4);
1421
Kevin Cheng93a16282021-08-31 16:14:03 -07001422 INIT_ATTRIBUTE(TransposeConv);
Eric Kunzee5e26762020-10-13 16:11:07 -07001423 INIT_QINFO(Conv);
1424}
1425
Kevin Chengcc61be32021-10-14 17:09:57 -07001426template <DType InDtype, DType WeightDtype>
1427OpTransposeConv2d<InDtype, WeightDtype>::~OpTransposeConv2d()
Eric Kunzee5e26762020-10-13 16:11:07 -07001428{
1429 if (attribute)
1430 delete attribute;
1431 if (qinfo)
1432 delete qinfo;
1433}
1434
Kevin Chengcc61be32021-10-14 17:09:57 -07001435template <DType InDtype, DType WeightDtype>
1436int OpTransposeConv2d<InDtype, WeightDtype>::checkTensorAttributes()
Eric Kunzee5e26762020-10-13 16:11:07 -07001437{
1438 if (validateRequiredOperands())
1439 return 1;
1440
1441 if (validateRequiredRank(inputs[0]) || validateRequiredRank(inputs[1]) || validateRequiredRank(outputs[0]))
1442 {
1443 return 1;
1444 }
1445
Kevin Chengcc61be32021-10-14 17:09:57 -07001446 ERROR_IF(outputs[0]->getDtype() != AccDtype,
Kevin Cheng80794802021-11-01 11:14:13 -07001447 "OpTransposeConv2d: Output data type not supported for this configuration of operator");
Kevin Chengcc61be32021-10-14 17:09:57 -07001448
Eric Kunzee5e26762020-10-13 16:11:07 -07001449 input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
1450 weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
1451 bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
1452 output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
1453
Eric Kunzee5e26762020-10-13 16:11:07 -07001454 if (attribute->outpad().size() != 2)
1455 {
1456 printNodeValidationError("OpTransposeConv2d: illegal size for attribute outpad");
1457 return 1;
1458 }
1459
1460 if (attribute->stride().size() != 2)
1461 {
1462 printNodeValidationError("OpTransposeConv2d: illegal size for attribute stride");
1463 return 1;
1464 }
1465
1466 if (attribute->dilation().size() != 2)
1467 {
1468 printNodeValidationError("OpTransposeConv2d: illegal size for attribute dilation");
1469 return 1;
1470 }
1471
1472 if (attribute->output_shape().size() != 4)
1473 {
1474 printNodeValidationError("OpTransposeConv2d: illegal size for attribute output_shape");
1475 return 1;
1476 }
1477
Kevin Cheng9fe17242021-11-10 01:04:39 +00001478 for (int32_t i : attribute->outpad())
1479 {
1480 if (i < 0)
1481 {
1482 printNodeValidationError("OpTransposeConv2d: At least one pad is smaller than zero");
1483 return 1;
1484 }
1485 }
1486
1487 for (int32_t i : attribute->stride())
1488 {
1489 if (i < 1)
1490 {
1491 printNodeValidationError("OpTransposeConv2d: At least one stride is smaller than one");
1492 return 1;
1493 }
1494 }
1495
1496 for (int32_t i : attribute->dilation())
1497 {
1498 if (i < 1)
1499 {
1500 printNodeValidationError("OpTransposeConv2d: At least one dilation is smaller than one");
1501 return 1;
1502 }
1503 }
1504
Eric Kunzee5e26762020-10-13 16:11:07 -07001505 for (int d = 0; d < 4; d++)
1506 {
1507 if (attribute->output_shape()[d] != this->output->getShape()[d])
1508 {
1509 printNodeValidationError("OpTransposeConv2d: illegal size for attribute output_shape");
1510 return 1;
1511 }
1512 }
1513
Kevin Chengcc61be32021-10-14 17:09:57 -07001514 if (this->qinfo)
1515 {
1516 if (InDtype != DType_INT8)
1517 {
1518 ERROR_IF(this->qinfo->input_zp() != 0, "OpTransposeConv2d: zeropoint only for int8_t");
1519 }
1520 if (WeightDtype != DType_INT8)
1521 {
1522 ERROR_IF(this->qinfo->weight_zp() != 0, "OpTransposeConv2d: zeropoint only for int8_t");
1523 }
1524 }
1525
Eric Kunzee5e26762020-10-13 16:11:07 -07001526 return 0;
1527}
1528
Kevin Chengcc61be32021-10-14 17:09:57 -07001529template <DType InDtype, DType WeightDtype>
1530int OpTransposeConv2d<InDtype, WeightDtype>::eval()
Eric Kunzee5e26762020-10-13 16:11:07 -07001531{
1532 int in_batch = this->input->getShape()[0];
1533 int in_height = this->input->getShape()[1];
1534 int in_width = this->input->getShape()[2];
1535 int in_channels = this->input->getShape()[3];
1536
1537 int f_out_channels = this->weight->getShape()[0];
1538 int f_height = this->weight->getShape()[1];
1539 int f_width = this->weight->getShape()[2];
1540 int f_in_channels = this->weight->getShape()[3];
1541
1542 int b_out_channels = this->bias->getShape()[0];
1543
1544 int out_batch = this->output->getShape()[0];
1545 int out_height = this->output->getShape()[1];
1546 int out_width = this->output->getShape()[2];
1547 int out_channels = this->output->getShape()[3];
1548
1549 int padding_top = this->attribute->outpad()[0];
1550 int padding_left = this->attribute->outpad()[1];
1551 int stride_h = this->attribute->stride()[0];
1552 int stride_w = this->attribute->stride()[1];
1553 int dilation_h = this->attribute->dilation()[0];
1554 int dilation_w = this->attribute->dilation()[1];
1555
Kevin Chengacb550f2021-06-29 15:32:19 -07001556 ERROR_IF(in_batch != out_batch, "OpTransposeConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
1557 ERROR_IF(f_in_channels != in_channels, "OpTransposeConv2d: tensor input channel mismatch %d != %d", f_in_channels,
1558 in_channels);
1559 ERROR_IF(f_out_channels != out_channels, "OpTransposeConv2d: tensor output channel mismatch %d != %d",
1560 f_out_channels, out_channels);
1561 ERROR_IF(b_out_channels != out_channels, "OpDepthwiseConv2d: bias channels mismatch %d != %d", b_out_channels,
1562 out_channels);
Eric Kunzee5e26762020-10-13 16:11:07 -07001563
1564 DEBUG_INFO(OP,
1565 "perform OpTransposeConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], "
1566 "output.shape=[%d,%d,%d,%d], stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d]",
1567 in_batch, in_height, in_width, in_channels, f_height, f_width, f_out_channels, f_in_channels, out_batch,
1568 out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
1569 padding_left);
1570
1571 TIn input_val = this->input->getTensor();
1572 TWeight weight_val = this->weight->getTensor();
1573 if (this->qinfo)
1574 {
1575 input_val = input_val - (InEigenType)this->qinfo->input_zp();
1576 weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
1577 }
1578
1579 Eigen::array<Eigen::Index, 4> reshape_dim;
1580 reshape_dim.fill(1);
1581 reshape_dim[3] = b_out_channels;
1582
1583 Eigen::array<Eigen::Index, 4> bcast;
1584 bcast[0] = out_batch;
1585 bcast[1] = out_height;
1586 bcast[2] = out_width;
1587 bcast[3] = 1;
1588
1589 // initialize with bias
1590 this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
1591
1592 int out_x_origin, out_y_origin;
1593 int out_x, out_y;
1594
1595 // reference implementation from: tensorflow/tensorflow/lite/kernels/internal/reference/reference_ops.h
1596 for (int ob = 0; ob < out_batch; ob++)
1597 {
1598 for (int ih = 0; ih < in_height; ih++)
1599 {
1600 for (int iw = 0; iw < in_width; iw++)
1601 {
1602 out_x_origin = iw * stride_w - padding_left;
1603 out_y_origin = ih * stride_h - padding_top;
1604 for (int ic = 0; ic < in_channels; ic++)
1605 {
1606 for (int fh = 0; fh < f_height; fh++)
1607 {
1608 for (int fw = 0; fw < f_width; fw++)
1609 {
1610 out_x = out_x_origin + fw * dilation_w;
1611 out_y = out_y_origin + fh * dilation_h;
1612 for (int oc = 0; oc < out_channels; oc++)
1613 {
1614 if ((out_x >= 0 && out_x < out_width) && (out_y >= 0 && out_y < out_height))
1615 {
1616 this->output->getTensor()(ob, out_y, out_x, oc) +=
1617 ((AccEigenType)input_val(ob, ih, iw, ic) *
1618 (AccEigenType)weight_val(oc, fh, fw, ic));
1619 }
1620 }
1621 }
1622 }
1623 }
1624 }
1625 }
1626 }
1627
1628 if (AccDtype == DType_INT48)
1629 {
1630 this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
1631 this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
1632 }
1633
1634 return GraphNode::eval();
1635}
1636
1637// template explicit instantiation
1638DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, FLOAT);
Kevin Cheng3a478572021-01-22 17:21:02 -08001639DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, INT8);
Eric Kunzee5e26762020-10-13 16:11:07 -07001640DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, INT16);
1641
1642DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, FLOAT)
Kevin Cheng3a478572021-01-22 17:21:02 -08001643DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, INT8)
Eric Kunzee5e26762020-10-13 16:11:07 -07001644DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, INT16)
1645
1646DEF_INSTANTIATE_TWO_TYPE(OpConv2d, FLOAT, FLOAT);
Kevin Cheng3a478572021-01-22 17:21:02 -08001647DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT8, INT4);
1648DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT8, INT8);
Eric Kunzee5e26762020-10-13 16:11:07 -07001649DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT16, INT8);
1650
Kevin Cheng1533b852021-09-01 12:51:58 -07001651DEF_INSTANTIATE_TWO_TYPE(OpConv3d, FLOAT, FLOAT);
1652DEF_INSTANTIATE_TWO_TYPE(OpConv3d, INT8, INT4);
1653DEF_INSTANTIATE_TWO_TYPE(OpConv3d, INT8, INT8);
1654DEF_INSTANTIATE_TWO_TYPE(OpConv3d, INT16, INT8);
1655
Eric Kunzee5e26762020-10-13 16:11:07 -07001656DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, FLOAT, FLOAT);
Kevin Cheng3a478572021-01-22 17:21:02 -08001657DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT8, INT4);
1658DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT8, INT8);
Eric Kunzee5e26762020-10-13 16:11:07 -07001659DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT16, INT8);
1660
1661DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, FLOAT, FLOAT);
Kevin Cheng3a478572021-01-22 17:21:02 -08001662DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT8, INT4);
1663DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT8, INT8);
Eric Kunzee5e26762020-10-13 16:11:07 -07001664DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT16, INT8);
1665
Kevin Cheng3a478572021-01-22 17:21:02 -08001666DEF_INSTANTIATE_ONE_TYPE(OpMatMul, INT8);
Eric Kunzee5e26762020-10-13 16:11:07 -07001667DEF_INSTANTIATE_ONE_TYPE(OpMatMul, INT16);
1668DEF_INSTANTIATE_ONE_TYPE(OpMatMul, FLOAT);
1669
1670DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, FLOAT);
Kevin Cheng3a478572021-01-22 17:21:02 -08001671DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, INT8);
Eric Kunzee5e26762020-10-13 16:11:07 -07001672DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, INT16);
1673
1674DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, FLOAT, FLOAT);
Kevin Cheng3a478572021-01-22 17:21:02 -08001675DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT8, INT4);
1676DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT8, INT8);
Eric Kunzee5e26762020-10-13 16:11:07 -07001677DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT16, INT8);