blob: b8c7adee6883fad801b6c03c46042b43b45bb46f [file] [log] [blame]
Eric Kunzee5e26762020-10-13 16:11:07 -07001
Kevin Cheng3a478572021-01-22 17:21:02 -08002// Copyright (c) 2020-2021, ARM Limited.
Eric Kunzee5e26762020-10-13 16:11:07 -07003//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16#include "tensor_ops.h"
17#include "quant_util.h"
18#include "template_types.h"
19
20using namespace TosaReference;
21using namespace Eigen;
22using namespace tosa;
23
24template <int Rank, DType Dtype>
25OpArgMax<Rank, Dtype>::OpArgMax(TosaAttributeBase* attribute_, TosaQuantInfoBase* qinfo_, uint64_t id_)
26 : GraphNode(Op_ARGMAX, id_)
27{
28 setRequiredOperands(1, 1);
29 setRequiredRank(0, 6);
30
31 INIT_ATTRIBUTE(Axis);
32}
33
34template <int Rank, DType Dtype>
35OpArgMax<Rank, Dtype>::~OpArgMax()
36{
37 if (attribute)
38 delete attribute;
39}
40
41template <int Rank, DType Dtype>
42int OpArgMax<Rank, Dtype>::checkTensorAttributes()
43{
44 if (validateRequiredOperands())
45 return 1;
46
47 if (validateRequiredRank(inputs[0]) || validateRequiredRank(outputs[0]))
48 {
49 return 1;
50 }
51
52 input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
53 output = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
54
55 return 0;
56}
57
58template <int Rank, DType Dtype>
59int OpArgMax<Rank, Dtype>::eval()
60{
61 Eigen::Tensor<DenseIndex, Rank - 1> index = this->input->getTensor().argmax(attribute->axis());
62
63 this->output->getTensor() = index.unaryExpr([](DenseIndex in) -> OutEigenType { return (OutEigenType)in; });
64
65 return GraphNode::eval();
66}
67
68template <DType Dtype>
69OpAvgPool2d<Dtype>::OpAvgPool2d(TosaAttributeBase* attribute_, TosaQuantInfoBase* qinfo_, uint64_t id_)
70 : GraphNode(Op_AVG_POOL2D, id_)
71{
72 setRequiredOperands(1, 1);
73 setRequiredRank(4);
74
75 INIT_ATTRIBUTE(Pool2d);
76 INIT_QINFO(Unary);
77}
78
79template <DType Dtype>
80OpAvgPool2d<Dtype>::~OpAvgPool2d()
81{
82 if (attribute)
83 delete attribute;
84}
85
86template <DType Dtype>
87int OpAvgPool2d<Dtype>::checkTensorAttributes()
88{
89 if (validateRequiredOperands())
90 return 1;
91
92 if (validateRequiredRank(inputs[0]) || validateRequiredRank(outputs[0]))
93 {
94 return 1;
95 }
96
97 if (inputs[0]->matchType(*outputs[0]))
98 {
99 printNodeValidationError("OpAvgPool2d: input and output tensor type mismatch");
100 return 1;
101 }
102
103 in = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
104 out = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
105
Eric Kunzee5e26762020-10-13 16:11:07 -0700106 if (attribute->padding().size() != 4)
107 {
108 printNodeValidationError("OpAvgPool2d: illegal size for attribute padding");
109 return 1;
110 }
111
112 if (attribute->kernel().size() != 2)
113 {
114 printNodeValidationError("OpAvgPool2d: illegal size for attribute kernel");
115 return 1;
116 }
117
118 if (attribute->stride().size() != 2)
119 {
120 printNodeValidationError("OpAvgPool2d: illegal size for attribute stride");
121 return 1;
122 }
123
124 return 0;
125}
126
127template <DType Dtype>
128ETensor1<int32_t> OpAvgPool2d<Dtype>::calculate_div_map_1d(int in_size, int out_size, int kernel_size, int stride)
129{
130 ETensor1<int32_t> result(out_size);
131
132 int32_t total_pad = (out_size - 1) * stride + kernel_size - in_size;
133 total_pad = total_pad < 0 ? 0 : total_pad;
134
135 int32_t pad_left = total_pad >> 1;
136 int32_t pad_right = total_pad - pad_left;
137
138 result.setConstant(kernel_size);
139
140 // the index left to 'left_index' and index right to 'right_index' indicates
141 // the input window of this output covers a pad bit
142 int32_t left_index = pad_left / stride;
143 int32_t right_index = pad_right / stride;
144
145 // not handle ultra small activation yet
146 ASSERT_MSG_NODE((out_size - 1 - right_index) >= left_index, "AvgPool2d: Small activations not supported yet");
147
148 // minus the number of pad bit this index cover
149 while (left_index >= 0)
150 {
151 result(left_index) -= (pad_left - left_index * stride);
152 left_index--;
153 }
154
155 while (right_index >= 0)
156 {
157 result(out_size - 1 - right_index) -= (pad_right - right_index * stride);
158 right_index--;
159 }
160
161 return result;
162}
163
164// assuming input and output tensor have same scales like tflite reference
165// so no need to scale input and output
166template <DType Dtype>
167int OpAvgPool2d<Dtype>::eval()
168{
169 int in_batch = this->in->getShape()[0];
170 int in_height = this->in->getShape()[1];
171 int in_width = this->in->getShape()[2];
172 int in_channels = this->in->getShape()[3];
173
174 int out_batch = this->out->getShape()[0];
175 int out_height = this->out->getShape()[1];
176 int out_width = this->out->getShape()[2];
177 int out_channels = this->out->getShape()[3];
178
179 ASSERT_MSG_NODE(in_batch == out_batch, "OpAvgPool2d: tensor batch mismatch %d != %d", in_batch, out_batch);
180
181 int padding_top = this->attribute->padding()[0];
182 int padding_bottom = this->attribute->padding()[1];
183 int padding_left = this->attribute->padding()[2];
184 int padding_right = this->attribute->padding()[3];
185 int kernel_h = this->attribute->kernel()[0];
186 int kernel_w = this->attribute->kernel()[1];
187 int stride_h = this->attribute->stride()[0];
188 int stride_w = this->attribute->stride()[1];
189
190 DEBUG_INFO(OP,
191 "perform AvgPool2d, input.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], kernel=[%d,%d], "
192 "stride=[%d,%d], padding=[%d,%d,%d,%d]",
193 in_batch, in_height, in_width, in_channels, out_batch, out_height, out_width, out_channels, kernel_h,
194 kernel_w, stride_h, stride_w, padding_top, padding_bottom, padding_left, padding_right);
195
196 Eigen::array<Eigen::Index, 2> im2col_input_dims;
197 im2col_input_dims[0] = kernel_h * kernel_w;
198 im2col_input_dims[1] = out_batch * out_height * out_width * out_channels;
199
200 Eigen::array<Eigen::Index, 4> col2im_output_dims;
201 col2im_output_dims[0] = out_batch;
202 col2im_output_dims[1] = out_height;
203 col2im_output_dims[2] = out_width;
204 col2im_output_dims[3] = out_channels;
205
206 Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
207 padding[0] = std::make_pair(0, 0);
208 padding[1] = std::make_pair(padding_top, padding_bottom);
209 padding[2] = std::make_pair(padding_left, padding_right);
210 padding[3] = std::make_pair(0, 0);
211
212 ETensor4<InEigenType> input_val = this->in->getTensor();
213 if (this->qinfo)
214 {
215 input_val = input_val - (InEigenType)this->qinfo->input_zp();
216 }
217
218 ETensor4<InEigenType> input_padded = input_val.pad(padding);
219
220 // assuming input and output have same scales
221 // so input and output scaling is not required
222 // TODO: check if this assumption TOSA made
223
224 // extract_image_patches() output [N, KH, KW, H * W, C]
225 // transpose to [KH, KW, N, H * W, C]
226 // reshape to [KH * KW, N * H * W * C]
227 ETensor2<InEigenType> input_extract_patches =
228 input_padded.extract_image_patches(kernel_h, kernel_w, stride_h, stride_w, 1, 1, Eigen::PADDING_VALID)
229 .shuffle(Eigen::array<Eigen::Index, 5>{ 1, 2, 0, 3, 4 })
230 .reshape(im2col_input_dims);
231
232 // 1D result with [N * H * W * C]
233 ETensor1<AccEigenType> out_1d(this->out->getElementCount());
234 out_1d.setZero();
235
236 // sum pool
237 for (size_t i = 0; i < this->out->getElementCount(); i++)
238 {
239 for (int32_t j = 0; j < kernel_h * kernel_w; j++)
240 {
241 out_1d(i) += (AccEigenType)input_extract_patches(j, i);
242 }
243 }
244
245 // reshape result to [N, H, W, C] and divide with div_map
246 ETensor4<AccEigenType> sum = out_1d.reshape(col2im_output_dims);
247
248 // calculate 1d height/width div_map (number of elements this pooling window covers)
249 // and outer product to get 2d div_map, then reshape/broadcast to [N, H, W, C]
250 ETensor1<int32_t> div_map_h = calculate_div_map_1d(in_height, out_height, kernel_h, stride_h);
251 ETensor1<int32_t> div_map_w = calculate_div_map_1d(in_width, out_width, kernel_w, stride_w);
252 Eigen::array<Eigen::IndexPair<Eigen::Index>, 1> contract_dims = { Eigen::IndexPair<Eigen::Index>(1, 0) };
253 Eigen::array<Eigen::Index, 4> bcast{ out_batch, 1, 1, out_channels };
254
255 ETensor4<int32_t> div_map =
256 div_map_h.reshape(Eigen::array<Eigen::Index, 2>{ out_height, 1 })
257 .contract(div_map_w.reshape(Eigen::array<Eigen::Index, 2>{ 1, out_width }), contract_dims)
258 .reshape(Eigen::array<Eigen::Index, 4>{ 1, out_height, out_width, 1 })
259 .broadcast(bcast);
260
261 if (Dtype != DType_FLOAT)
262 {
263 this->out->getTensor() = sum.binaryExpr(div_map, [](AccEigenType value, int32_t div) -> OutEigenType {
264 int32_t multiplier, shift;
Kevin Cheng99bea142020-10-19 12:35:05 -0700265 TosaReference::QuantUtil::reciprocal_scale(div, multiplier, shift);
Eric Kunzee5e26762020-10-13 16:11:07 -0700266
Kevin Cheng99bea142020-10-19 12:35:05 -0700267 return (OutEigenType)TosaReference::QuantUtil::apply_scale_32(value, multiplier, shift, false);
Eric Kunzee5e26762020-10-13 16:11:07 -0700268 });
269 this->out->getTensor() = this->out->getTensor() + (OutEigenType)(this->qinfo->output_zp());
270 this->out->getTensor() = this->out->getTensor().cwiseMax((OutEigenType)QMin);
271 this->out->getTensor() = this->out->getTensor().cwiseMin((OutEigenType)QMax);
272 }
273 else
274 {
275 this->out->getTensor() = (sum / div_map.template cast<AccEigenType>()).template cast<OutEigenType>();
276 }
277
278 return GraphNode::eval();
279}
280
281template <DType InDtype, DType WeightDtype>
282OpConv2d<InDtype, WeightDtype>::OpConv2d(TosaAttributeBase* attribute_, TosaQuantInfoBase* qinfo_, uint64_t id_)
283 : GraphNode(Op_CONV2D, id_)
284{
285 setRequiredOperands(3, 1);
286 setRequiredRank(4);
287
288 INIT_ATTRIBUTE(Conv2d);
289 INIT_QINFO(Conv);
290}
291
292template <DType InDtype, DType WeightDtype>
293OpConv2d<InDtype, WeightDtype>::~OpConv2d()
294{
295 if (attribute)
296 delete attribute;
297 if (qinfo)
298 delete qinfo;
299}
300
301template <DType InDtype, DType WeightDtype>
302int OpConv2d<InDtype, WeightDtype>::checkTensorAttributes()
303{
304 if (validateRequiredOperands())
305 return 1;
306
307 if (validateRequiredRank(inputs[0]) || validateRequiredRank(inputs[1]) || validateRequiredRank(outputs[0]))
308 {
309 return 1;
310 }
311
312 // 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
313 if (inputs[2]->getRank() != 1)
314 {
315 printNodeValidationError("OpConv2d: bias tensor must be rank 1");
316 }
317
Eric Kunzee5e26762020-10-13 16:11:07 -0700318 input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
319 weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
320 bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
321 output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
322
Eric Kunzee5e26762020-10-13 16:11:07 -0700323 if (attribute->padding().size() != 4)
324 {
325 printNodeValidationError("OpConv2d: illegal size for attribute padding");
326 return 1;
327 }
328
329 if (attribute->stride().size() != 2)
330 {
331 printNodeValidationError("OpConv2d: illegal size for attribute stride");
332 return 1;
333 }
334
335 if (attribute->dilation().size() != 2)
336 {
337 printNodeValidationError("OpConv2d: illegal size for attribute dilation");
338 return 1;
339 }
340
341 return 0;
342}
343
344template <DType InDtype, DType WeightDtype>
345int OpConv2d<InDtype, WeightDtype>::eval()
346{
347 int in_batch = this->input->getShape()[0];
348 int in_height = this->input->getShape()[1];
349 int in_width = this->input->getShape()[2];
350 int in_channels = this->input->getShape()[3];
351
352 int f_out_channels = this->weight->getShape()[0];
353 int f_height = this->weight->getShape()[1];
354 int f_width = this->weight->getShape()[2];
355 int f_in_channels = this->weight->getShape()[3];
356
357 int b_out_channels = this->bias->getShape()[0];
358
359 int out_batch = this->output->getShape()[0];
360 int out_height = this->output->getShape()[1];
361 int out_width = this->output->getShape()[2];
362 int out_channels = this->output->getShape()[3];
363
364 ASSERT_MSG_NODE(in_batch == out_batch, "OpConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
365 ASSERT_MSG_NODE(f_in_channels == in_channels, "OpConv2d: tensor input channel mismatch %d != %d", f_in_channels,
366 in_channels);
367 ASSERT_MSG_NODE(f_out_channels == out_channels, "OpConv2d: tensor output channel mismatch %d != %d", f_out_channels,
368 out_channels);
369 ASSERT_MSG_NODE(b_out_channels == out_channels, "OpConv2d: tensor output channel mismatch %d != %d", b_out_channels,
370 out_channels);
371
372 int padding_top = this->attribute->padding()[0];
373 int padding_bottom = this->attribute->padding()[1];
374 int padding_left = this->attribute->padding()[2];
375 int padding_right = this->attribute->padding()[3];
376 int stride_h = this->attribute->stride()[0];
377 int stride_w = this->attribute->stride()[1];
378 int dilation_h = this->attribute->dilation()[0];
379 int dilation_w = this->attribute->dilation()[1];
380
381 DEBUG_INFO(OP,
382 "perform OpConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], "
383 "stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d,%d,%d]",
384 in_batch, in_height, in_width, in_channels, f_height, f_width, f_in_channels, f_out_channels, out_batch,
385 out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
386 padding_bottom, padding_left, padding_right);
387
388 // GEMM-conv2d, left matrix is input, right matrix is weight
389 Eigen::array<Eigen::Index, 2> im2col_input_dims;
390 im2col_input_dims[0] = out_batch * out_height * out_width;
391 im2col_input_dims[1] = f_height * f_width * f_in_channels;
392
393 Eigen::array<Eigen::Index, 2> im2col_weight_dims;
394 im2col_weight_dims[0] = f_height * f_width * f_in_channels;
395 im2col_weight_dims[1] = f_out_channels;
396
397 Eigen::array<Eigen::Index, 2> bias_reshaped_dims;
398 bias_reshaped_dims[0] = 1;
399 bias_reshaped_dims[1] = b_out_channels;
400
401 Eigen::array<Eigen::Index, 4> weight_zp_bcast_dims;
402 weight_zp_bcast_dims[0] = f_height;
403 weight_zp_bcast_dims[1] = f_width;
404 weight_zp_bcast_dims[2] = f_in_channels;
405
406 Eigen::array<Eigen::Index, 2> bias_bcast_dims;
407 bias_bcast_dims[0] = out_batch * out_height * out_width;
408 bias_bcast_dims[1] = 1;
409
410 Eigen::array<Eigen::Index, 4> col2im_output_dims;
411 col2im_output_dims[0] = out_batch;
412 col2im_output_dims[1] = out_height;
413 col2im_output_dims[2] = out_width;
414 col2im_output_dims[3] = out_channels;
415
416 Eigen::array<Eigen::IndexPair<Eigen::Index>, 1> contract_dims = { Eigen::IndexPair<Eigen::Index>(1, 0) };
417
418 Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
419 padding[0] = std::make_pair(0, 0);
420 padding[1] = std::make_pair(padding_top, padding_bottom);
421 padding[2] = std::make_pair(padding_left, padding_right);
422 padding[3] = std::make_pair(0, 0);
423
424 TIn input_val = this->input->getTensor();
425 TWeight weight_val = this->weight->getTensor();
426 if (this->qinfo)
427 {
428 input_val = input_val - (InEigenType)this->qinfo->input_zp();
429 weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
430 }
431
432 ETensor4<InEigenType> input_padded = input_val.pad(padding);
433
434 // extract_image_patches() output [N, KH, KW, H * W, C]
435 // need to transpose to [N, H * W, KH, KW, C]
436 ETensor5<InEigenType> input_extract_patches =
437 input_padded
438 .extract_image_patches(f_height, f_width, stride_h, stride_w, dilation_h, dilation_w, Eigen::PADDING_VALID)
439 .shuffle(Eigen::array<Eigen::Index, 5>{ 0, 3, 1, 2, 4 });
440
441 // reshape input to [N * H * W, KH * KW * C]
442 ETensor2<InEigenType> im2col_input = input_extract_patches.reshape(im2col_input_dims);
443
444 // transpose and reshape weight from [OC, H, W, IC] to [H * W * IC, OC]
445 ETensor2<WeightEigenType> im2col_weight =
446 weight_val.shuffle(Eigen::array<Eigen::Index, 4>({ 1, 2, 3, 0 })).reshape(im2col_weight_dims);
447
448 // don't need to apply bias_multiplier ( * bias_scale and >> bias_shift) since tflite already scale it
449 // and reshaped from [C] to [1, C], and broadcast to [N * H * W, C]
450 ETensor2<AccEigenType> bias_2d = this->bias->getTensor().reshape(bias_reshaped_dims).broadcast(bias_bcast_dims);
451
452 // output matrix is [N * H * W, C]
453 ETensor2<AccEigenType> contracted_result =
454 im2col_input.template cast<AccEigenType>().contract(im2col_weight.template cast<AccEigenType>(), contract_dims);
455
456 // adding bias
457 ETensor2<AccEigenType> biased_output = contracted_result + bias_2d.template cast<AccEigenType>();
458
459 // reshape back to [N, H, W, C]
460 this->output->getTensor() = biased_output.reshape(col2im_output_dims);
461
462 if (AccDtype == DType_INT48)
463 {
464 this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
465 this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
466 }
467
468 return GraphNode::eval();
469}
470
471template <DType InDtype, DType WeightDtype>
472OpDepthwiseConv2d<InDtype, WeightDtype>::OpDepthwiseConv2d(TosaAttributeBase* attribute_,
473 TosaQuantInfoBase* qinfo_,
474 uint64_t id_)
475 : GraphNode(Op_DEPTHWISE_CONV2D, id_)
476{
477 setRequiredOperands(3, 1);
478 setRequiredRank(4);
479
480 INIT_ATTRIBUTE(Conv2d);
481 INIT_QINFO(Conv);
482}
483
484template <DType InDtype, DType WeightDtype>
485OpDepthwiseConv2d<InDtype, WeightDtype>::~OpDepthwiseConv2d()
486{
487 if (attribute)
488 delete attribute;
489 if (qinfo)
490 delete qinfo;
491}
492
493template <DType InDtype, DType WeightDtype>
494int OpDepthwiseConv2d<InDtype, WeightDtype>::checkTensorAttributes()
495{
496 if (validateRequiredOperands())
497 return 1;
498
499 if (validateRequiredRank(inputs[0]) || validateRequiredRank(inputs[1]) || validateRequiredRank(outputs[0]))
500 {
501 return 1;
502 }
503
504 // 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
505 if (inputs[2]->getRank() != 1)
506 {
507 printNodeValidationError("OpDepthwiseConv2d: bias tensor must be rank 1");
508 }
509
Eric Kunzee5e26762020-10-13 16:11:07 -0700510 input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
511 weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
512 bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
513 output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
514
Eric Kunzee5e26762020-10-13 16:11:07 -0700515 if (attribute->padding().size() != 4)
516 {
517 printNodeValidationError("OpDepthwiseConv2d: illegal size for attribute padding");
518 return 1;
519 }
520
521 if (attribute->stride().size() != 2)
522 {
523 printNodeValidationError("OpDepthwiseConv2d: illegal size for attribute stride");
524 return 1;
525 }
526
527 if (attribute->dilation().size() != 2)
528 {
529 printNodeValidationError("OpDepthwiseConv2d: illegal size for attribute dilation");
530 return 1;
531 }
532
533 return 0;
534}
535
536template <DType InDtype, DType WeightDtype>
537int OpDepthwiseConv2d<InDtype, WeightDtype>::eval()
538{
539 int in_batch = this->input->getShape()[0];
540 int in_height = this->input->getShape()[1];
541 int in_width = this->input->getShape()[2];
542 int in_channels = this->input->getShape()[3];
543
544 int f_height = this->weight->getShape()[0];
545 int f_width = this->weight->getShape()[1];
546 int f_in_channels = this->weight->getShape()[2];
547 int f_multiplier = this->weight->getShape()[3];
548
549 int b_out_channels = this->bias->getShape()[0];
550
551 int out_batch = this->output->getShape()[0];
552 int out_height = this->output->getShape()[1];
553 int out_width = this->output->getShape()[2];
554 int out_channels = this->output->getShape()[3];
555
556 ASSERT_MSG_NODE(in_batch == out_batch, "OpDepthwiseConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
557 ASSERT_MSG_NODE(f_in_channels == in_channels, "OpDepthwiseConv2d: tensor input channel mismatch %d != %d",
558 f_in_channels, in_channels);
559 ASSERT_MSG_NODE(in_channels * f_multiplier == out_channels,
560 "OpDepthwiseConv2d: tensor output channel mismatch %d != %d", in_channels * f_multiplier,
561 out_channels);
562 ASSERT_MSG_NODE(b_out_channels == out_channels, "OpDepthwiseConv2d: tensor b_out_channels mismatch %d != %d",
563 b_out_channels, out_channels);
564
565 int padding_top = this->attribute->padding()[0];
566 int padding_bottom = this->attribute->padding()[1];
567 int padding_left = this->attribute->padding()[2];
568 int padding_right = this->attribute->padding()[3];
569 int stride_h = this->attribute->stride()[0];
570 int stride_w = this->attribute->stride()[1];
571 int dilation_h = this->attribute->dilation()[0];
572 int dilation_w = this->attribute->dilation()[1];
573
574 DEBUG_INFO(OP,
575 "perform OpDepthwiseConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], "
576 "output.shape=[%d,%d,%d,%d], stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d,%d,%d]",
577 in_batch, in_height, in_width, in_channels, f_height, f_width, f_in_channels, f_multiplier, out_batch,
578 out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
579 padding_bottom, padding_left, padding_right);
580
581 Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
582 padding[0] = std::make_pair(0, 0);
583 padding[1] = std::make_pair(padding_top, padding_bottom);
584 padding[2] = std::make_pair(padding_left, padding_right);
585 padding[3] = std::make_pair(0, 0);
586
587 TIn input_val = this->input->getTensor();
588 TWeight weight_val = this->weight->getTensor();
589 if (this->qinfo)
590 {
591 input_val = input_val - (InEigenType)this->qinfo->input_zp();
592 weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
593 }
594
595 ETensor4<InEigenType> input_padded = input_val.pad(padding);
596
597 // GEMM doesn't fit well with DepthwiseConv2d
598 // 1. use extract_image_patches() to handle stride/dilation/padding
599 // 2. perform direct convolution
600
601 // 1. extract_image_patches() output [N, KH, KW, OH * OW, IC]
602 ETensor5<InEigenType> input_extract_patches = input_padded.extract_image_patches(
603 f_height, f_width, stride_h, stride_w, dilation_h, dilation_w, Eigen::PADDING_VALID);
604
605 Eigen::array<Eigen::Index, 4> reshape_dim;
606 reshape_dim.fill(1);
607 reshape_dim[3] = b_out_channels;
608
609 Eigen::array<Eigen::Index, 4> bcast;
610 bcast[0] = out_batch;
611 bcast[1] = out_height;
612 bcast[2] = out_width;
613 bcast[3] = 1;
614
615 // initialize with bias
616 this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
617
618 // 2. direct depthwise convolution
619 for (int ob = 0; ob < out_batch; ob++)
620 {
621 for (int oh = 0; oh < out_height; oh++)
622 {
623 for (int ow = 0; ow < out_width; ow++)
624 {
625 for (int ic = 0; ic < in_channels; ic++)
626 {
627 for (int cm = 0; cm < f_multiplier; cm++)
628 {
629 for (int fh = 0; fh < f_height; fh++)
630 {
631 for (int fw = 0; fw < f_width; fw++)
632 {
633 this->output->getTensor()(ob, oh, ow, ic * f_multiplier + cm) +=
634 ((AccEigenType)input_extract_patches(ob, fh, fw, ow * out_height + oh, ic) *
635 (AccEigenType)weight_val(fh, fw, ic, cm));
636 }
637 }
638 }
639 }
640 }
641 }
642 }
643
644 if (AccDtype == DType_INT48)
645 {
646 this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
647 this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
648 }
649
650 return GraphNode::eval();
651}
652
653template <DType InDtype, DType WeightDtype>
654OpFullyConnected<InDtype, WeightDtype>::OpFullyConnected(TosaAttributeBase* attribute_,
655 TosaQuantInfoBase* qinfo_,
656 uint64_t id_)
657 : GraphNode(Op_FULLY_CONNECTED, id_)
658{
659 setRequiredOperands(3, 1);
660 setRequiredRank(2);
661
662 INIT_QINFO(Conv);
663}
664
665template <DType InDtype, DType WeightDtype>
666OpFullyConnected<InDtype, WeightDtype>::~OpFullyConnected()
667{
668 if (qinfo)
669 delete qinfo;
670}
671
672template <DType InDtype, DType WeightDtype>
673int OpFullyConnected<InDtype, WeightDtype>::checkTensorAttributes()
674{
675 if (validateRequiredOperands())
676 return 1;
677
678 if (validateRequiredRank(inputs[0]) || validateRequiredRank(inputs[1]) || validateRequiredRank(outputs[0]))
679 {
680 return 1;
681 }
682
683 input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
684 weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
685 bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
686
687 if (input->getShape()[1] != weight->getShape()[1])
688 {
689 printNodeValidationError("OpFullyConnected operator input.shape[1] should match weight.shape[1]");
690 return 1;
691 }
692
693 if (weight->getShape()[0] != bias->getShape()[0])
694 {
695 printNodeValidationError("OpFullyConnected operator bias.shape[0] should match weight.shape[0]");
696 return 1;
697 }
698
699 output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
700
701 return 0;
702}
703
704template <DType InDtype, DType WeightDtype>
705int OpFullyConnected<InDtype, WeightDtype>::eval()
706{
707 typedef Eigen::Tensor<int, 1>::DimensionPair DimPair;
708 Eigen::array<DimPair, 1> dims{ { DimPair(1, 0) } };
709
710 Eigen::array<Eigen::Index, 2> weight_shuffle{ 1, 0 };
711
712 Eigen::array<Eigen::Index, 2> bias_reshape;
713 bias_reshape[0] = 1;
714 bias_reshape[1] = this->bias->getShape()[0];
715
716 Eigen::array<Eigen::Index, 2> bias_bcast;
717 bias_bcast[0] = this->input->getShape()[0];
718 bias_bcast[1] = 1;
719
720 TIn input_val = this->input->getTensor();
721 TWeight weight_val = this->weight->getTensor().shuffle(weight_shuffle);
722 if (this->qinfo)
723 {
724 input_val = input_val - (InEigenType)this->qinfo->input_zp();
725 weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
726 }
727
728 this->output->getTensor() =
729 input_val.template cast<AccEigenType>().contract(weight_val.template cast<AccEigenType>(), dims) +
730 this->bias->getTensor().reshape(bias_reshape).broadcast(bias_bcast);
731
732 if (AccDtype == DType_INT48)
733 {
734 this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
735 this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
736 }
737 return GraphNode::eval();
738}
739
740template <DType Dtype>
741OpMatMul<Dtype>::OpMatMul(TosaAttributeBase* attribute_, TosaQuantInfoBase* qinfo_, uint64_t id_)
742 : GraphNode(Op_MATMUL, id_)
743{
744 setRequiredOperands(2, 1);
745 setRequiredRank(2);
746
747 INIT_QINFO(MatMul);
748}
749
750template <DType Dtype>
751OpMatMul<Dtype>::~OpMatMul()
752{
753 if (qinfo)
754 delete qinfo;
755}
756
757template <DType Dtype>
758int OpMatMul<Dtype>::checkTensorAttributes()
759{
760 if (validateRequiredOperands())
761 return 1;
762
763 if (validateRequiredRank(inputs[0]) || validateRequiredRank(inputs[1]) || validateRequiredRank(outputs[0]))
764 {
765 return 1;
766 }
767
768 a = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
769 b = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[1]);
770
771 if (a->getShape()[1] != b->getShape()[0])
772 {
773 printNodeValidationError("OpMatMul operator a.shape[1] should match b.shape[0]");
774 return 1;
775 }
776
777 c = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
778
779 return 0;
780}
781
782template <DType Dtype>
783int OpMatMul<Dtype>::eval()
784{
785 typedef Eigen::Tensor<int, 1>::DimensionPair DimPair;
786 Eigen::array<DimPair, 1> dims{ { DimPair(1, 0) } };
787
788 TIn a_val = this->a->getTensor();
789 TIn b_val = this->b->getTensor();
790 if (this->qinfo)
791 {
792 a_val = a_val - (InEigenType)this->qinfo->a_zp();
793 b_val = b_val - (InEigenType)this->qinfo->b_zp();
794 }
795
796 this->c->getTensor() = a_val.template cast<AccEigenType>().contract(b_val.template cast<AccEigenType>(), dims);
797
798 if (AccDtype == DType_INT48)
799 {
800 this->c->getTensor() = this->c->getTensor().cwiseMax((AccEigenType)AccQMin);
801 this->c->getTensor() = this->c->getTensor().cwiseMin((AccEigenType)AccQMax);
802 }
803
804 return GraphNode::eval();
805}
806
807template <DType Dtype>
808OpMaxPool2d<Dtype>::OpMaxPool2d(TosaAttributeBase* attribute_, TosaQuantInfoBase* qinfo_, uint64_t id_)
809 : GraphNode(Op_MAX_POOL2D, id_)
810{
811 setRequiredOperands(1, 1);
812 setRequiredRank(4);
813
814 INIT_ATTRIBUTE(Pool2d);
815}
816
817template <DType Dtype>
818OpMaxPool2d<Dtype>::~OpMaxPool2d()
819{
820 if (attribute)
821 delete attribute;
822}
823
824template <DType Dtype>
825int OpMaxPool2d<Dtype>::checkTensorAttributes()
826{
827 if (validateRequiredOperands())
828 return 1;
829
830 if (validateRequiredRank(inputs[0]) || validateRequiredRank(outputs[0]))
831 {
832 return 1;
833 }
834
835 if (inputs[0]->matchType(*outputs[0]))
836 {
837 printNodeValidationError("OpMaxPool2d: input and output tensor type mismatch");
838 return 1;
839 }
840
841 in = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
842 out = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
843
Eric Kunzee5e26762020-10-13 16:11:07 -0700844 if (attribute->padding().size() != 4)
845 {
846 printNodeValidationError("OpMaxPool2d: illegal size for attribute padding");
847 return 1;
848 }
849
850 if (attribute->kernel().size() != 2)
851 {
852 printNodeValidationError("OpMaxPool2d: illegal size for attribute kernel");
853 return 1;
854 }
855
856 if (attribute->stride().size() != 2)
857 {
858 printNodeValidationError("OpMaxPool2d: illegal size for attribute stride");
859 return 1;
860 }
861
862 return 0;
863}
864
865template <DType Dtype>
866int OpMaxPool2d<Dtype>::eval()
867{
868 int in_batch = this->in->getShape()[0];
869 int in_height = this->in->getShape()[1];
870 int in_width = this->in->getShape()[2];
871 int in_channels = this->in->getShape()[3];
872
873 int out_batch = this->out->getShape()[0];
874 int out_height = this->out->getShape()[1];
875 int out_width = this->out->getShape()[2];
876 int out_channels = this->out->getShape()[3];
877
878 ASSERT_MSG_NODE(in_batch == out_batch, "OpMaxPool2d: tensor batch mismatch %d != %d", in_batch, out_batch);
879
880 int padding_top = this->attribute->padding()[0];
881 int padding_bottom = this->attribute->padding()[1];
882 int padding_left = this->attribute->padding()[2];
883 int padding_right = this->attribute->padding()[3];
884 int kernel_h = this->attribute->kernel()[0];
885 int kernel_w = this->attribute->kernel()[1];
886 int stride_h = this->attribute->stride()[0];
887 int stride_w = this->attribute->stride()[1];
888
889 DEBUG_INFO(OP,
890 "perform MaxPool2d, input.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], kernel=[%d,%d], "
891 "stride=[%d,%d], padding=[%d,%d,%d,%d]",
892 in_batch, in_height, in_width, in_channels, out_batch, out_height, out_width, out_channels, kernel_h,
893 kernel_w, stride_h, stride_w, padding_top, padding_bottom, padding_left, padding_right);
894
895 Eigen::array<Eigen::Index, 2> im2col_input_dims;
896 im2col_input_dims[0] = kernel_h * kernel_w;
897 im2col_input_dims[1] = out_batch * out_height * out_width * out_channels;
898
899 Eigen::array<Eigen::Index, 4> col2im_output_dims;
900 col2im_output_dims[0] = out_batch;
901 col2im_output_dims[1] = out_height;
902 col2im_output_dims[2] = out_width;
903 col2im_output_dims[3] = out_channels;
904
905 Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
906 padding[0] = std::make_pair(0, 0);
907 padding[1] = std::make_pair(padding_top, padding_bottom);
908 padding[2] = std::make_pair(padding_left, padding_right);
909 padding[3] = std::make_pair(0, 0);
910
911 ETensor4<InEigenType> input_padded = this->in->getTensor().pad(padding, std::numeric_limits<InEigenType>::lowest());
912
913 // extract_image_patches() output [N, KH, KW, H * W, C]
914 // transpose to [KH, KW, N, H * W, C]
915 // reshape to [KH * KW, N * H * W * C]
916 //
917 // Set the padding value to be the most negative value that can be
918 // represented by the datatype to ensure that any padding values will be equal
919 // to or smaller than the actual maximum in the KH x KW patch.
920 ETensor2<InEigenType> input_extract_patches =
921 input_padded
922 .extract_image_patches(kernel_h, kernel_w, stride_h, stride_w, 1, 1, Eigen::PADDING_VALID,
923 std::numeric_limits<InEigenType>::lowest())
924 .shuffle(Eigen::array<Eigen::Index, 5>{ 1, 2, 0, 3, 4 })
925 .reshape(im2col_input_dims);
926
927 // Get the maximum of the KHxHW patches along axis 0
928 Eigen::Tensor<DenseIndex, 1> tensor_argmax = input_extract_patches.argmax(0);
929
930 // 1D result with [N * H * W * C]
931 ETensor1<OutEigenType> out_1d(this->out->getElementCount());
932
933 // index input_patches with argmax array should give the result
934 for (size_t i = 0; i < this->out->getElementCount(); i++)
935 {
936 out_1d(i) = (OutEigenType)input_extract_patches(tensor_argmax(i), i);
937 }
938
939 // reshape result to [N, H, W, C]
940 this->out->getTensor() = out_1d.reshape(col2im_output_dims);
941
942 return GraphNode::eval();
943}
944
945template <DType InDtype, DType OutDtype>
946OpTransposeConv2d<InDtype, OutDtype>::OpTransposeConv2d(TosaAttributeBase* attribute_,
947 TosaQuantInfoBase* qinfo_,
948 uint64_t id_)
949 : GraphNode(Op_TRANSPOSE_CONV2D, id_)
950{
951 setRequiredOperands(3, 1);
952 setRequiredRank(4);
953
954 INIT_ATTRIBUTE(TransposeConv2d);
955 INIT_QINFO(Conv);
956}
957
958template <DType InDtype, DType OutDtype>
959OpTransposeConv2d<InDtype, OutDtype>::~OpTransposeConv2d()
960{
961 if (attribute)
962 delete attribute;
963 if (qinfo)
964 delete qinfo;
965}
966
967template <DType InDtype, DType OutDtype>
968int OpTransposeConv2d<InDtype, OutDtype>::checkTensorAttributes()
969{
970 if (validateRequiredOperands())
971 return 1;
972
973 if (validateRequiredRank(inputs[0]) || validateRequiredRank(inputs[1]) || validateRequiredRank(outputs[0]))
974 {
975 return 1;
976 }
977
Eric Kunzee5e26762020-10-13 16:11:07 -0700978 input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
979 weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
980 bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
981 output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
982
Eric Kunzee5e26762020-10-13 16:11:07 -0700983 if (attribute->outpad().size() != 2)
984 {
985 printNodeValidationError("OpTransposeConv2d: illegal size for attribute outpad");
986 return 1;
987 }
988
989 if (attribute->stride().size() != 2)
990 {
991 printNodeValidationError("OpTransposeConv2d: illegal size for attribute stride");
992 return 1;
993 }
994
995 if (attribute->dilation().size() != 2)
996 {
997 printNodeValidationError("OpTransposeConv2d: illegal size for attribute dilation");
998 return 1;
999 }
1000
1001 if (attribute->output_shape().size() != 4)
1002 {
1003 printNodeValidationError("OpTransposeConv2d: illegal size for attribute output_shape");
1004 return 1;
1005 }
1006
1007 for (int d = 0; d < 4; d++)
1008 {
1009 if (attribute->output_shape()[d] != this->output->getShape()[d])
1010 {
1011 printNodeValidationError("OpTransposeConv2d: illegal size for attribute output_shape");
1012 return 1;
1013 }
1014 }
1015
1016 return 0;
1017}
1018
1019template <DType InDtype, DType OutDtype>
1020int OpTransposeConv2d<InDtype, OutDtype>::eval()
1021{
1022 int in_batch = this->input->getShape()[0];
1023 int in_height = this->input->getShape()[1];
1024 int in_width = this->input->getShape()[2];
1025 int in_channels = this->input->getShape()[3];
1026
1027 int f_out_channels = this->weight->getShape()[0];
1028 int f_height = this->weight->getShape()[1];
1029 int f_width = this->weight->getShape()[2];
1030 int f_in_channels = this->weight->getShape()[3];
1031
1032 int b_out_channels = this->bias->getShape()[0];
1033
1034 int out_batch = this->output->getShape()[0];
1035 int out_height = this->output->getShape()[1];
1036 int out_width = this->output->getShape()[2];
1037 int out_channels = this->output->getShape()[3];
1038
1039 int padding_top = this->attribute->outpad()[0];
1040 int padding_left = this->attribute->outpad()[1];
1041 int stride_h = this->attribute->stride()[0];
1042 int stride_w = this->attribute->stride()[1];
1043 int dilation_h = this->attribute->dilation()[0];
1044 int dilation_w = this->attribute->dilation()[1];
1045
1046 ASSERT_MSG_NODE(in_batch == out_batch, "OpTransposeConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
1047 ASSERT_MSG_NODE(f_in_channels == in_channels, "OpTransposeConv2d: tensor input channel mismatch %d != %d",
1048 f_in_channels, in_channels);
1049 ASSERT_MSG_NODE(f_out_channels == out_channels, "OpTransposeConv2d: tensor output channel mismatch %d != %d",
1050 f_out_channels, out_channels);
1051 ASSERT_MSG_NODE(b_out_channels == out_channels, "OpDepthwiseConv2d: tensor b_out_channels mismatch %d != %d",
1052 b_out_channels, out_channels);
1053
1054 DEBUG_INFO(OP,
1055 "perform OpTransposeConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], "
1056 "output.shape=[%d,%d,%d,%d], stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d]",
1057 in_batch, in_height, in_width, in_channels, f_height, f_width, f_out_channels, f_in_channels, out_batch,
1058 out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
1059 padding_left);
1060
1061 TIn input_val = this->input->getTensor();
1062 TWeight weight_val = this->weight->getTensor();
1063 if (this->qinfo)
1064 {
1065 input_val = input_val - (InEigenType)this->qinfo->input_zp();
1066 weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
1067 }
1068
1069 Eigen::array<Eigen::Index, 4> reshape_dim;
1070 reshape_dim.fill(1);
1071 reshape_dim[3] = b_out_channels;
1072
1073 Eigen::array<Eigen::Index, 4> bcast;
1074 bcast[0] = out_batch;
1075 bcast[1] = out_height;
1076 bcast[2] = out_width;
1077 bcast[3] = 1;
1078
1079 // initialize with bias
1080 this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
1081
1082 int out_x_origin, out_y_origin;
1083 int out_x, out_y;
1084
1085 // reference implementation from: tensorflow/tensorflow/lite/kernels/internal/reference/reference_ops.h
1086 for (int ob = 0; ob < out_batch; ob++)
1087 {
1088 for (int ih = 0; ih < in_height; ih++)
1089 {
1090 for (int iw = 0; iw < in_width; iw++)
1091 {
1092 out_x_origin = iw * stride_w - padding_left;
1093 out_y_origin = ih * stride_h - padding_top;
1094 for (int ic = 0; ic < in_channels; ic++)
1095 {
1096 for (int fh = 0; fh < f_height; fh++)
1097 {
1098 for (int fw = 0; fw < f_width; fw++)
1099 {
1100 out_x = out_x_origin + fw * dilation_w;
1101 out_y = out_y_origin + fh * dilation_h;
1102 for (int oc = 0; oc < out_channels; oc++)
1103 {
1104 if ((out_x >= 0 && out_x < out_width) && (out_y >= 0 && out_y < out_height))
1105 {
1106 this->output->getTensor()(ob, out_y, out_x, oc) +=
1107 ((AccEigenType)input_val(ob, ih, iw, ic) *
1108 (AccEigenType)weight_val(oc, fh, fw, ic));
1109 }
1110 }
1111 }
1112 }
1113 }
1114 }
1115 }
1116 }
1117
1118 if (AccDtype == DType_INT48)
1119 {
1120 this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
1121 this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
1122 }
1123
1124 return GraphNode::eval();
1125}
1126
1127// template explicit instantiation
1128DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, FLOAT);
Kevin Cheng3a478572021-01-22 17:21:02 -08001129DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, INT8);
Eric Kunzee5e26762020-10-13 16:11:07 -07001130DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, INT16);
1131
1132DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, FLOAT)
Kevin Cheng3a478572021-01-22 17:21:02 -08001133DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, INT8)
Eric Kunzee5e26762020-10-13 16:11:07 -07001134DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, INT16)
1135
1136DEF_INSTANTIATE_TWO_TYPE(OpConv2d, FLOAT, FLOAT);
Kevin Cheng3a478572021-01-22 17:21:02 -08001137DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT8, INT4);
1138DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT8, INT8);
Eric Kunzee5e26762020-10-13 16:11:07 -07001139DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT16, INT8);
1140
1141DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, FLOAT, FLOAT);
Kevin Cheng3a478572021-01-22 17:21:02 -08001142DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT8, INT4);
1143DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT8, INT8);
Eric Kunzee5e26762020-10-13 16:11:07 -07001144DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT16, INT8);
1145
1146DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, FLOAT, FLOAT);
Kevin Cheng3a478572021-01-22 17:21:02 -08001147DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT8, INT4);
1148DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT8, INT8);
Eric Kunzee5e26762020-10-13 16:11:07 -07001149DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT16, INT8);
1150
Kevin Cheng3a478572021-01-22 17:21:02 -08001151DEF_INSTANTIATE_ONE_TYPE(OpMatMul, INT8);
Eric Kunzee5e26762020-10-13 16:11:07 -07001152DEF_INSTANTIATE_ONE_TYPE(OpMatMul, INT16);
1153DEF_INSTANTIATE_ONE_TYPE(OpMatMul, FLOAT);
1154
1155DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, FLOAT);
Kevin Cheng3a478572021-01-22 17:21:02 -08001156DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, INT8);
Eric Kunzee5e26762020-10-13 16:11:07 -07001157DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, INT16);
1158
1159DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, FLOAT, FLOAT);
Kevin Cheng3a478572021-01-22 17:21:02 -08001160DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT8, INT4);
1161DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT8, INT8);
Eric Kunzee5e26762020-10-13 16:11:07 -07001162DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT16, INT8);