Blame - reference_model/src/ops/tensor_ops.cc - tosa/reference_model

blob: a0a1f04d947859e1e20bad97234a115de647d4b9 [file] [log] [blame]

Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	2	// Copyright (c) 2020-2021, ARM Limited.
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	3	//
				4	// Licensed under the Apache License, Version 2.0 (the "License");
				5	// you may not use this file except in compliance with the License.
				6	// You may obtain a copy of the License at
				7	//
				8	// http://www.apache.org/licenses/LICENSE-2.0
				9	//
				10	// Unless required by applicable law or agreed to in writing, software
				11	// distributed under the License is distributed on an "AS IS" BASIS,
				12	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	// See the License for the specific language governing permissions and
				14	// limitations under the License.
				15
				16	#include "tensor_ops.h"
				17	#include "quant_util.h"
				18	#include "template_types.h"
				19
				20	using namespace TosaReference;
				21	using namespace Eigen;
				22	using namespace tosa;
				23
				24	template <int Rank, DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	25	OpArgMax<Rank, Dtype>::OpArgMax(SubgraphTraverser* sgt_,
				26	TosaAttributeBase* attribute_,
				27	TosaQuantInfoBase* qinfo_,
				28	uint64_t id_)
				29	: GraphNode(sgt_, Op_ARGMAX, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	30	{
				31	setRequiredOperands(1, 1);
				32	setRequiredRank(0, 6);
				33
				34	INIT_ATTRIBUTE(Axis);
				35	}
				36
				37	template <int Rank, DType Dtype>
				38	OpArgMax<Rank, Dtype>::~OpArgMax()
				39	{
				40	if (attribute)
				41	delete attribute;
				42	}
				43
				44	template <int Rank, DType Dtype>
				45	int OpArgMax<Rank, Dtype>::checkTensorAttributes()
				46	{
				47	if (validateRequiredOperands())
				48	return 1;
				49
				50	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(outputs[0]))
				51	{
				52	return 1;
				53	}
				54
				55	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				56	output = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
				57
				58	return 0;
				59	}
				60
				61	template <int Rank, DType Dtype>
				62	int OpArgMax<Rank, Dtype>::eval()
				63	{
				64	Eigen::Tensor<DenseIndex, Rank - 1> index = this->input->getTensor().argmax(attribute->axis());
				65
				66	this->output->getTensor() = index.unaryExpr([](DenseIndex in) -> OutEigenType { return (OutEigenType)in; });
				67
				68	return GraphNode::eval();
				69	}
				70
				71	template <DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	72	OpAvgPool2d<Dtype>::OpAvgPool2d(SubgraphTraverser* sgt_,
				73	TosaAttributeBase* attribute_,
				74	TosaQuantInfoBase* qinfo_,
				75	uint64_t id_)
				76	: GraphNode(sgt_, Op_AVG_POOL2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	77	{
				78	setRequiredOperands(1, 1);
				79	setRequiredRank(4);
				80
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	81	INIT_ATTRIBUTE(Pool);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	82	INIT_QINFO(Unary);
				83	}
				84
				85	template <DType Dtype>
				86	OpAvgPool2d<Dtype>::~OpAvgPool2d()
				87	{
				88	if (attribute)
				89	delete attribute;
				90	}
				91
				92	template <DType Dtype>
				93	int OpAvgPool2d<Dtype>::checkTensorAttributes()
				94	{
				95	if (validateRequiredOperands())
				96	return 1;
				97
				98	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(outputs[0]))
				99	{
				100	return 1;
				101	}
				102
				103	if (inputs[0]->matchType(*outputs[0]))
				104	{
				105	printNodeValidationError("OpAvgPool2d: input and output tensor type mismatch");
				106	return 1;
				107	}
				108
				109	in = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				110	out = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
				111
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	112	if (attribute->padding().size() != 4)
				113	{
				114	printNodeValidationError("OpAvgPool2d: illegal size for attribute padding");
				115	return 1;
				116	}
				117
				118	if (attribute->kernel().size() != 2)
				119	{
				120	printNodeValidationError("OpAvgPool2d: illegal size for attribute kernel");
				121	return 1;
				122	}
				123
				124	if (attribute->stride().size() != 2)
				125	{
				126	printNodeValidationError("OpAvgPool2d: illegal size for attribute stride");
				127	return 1;
				128	}
				129
				130	return 0;
				131	}
				132
				133	template <DType Dtype>
				134	ETensor1<int32_t> OpAvgPool2d<Dtype>::calculate_div_map_1d(int in_size, int out_size, int kernel_size, int stride)
				135	{
				136	ETensor1<int32_t> result(out_size);
				137
				138	int32_t total_pad = (out_size - 1) * stride + kernel_size - in_size;
				139	total_pad = total_pad < 0 ? 0 : total_pad;
				140
				141	int32_t pad_left = total_pad >> 1;
				142	int32_t pad_right = total_pad - pad_left;
				143
				144	result.setConstant(kernel_size);
				145
				146	// the index left to 'left_index' and index right to 'right_index' indicates
				147	// the input window of this output covers a pad bit
				148	int32_t left_index = pad_left / stride;
				149	int32_t right_index = pad_right / stride;
				150
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	151	// minus the number of pad bit this index cover
				152	while (left_index >= 0)
				153	{
				154	result(left_index) -= (pad_left - left_index * stride);
				155	left_index--;
				156	}
				157
				158	while (right_index >= 0)
				159	{
				160	result(out_size - 1 - right_index) -= (pad_right - right_index * stride);
				161	right_index--;
				162	}
				163
				164	return result;
				165	}
				166
				167	// assuming input and output tensor have same scales like tflite reference
				168	// so no need to scale input and output
				169	template <DType Dtype>
				170	int OpAvgPool2d<Dtype>::eval()
				171	{
				172	int in_batch = this->in->getShape()[0];
				173	int in_height = this->in->getShape()[1];
				174	int in_width = this->in->getShape()[2];
				175	int in_channels = this->in->getShape()[3];
				176
				177	int out_batch = this->out->getShape()[0];
				178	int out_height = this->out->getShape()[1];
				179	int out_width = this->out->getShape()[2];
				180	int out_channels = this->out->getShape()[3];
				181
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	182	ERROR_IF(in_batch != out_batch, "OpAvgPool2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				183	ERROR_IF(in_channels != out_channels, "OpAvgPool2d: tensor channel mismatch %d != %d", in_channels, out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	184
				185	int padding_top = this->attribute->padding()[0];
				186	int padding_bottom = this->attribute->padding()[1];
				187	int padding_left = this->attribute->padding()[2];
				188	int padding_right = this->attribute->padding()[3];
				189	int kernel_h = this->attribute->kernel()[0];
				190	int kernel_w = this->attribute->kernel()[1];
				191	int stride_h = this->attribute->stride()[0];
				192	int stride_w = this->attribute->stride()[1];
				193
				194	DEBUG_INFO(OP,
				195	"perform AvgPool2d, input.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], kernel=[%d,%d], "
				196	"stride=[%d,%d], padding=[%d,%d,%d,%d]",
				197	in_batch, in_height, in_width, in_channels, out_batch, out_height, out_width, out_channels, kernel_h,
				198	kernel_w, stride_h, stride_w, padding_top, padding_bottom, padding_left, padding_right);
				199
				200	Eigen::array<Eigen::Index, 2> im2col_input_dims;
				201	im2col_input_dims[0] = kernel_h * kernel_w;
				202	im2col_input_dims[1] = out_batch * out_height * out_width * out_channels;
				203
				204	Eigen::array<Eigen::Index, 4> col2im_output_dims;
				205	col2im_output_dims[0] = out_batch;
				206	col2im_output_dims[1] = out_height;
				207	col2im_output_dims[2] = out_width;
				208	col2im_output_dims[3] = out_channels;
				209
				210	Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
				211	padding[0] = std::make_pair(0, 0);
				212	padding[1] = std::make_pair(padding_top, padding_bottom);
				213	padding[2] = std::make_pair(padding_left, padding_right);
				214	padding[3] = std::make_pair(0, 0);
				215
				216	ETensor4<InEigenType> input_val = this->in->getTensor();
				217	if (this->qinfo)
				218	{
				219	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				220	}
				221
				222	ETensor4<InEigenType> input_padded = input_val.pad(padding);
				223
				224	// assuming input and output have same scales
				225	// so input and output scaling is not required
				226	// TODO: check if this assumption TOSA made
				227
				228	// extract_image_patches() output [N, KH, KW, H * W, C]
				229	// transpose to [KH, KW, N, H * W, C]
				230	// reshape to [KH * KW, N * H * W * C]
				231	ETensor2<InEigenType> input_extract_patches =
				232	input_padded.extract_image_patches(kernel_h, kernel_w, stride_h, stride_w, 1, 1, Eigen::PADDING_VALID)
				233	.shuffle(Eigen::array<Eigen::Index, 5>{ 1, 2, 0, 3, 4 })
				234	.reshape(im2col_input_dims);
				235
				236	// 1D result with [N * H * W * C]
				237	ETensor1<AccEigenType> out_1d(this->out->getElementCount());
				238	out_1d.setZero();
				239
				240	// sum pool
				241	for (size_t i = 0; i < this->out->getElementCount(); i++)
				242	{
				243	for (int32_t j = 0; j < kernel_h * kernel_w; j++)
				244	{
				245	out_1d(i) += (AccEigenType)input_extract_patches(j, i);
				246	}
				247	}
				248
				249	// reshape result to [N, H, W, C] and divide with div_map
				250	ETensor4<AccEigenType> sum = out_1d.reshape(col2im_output_dims);
				251
				252	// calculate 1d height/width div_map (number of elements this pooling window covers)
				253	// and outer product to get 2d div_map, then reshape/broadcast to [N, H, W, C]
				254	ETensor1<int32_t> div_map_h = calculate_div_map_1d(in_height, out_height, kernel_h, stride_h);
				255	ETensor1<int32_t> div_map_w = calculate_div_map_1d(in_width, out_width, kernel_w, stride_w);
				256	Eigen::array<Eigen::IndexPair<Eigen::Index>, 1> contract_dims = { Eigen::IndexPair<Eigen::Index>(1, 0) };
				257	Eigen::array<Eigen::Index, 4> bcast{ out_batch, 1, 1, out_channels };
				258
				259	ETensor4<int32_t> div_map =
				260	div_map_h.reshape(Eigen::array<Eigen::Index, 2>{ out_height, 1 })
				261	.contract(div_map_w.reshape(Eigen::array<Eigen::Index, 2>{ 1, out_width }), contract_dims)
				262	.reshape(Eigen::array<Eigen::Index, 4>{ 1, out_height, out_width, 1 })
				263	.broadcast(bcast);
				264
				265	if (Dtype != DType_FLOAT)
				266	{
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	267	try
				268	{
				269	this->out->getTensor() = sum.binaryExpr(div_map, [](AccEigenType value, int32_t div) -> OutEigenType {
				270	int32_t multiplier, shift;
				271	TosaReference::QuantUtil::reciprocal_scale(div, multiplier, shift);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	272
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	273	return (OutEigenType)TosaReference::QuantUtil::apply_scale_32(value, multiplier, shift, false);
				274	});
				275	}
				276	catch (std::string desc)
				277	{
				278	REQUIRE(false, "OpAvgPool2d apply_scale_32() fails: %s.", desc.c_str());
				279	}
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	280	this->out->getTensor() = this->out->getTensor() + (OutEigenType)(this->qinfo->output_zp());
				281	this->out->getTensor() = this->out->getTensor().cwiseMax((OutEigenType)QMin);
				282	this->out->getTensor() = this->out->getTensor().cwiseMin((OutEigenType)QMax);
				283	}
				284	else
				285	{
				286	this->out->getTensor() = (sum / div_map.template cast<AccEigenType>()).template cast<OutEigenType>();
				287	}
				288
				289	return GraphNode::eval();
				290	}
				291
				292	template <DType InDtype, DType WeightDtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	293	OpConv2d<InDtype, WeightDtype>::OpConv2d(SubgraphTraverser* sgt_,
				294	TosaAttributeBase* attribute_,
				295	TosaQuantInfoBase* qinfo_,
				296	uint64_t id_)
				297	: GraphNode(sgt_, Op_CONV2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	298	{
				299	setRequiredOperands(3, 1);
				300	setRequiredRank(4);
				301
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	302	INIT_ATTRIBUTE(Conv);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	303	INIT_QINFO(Conv);
				304	}
				305
				306	template <DType InDtype, DType WeightDtype>
				307	OpConv2d<InDtype, WeightDtype>::~OpConv2d()
				308	{
				309	if (attribute)
				310	delete attribute;
				311	if (qinfo)
				312	delete qinfo;
				313	}
				314
				315	template <DType InDtype, DType WeightDtype>
				316	int OpConv2d<InDtype, WeightDtype>::checkTensorAttributes()
				317	{
				318	if (validateRequiredOperands())
				319	return 1;
				320
				321	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				322	{
				323	return 1;
				324	}
				325
				326	// 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
				327	if (inputs[2]->getRank() != 1)
				328	{
				329	printNodeValidationError("OpConv2d: bias tensor must be rank 1");
				330	}
				331
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	332	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				333	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				334	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				335	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				336
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	337	if (attribute->padding().size() != 4)
				338	{
				339	printNodeValidationError("OpConv2d: illegal size for attribute padding");
				340	return 1;
				341	}
				342
				343	if (attribute->stride().size() != 2)
				344	{
				345	printNodeValidationError("OpConv2d: illegal size for attribute stride");
				346	return 1;
				347	}
				348
				349	if (attribute->dilation().size() != 2)
				350	{
				351	printNodeValidationError("OpConv2d: illegal size for attribute dilation");
				352	return 1;
				353	}
				354
				355	return 0;
				356	}
				357
				358	template <DType InDtype, DType WeightDtype>
				359	int OpConv2d<InDtype, WeightDtype>::eval()
				360	{
				361	int in_batch = this->input->getShape()[0];
				362	int in_height = this->input->getShape()[1];
				363	int in_width = this->input->getShape()[2];
				364	int in_channels = this->input->getShape()[3];
				365
				366	int f_out_channels = this->weight->getShape()[0];
				367	int f_height = this->weight->getShape()[1];
				368	int f_width = this->weight->getShape()[2];
				369	int f_in_channels = this->weight->getShape()[3];
				370
				371	int b_out_channels = this->bias->getShape()[0];
				372
				373	int out_batch = this->output->getShape()[0];
				374	int out_height = this->output->getShape()[1];
				375	int out_width = this->output->getShape()[2];
				376	int out_channels = this->output->getShape()[3];
				377
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	378	ERROR_IF(in_batch != out_batch, "OpConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				379	ERROR_IF(f_in_channels != in_channels, "OpConv2d: tensor input channel mismatch %d != %d", f_in_channels,
				380	in_channels);
				381	ERROR_IF(f_out_channels != out_channels, "OpConv2d: tensor output channel mismatch %d != %d", f_out_channels,
				382	out_channels);
				383	ERROR_IF(b_out_channels != out_channels, "OpConv2d: bias channel mismatch %d != %d", b_out_channels, out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	384
				385	int padding_top = this->attribute->padding()[0];
				386	int padding_bottom = this->attribute->padding()[1];
				387	int padding_left = this->attribute->padding()[2];
				388	int padding_right = this->attribute->padding()[3];
				389	int stride_h = this->attribute->stride()[0];
				390	int stride_w = this->attribute->stride()[1];
				391	int dilation_h = this->attribute->dilation()[0];
				392	int dilation_w = this->attribute->dilation()[1];
				393
				394	DEBUG_INFO(OP,
				395	"perform OpConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], "
				396	"stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d,%d,%d]",
				397	in_batch, in_height, in_width, in_channels, f_height, f_width, f_in_channels, f_out_channels, out_batch,
				398	out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
				399	padding_bottom, padding_left, padding_right);
				400
				401	// GEMM-conv2d, left matrix is input, right matrix is weight
				402	Eigen::array<Eigen::Index, 2> im2col_input_dims;
				403	im2col_input_dims[0] = out_batch * out_height * out_width;
				404	im2col_input_dims[1] = f_height * f_width * f_in_channels;
				405
				406	Eigen::array<Eigen::Index, 2> im2col_weight_dims;
				407	im2col_weight_dims[0] = f_height * f_width * f_in_channels;
				408	im2col_weight_dims[1] = f_out_channels;
				409
				410	Eigen::array<Eigen::Index, 2> bias_reshaped_dims;
				411	bias_reshaped_dims[0] = 1;
				412	bias_reshaped_dims[1] = b_out_channels;
				413
				414	Eigen::array<Eigen::Index, 4> weight_zp_bcast_dims;
				415	weight_zp_bcast_dims[0] = f_height;
				416	weight_zp_bcast_dims[1] = f_width;
				417	weight_zp_bcast_dims[2] = f_in_channels;
				418
				419	Eigen::array<Eigen::Index, 2> bias_bcast_dims;
				420	bias_bcast_dims[0] = out_batch * out_height * out_width;
				421	bias_bcast_dims[1] = 1;
				422
				423	Eigen::array<Eigen::Index, 4> col2im_output_dims;
				424	col2im_output_dims[0] = out_batch;
				425	col2im_output_dims[1] = out_height;
				426	col2im_output_dims[2] = out_width;
				427	col2im_output_dims[3] = out_channels;
				428
				429	Eigen::array<Eigen::IndexPair<Eigen::Index>, 1> contract_dims = { Eigen::IndexPair<Eigen::Index>(1, 0) };
				430
				431	Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
				432	padding[0] = std::make_pair(0, 0);
				433	padding[1] = std::make_pair(padding_top, padding_bottom);
				434	padding[2] = std::make_pair(padding_left, padding_right);
				435	padding[3] = std::make_pair(0, 0);
				436
				437	TIn input_val = this->input->getTensor();
				438	TWeight weight_val = this->weight->getTensor();
				439	if (this->qinfo)
				440	{
				441	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				442	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				443	}
				444
				445	ETensor4<InEigenType> input_padded = input_val.pad(padding);
				446
				447	// extract_image_patches() output [N, KH, KW, H * W, C]
				448	// need to transpose to [N, H * W, KH, KW, C]
				449	ETensor5<InEigenType> input_extract_patches =
				450	input_padded
				451	.extract_image_patches(f_height, f_width, stride_h, stride_w, dilation_h, dilation_w, Eigen::PADDING_VALID)
				452	.shuffle(Eigen::array<Eigen::Index, 5>{ 0, 3, 1, 2, 4 });
				453
				454	// reshape input to [N * H * W, KH * KW * C]
				455	ETensor2<InEigenType> im2col_input = input_extract_patches.reshape(im2col_input_dims);
				456
				457	// transpose and reshape weight from [OC, H, W, IC] to [H * W * IC, OC]
				458	ETensor2<WeightEigenType> im2col_weight =
				459	weight_val.shuffle(Eigen::array<Eigen::Index, 4>({ 1, 2, 3, 0 })).reshape(im2col_weight_dims);
				460
				461	// don't need to apply bias_multiplier ( * bias_scale and >> bias_shift) since tflite already scale it
				462	// and reshaped from [C] to [1, C], and broadcast to [N * H * W, C]
				463	ETensor2<AccEigenType> bias_2d = this->bias->getTensor().reshape(bias_reshaped_dims).broadcast(bias_bcast_dims);
				464
				465	// output matrix is [N * H * W, C]
				466	ETensor2<AccEigenType> contracted_result =
				467	im2col_input.template cast<AccEigenType>().contract(im2col_weight.template cast<AccEigenType>(), contract_dims);
				468
				469	// adding bias
				470	ETensor2<AccEigenType> biased_output = contracted_result + bias_2d.template cast<AccEigenType>();
				471
				472	// reshape back to [N, H, W, C]
				473	this->output->getTensor() = biased_output.reshape(col2im_output_dims);
				474
				475	if (AccDtype == DType_INT48)
				476	{
				477	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				478	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				479	}
				480
				481	return GraphNode::eval();
				482	}
				483
				484	template <DType InDtype, DType WeightDtype>
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame^]	485	OpConv3d<InDtype, WeightDtype>::OpConv3d(SubgraphTraverser* sgt_,
				486	TosaAttributeBase* attribute_,
				487	TosaQuantInfoBase* qinfo_,
				488	uint64_t id_)
				489	: GraphNode(sgt_, Op_CONV3D, id_)
				490	{
				491	setRequiredOperands(3, 1);
				492	setRequiredRank(5);
				493
				494	INIT_ATTRIBUTE(Conv);
				495	INIT_QINFO(Conv);
				496	}
				497
				498	template <DType InDtype, DType WeightDtype>
				499	OpConv3d<InDtype, WeightDtype>::~OpConv3d()
				500	{
				501	if (attribute)
				502	delete attribute;
				503	if (qinfo)
				504	delete qinfo;
				505	}
				506
				507	template <DType InDtype, DType WeightDtype>
				508	int OpConv3d<InDtype, WeightDtype>::checkTensorAttributes()
				509	{
				510	if (validateRequiredOperands())
				511	return 1;
				512
				513	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				514	{
				515	return 1;
				516	}
				517
				518	// 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
				519	if (inputs[2]->getRank() != 1)
				520	{
				521	printNodeValidationError("OpConv3d: bias tensor must be rank 1");
				522	}
				523
				524	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				525	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				526	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				527	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				528
				529	if (attribute->padding().size() != 6)
				530	{
				531	printNodeValidationError("OpConv3d: illegal size for attribute padding");
				532	return 1;
				533	}
				534
				535	if (attribute->stride().size() != 3)
				536	{
				537	printNodeValidationError("OpConv3d: illegal size for attribute stride");
				538	return 1;
				539	}
				540
				541	if (attribute->dilation().size() != 3)
				542	{
				543	printNodeValidationError("OpConv3d: illegal size for attribute dilation");
				544	return 1;
				545	}
				546
				547	return 0;
				548	}
				549
				550	template <DType InDtype, DType WeightDtype>
				551	int OpConv3d<InDtype, WeightDtype>::eval()
				552	{
				553	int in_batch = this->input->getShape()[0];
				554	int in_depth = this->input->getShape()[1];
				555	int in_height = this->input->getShape()[2];
				556	int in_width = this->input->getShape()[3];
				557	int in_channels = this->input->getShape()[4];
				558
				559	int f_out_channels = this->weight->getShape()[0];
				560	int f_depth = this->weight->getShape()[1];
				561	int f_height = this->weight->getShape()[2];
				562	int f_width = this->weight->getShape()[3];
				563	int f_in_channels = this->weight->getShape()[4];
				564
				565	int b_out_channels = this->bias->getShape()[0];
				566
				567	int out_batch = this->output->getShape()[0];
				568	int out_depth = this->output->getShape()[1];
				569	int out_height = this->output->getShape()[2];
				570	int out_width = this->output->getShape()[3];
				571	int out_channels = this->output->getShape()[4];
				572
				573	ERROR_IF(in_batch != out_batch, "OpConv3d: tensor batch mismatch %d != %d", in_batch, out_batch);
				574	ERROR_IF(f_in_channels != in_channels, "OpConv3d: tensor input channel mismatch %d != %d", f_in_channels,
				575	in_channels);
				576	ERROR_IF(f_out_channels != out_channels, "OpConv3d: tensor output channel mismatch %d != %d", f_out_channels,
				577	out_channels);
				578	ERROR_IF(b_out_channels != out_channels, "OpConv3d: bias channel mismatch %d != %d", b_out_channels, out_channels);
				579
				580	int padding_d0 = this->attribute->padding()[0];
				581	int padding_d1 = this->attribute->padding()[1];
				582	int padding_top = this->attribute->padding()[2];
				583	int padding_bottom = this->attribute->padding()[3];
				584	int padding_left = this->attribute->padding()[4];
				585	int padding_right = this->attribute->padding()[5];
				586	int stride_d = this->attribute->stride()[0];
				587	int stride_h = this->attribute->stride()[1];
				588	int stride_w = this->attribute->stride()[2];
				589	int dilation_d = this->attribute->dilation()[0];
				590	int dilation_h = this->attribute->dilation()[1];
				591	int dilation_w = this->attribute->dilation()[2];
				592
				593	DEBUG_INFO(
				594	OP,
				595	"perform OpConv3d, input.shape=[%d,%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d,%d], output.shape=[%d,%d,%d,%d,%d], "
				596	"stride=[%d,%d,%d], dilation=[%d,%d,%d], padding=[%d,%d,%d,%d,%d,%d]",
				597	in_batch, in_depth, in_height, in_width, in_channels, f_out_channels, f_depth, f_height, f_width, f_in_channels,
				598	out_batch, out_depth, out_height, out_width, out_channels, stride_d, stride_h, stride_w, dilation_d, dilation_h,
				599	dilation_w, padding_d0, padding_d1, padding_top, padding_bottom, padding_left, padding_right);
				600
				601	Eigen::array<std::pair<int32_t, int32_t>, 5> padding;
				602	padding[0] = std::make_pair(0, 0);
				603	padding[1] = std::make_pair(padding_d0, padding_d1);
				604	padding[2] = std::make_pair(padding_top, padding_bottom);
				605	padding[3] = std::make_pair(padding_left, padding_right);
				606	padding[4] = std::make_pair(0, 0);
				607
				608	TIn input_val = this->input->getTensor();
				609	TWeight weight_val = this->weight->getTensor();
				610	if (this->qinfo)
				611	{
				612	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				613	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				614	}
				615
				616	ETensor5<InEigenType> input_padded = input_val.pad(padding);
				617
				618	// 1. initialize with bias
				619	Eigen::array<Eigen::Index, 5> reshape_dim;
				620	reshape_dim.fill(1);
				621	reshape_dim[4] = b_out_channels;
				622
				623	Eigen::array<Eigen::Index, 5> bcast;
				624	bcast[0] = out_batch;
				625	bcast[1] = out_depth;
				626	bcast[2] = out_height;
				627	bcast[3] = out_width;
				628	bcast[4] = 1;
				629	this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
				630
				631	// 2. direct convolution
				632	AccEigenType acc = 0;
				633	int d_idx, h_idx, w_idx;
				634
				635	for (int ob = 0; ob < out_batch; ob++)
				636	{
				637	for (int od = 0; od < out_depth; od++)
				638	{
				639	for (int oh = 0; oh < out_height; oh++)
				640	{
				641	for (int ow = 0; ow < out_width; ow++)
				642	{
				643	for (int oc = 0; oc < out_channels; oc++)
				644	{
				645	acc = 0;
				646	for (int fd = 0; fd < f_depth; fd++)
				647	{
				648	d_idx = od * stride_d + fd * dilation_d;
				649	for (int fh = 0; fh < f_height; fh++)
				650	{
				651	h_idx = oh * stride_h + fh * dilation_h;
				652	for (int fw = 0; fw < f_width; fw++)
				653	{
				654	w_idx = ow * stride_w + fw * dilation_w;
				655	for (int ic = 0; ic < in_channels; ic++)
				656	{
				657	acc += ((AccEigenType)input_padded(ob, d_idx, h_idx, w_idx, ic) *
				658	(AccEigenType)weight_val(oc, fd, fh, fw, ic));
				659	}
				660	}
				661	}
				662	}
				663	this->output->getTensor()(ob, od, oh, ow, oc) = acc;
				664	}
				665	}
				666	}
				667	}
				668	}
				669
				670	if (AccDtype == DType_INT48)
				671	{
				672	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				673	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				674	}
				675
				676	return GraphNode::eval();
				677	}
				678
				679	template <DType InDtype, DType WeightDtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	680	OpDepthwiseConv2d<InDtype, WeightDtype>::OpDepthwiseConv2d(SubgraphTraverser* sgt_,
				681	TosaAttributeBase* attribute_,
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	682	TosaQuantInfoBase* qinfo_,
				683	uint64_t id_)
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	684	: GraphNode(sgt_, Op_DEPTHWISE_CONV2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	685	{
				686	setRequiredOperands(3, 1);
				687	setRequiredRank(4);
				688
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	689	INIT_ATTRIBUTE(Conv);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	690	INIT_QINFO(Conv);
				691	}
				692
				693	template <DType InDtype, DType WeightDtype>
				694	OpDepthwiseConv2d<InDtype, WeightDtype>::~OpDepthwiseConv2d()
				695	{
				696	if (attribute)
				697	delete attribute;
				698	if (qinfo)
				699	delete qinfo;
				700	}
				701
				702	template <DType InDtype, DType WeightDtype>
				703	int OpDepthwiseConv2d<InDtype, WeightDtype>::checkTensorAttributes()
				704	{
				705	if (validateRequiredOperands())
				706	return 1;
				707
				708	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				709	{
				710	return 1;
				711	}
				712
				713	// 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
				714	if (inputs[2]->getRank() != 1)
				715	{
				716	printNodeValidationError("OpDepthwiseConv2d: bias tensor must be rank 1");
				717	}
				718
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	719	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				720	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				721	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				722	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				723
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	724	if (attribute->padding().size() != 4)
				725	{
				726	printNodeValidationError("OpDepthwiseConv2d: illegal size for attribute padding");
				727	return 1;
				728	}
				729
				730	if (attribute->stride().size() != 2)
				731	{
				732	printNodeValidationError("OpDepthwiseConv2d: illegal size for attribute stride");
				733	return 1;
				734	}
				735
				736	if (attribute->dilation().size() != 2)
				737	{
				738	printNodeValidationError("OpDepthwiseConv2d: illegal size for attribute dilation");
				739	return 1;
				740	}
				741
				742	return 0;
				743	}
				744
				745	template <DType InDtype, DType WeightDtype>
				746	int OpDepthwiseConv2d<InDtype, WeightDtype>::eval()
				747	{
				748	int in_batch = this->input->getShape()[0];
				749	int in_height = this->input->getShape()[1];
				750	int in_width = this->input->getShape()[2];
				751	int in_channels = this->input->getShape()[3];
				752
				753	int f_height = this->weight->getShape()[0];
				754	int f_width = this->weight->getShape()[1];
				755	int f_in_channels = this->weight->getShape()[2];
				756	int f_multiplier = this->weight->getShape()[3];
				757
				758	int b_out_channels = this->bias->getShape()[0];
				759
				760	int out_batch = this->output->getShape()[0];
				761	int out_height = this->output->getShape()[1];
				762	int out_width = this->output->getShape()[2];
				763	int out_channels = this->output->getShape()[3];
				764
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	765	ERROR_IF(in_batch != out_batch, "OpDepthwiseConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				766	ERROR_IF(f_in_channels != in_channels, "OpDepthwiseConv2d: tensor input channel mismatch %d != %d", f_in_channels,
				767	in_channels);
				768	ERROR_IF(in_channels * f_multiplier != out_channels, "OpDepthwiseConv2d: tensor output channel mismatch %d != %d",
				769	in_channels * f_multiplier, out_channels);
				770	ERROR_IF(b_out_channels != out_channels, "OpDepthwiseConv2d: bias channels mismatch %d != %d", b_out_channels,
				771	out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	772
				773	int padding_top = this->attribute->padding()[0];
				774	int padding_bottom = this->attribute->padding()[1];
				775	int padding_left = this->attribute->padding()[2];
				776	int padding_right = this->attribute->padding()[3];
				777	int stride_h = this->attribute->stride()[0];
				778	int stride_w = this->attribute->stride()[1];
				779	int dilation_h = this->attribute->dilation()[0];
				780	int dilation_w = this->attribute->dilation()[1];
				781
				782	DEBUG_INFO(OP,
				783	"perform OpDepthwiseConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], "
				784	"output.shape=[%d,%d,%d,%d], stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d,%d,%d]",
				785	in_batch, in_height, in_width, in_channels, f_height, f_width, f_in_channels, f_multiplier, out_batch,
				786	out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
				787	padding_bottom, padding_left, padding_right);
				788
				789	Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
				790	padding[0] = std::make_pair(0, 0);
				791	padding[1] = std::make_pair(padding_top, padding_bottom);
				792	padding[2] = std::make_pair(padding_left, padding_right);
				793	padding[3] = std::make_pair(0, 0);
				794
				795	TIn input_val = this->input->getTensor();
				796	TWeight weight_val = this->weight->getTensor();
				797	if (this->qinfo)
				798	{
				799	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				800	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				801	}
				802
				803	ETensor4<InEigenType> input_padded = input_val.pad(padding);
				804
				805	// GEMM doesn't fit well with DepthwiseConv2d
				806	// 1. use extract_image_patches() to handle stride/dilation/padding
				807	// 2. perform direct convolution
				808
				809	// 1. extract_image_patches() output [N, KH, KW, OH * OW, IC]
				810	ETensor5<InEigenType> input_extract_patches = input_padded.extract_image_patches(
				811	f_height, f_width, stride_h, stride_w, dilation_h, dilation_w, Eigen::PADDING_VALID);
				812
				813	Eigen::array<Eigen::Index, 4> reshape_dim;
				814	reshape_dim.fill(1);
				815	reshape_dim[3] = b_out_channels;
				816
				817	Eigen::array<Eigen::Index, 4> bcast;
				818	bcast[0] = out_batch;
				819	bcast[1] = out_height;
				820	bcast[2] = out_width;
				821	bcast[3] = 1;
				822
				823	// initialize with bias
				824	this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
				825
				826	// 2. direct depthwise convolution
				827	for (int ob = 0; ob < out_batch; ob++)
				828	{
				829	for (int oh = 0; oh < out_height; oh++)
				830	{
				831	for (int ow = 0; ow < out_width; ow++)
				832	{
				833	for (int ic = 0; ic < in_channels; ic++)
				834	{
				835	for (int cm = 0; cm < f_multiplier; cm++)
				836	{
				837	for (int fh = 0; fh < f_height; fh++)
				838	{
				839	for (int fw = 0; fw < f_width; fw++)
				840	{
				841	this->output->getTensor()(ob, oh, ow, ic * f_multiplier + cm) +=
				842	((AccEigenType)input_extract_patches(ob, fh, fw, ow * out_height + oh, ic) *
				843	(AccEigenType)weight_val(fh, fw, ic, cm));
				844	}
				845	}
				846	}
				847	}
				848	}
				849	}
				850	}
				851
				852	if (AccDtype == DType_INT48)
				853	{
				854	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				855	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				856	}
				857
				858	return GraphNode::eval();
				859	}
				860
				861	template <DType InDtype, DType WeightDtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	862	OpFullyConnected<InDtype, WeightDtype>::OpFullyConnected(SubgraphTraverser* sgt_,
				863	TosaAttributeBase* attribute_,
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	864	TosaQuantInfoBase* qinfo_,
				865	uint64_t id_)
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	866	: GraphNode(sgt_, Op_FULLY_CONNECTED, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	867	{
				868	setRequiredOperands(3, 1);
				869	setRequiredRank(2);
				870
				871	INIT_QINFO(Conv);
				872	}
				873
				874	template <DType InDtype, DType WeightDtype>
				875	OpFullyConnected<InDtype, WeightDtype>::~OpFullyConnected()
				876	{
				877	if (qinfo)
				878	delete qinfo;
				879	}
				880
				881	template <DType InDtype, DType WeightDtype>
				882	int OpFullyConnected<InDtype, WeightDtype>::checkTensorAttributes()
				883	{
				884	if (validateRequiredOperands())
				885	return 1;
				886
				887	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				888	{
				889	return 1;
				890	}
				891
				892	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				893	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				894	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				895
				896	if (input->getShape()[1] != weight->getShape()[1])
				897	{
				898	printNodeValidationError("OpFullyConnected operator input.shape[1] should match weight.shape[1]");
				899	return 1;
				900	}
				901
				902	if (weight->getShape()[0] != bias->getShape()[0])
				903	{
				904	printNodeValidationError("OpFullyConnected operator bias.shape[0] should match weight.shape[0]");
				905	return 1;
				906	}
				907
				908	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				909
				910	return 0;
				911	}
				912
				913	template <DType InDtype, DType WeightDtype>
				914	int OpFullyConnected<InDtype, WeightDtype>::eval()
				915	{
				916	typedef Eigen::Tensor<int, 1>::DimensionPair DimPair;
				917	Eigen::array<DimPair, 1> dims{ { DimPair(1, 0) } };
				918
				919	Eigen::array<Eigen::Index, 2> weight_shuffle{ 1, 0 };
				920
				921	Eigen::array<Eigen::Index, 2> bias_reshape;
				922	bias_reshape[0] = 1;
				923	bias_reshape[1] = this->bias->getShape()[0];
				924
				925	Eigen::array<Eigen::Index, 2> bias_bcast;
				926	bias_bcast[0] = this->input->getShape()[0];
				927	bias_bcast[1] = 1;
				928
				929	TIn input_val = this->input->getTensor();
				930	TWeight weight_val = this->weight->getTensor().shuffle(weight_shuffle);
				931	if (this->qinfo)
				932	{
				933	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				934	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				935	}
				936
				937	this->output->getTensor() =
				938	input_val.template cast<AccEigenType>().contract(weight_val.template cast<AccEigenType>(), dims) +
				939	this->bias->getTensor().reshape(bias_reshape).broadcast(bias_bcast);
				940
				941	if (AccDtype == DType_INT48)
				942	{
				943	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				944	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				945	}
				946	return GraphNode::eval();
				947	}
				948
				949	template <DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	950	OpMatMul<Dtype>::OpMatMul(SubgraphTraverser* sgt_,
				951	TosaAttributeBase* attribute_,
				952	TosaQuantInfoBase* qinfo_,
				953	uint64_t id_)
				954	: GraphNode(sgt_, Op_MATMUL, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	955	{
				956	setRequiredOperands(2, 1);
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	957	setRequiredRank(3);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	958
				959	INIT_QINFO(MatMul);
				960	}
				961
				962	template <DType Dtype>
				963	OpMatMul<Dtype>::~OpMatMul()
				964	{
				965	if (qinfo)
				966	delete qinfo;
				967	}
				968
				969	template <DType Dtype>
				970	int OpMatMul<Dtype>::checkTensorAttributes()
				971	{
				972	if (validateRequiredOperands())
				973	return 1;
				974
				975	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				976	{
				977	return 1;
				978	}
				979
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	980	a = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				981	b = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[1]);
				982	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	983
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	984	ASSERT_MEM(a && b && output);
				985
				986	// a: [N, H, C]
				987	// b: [N, C, W]
				988	// c: [N, H, W]
				989
				990	// Check N
				991	if (a->getShape()[0] != b->getShape()[0] \|\| a->getShape()[0] != output->getShape()[0])
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	992	{
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	993	printNodeValidationError("OpMatMul operator a.shape[0], b.shape[0] and output.shape[0] should match");
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	994	return 1;
				995	}
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	996	N = a->getShape()[0];
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	997
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	998	// Check C
				999	if (a->getShape()[2] != b->getShape()[1])
				1000	{
				1001	printNodeValidationError("OpMatMul operator a.shape[2] should match b.shape[1]");
				1002	return 1;
				1003	}
				1004	C = a->getShape()[2];
				1005
				1006	// Check H
				1007	if (a->getShape()[1] != output->getShape()[1])
				1008	{
				1009	printNodeValidationError("OpMatMul operator a.shape[1] should match output.shape[1]");
				1010	return 1;
				1011	}
				1012	H = a->getShape()[1];
				1013
				1014	// Check W
				1015	if (b->getShape()[2] != output->getShape()[2])
				1016	{
				1017	printNodeValidationError("OpMatMul operator output.shape[2] should match output.shape[2]");
				1018	return 1;
				1019	}
				1020	W = b->getShape()[2];
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1021
				1022	return 0;
				1023	}
				1024
				1025	template <DType Dtype>
				1026	int OpMatMul<Dtype>::eval()
				1027	{
				1028	typedef Eigen::Tensor<int, 1>::DimensionPair DimPair;
				1029	Eigen::array<DimPair, 1> dims{ { DimPair(1, 0) } };
				1030
				1031	TIn a_val = this->a->getTensor();
				1032	TIn b_val = this->b->getTensor();
				1033	if (this->qinfo)
				1034	{
				1035	a_val = a_val - (InEigenType)this->qinfo->a_zp();
				1036	b_val = b_val - (InEigenType)this->qinfo->b_zp();
				1037	}
				1038
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1039	Eigen::array<Eigen::Index, 2> a_rank2_shape({ H, C });
				1040	Eigen::array<Eigen::Index, 2> b_rank2_shape({ C, W });
				1041	Eigen::array<Eigen::Index, 3> output_rank3_shape({ 1, H, W });
				1042
				1043	Eigen::array<Eigen::Index, 3> a_size_array({ 1, H, C });
				1044	Eigen::array<Eigen::Index, 3> b_size_array({ 1, C, W });
				1045
				1046	Eigen::array<Eigen::Index, 3> a_begin_array({ 0, 0, 0 });
				1047	Eigen::array<Eigen::Index, 3> b_begin_array({ 0, 0, 0 });
				1048
				1049	// Iterate N dimension.
				1050	for (int i = 0; i < N; i++)
				1051	{
				1052	a_begin_array[0] = i;
				1053	b_begin_array[0] = i;
				1054
				1055	TInRank2 a_rank2_val = a_val.slice(a_begin_array, a_size_array).reshape(a_rank2_shape);
				1056	TInRank2 b_rank2_val = b_val.slice(b_begin_array, b_size_array).reshape(b_rank2_shape);
				1057	TAccRank2 output_rank2_val =
				1058	a_rank2_val.template cast<AccEigenType>().contract(b_rank2_val.template cast<AccEigenType>(), dims);
				1059	TAcc output_rank3_val = output_rank2_val.reshape(output_rank3_shape);
				1060	if (i == 0)
				1061	{
				1062	this->output->getTensor() = output_rank3_val;
				1063	}
				1064	else
				1065	{
				1066	TAcc temp = this->output->getTensor().concatenate(output_rank3_val, 0);
				1067	this->output->getTensor() = temp;
				1068	}
				1069	}
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1070
				1071	if (AccDtype == DType_INT48)
				1072	{
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1073	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				1074	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1075	}
				1076
				1077	return GraphNode::eval();
				1078	}
				1079
				1080	template <DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1081	OpMaxPool2d<Dtype>::OpMaxPool2d(SubgraphTraverser* sgt_,
				1082	TosaAttributeBase* attribute_,
				1083	TosaQuantInfoBase* qinfo_,
				1084	uint64_t id_)
				1085	: GraphNode(sgt_, Op_MAX_POOL2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1086	{
				1087	setRequiredOperands(1, 1);
				1088	setRequiredRank(4);
				1089
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	1090	INIT_ATTRIBUTE(Pool);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1091	}
				1092
				1093	template <DType Dtype>
				1094	OpMaxPool2d<Dtype>::~OpMaxPool2d()
				1095	{
				1096	if (attribute)
				1097	delete attribute;
				1098	}
				1099
				1100	template <DType Dtype>
				1101	int OpMaxPool2d<Dtype>::checkTensorAttributes()
				1102	{
				1103	if (validateRequiredOperands())
				1104	return 1;
				1105
				1106	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(outputs[0]))
				1107	{
				1108	return 1;
				1109	}
				1110
				1111	if (inputs[0]->matchType(*outputs[0]))
				1112	{
				1113	printNodeValidationError("OpMaxPool2d: input and output tensor type mismatch");
				1114	return 1;
				1115	}
				1116
				1117	in = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				1118	out = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
				1119
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1120	if (attribute->padding().size() != 4)
				1121	{
				1122	printNodeValidationError("OpMaxPool2d: illegal size for attribute padding");
				1123	return 1;
				1124	}
				1125
				1126	if (attribute->kernel().size() != 2)
				1127	{
				1128	printNodeValidationError("OpMaxPool2d: illegal size for attribute kernel");
				1129	return 1;
				1130	}
				1131
				1132	if (attribute->stride().size() != 2)
				1133	{
				1134	printNodeValidationError("OpMaxPool2d: illegal size for attribute stride");
				1135	return 1;
				1136	}
				1137
				1138	return 0;
				1139	}
				1140
				1141	template <DType Dtype>
				1142	int OpMaxPool2d<Dtype>::eval()
				1143	{
				1144	int in_batch = this->in->getShape()[0];
				1145	int in_height = this->in->getShape()[1];
				1146	int in_width = this->in->getShape()[2];
				1147	int in_channels = this->in->getShape()[3];
				1148
				1149	int out_batch = this->out->getShape()[0];
				1150	int out_height = this->out->getShape()[1];
				1151	int out_width = this->out->getShape()[2];
				1152	int out_channels = this->out->getShape()[3];
				1153
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1154	ERROR_IF(in_batch != out_batch, "OpMaxPool2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				1155	ERROR_IF(in_channels != out_channels, "OpMaxPool2d: tensor channel mismatch %d != %d", in_channels, out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1156
				1157	int padding_top = this->attribute->padding()[0];
				1158	int padding_bottom = this->attribute->padding()[1];
				1159	int padding_left = this->attribute->padding()[2];
				1160	int padding_right = this->attribute->padding()[3];
				1161	int kernel_h = this->attribute->kernel()[0];
				1162	int kernel_w = this->attribute->kernel()[1];
				1163	int stride_h = this->attribute->stride()[0];
				1164	int stride_w = this->attribute->stride()[1];
				1165
				1166	DEBUG_INFO(OP,
				1167	"perform MaxPool2d, input.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], kernel=[%d,%d], "
				1168	"stride=[%d,%d], padding=[%d,%d,%d,%d]",
				1169	in_batch, in_height, in_width, in_channels, out_batch, out_height, out_width, out_channels, kernel_h,
				1170	kernel_w, stride_h, stride_w, padding_top, padding_bottom, padding_left, padding_right);
				1171
				1172	Eigen::array<Eigen::Index, 2> im2col_input_dims;
				1173	im2col_input_dims[0] = kernel_h * kernel_w;
				1174	im2col_input_dims[1] = out_batch * out_height * out_width * out_channels;
				1175
				1176	Eigen::array<Eigen::Index, 4> col2im_output_dims;
				1177	col2im_output_dims[0] = out_batch;
				1178	col2im_output_dims[1] = out_height;
				1179	col2im_output_dims[2] = out_width;
				1180	col2im_output_dims[3] = out_channels;
				1181
				1182	Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
				1183	padding[0] = std::make_pair(0, 0);
				1184	padding[1] = std::make_pair(padding_top, padding_bottom);
				1185	padding[2] = std::make_pair(padding_left, padding_right);
				1186	padding[3] = std::make_pair(0, 0);
				1187
				1188	ETensor4<InEigenType> input_padded = this->in->getTensor().pad(padding, std::numeric_limits<InEigenType>::lowest());
				1189
				1190	// extract_image_patches() output [N, KH, KW, H * W, C]
				1191	// transpose to [KH, KW, N, H * W, C]
				1192	// reshape to [KH * KW, N * H * W * C]
				1193	//
				1194	// Set the padding value to be the most negative value that can be
				1195	// represented by the datatype to ensure that any padding values will be equal
				1196	// to or smaller than the actual maximum in the KH x KW patch.
				1197	ETensor2<InEigenType> input_extract_patches =
				1198	input_padded
				1199	.extract_image_patches(kernel_h, kernel_w, stride_h, stride_w, 1, 1, Eigen::PADDING_VALID,
				1200	std::numeric_limits<InEigenType>::lowest())
				1201	.shuffle(Eigen::array<Eigen::Index, 5>{ 1, 2, 0, 3, 4 })
				1202	.reshape(im2col_input_dims);
				1203
				1204	// Get the maximum of the KHxHW patches along axis 0
				1205	Eigen::Tensor<DenseIndex, 1> tensor_argmax = input_extract_patches.argmax(0);
				1206
				1207	// 1D result with [N * H * W * C]
				1208	ETensor1<OutEigenType> out_1d(this->out->getElementCount());
				1209
				1210	// index input_patches with argmax array should give the result
				1211	for (size_t i = 0; i < this->out->getElementCount(); i++)
				1212	{
				1213	out_1d(i) = (OutEigenType)input_extract_patches(tensor_argmax(i), i);
				1214	}
				1215
				1216	// reshape result to [N, H, W, C]
				1217	this->out->getTensor() = out_1d.reshape(col2im_output_dims);
				1218
				1219	return GraphNode::eval();
				1220	}
				1221
				1222	template <DType InDtype, DType OutDtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1223	OpTransposeConv2d<InDtype, OutDtype>::OpTransposeConv2d(SubgraphTraverser* sgt_,
				1224	TosaAttributeBase* attribute_,
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1225	TosaQuantInfoBase* qinfo_,
				1226	uint64_t id_)
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1227	: GraphNode(sgt_, Op_TRANSPOSE_CONV2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1228	{
				1229	setRequiredOperands(3, 1);
				1230	setRequiredRank(4);
				1231
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	1232	INIT_ATTRIBUTE(TransposeConv);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1233	INIT_QINFO(Conv);
				1234	}
				1235
				1236	template <DType InDtype, DType OutDtype>
				1237	OpTransposeConv2d<InDtype, OutDtype>::~OpTransposeConv2d()
				1238	{
				1239	if (attribute)
				1240	delete attribute;
				1241	if (qinfo)
				1242	delete qinfo;
				1243	}
				1244
				1245	template <DType InDtype, DType OutDtype>
				1246	int OpTransposeConv2d<InDtype, OutDtype>::checkTensorAttributes()
				1247	{
				1248	if (validateRequiredOperands())
				1249	return 1;
				1250
				1251	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				1252	{
				1253	return 1;
				1254	}
				1255
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1256	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				1257	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				1258	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				1259	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				1260
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1261	if (attribute->outpad().size() != 2)
				1262	{
				1263	printNodeValidationError("OpTransposeConv2d: illegal size for attribute outpad");
				1264	return 1;
				1265	}
				1266
				1267	if (attribute->stride().size() != 2)
				1268	{
				1269	printNodeValidationError("OpTransposeConv2d: illegal size for attribute stride");
				1270	return 1;
				1271	}
				1272
				1273	if (attribute->dilation().size() != 2)
				1274	{
				1275	printNodeValidationError("OpTransposeConv2d: illegal size for attribute dilation");
				1276	return 1;
				1277	}
				1278
				1279	if (attribute->output_shape().size() != 4)
				1280	{
				1281	printNodeValidationError("OpTransposeConv2d: illegal size for attribute output_shape");
				1282	return 1;
				1283	}
				1284
				1285	for (int d = 0; d < 4; d++)
				1286	{
				1287	if (attribute->output_shape()[d] != this->output->getShape()[d])
				1288	{
				1289	printNodeValidationError("OpTransposeConv2d: illegal size for attribute output_shape");
				1290	return 1;
				1291	}
				1292	}
				1293
				1294	return 0;
				1295	}
				1296
				1297	template <DType InDtype, DType OutDtype>
				1298	int OpTransposeConv2d<InDtype, OutDtype>::eval()
				1299	{
				1300	int in_batch = this->input->getShape()[0];
				1301	int in_height = this->input->getShape()[1];
				1302	int in_width = this->input->getShape()[2];
				1303	int in_channels = this->input->getShape()[3];
				1304
				1305	int f_out_channels = this->weight->getShape()[0];
				1306	int f_height = this->weight->getShape()[1];
				1307	int f_width = this->weight->getShape()[2];
				1308	int f_in_channels = this->weight->getShape()[3];
				1309
				1310	int b_out_channels = this->bias->getShape()[0];
				1311
				1312	int out_batch = this->output->getShape()[0];
				1313	int out_height = this->output->getShape()[1];
				1314	int out_width = this->output->getShape()[2];
				1315	int out_channels = this->output->getShape()[3];
				1316
				1317	int padding_top = this->attribute->outpad()[0];
				1318	int padding_left = this->attribute->outpad()[1];
				1319	int stride_h = this->attribute->stride()[0];
				1320	int stride_w = this->attribute->stride()[1];
				1321	int dilation_h = this->attribute->dilation()[0];
				1322	int dilation_w = this->attribute->dilation()[1];
				1323
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1324	ERROR_IF(in_batch != out_batch, "OpTransposeConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				1325	ERROR_IF(f_in_channels != in_channels, "OpTransposeConv2d: tensor input channel mismatch %d != %d", f_in_channels,
				1326	in_channels);
				1327	ERROR_IF(f_out_channels != out_channels, "OpTransposeConv2d: tensor output channel mismatch %d != %d",
				1328	f_out_channels, out_channels);
				1329	ERROR_IF(b_out_channels != out_channels, "OpDepthwiseConv2d: bias channels mismatch %d != %d", b_out_channels,
				1330	out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1331
				1332	DEBUG_INFO(OP,
				1333	"perform OpTransposeConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], "
				1334	"output.shape=[%d,%d,%d,%d], stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d]",
				1335	in_batch, in_height, in_width, in_channels, f_height, f_width, f_out_channels, f_in_channels, out_batch,
				1336	out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
				1337	padding_left);
				1338
				1339	TIn input_val = this->input->getTensor();
				1340	TWeight weight_val = this->weight->getTensor();
				1341	if (this->qinfo)
				1342	{
				1343	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				1344	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				1345	}
				1346
				1347	Eigen::array<Eigen::Index, 4> reshape_dim;
				1348	reshape_dim.fill(1);
				1349	reshape_dim[3] = b_out_channels;
				1350
				1351	Eigen::array<Eigen::Index, 4> bcast;
				1352	bcast[0] = out_batch;
				1353	bcast[1] = out_height;
				1354	bcast[2] = out_width;
				1355	bcast[3] = 1;
				1356
				1357	// initialize with bias
				1358	this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
				1359
				1360	int out_x_origin, out_y_origin;
				1361	int out_x, out_y;
				1362
				1363	// reference implementation from: tensorflow/tensorflow/lite/kernels/internal/reference/reference_ops.h
				1364	for (int ob = 0; ob < out_batch; ob++)
				1365	{
				1366	for (int ih = 0; ih < in_height; ih++)
				1367	{
				1368	for (int iw = 0; iw < in_width; iw++)
				1369	{
				1370	out_x_origin = iw * stride_w - padding_left;
				1371	out_y_origin = ih * stride_h - padding_top;
				1372	for (int ic = 0; ic < in_channels; ic++)
				1373	{
				1374	for (int fh = 0; fh < f_height; fh++)
				1375	{
				1376	for (int fw = 0; fw < f_width; fw++)
				1377	{
				1378	out_x = out_x_origin + fw * dilation_w;
				1379	out_y = out_y_origin + fh * dilation_h;
				1380	for (int oc = 0; oc < out_channels; oc++)
				1381	{
				1382	if ((out_x >= 0 && out_x < out_width) && (out_y >= 0 && out_y < out_height))
				1383	{
				1384	this->output->getTensor()(ob, out_y, out_x, oc) +=
				1385	((AccEigenType)input_val(ob, ih, iw, ic) *
				1386	(AccEigenType)weight_val(oc, fh, fw, ic));
				1387	}
				1388	}
				1389	}
				1390	}
				1391	}
				1392	}
				1393	}
				1394	}
				1395
				1396	if (AccDtype == DType_INT48)
				1397	{
				1398	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				1399	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				1400	}
				1401
				1402	return GraphNode::eval();
				1403	}
				1404
				1405	// template explicit instantiation
				1406	DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1407	DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1408	DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, INT16);
				1409
				1410	DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, FLOAT)
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1411	DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, INT8)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1412	DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, INT16)
				1413
				1414	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1415	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT8, INT4);
				1416	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1417	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT16, INT8);
				1418
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame^]	1419	DEF_INSTANTIATE_TWO_TYPE(OpConv3d, FLOAT, FLOAT);
				1420	DEF_INSTANTIATE_TWO_TYPE(OpConv3d, INT8, INT4);
				1421	DEF_INSTANTIATE_TWO_TYPE(OpConv3d, INT8, INT8);
				1422	DEF_INSTANTIATE_TWO_TYPE(OpConv3d, INT16, INT8);
				1423
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1424	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1425	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT8, INT4);
				1426	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1427	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT16, INT8);
				1428
				1429	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1430	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT8, INT4);
				1431	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1432	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT16, INT8);
				1433
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1434	DEF_INSTANTIATE_ONE_TYPE(OpMatMul, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1435	DEF_INSTANTIATE_ONE_TYPE(OpMatMul, INT16);
				1436	DEF_INSTANTIATE_ONE_TYPE(OpMatMul, FLOAT);
				1437
				1438	DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1439	DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1440	DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, INT16);
				1441
				1442	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1443	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT8, INT4);
				1444	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1445	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT16, INT8);