Blame - reference_model/src/ops/tensor_ops.cc - tosa/reference_model

blob: 118d048107b3430b62f0f229db3f5dff065355df [file] [log] [blame]

Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	2	// Copyright (c) 2020-2021, ARM Limited.
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	3	//
				4	// Licensed under the Apache License, Version 2.0 (the "License");
				5	// you may not use this file except in compliance with the License.
				6	// You may obtain a copy of the License at
				7	//
				8	// http://www.apache.org/licenses/LICENSE-2.0
				9	//
				10	// Unless required by applicable law or agreed to in writing, software
				11	// distributed under the License is distributed on an "AS IS" BASIS,
				12	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	// See the License for the specific language governing permissions and
				14	// limitations under the License.
				15
				16	#include "tensor_ops.h"
				17	#include "quant_util.h"
				18	#include "template_types.h"
				19
				20	using namespace TosaReference;
				21	using namespace Eigen;
				22	using namespace tosa;
				23
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	24	int check_pool2d_attribute_common(tosa::TosaPoolAttribute* attribute,
				25	std::vector<int32_t> input_shape,
				26	std::vector<int32_t> output_shape,
				27	std::string& msg)
				28	{
				29	if (attribute->padding().size() != 4)
				30	{
				31	msg = "illegal size for attribute padding";
				32	return 1;
				33	}
				34
				35	if (attribute->kernel().size() != 2)
				36	{
				37	msg = "illegal size for attribute kernel";
				38	return 1;
				39	}
				40
				41	if (attribute->stride().size() != 2)
				42	{
				43	msg = "illegal size for attribute stride";
				44	return 1;
				45	}
				46
				47	for (int32_t i : attribute->padding())
				48	{
				49	if (i < 0)
				50	{
				51	msg = "At least one pad is smaller than zero";
				52	return 1;
				53	}
				54	}
				55
				56	for (int32_t i : attribute->kernel())
				57	{
				58	if (i < 1)
				59	{
				60	msg = "At least one kernel dimension is smaller than zero";
				61	return 1;
				62	}
				63	}
				64
				65	for (int32_t i : attribute->stride())
				66	{
				67	if (i < 1)
				68	{
				69	msg = "At least one stride dimension is smaller than zero";
				70	return 1;
				71	}
				72	}
				73
				74	int32_t IH = input_shape[1];
				75	int32_t IW = input_shape[2];
				76	int32_t OH = output_shape[1];
				77	int32_t OW = output_shape[2];
				78
				79	int32_t pad_top = attribute->padding()[0];
				80	int32_t pad_bottom = attribute->padding()[1];
				81	int32_t pad_left = attribute->padding()[2];
				82	int32_t pad_right = attribute->padding()[3];
				83
				84	int32_t stride_y = attribute->stride()[0];
				85	int32_t stride_x = attribute->stride()[1];
				86	int32_t kernel_y = attribute->kernel()[0];
				87	int32_t kernel_x = attribute->kernel()[1];
				88
				89	if (pad_top >= kernel_y \|\| pad_bottom >= kernel_y \|\| pad_left >= kernel_x \|\| pad_right >= kernel_x)
				90	{
				91	msg = "At least one pad is >= kernel dimension";
				92	return 1;
				93	}
				94
				95	int32_t allowed_min_input_height = (OH * stride_y) - pad_top - pad_bottom - stride_y + kernel_y;
				96	int32_t allowed_min_input_width = (OW * stride_x) - pad_left - pad_right - stride_x + kernel_x;
				97
				98	int32_t d_height = IH - allowed_min_input_height;
				99	int32_t d_width = IW - allowed_min_input_width;
				100
				101	if (d_height < 0 \|\| d_height > stride_y \|\| d_width < 0 \|\| d_width > stride_x)
				102	{
				103	msg = "Mismatch between output shape provided and expected output shape";
				104	return 1;
				105	}
				106
				107	return 0;
				108	}
				109
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	110	template <int Rank, DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	111	OpArgMax<Rank, Dtype>::OpArgMax(SubgraphTraverser* sgt_,
				112	TosaAttributeBase* attribute_,
				113	TosaQuantInfoBase* qinfo_,
				114	uint64_t id_)
				115	: GraphNode(sgt_, Op_ARGMAX, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	116	{
				117	setRequiredOperands(1, 1);
				118	setRequiredRank(0, 6);
				119
				120	INIT_ATTRIBUTE(Axis);
				121	}
				122
				123	template <int Rank, DType Dtype>
				124	OpArgMax<Rank, Dtype>::~OpArgMax()
				125	{
				126	if (attribute)
				127	delete attribute;
				128	}
				129
				130	template <int Rank, DType Dtype>
				131	int OpArgMax<Rank, Dtype>::checkTensorAttributes()
				132	{
				133	if (validateRequiredOperands())
				134	return 1;
				135
				136	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(outputs[0]))
				137	{
				138	return 1;
				139	}
				140
				141	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				142	output = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
				143
				144	return 0;
				145	}
				146
				147	template <int Rank, DType Dtype>
				148	int OpArgMax<Rank, Dtype>::eval()
				149	{
				150	Eigen::Tensor<DenseIndex, Rank - 1> index = this->input->getTensor().argmax(attribute->axis());
				151
				152	this->output->getTensor() = index.unaryExpr([](DenseIndex in) -> OutEigenType { return (OutEigenType)in; });
				153
				154	return GraphNode::eval();
				155	}
				156
				157	template <DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	158	OpAvgPool2d<Dtype>::OpAvgPool2d(SubgraphTraverser* sgt_,
				159	TosaAttributeBase* attribute_,
				160	TosaQuantInfoBase* qinfo_,
				161	uint64_t id_)
				162	: GraphNode(sgt_, Op_AVG_POOL2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	163	{
				164	setRequiredOperands(1, 1);
				165	setRequiredRank(4);
				166
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	167	INIT_ATTRIBUTE(Pool);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	168	INIT_QINFO(Unary);
				169	}
				170
				171	template <DType Dtype>
				172	OpAvgPool2d<Dtype>::~OpAvgPool2d()
				173	{
				174	if (attribute)
				175	delete attribute;
				176	}
				177
				178	template <DType Dtype>
				179	int OpAvgPool2d<Dtype>::checkTensorAttributes()
				180	{
				181	if (validateRequiredOperands())
				182	return 1;
				183
				184	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(outputs[0]))
				185	{
				186	return 1;
				187	}
				188
				189	if (inputs[0]->matchType(*outputs[0]))
				190	{
				191	printNodeValidationError("OpAvgPool2d: input and output tensor type mismatch");
				192	return 1;
				193	}
				194
				195	in = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				196	out = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
				197
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	198	if (Dtype != DType_INT8 && this->qinfo)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	199	{
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	200	ERROR_IF(this->qinfo->input_zp() != 0, "OpAvgPool2d: zeropoint only for int8_t");
				201	ERROR_IF(this->qinfo->output_zp() != 0, "OpAvgPool2d: zeropoint only for int8_t");
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	202	}
				203
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	204	std::string msg;
				205	if (check_pool2d_attribute_common(attribute, in->getShape(), out->getShape(), msg))
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	206	{
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	207	msg = "OpAvgPool2d: " + msg;
				208	printNodeValidationError(msg.c_str());
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	209	return 1;
				210	}
				211
				212	return 0;
				213	}
				214
				215	template <DType Dtype>
				216	ETensor1<int32_t> OpAvgPool2d<Dtype>::calculate_div_map_1d(int in_size, int out_size, int kernel_size, int stride)
				217	{
				218	ETensor1<int32_t> result(out_size);
				219
				220	int32_t total_pad = (out_size - 1) * stride + kernel_size - in_size;
				221	total_pad = total_pad < 0 ? 0 : total_pad;
				222
				223	int32_t pad_left = total_pad >> 1;
				224	int32_t pad_right = total_pad - pad_left;
				225
				226	result.setConstant(kernel_size);
				227
				228	// the index left to 'left_index' and index right to 'right_index' indicates
				229	// the input window of this output covers a pad bit
				230	int32_t left_index = pad_left / stride;
				231	int32_t right_index = pad_right / stride;
				232
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	233	// minus the number of pad bit this index cover
				234	while (left_index >= 0)
				235	{
				236	result(left_index) -= (pad_left - left_index * stride);
				237	left_index--;
				238	}
				239
				240	while (right_index >= 0)
				241	{
				242	result(out_size - 1 - right_index) -= (pad_right - right_index * stride);
				243	right_index--;
				244	}
				245
				246	return result;
				247	}
				248
				249	// assuming input and output tensor have same scales like tflite reference
				250	// so no need to scale input and output
				251	template <DType Dtype>
				252	int OpAvgPool2d<Dtype>::eval()
				253	{
				254	int in_batch = this->in->getShape()[0];
				255	int in_height = this->in->getShape()[1];
				256	int in_width = this->in->getShape()[2];
				257	int in_channels = this->in->getShape()[3];
				258
				259	int out_batch = this->out->getShape()[0];
				260	int out_height = this->out->getShape()[1];
				261	int out_width = this->out->getShape()[2];
				262	int out_channels = this->out->getShape()[3];
				263
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	264	ERROR_IF(in_batch != out_batch, "OpAvgPool2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				265	ERROR_IF(in_channels != out_channels, "OpAvgPool2d: tensor channel mismatch %d != %d", in_channels, out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	266
				267	int padding_top = this->attribute->padding()[0];
				268	int padding_bottom = this->attribute->padding()[1];
				269	int padding_left = this->attribute->padding()[2];
				270	int padding_right = this->attribute->padding()[3];
				271	int kernel_h = this->attribute->kernel()[0];
				272	int kernel_w = this->attribute->kernel()[1];
				273	int stride_h = this->attribute->stride()[0];
				274	int stride_w = this->attribute->stride()[1];
				275
				276	DEBUG_INFO(OP,
				277	"perform AvgPool2d, input.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], kernel=[%d,%d], "
				278	"stride=[%d,%d], padding=[%d,%d,%d,%d]",
				279	in_batch, in_height, in_width, in_channels, out_batch, out_height, out_width, out_channels, kernel_h,
				280	kernel_w, stride_h, stride_w, padding_top, padding_bottom, padding_left, padding_right);
				281
				282	Eigen::array<Eigen::Index, 2> im2col_input_dims;
				283	im2col_input_dims[0] = kernel_h * kernel_w;
				284	im2col_input_dims[1] = out_batch * out_height * out_width * out_channels;
				285
				286	Eigen::array<Eigen::Index, 4> col2im_output_dims;
				287	col2im_output_dims[0] = out_batch;
				288	col2im_output_dims[1] = out_height;
				289	col2im_output_dims[2] = out_width;
				290	col2im_output_dims[3] = out_channels;
				291
				292	Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
				293	padding[0] = std::make_pair(0, 0);
				294	padding[1] = std::make_pair(padding_top, padding_bottom);
				295	padding[2] = std::make_pair(padding_left, padding_right);
				296	padding[3] = std::make_pair(0, 0);
				297
				298	ETensor4<InEigenType> input_val = this->in->getTensor();
				299	if (this->qinfo)
				300	{
				301	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				302	}
				303
				304	ETensor4<InEigenType> input_padded = input_val.pad(padding);
				305
				306	// assuming input and output have same scales
				307	// so input and output scaling is not required
				308	// TODO: check if this assumption TOSA made
				309
				310	// extract_image_patches() output [N, KH, KW, H * W, C]
				311	// transpose to [KH, KW, N, H * W, C]
				312	// reshape to [KH * KW, N * H * W * C]
				313	ETensor2<InEigenType> input_extract_patches =
				314	input_padded.extract_image_patches(kernel_h, kernel_w, stride_h, stride_w, 1, 1, Eigen::PADDING_VALID)
				315	.shuffle(Eigen::array<Eigen::Index, 5>{ 1, 2, 0, 3, 4 })
				316	.reshape(im2col_input_dims);
				317
				318	// 1D result with [N * H * W * C]
				319	ETensor1<AccEigenType> out_1d(this->out->getElementCount());
				320	out_1d.setZero();
				321
				322	// sum pool
				323	for (size_t i = 0; i < this->out->getElementCount(); i++)
				324	{
				325	for (int32_t j = 0; j < kernel_h * kernel_w; j++)
				326	{
				327	out_1d(i) += (AccEigenType)input_extract_patches(j, i);
				328	}
				329	}
				330
				331	// reshape result to [N, H, W, C] and divide with div_map
				332	ETensor4<AccEigenType> sum = out_1d.reshape(col2im_output_dims);
				333
				334	// calculate 1d height/width div_map (number of elements this pooling window covers)
				335	// and outer product to get 2d div_map, then reshape/broadcast to [N, H, W, C]
				336	ETensor1<int32_t> div_map_h = calculate_div_map_1d(in_height, out_height, kernel_h, stride_h);
				337	ETensor1<int32_t> div_map_w = calculate_div_map_1d(in_width, out_width, kernel_w, stride_w);
				338	Eigen::array<Eigen::IndexPair<Eigen::Index>, 1> contract_dims = { Eigen::IndexPair<Eigen::Index>(1, 0) };
				339	Eigen::array<Eigen::Index, 4> bcast{ out_batch, 1, 1, out_channels };
				340
				341	ETensor4<int32_t> div_map =
				342	div_map_h.reshape(Eigen::array<Eigen::Index, 2>{ out_height, 1 })
				343	.contract(div_map_w.reshape(Eigen::array<Eigen::Index, 2>{ 1, out_width }), contract_dims)
				344	.reshape(Eigen::array<Eigen::Index, 4>{ 1, out_height, out_width, 1 })
				345	.broadcast(bcast);
				346
				347	if (Dtype != DType_FLOAT)
				348	{
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	349	try
				350	{
				351	this->out->getTensor() = sum.binaryExpr(div_map, [](AccEigenType value, int32_t div) -> OutEigenType {
				352	int32_t multiplier, shift;
				353	TosaReference::QuantUtil::reciprocal_scale(div, multiplier, shift);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	354
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	355	return (OutEigenType)TosaReference::QuantUtil::apply_scale_32(value, multiplier, shift, false);
				356	});
				357	}
				358	catch (std::string desc)
				359	{
				360	REQUIRE(false, "OpAvgPool2d apply_scale_32() fails: %s.", desc.c_str());
				361	}
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	362	this->out->getTensor() = this->out->getTensor() + (OutEigenType)(this->qinfo->output_zp());
				363	this->out->getTensor() = this->out->getTensor().cwiseMax((OutEigenType)QMin);
				364	this->out->getTensor() = this->out->getTensor().cwiseMin((OutEigenType)QMax);
				365	}
				366	else
				367	{
				368	this->out->getTensor() = (sum / div_map.template cast<AccEigenType>()).template cast<OutEigenType>();
				369	}
				370
				371	return GraphNode::eval();
				372	}
				373
				374	template <DType InDtype, DType WeightDtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	375	OpConv2d<InDtype, WeightDtype>::OpConv2d(SubgraphTraverser* sgt_,
				376	TosaAttributeBase* attribute_,
				377	TosaQuantInfoBase* qinfo_,
				378	uint64_t id_)
				379	: GraphNode(sgt_, Op_CONV2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	380	{
				381	setRequiredOperands(3, 1);
				382	setRequiredRank(4);
				383
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	384	INIT_ATTRIBUTE(Conv);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	385	INIT_QINFO(Conv);
				386	}
				387
				388	template <DType InDtype, DType WeightDtype>
				389	OpConv2d<InDtype, WeightDtype>::~OpConv2d()
				390	{
				391	if (attribute)
				392	delete attribute;
				393	if (qinfo)
				394	delete qinfo;
				395	}
				396
				397	template <DType InDtype, DType WeightDtype>
				398	int OpConv2d<InDtype, WeightDtype>::checkTensorAttributes()
				399	{
				400	if (validateRequiredOperands())
				401	return 1;
				402
				403	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				404	{
				405	return 1;
				406	}
				407
				408	// 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
				409	if (inputs[2]->getRank() != 1)
				410	{
				411	printNodeValidationError("OpConv2d: bias tensor must be rank 1");
				412	}
				413
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	414	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				415	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				416	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				417	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				418
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	419	if (attribute->padding().size() != 4)
				420	{
				421	printNodeValidationError("OpConv2d: illegal size for attribute padding");
				422	return 1;
				423	}
				424
				425	if (attribute->stride().size() != 2)
				426	{
				427	printNodeValidationError("OpConv2d: illegal size for attribute stride");
				428	return 1;
				429	}
				430
				431	if (attribute->dilation().size() != 2)
				432	{
				433	printNodeValidationError("OpConv2d: illegal size for attribute dilation");
				434	return 1;
				435	}
				436
				437	return 0;
				438	}
				439
				440	template <DType InDtype, DType WeightDtype>
				441	int OpConv2d<InDtype, WeightDtype>::eval()
				442	{
				443	int in_batch = this->input->getShape()[0];
				444	int in_height = this->input->getShape()[1];
				445	int in_width = this->input->getShape()[2];
				446	int in_channels = this->input->getShape()[3];
				447
				448	int f_out_channels = this->weight->getShape()[0];
				449	int f_height = this->weight->getShape()[1];
				450	int f_width = this->weight->getShape()[2];
				451	int f_in_channels = this->weight->getShape()[3];
				452
				453	int b_out_channels = this->bias->getShape()[0];
				454
				455	int out_batch = this->output->getShape()[0];
				456	int out_height = this->output->getShape()[1];
				457	int out_width = this->output->getShape()[2];
				458	int out_channels = this->output->getShape()[3];
				459
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	460	ERROR_IF(in_batch != out_batch, "OpConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				461	ERROR_IF(f_in_channels != in_channels, "OpConv2d: tensor input channel mismatch %d != %d", f_in_channels,
				462	in_channels);
				463	ERROR_IF(f_out_channels != out_channels, "OpConv2d: tensor output channel mismatch %d != %d", f_out_channels,
				464	out_channels);
				465	ERROR_IF(b_out_channels != out_channels, "OpConv2d: bias channel mismatch %d != %d", b_out_channels, out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	466
				467	int padding_top = this->attribute->padding()[0];
				468	int padding_bottom = this->attribute->padding()[1];
				469	int padding_left = this->attribute->padding()[2];
				470	int padding_right = this->attribute->padding()[3];
				471	int stride_h = this->attribute->stride()[0];
				472	int stride_w = this->attribute->stride()[1];
				473	int dilation_h = this->attribute->dilation()[0];
				474	int dilation_w = this->attribute->dilation()[1];
				475
				476	DEBUG_INFO(OP,
				477	"perform OpConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], "
				478	"stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d,%d,%d]",
				479	in_batch, in_height, in_width, in_channels, f_height, f_width, f_in_channels, f_out_channels, out_batch,
				480	out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
				481	padding_bottom, padding_left, padding_right);
				482
				483	// GEMM-conv2d, left matrix is input, right matrix is weight
				484	Eigen::array<Eigen::Index, 2> im2col_input_dims;
				485	im2col_input_dims[0] = out_batch * out_height * out_width;
				486	im2col_input_dims[1] = f_height * f_width * f_in_channels;
				487
				488	Eigen::array<Eigen::Index, 2> im2col_weight_dims;
				489	im2col_weight_dims[0] = f_height * f_width * f_in_channels;
				490	im2col_weight_dims[1] = f_out_channels;
				491
				492	Eigen::array<Eigen::Index, 2> bias_reshaped_dims;
				493	bias_reshaped_dims[0] = 1;
				494	bias_reshaped_dims[1] = b_out_channels;
				495
				496	Eigen::array<Eigen::Index, 4> weight_zp_bcast_dims;
				497	weight_zp_bcast_dims[0] = f_height;
				498	weight_zp_bcast_dims[1] = f_width;
				499	weight_zp_bcast_dims[2] = f_in_channels;
				500
				501	Eigen::array<Eigen::Index, 2> bias_bcast_dims;
				502	bias_bcast_dims[0] = out_batch * out_height * out_width;
				503	bias_bcast_dims[1] = 1;
				504
				505	Eigen::array<Eigen::Index, 4> col2im_output_dims;
				506	col2im_output_dims[0] = out_batch;
				507	col2im_output_dims[1] = out_height;
				508	col2im_output_dims[2] = out_width;
				509	col2im_output_dims[3] = out_channels;
				510
				511	Eigen::array<Eigen::IndexPair<Eigen::Index>, 1> contract_dims = { Eigen::IndexPair<Eigen::Index>(1, 0) };
				512
				513	Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
				514	padding[0] = std::make_pair(0, 0);
				515	padding[1] = std::make_pair(padding_top, padding_bottom);
				516	padding[2] = std::make_pair(padding_left, padding_right);
				517	padding[3] = std::make_pair(0, 0);
				518
				519	TIn input_val = this->input->getTensor();
				520	TWeight weight_val = this->weight->getTensor();
				521	if (this->qinfo)
				522	{
				523	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				524	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				525	}
				526
				527	ETensor4<InEigenType> input_padded = input_val.pad(padding);
				528
				529	// extract_image_patches() output [N, KH, KW, H * W, C]
				530	// need to transpose to [N, H * W, KH, KW, C]
				531	ETensor5<InEigenType> input_extract_patches =
				532	input_padded
				533	.extract_image_patches(f_height, f_width, stride_h, stride_w, dilation_h, dilation_w, Eigen::PADDING_VALID)
				534	.shuffle(Eigen::array<Eigen::Index, 5>{ 0, 3, 1, 2, 4 });
				535
				536	// reshape input to [N * H * W, KH * KW * C]
				537	ETensor2<InEigenType> im2col_input = input_extract_patches.reshape(im2col_input_dims);
				538
				539	// transpose and reshape weight from [OC, H, W, IC] to [H * W * IC, OC]
				540	ETensor2<WeightEigenType> im2col_weight =
				541	weight_val.shuffle(Eigen::array<Eigen::Index, 4>({ 1, 2, 3, 0 })).reshape(im2col_weight_dims);
				542
				543	// don't need to apply bias_multiplier ( * bias_scale and >> bias_shift) since tflite already scale it
				544	// and reshaped from [C] to [1, C], and broadcast to [N * H * W, C]
				545	ETensor2<AccEigenType> bias_2d = this->bias->getTensor().reshape(bias_reshaped_dims).broadcast(bias_bcast_dims);
				546
				547	// output matrix is [N * H * W, C]
				548	ETensor2<AccEigenType> contracted_result =
				549	im2col_input.template cast<AccEigenType>().contract(im2col_weight.template cast<AccEigenType>(), contract_dims);
				550
				551	// adding bias
				552	ETensor2<AccEigenType> biased_output = contracted_result + bias_2d.template cast<AccEigenType>();
				553
				554	// reshape back to [N, H, W, C]
				555	this->output->getTensor() = biased_output.reshape(col2im_output_dims);
				556
				557	if (AccDtype == DType_INT48)
				558	{
				559	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				560	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				561	}
				562
				563	return GraphNode::eval();
				564	}
				565
				566	template <DType InDtype, DType WeightDtype>
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	567	OpConv3d<InDtype, WeightDtype>::OpConv3d(SubgraphTraverser* sgt_,
				568	TosaAttributeBase* attribute_,
				569	TosaQuantInfoBase* qinfo_,
				570	uint64_t id_)
				571	: GraphNode(sgt_, Op_CONV3D, id_)
				572	{
				573	setRequiredOperands(3, 1);
				574	setRequiredRank(5);
				575
				576	INIT_ATTRIBUTE(Conv);
				577	INIT_QINFO(Conv);
				578	}
				579
				580	template <DType InDtype, DType WeightDtype>
				581	OpConv3d<InDtype, WeightDtype>::~OpConv3d()
				582	{
				583	if (attribute)
				584	delete attribute;
				585	if (qinfo)
				586	delete qinfo;
				587	}
				588
				589	template <DType InDtype, DType WeightDtype>
				590	int OpConv3d<InDtype, WeightDtype>::checkTensorAttributes()
				591	{
				592	if (validateRequiredOperands())
				593	return 1;
				594
				595	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				596	{
				597	return 1;
				598	}
				599
				600	// 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
				601	if (inputs[2]->getRank() != 1)
				602	{
				603	printNodeValidationError("OpConv3d: bias tensor must be rank 1");
				604	}
				605
				606	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				607	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				608	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				609	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				610
				611	if (attribute->padding().size() != 6)
				612	{
				613	printNodeValidationError("OpConv3d: illegal size for attribute padding");
				614	return 1;
				615	}
				616
				617	if (attribute->stride().size() != 3)
				618	{
				619	printNodeValidationError("OpConv3d: illegal size for attribute stride");
				620	return 1;
				621	}
				622
				623	if (attribute->dilation().size() != 3)
				624	{
				625	printNodeValidationError("OpConv3d: illegal size for attribute dilation");
				626	return 1;
				627	}
				628
				629	return 0;
				630	}
				631
				632	template <DType InDtype, DType WeightDtype>
				633	int OpConv3d<InDtype, WeightDtype>::eval()
				634	{
				635	int in_batch = this->input->getShape()[0];
				636	int in_depth = this->input->getShape()[1];
				637	int in_height = this->input->getShape()[2];
				638	int in_width = this->input->getShape()[3];
				639	int in_channels = this->input->getShape()[4];
				640
				641	int f_out_channels = this->weight->getShape()[0];
				642	int f_depth = this->weight->getShape()[1];
				643	int f_height = this->weight->getShape()[2];
				644	int f_width = this->weight->getShape()[3];
				645	int f_in_channels = this->weight->getShape()[4];
				646
				647	int b_out_channels = this->bias->getShape()[0];
				648
				649	int out_batch = this->output->getShape()[0];
				650	int out_depth = this->output->getShape()[1];
				651	int out_height = this->output->getShape()[2];
				652	int out_width = this->output->getShape()[3];
				653	int out_channels = this->output->getShape()[4];
				654
				655	ERROR_IF(in_batch != out_batch, "OpConv3d: tensor batch mismatch %d != %d", in_batch, out_batch);
				656	ERROR_IF(f_in_channels != in_channels, "OpConv3d: tensor input channel mismatch %d != %d", f_in_channels,
				657	in_channels);
				658	ERROR_IF(f_out_channels != out_channels, "OpConv3d: tensor output channel mismatch %d != %d", f_out_channels,
				659	out_channels);
				660	ERROR_IF(b_out_channels != out_channels, "OpConv3d: bias channel mismatch %d != %d", b_out_channels, out_channels);
				661
				662	int padding_d0 = this->attribute->padding()[0];
				663	int padding_d1 = this->attribute->padding()[1];
				664	int padding_top = this->attribute->padding()[2];
				665	int padding_bottom = this->attribute->padding()[3];
				666	int padding_left = this->attribute->padding()[4];
				667	int padding_right = this->attribute->padding()[5];
				668	int stride_d = this->attribute->stride()[0];
				669	int stride_h = this->attribute->stride()[1];
				670	int stride_w = this->attribute->stride()[2];
				671	int dilation_d = this->attribute->dilation()[0];
				672	int dilation_h = this->attribute->dilation()[1];
				673	int dilation_w = this->attribute->dilation()[2];
				674
				675	DEBUG_INFO(
				676	OP,
				677	"perform OpConv3d, input.shape=[%d,%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d,%d], output.shape=[%d,%d,%d,%d,%d], "
				678	"stride=[%d,%d,%d], dilation=[%d,%d,%d], padding=[%d,%d,%d,%d,%d,%d]",
				679	in_batch, in_depth, in_height, in_width, in_channels, f_out_channels, f_depth, f_height, f_width, f_in_channels,
				680	out_batch, out_depth, out_height, out_width, out_channels, stride_d, stride_h, stride_w, dilation_d, dilation_h,
				681	dilation_w, padding_d0, padding_d1, padding_top, padding_bottom, padding_left, padding_right);
				682
				683	Eigen::array<std::pair<int32_t, int32_t>, 5> padding;
				684	padding[0] = std::make_pair(0, 0);
				685	padding[1] = std::make_pair(padding_d0, padding_d1);
				686	padding[2] = std::make_pair(padding_top, padding_bottom);
				687	padding[3] = std::make_pair(padding_left, padding_right);
				688	padding[4] = std::make_pair(0, 0);
				689
				690	TIn input_val = this->input->getTensor();
				691	TWeight weight_val = this->weight->getTensor();
				692	if (this->qinfo)
				693	{
				694	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				695	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				696	}
				697
				698	ETensor5<InEigenType> input_padded = input_val.pad(padding);
				699
				700	// 1. initialize with bias
				701	Eigen::array<Eigen::Index, 5> reshape_dim;
				702	reshape_dim.fill(1);
				703	reshape_dim[4] = b_out_channels;
				704
				705	Eigen::array<Eigen::Index, 5> bcast;
				706	bcast[0] = out_batch;
				707	bcast[1] = out_depth;
				708	bcast[2] = out_height;
				709	bcast[3] = out_width;
				710	bcast[4] = 1;
				711	this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
				712
				713	// 2. direct convolution
				714	AccEigenType acc = 0;
				715	int d_idx, h_idx, w_idx;
				716
				717	for (int ob = 0; ob < out_batch; ob++)
				718	{
				719	for (int od = 0; od < out_depth; od++)
				720	{
				721	for (int oh = 0; oh < out_height; oh++)
				722	{
				723	for (int ow = 0; ow < out_width; ow++)
				724	{
				725	for (int oc = 0; oc < out_channels; oc++)
				726	{
				727	acc = 0;
				728	for (int fd = 0; fd < f_depth; fd++)
				729	{
				730	d_idx = od * stride_d + fd * dilation_d;
				731	for (int fh = 0; fh < f_height; fh++)
				732	{
				733	h_idx = oh * stride_h + fh * dilation_h;
				734	for (int fw = 0; fw < f_width; fw++)
				735	{
				736	w_idx = ow * stride_w + fw * dilation_w;
				737	for (int ic = 0; ic < in_channels; ic++)
				738	{
				739	acc += ((AccEigenType)input_padded(ob, d_idx, h_idx, w_idx, ic) *
				740	(AccEigenType)weight_val(oc, fd, fh, fw, ic));
				741	}
				742	}
				743	}
				744	}
				745	this->output->getTensor()(ob, od, oh, ow, oc) = acc;
				746	}
				747	}
				748	}
				749	}
				750	}
				751
				752	if (AccDtype == DType_INT48)
				753	{
				754	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				755	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				756	}
				757
				758	return GraphNode::eval();
				759	}
				760
				761	template <DType InDtype, DType WeightDtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	762	OpDepthwiseConv2d<InDtype, WeightDtype>::OpDepthwiseConv2d(SubgraphTraverser* sgt_,
				763	TosaAttributeBase* attribute_,
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	764	TosaQuantInfoBase* qinfo_,
				765	uint64_t id_)
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	766	: GraphNode(sgt_, Op_DEPTHWISE_CONV2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	767	{
				768	setRequiredOperands(3, 1);
				769	setRequiredRank(4);
				770
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	771	INIT_ATTRIBUTE(Conv);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	772	INIT_QINFO(Conv);
				773	}
				774
				775	template <DType InDtype, DType WeightDtype>
				776	OpDepthwiseConv2d<InDtype, WeightDtype>::~OpDepthwiseConv2d()
				777	{
				778	if (attribute)
				779	delete attribute;
				780	if (qinfo)
				781	delete qinfo;
				782	}
				783
				784	template <DType InDtype, DType WeightDtype>
				785	int OpDepthwiseConv2d<InDtype, WeightDtype>::checkTensorAttributes()
				786	{
				787	if (validateRequiredOperands())
				788	return 1;
				789
				790	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				791	{
				792	return 1;
				793	}
				794
				795	// 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
				796	if (inputs[2]->getRank() != 1)
				797	{
				798	printNodeValidationError("OpDepthwiseConv2d: bias tensor must be rank 1");
				799	}
				800
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	801	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				802	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				803	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				804	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				805
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	806	if (attribute->padding().size() != 4)
				807	{
				808	printNodeValidationError("OpDepthwiseConv2d: illegal size for attribute padding");
				809	return 1;
				810	}
				811
				812	if (attribute->stride().size() != 2)
				813	{
				814	printNodeValidationError("OpDepthwiseConv2d: illegal size for attribute stride");
				815	return 1;
				816	}
				817
				818	if (attribute->dilation().size() != 2)
				819	{
				820	printNodeValidationError("OpDepthwiseConv2d: illegal size for attribute dilation");
				821	return 1;
				822	}
				823
				824	return 0;
				825	}
				826
				827	template <DType InDtype, DType WeightDtype>
				828	int OpDepthwiseConv2d<InDtype, WeightDtype>::eval()
				829	{
				830	int in_batch = this->input->getShape()[0];
				831	int in_height = this->input->getShape()[1];
				832	int in_width = this->input->getShape()[2];
				833	int in_channels = this->input->getShape()[3];
				834
				835	int f_height = this->weight->getShape()[0];
				836	int f_width = this->weight->getShape()[1];
				837	int f_in_channels = this->weight->getShape()[2];
				838	int f_multiplier = this->weight->getShape()[3];
				839
				840	int b_out_channels = this->bias->getShape()[0];
				841
				842	int out_batch = this->output->getShape()[0];
				843	int out_height = this->output->getShape()[1];
				844	int out_width = this->output->getShape()[2];
				845	int out_channels = this->output->getShape()[3];
				846
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	847	ERROR_IF(in_batch != out_batch, "OpDepthwiseConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				848	ERROR_IF(f_in_channels != in_channels, "OpDepthwiseConv2d: tensor input channel mismatch %d != %d", f_in_channels,
				849	in_channels);
				850	ERROR_IF(in_channels * f_multiplier != out_channels, "OpDepthwiseConv2d: tensor output channel mismatch %d != %d",
				851	in_channels * f_multiplier, out_channels);
				852	ERROR_IF(b_out_channels != out_channels, "OpDepthwiseConv2d: bias channels mismatch %d != %d", b_out_channels,
				853	out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	854
				855	int padding_top = this->attribute->padding()[0];
				856	int padding_bottom = this->attribute->padding()[1];
				857	int padding_left = this->attribute->padding()[2];
				858	int padding_right = this->attribute->padding()[3];
				859	int stride_h = this->attribute->stride()[0];
				860	int stride_w = this->attribute->stride()[1];
				861	int dilation_h = this->attribute->dilation()[0];
				862	int dilation_w = this->attribute->dilation()[1];
				863
				864	DEBUG_INFO(OP,
				865	"perform OpDepthwiseConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], "
				866	"output.shape=[%d,%d,%d,%d], stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d,%d,%d]",
				867	in_batch, in_height, in_width, in_channels, f_height, f_width, f_in_channels, f_multiplier, out_batch,
				868	out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
				869	padding_bottom, padding_left, padding_right);
				870
				871	Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
				872	padding[0] = std::make_pair(0, 0);
				873	padding[1] = std::make_pair(padding_top, padding_bottom);
				874	padding[2] = std::make_pair(padding_left, padding_right);
				875	padding[3] = std::make_pair(0, 0);
				876
				877	TIn input_val = this->input->getTensor();
				878	TWeight weight_val = this->weight->getTensor();
				879	if (this->qinfo)
				880	{
				881	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				882	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				883	}
				884
				885	ETensor4<InEigenType> input_padded = input_val.pad(padding);
				886
				887	// GEMM doesn't fit well with DepthwiseConv2d
				888	// 1. use extract_image_patches() to handle stride/dilation/padding
				889	// 2. perform direct convolution
				890
				891	// 1. extract_image_patches() output [N, KH, KW, OH * OW, IC]
				892	ETensor5<InEigenType> input_extract_patches = input_padded.extract_image_patches(
				893	f_height, f_width, stride_h, stride_w, dilation_h, dilation_w, Eigen::PADDING_VALID);
				894
				895	Eigen::array<Eigen::Index, 4> reshape_dim;
				896	reshape_dim.fill(1);
				897	reshape_dim[3] = b_out_channels;
				898
				899	Eigen::array<Eigen::Index, 4> bcast;
				900	bcast[0] = out_batch;
				901	bcast[1] = out_height;
				902	bcast[2] = out_width;
				903	bcast[3] = 1;
				904
				905	// initialize with bias
				906	this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
				907
				908	// 2. direct depthwise convolution
				909	for (int ob = 0; ob < out_batch; ob++)
				910	{
				911	for (int oh = 0; oh < out_height; oh++)
				912	{
				913	for (int ow = 0; ow < out_width; ow++)
				914	{
				915	for (int ic = 0; ic < in_channels; ic++)
				916	{
				917	for (int cm = 0; cm < f_multiplier; cm++)
				918	{
				919	for (int fh = 0; fh < f_height; fh++)
				920	{
				921	for (int fw = 0; fw < f_width; fw++)
				922	{
				923	this->output->getTensor()(ob, oh, ow, ic * f_multiplier + cm) +=
				924	((AccEigenType)input_extract_patches(ob, fh, fw, ow * out_height + oh, ic) *
				925	(AccEigenType)weight_val(fh, fw, ic, cm));
				926	}
				927	}
				928	}
				929	}
				930	}
				931	}
				932	}
				933
				934	if (AccDtype == DType_INT48)
				935	{
				936	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				937	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				938	}
				939
				940	return GraphNode::eval();
				941	}
				942
				943	template <DType InDtype, DType WeightDtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	944	OpFullyConnected<InDtype, WeightDtype>::OpFullyConnected(SubgraphTraverser* sgt_,
				945	TosaAttributeBase* attribute_,
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	946	TosaQuantInfoBase* qinfo_,
				947	uint64_t id_)
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	948	: GraphNode(sgt_, Op_FULLY_CONNECTED, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	949	{
				950	setRequiredOperands(3, 1);
				951	setRequiredRank(2);
				952
				953	INIT_QINFO(Conv);
				954	}
				955
				956	template <DType InDtype, DType WeightDtype>
				957	OpFullyConnected<InDtype, WeightDtype>::~OpFullyConnected()
				958	{
				959	if (qinfo)
				960	delete qinfo;
				961	}
				962
				963	template <DType InDtype, DType WeightDtype>
				964	int OpFullyConnected<InDtype, WeightDtype>::checkTensorAttributes()
				965	{
				966	if (validateRequiredOperands())
				967	return 1;
				968
				969	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				970	{
				971	return 1;
				972	}
				973
				974	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				975	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				976	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				977
				978	if (input->getShape()[1] != weight->getShape()[1])
				979	{
				980	printNodeValidationError("OpFullyConnected operator input.shape[1] should match weight.shape[1]");
				981	return 1;
				982	}
				983
				984	if (weight->getShape()[0] != bias->getShape()[0])
				985	{
				986	printNodeValidationError("OpFullyConnected operator bias.shape[0] should match weight.shape[0]");
				987	return 1;
				988	}
				989
				990	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				991
				992	return 0;
				993	}
				994
				995	template <DType InDtype, DType WeightDtype>
				996	int OpFullyConnected<InDtype, WeightDtype>::eval()
				997	{
				998	typedef Eigen::Tensor<int, 1>::DimensionPair DimPair;
				999	Eigen::array<DimPair, 1> dims{ { DimPair(1, 0) } };
				1000
				1001	Eigen::array<Eigen::Index, 2> weight_shuffle{ 1, 0 };
				1002
				1003	Eigen::array<Eigen::Index, 2> bias_reshape;
				1004	bias_reshape[0] = 1;
				1005	bias_reshape[1] = this->bias->getShape()[0];
				1006
				1007	Eigen::array<Eigen::Index, 2> bias_bcast;
				1008	bias_bcast[0] = this->input->getShape()[0];
				1009	bias_bcast[1] = 1;
				1010
				1011	TIn input_val = this->input->getTensor();
				1012	TWeight weight_val = this->weight->getTensor().shuffle(weight_shuffle);
				1013	if (this->qinfo)
				1014	{
				1015	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				1016	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				1017	}
				1018
				1019	this->output->getTensor() =
				1020	input_val.template cast<AccEigenType>().contract(weight_val.template cast<AccEigenType>(), dims) +
				1021	this->bias->getTensor().reshape(bias_reshape).broadcast(bias_bcast);
				1022
				1023	if (AccDtype == DType_INT48)
				1024	{
				1025	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				1026	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				1027	}
				1028	return GraphNode::eval();
				1029	}
				1030
				1031	template <DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1032	OpMatMul<Dtype>::OpMatMul(SubgraphTraverser* sgt_,
				1033	TosaAttributeBase* attribute_,
				1034	TosaQuantInfoBase* qinfo_,
				1035	uint64_t id_)
				1036	: GraphNode(sgt_, Op_MATMUL, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1037	{
				1038	setRequiredOperands(2, 1);
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1039	setRequiredRank(3);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1040
				1041	INIT_QINFO(MatMul);
				1042	}
				1043
				1044	template <DType Dtype>
				1045	OpMatMul<Dtype>::~OpMatMul()
				1046	{
				1047	if (qinfo)
				1048	delete qinfo;
				1049	}
				1050
				1051	template <DType Dtype>
				1052	int OpMatMul<Dtype>::checkTensorAttributes()
				1053	{
				1054	if (validateRequiredOperands())
				1055	return 1;
				1056
				1057	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				1058	{
				1059	return 1;
				1060	}
				1061
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1062	a = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				1063	b = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[1]);
				1064	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1065
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1066	ASSERT_MEM(a && b && output);
				1067
				1068	// a: [N, H, C]
				1069	// b: [N, C, W]
				1070	// c: [N, H, W]
				1071
				1072	// Check N
				1073	if (a->getShape()[0] != b->getShape()[0] \|\| a->getShape()[0] != output->getShape()[0])
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1074	{
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1075	printNodeValidationError("OpMatMul operator a.shape[0], b.shape[0] and output.shape[0] should match");
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1076	return 1;
				1077	}
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1078	N = a->getShape()[0];
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1079
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1080	// Check C
				1081	if (a->getShape()[2] != b->getShape()[1])
				1082	{
				1083	printNodeValidationError("OpMatMul operator a.shape[2] should match b.shape[1]");
				1084	return 1;
				1085	}
				1086	C = a->getShape()[2];
				1087
				1088	// Check H
				1089	if (a->getShape()[1] != output->getShape()[1])
				1090	{
				1091	printNodeValidationError("OpMatMul operator a.shape[1] should match output.shape[1]");
				1092	return 1;
				1093	}
				1094	H = a->getShape()[1];
				1095
				1096	// Check W
				1097	if (b->getShape()[2] != output->getShape()[2])
				1098	{
				1099	printNodeValidationError("OpMatMul operator output.shape[2] should match output.shape[2]");
				1100	return 1;
				1101	}
				1102	W = b->getShape()[2];
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1103
				1104	return 0;
				1105	}
				1106
				1107	template <DType Dtype>
				1108	int OpMatMul<Dtype>::eval()
				1109	{
				1110	typedef Eigen::Tensor<int, 1>::DimensionPair DimPair;
				1111	Eigen::array<DimPair, 1> dims{ { DimPair(1, 0) } };
				1112
				1113	TIn a_val = this->a->getTensor();
				1114	TIn b_val = this->b->getTensor();
				1115	if (this->qinfo)
				1116	{
				1117	a_val = a_val - (InEigenType)this->qinfo->a_zp();
				1118	b_val = b_val - (InEigenType)this->qinfo->b_zp();
				1119	}
				1120
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1121	Eigen::array<Eigen::Index, 2> a_rank2_shape({ H, C });
				1122	Eigen::array<Eigen::Index, 2> b_rank2_shape({ C, W });
				1123	Eigen::array<Eigen::Index, 3> output_rank3_shape({ 1, H, W });
				1124
				1125	Eigen::array<Eigen::Index, 3> a_size_array({ 1, H, C });
				1126	Eigen::array<Eigen::Index, 3> b_size_array({ 1, C, W });
				1127
				1128	Eigen::array<Eigen::Index, 3> a_begin_array({ 0, 0, 0 });
				1129	Eigen::array<Eigen::Index, 3> b_begin_array({ 0, 0, 0 });
				1130
				1131	// Iterate N dimension.
				1132	for (int i = 0; i < N; i++)
				1133	{
				1134	a_begin_array[0] = i;
				1135	b_begin_array[0] = i;
				1136
				1137	TInRank2 a_rank2_val = a_val.slice(a_begin_array, a_size_array).reshape(a_rank2_shape);
				1138	TInRank2 b_rank2_val = b_val.slice(b_begin_array, b_size_array).reshape(b_rank2_shape);
				1139	TAccRank2 output_rank2_val =
				1140	a_rank2_val.template cast<AccEigenType>().contract(b_rank2_val.template cast<AccEigenType>(), dims);
				1141	TAcc output_rank3_val = output_rank2_val.reshape(output_rank3_shape);
				1142	if (i == 0)
				1143	{
				1144	this->output->getTensor() = output_rank3_val;
				1145	}
				1146	else
				1147	{
				1148	TAcc temp = this->output->getTensor().concatenate(output_rank3_val, 0);
				1149	this->output->getTensor() = temp;
				1150	}
				1151	}
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1152
				1153	if (AccDtype == DType_INT48)
				1154	{
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1155	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				1156	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1157	}
				1158
				1159	return GraphNode::eval();
				1160	}
				1161
				1162	template <DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1163	OpMaxPool2d<Dtype>::OpMaxPool2d(SubgraphTraverser* sgt_,
				1164	TosaAttributeBase* attribute_,
				1165	TosaQuantInfoBase* qinfo_,
				1166	uint64_t id_)
				1167	: GraphNode(sgt_, Op_MAX_POOL2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1168	{
				1169	setRequiredOperands(1, 1);
				1170	setRequiredRank(4);
				1171
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	1172	INIT_ATTRIBUTE(Pool);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1173	}
				1174
				1175	template <DType Dtype>
				1176	OpMaxPool2d<Dtype>::~OpMaxPool2d()
				1177	{
				1178	if (attribute)
				1179	delete attribute;
				1180	}
				1181
				1182	template <DType Dtype>
				1183	int OpMaxPool2d<Dtype>::checkTensorAttributes()
				1184	{
				1185	if (validateRequiredOperands())
				1186	return 1;
				1187
				1188	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(outputs[0]))
				1189	{
				1190	return 1;
				1191	}
				1192
				1193	if (inputs[0]->matchType(*outputs[0]))
				1194	{
				1195	printNodeValidationError("OpMaxPool2d: input and output tensor type mismatch");
				1196	return 1;
				1197	}
				1198
				1199	in = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				1200	out = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
				1201
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	1202	std::string msg;
				1203	if (check_pool2d_attribute_common(attribute, in->getShape(), out->getShape(), msg))
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1204	{
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	1205	msg = "OpMaxPool2d: " + msg;
				1206	printNodeValidationError(msg.c_str());
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1207	return 1;
				1208	}
				1209
				1210	return 0;
				1211	}
				1212
				1213	template <DType Dtype>
				1214	int OpMaxPool2d<Dtype>::eval()
				1215	{
				1216	int in_batch = this->in->getShape()[0];
				1217	int in_height = this->in->getShape()[1];
				1218	int in_width = this->in->getShape()[2];
				1219	int in_channels = this->in->getShape()[3];
				1220
				1221	int out_batch = this->out->getShape()[0];
				1222	int out_height = this->out->getShape()[1];
				1223	int out_width = this->out->getShape()[2];
				1224	int out_channels = this->out->getShape()[3];
				1225
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1226	ERROR_IF(in_batch != out_batch, "OpMaxPool2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				1227	ERROR_IF(in_channels != out_channels, "OpMaxPool2d: tensor channel mismatch %d != %d", in_channels, out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1228
				1229	int padding_top = this->attribute->padding()[0];
				1230	int padding_bottom = this->attribute->padding()[1];
				1231	int padding_left = this->attribute->padding()[2];
				1232	int padding_right = this->attribute->padding()[3];
				1233	int kernel_h = this->attribute->kernel()[0];
				1234	int kernel_w = this->attribute->kernel()[1];
				1235	int stride_h = this->attribute->stride()[0];
				1236	int stride_w = this->attribute->stride()[1];
				1237
				1238	DEBUG_INFO(OP,
				1239	"perform MaxPool2d, input.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], kernel=[%d,%d], "
				1240	"stride=[%d,%d], padding=[%d,%d,%d,%d]",
				1241	in_batch, in_height, in_width, in_channels, out_batch, out_height, out_width, out_channels, kernel_h,
				1242	kernel_w, stride_h, stride_w, padding_top, padding_bottom, padding_left, padding_right);
				1243
				1244	Eigen::array<Eigen::Index, 2> im2col_input_dims;
				1245	im2col_input_dims[0] = kernel_h * kernel_w;
				1246	im2col_input_dims[1] = out_batch * out_height * out_width * out_channels;
				1247
				1248	Eigen::array<Eigen::Index, 4> col2im_output_dims;
				1249	col2im_output_dims[0] = out_batch;
				1250	col2im_output_dims[1] = out_height;
				1251	col2im_output_dims[2] = out_width;
				1252	col2im_output_dims[3] = out_channels;
				1253
				1254	Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
				1255	padding[0] = std::make_pair(0, 0);
				1256	padding[1] = std::make_pair(padding_top, padding_bottom);
				1257	padding[2] = std::make_pair(padding_left, padding_right);
				1258	padding[3] = std::make_pair(0, 0);
				1259
				1260	ETensor4<InEigenType> input_padded = this->in->getTensor().pad(padding, std::numeric_limits<InEigenType>::lowest());
				1261
				1262	// extract_image_patches() output [N, KH, KW, H * W, C]
				1263	// transpose to [KH, KW, N, H * W, C]
				1264	// reshape to [KH * KW, N * H * W * C]
				1265	//
				1266	// Set the padding value to be the most negative value that can be
				1267	// represented by the datatype to ensure that any padding values will be equal
				1268	// to or smaller than the actual maximum in the KH x KW patch.
				1269	ETensor2<InEigenType> input_extract_patches =
				1270	input_padded
				1271	.extract_image_patches(kernel_h, kernel_w, stride_h, stride_w, 1, 1, Eigen::PADDING_VALID,
				1272	std::numeric_limits<InEigenType>::lowest())
				1273	.shuffle(Eigen::array<Eigen::Index, 5>{ 1, 2, 0, 3, 4 })
				1274	.reshape(im2col_input_dims);
				1275
				1276	// Get the maximum of the KHxHW patches along axis 0
				1277	Eigen::Tensor<DenseIndex, 1> tensor_argmax = input_extract_patches.argmax(0);
				1278
				1279	// 1D result with [N * H * W * C]
				1280	ETensor1<OutEigenType> out_1d(this->out->getElementCount());
				1281
				1282	// index input_patches with argmax array should give the result
				1283	for (size_t i = 0; i < this->out->getElementCount(); i++)
				1284	{
				1285	out_1d(i) = (OutEigenType)input_extract_patches(tensor_argmax(i), i);
				1286	}
				1287
				1288	// reshape result to [N, H, W, C]
				1289	this->out->getTensor() = out_1d.reshape(col2im_output_dims);
				1290
				1291	return GraphNode::eval();
				1292	}
				1293
				1294	template <DType InDtype, DType OutDtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1295	OpTransposeConv2d<InDtype, OutDtype>::OpTransposeConv2d(SubgraphTraverser* sgt_,
				1296	TosaAttributeBase* attribute_,
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1297	TosaQuantInfoBase* qinfo_,
				1298	uint64_t id_)
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1299	: GraphNode(sgt_, Op_TRANSPOSE_CONV2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1300	{
				1301	setRequiredOperands(3, 1);
				1302	setRequiredRank(4);
				1303
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	1304	INIT_ATTRIBUTE(TransposeConv);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1305	INIT_QINFO(Conv);
				1306	}
				1307
				1308	template <DType InDtype, DType OutDtype>
				1309	OpTransposeConv2d<InDtype, OutDtype>::~OpTransposeConv2d()
				1310	{
				1311	if (attribute)
				1312	delete attribute;
				1313	if (qinfo)
				1314	delete qinfo;
				1315	}
				1316
				1317	template <DType InDtype, DType OutDtype>
				1318	int OpTransposeConv2d<InDtype, OutDtype>::checkTensorAttributes()
				1319	{
				1320	if (validateRequiredOperands())
				1321	return 1;
				1322
				1323	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				1324	{
				1325	return 1;
				1326	}
				1327
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1328	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				1329	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				1330	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				1331	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				1332
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1333	if (attribute->outpad().size() != 2)
				1334	{
				1335	printNodeValidationError("OpTransposeConv2d: illegal size for attribute outpad");
				1336	return 1;
				1337	}
				1338
				1339	if (attribute->stride().size() != 2)
				1340	{
				1341	printNodeValidationError("OpTransposeConv2d: illegal size for attribute stride");
				1342	return 1;
				1343	}
				1344
				1345	if (attribute->dilation().size() != 2)
				1346	{
				1347	printNodeValidationError("OpTransposeConv2d: illegal size for attribute dilation");
				1348	return 1;
				1349	}
				1350
				1351	if (attribute->output_shape().size() != 4)
				1352	{
				1353	printNodeValidationError("OpTransposeConv2d: illegal size for attribute output_shape");
				1354	return 1;
				1355	}
				1356
				1357	for (int d = 0; d < 4; d++)
				1358	{
				1359	if (attribute->output_shape()[d] != this->output->getShape()[d])
				1360	{
				1361	printNodeValidationError("OpTransposeConv2d: illegal size for attribute output_shape");
				1362	return 1;
				1363	}
				1364	}
				1365
				1366	return 0;
				1367	}
				1368
				1369	template <DType InDtype, DType OutDtype>
				1370	int OpTransposeConv2d<InDtype, OutDtype>::eval()
				1371	{
				1372	int in_batch = this->input->getShape()[0];
				1373	int in_height = this->input->getShape()[1];
				1374	int in_width = this->input->getShape()[2];
				1375	int in_channels = this->input->getShape()[3];
				1376
				1377	int f_out_channels = this->weight->getShape()[0];
				1378	int f_height = this->weight->getShape()[1];
				1379	int f_width = this->weight->getShape()[2];
				1380	int f_in_channels = this->weight->getShape()[3];
				1381
				1382	int b_out_channels = this->bias->getShape()[0];
				1383
				1384	int out_batch = this->output->getShape()[0];
				1385	int out_height = this->output->getShape()[1];
				1386	int out_width = this->output->getShape()[2];
				1387	int out_channels = this->output->getShape()[3];
				1388
				1389	int padding_top = this->attribute->outpad()[0];
				1390	int padding_left = this->attribute->outpad()[1];
				1391	int stride_h = this->attribute->stride()[0];
				1392	int stride_w = this->attribute->stride()[1];
				1393	int dilation_h = this->attribute->dilation()[0];
				1394	int dilation_w = this->attribute->dilation()[1];
				1395
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1396	ERROR_IF(in_batch != out_batch, "OpTransposeConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				1397	ERROR_IF(f_in_channels != in_channels, "OpTransposeConv2d: tensor input channel mismatch %d != %d", f_in_channels,
				1398	in_channels);
				1399	ERROR_IF(f_out_channels != out_channels, "OpTransposeConv2d: tensor output channel mismatch %d != %d",
				1400	f_out_channels, out_channels);
				1401	ERROR_IF(b_out_channels != out_channels, "OpDepthwiseConv2d: bias channels mismatch %d != %d", b_out_channels,
				1402	out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1403
				1404	DEBUG_INFO(OP,
				1405	"perform OpTransposeConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], "
				1406	"output.shape=[%d,%d,%d,%d], stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d]",
				1407	in_batch, in_height, in_width, in_channels, f_height, f_width, f_out_channels, f_in_channels, out_batch,
				1408	out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
				1409	padding_left);
				1410
				1411	TIn input_val = this->input->getTensor();
				1412	TWeight weight_val = this->weight->getTensor();
				1413	if (this->qinfo)
				1414	{
				1415	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				1416	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				1417	}
				1418
				1419	Eigen::array<Eigen::Index, 4> reshape_dim;
				1420	reshape_dim.fill(1);
				1421	reshape_dim[3] = b_out_channels;
				1422
				1423	Eigen::array<Eigen::Index, 4> bcast;
				1424	bcast[0] = out_batch;
				1425	bcast[1] = out_height;
				1426	bcast[2] = out_width;
				1427	bcast[3] = 1;
				1428
				1429	// initialize with bias
				1430	this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
				1431
				1432	int out_x_origin, out_y_origin;
				1433	int out_x, out_y;
				1434
				1435	// reference implementation from: tensorflow/tensorflow/lite/kernels/internal/reference/reference_ops.h
				1436	for (int ob = 0; ob < out_batch; ob++)
				1437	{
				1438	for (int ih = 0; ih < in_height; ih++)
				1439	{
				1440	for (int iw = 0; iw < in_width; iw++)
				1441	{
				1442	out_x_origin = iw * stride_w - padding_left;
				1443	out_y_origin = ih * stride_h - padding_top;
				1444	for (int ic = 0; ic < in_channels; ic++)
				1445	{
				1446	for (int fh = 0; fh < f_height; fh++)
				1447	{
				1448	for (int fw = 0; fw < f_width; fw++)
				1449	{
				1450	out_x = out_x_origin + fw * dilation_w;
				1451	out_y = out_y_origin + fh * dilation_h;
				1452	for (int oc = 0; oc < out_channels; oc++)
				1453	{
				1454	if ((out_x >= 0 && out_x < out_width) && (out_y >= 0 && out_y < out_height))
				1455	{
				1456	this->output->getTensor()(ob, out_y, out_x, oc) +=
				1457	((AccEigenType)input_val(ob, ih, iw, ic) *
				1458	(AccEigenType)weight_val(oc, fh, fw, ic));
				1459	}
				1460	}
				1461	}
				1462	}
				1463	}
				1464	}
				1465	}
				1466	}
				1467
				1468	if (AccDtype == DType_INT48)
				1469	{
				1470	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				1471	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				1472	}
				1473
				1474	return GraphNode::eval();
				1475	}
				1476
				1477	// template explicit instantiation
				1478	DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1479	DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1480	DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, INT16);
				1481
				1482	DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, FLOAT)
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1483	DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, INT8)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1484	DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, INT16)
				1485
				1486	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1487	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT8, INT4);
				1488	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1489	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT16, INT8);
				1490
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	1491	DEF_INSTANTIATE_TWO_TYPE(OpConv3d, FLOAT, FLOAT);
				1492	DEF_INSTANTIATE_TWO_TYPE(OpConv3d, INT8, INT4);
				1493	DEF_INSTANTIATE_TWO_TYPE(OpConv3d, INT8, INT8);
				1494	DEF_INSTANTIATE_TWO_TYPE(OpConv3d, INT16, INT8);
				1495
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1496	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1497	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT8, INT4);
				1498	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1499	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT16, INT8);
				1500
				1501	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1502	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT8, INT4);
				1503	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1504	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT16, INT8);
				1505
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1506	DEF_INSTANTIATE_ONE_TYPE(OpMatMul, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1507	DEF_INSTANTIATE_ONE_TYPE(OpMatMul, INT16);
				1508	DEF_INSTANTIATE_ONE_TYPE(OpMatMul, FLOAT);
				1509
				1510	DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1511	DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1512	DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, INT16);
				1513
				1514	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1515	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT8, INT4);
				1516	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1517	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT16, INT8);