Blame - reference_model/src/ops/tensor_ops.cc - tosa/reference_model

blob: 5494d77e701692cd43678739cee77d1c0e191085 [file] [log] [blame]

Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	2	// Copyright (c) 2020-2021, ARM Limited.
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	3	//
				4	// Licensed under the Apache License, Version 2.0 (the "License");
				5	// you may not use this file except in compliance with the License.
				6	// You may obtain a copy of the License at
				7	//
				8	// http://www.apache.org/licenses/LICENSE-2.0
				9	//
				10	// Unless required by applicable law or agreed to in writing, software
				11	// distributed under the License is distributed on an "AS IS" BASIS,
				12	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	// See the License for the specific language governing permissions and
				14	// limitations under the License.
				15
				16	#include "tensor_ops.h"
				17	#include "quant_util.h"
				18	#include "template_types.h"
				19
				20	using namespace TosaReference;
				21	using namespace Eigen;
				22	using namespace tosa;
				23
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	24	int check_pool2d_attribute_common(tosa::TosaPoolAttribute* attribute,
				25	std::vector<int32_t> input_shape,
				26	std::vector<int32_t> output_shape,
				27	std::string& msg)
				28	{
				29	if (attribute->padding().size() != 4)
				30	{
				31	msg = "illegal size for attribute padding";
				32	return 1;
				33	}
				34
				35	if (attribute->kernel().size() != 2)
				36	{
				37	msg = "illegal size for attribute kernel";
				38	return 1;
				39	}
				40
				41	if (attribute->stride().size() != 2)
				42	{
				43	msg = "illegal size for attribute stride";
				44	return 1;
				45	}
				46
				47	for (int32_t i : attribute->padding())
				48	{
				49	if (i < 0)
				50	{
				51	msg = "At least one pad is smaller than zero";
				52	return 1;
				53	}
				54	}
				55
				56	for (int32_t i : attribute->kernel())
				57	{
				58	if (i < 1)
				59	{
				60	msg = "At least one kernel dimension is smaller than zero";
				61	return 1;
				62	}
				63	}
				64
				65	for (int32_t i : attribute->stride())
				66	{
				67	if (i < 1)
				68	{
				69	msg = "At least one stride dimension is smaller than zero";
				70	return 1;
				71	}
				72	}
				73
				74	int32_t IH = input_shape[1];
				75	int32_t IW = input_shape[2];
				76	int32_t OH = output_shape[1];
				77	int32_t OW = output_shape[2];
				78
				79	int32_t pad_top = attribute->padding()[0];
				80	int32_t pad_bottom = attribute->padding()[1];
				81	int32_t pad_left = attribute->padding()[2];
				82	int32_t pad_right = attribute->padding()[3];
				83
				84	int32_t stride_y = attribute->stride()[0];
				85	int32_t stride_x = attribute->stride()[1];
				86	int32_t kernel_y = attribute->kernel()[0];
				87	int32_t kernel_x = attribute->kernel()[1];
				88
				89	if (pad_top >= kernel_y \|\| pad_bottom >= kernel_y \|\| pad_left >= kernel_x \|\| pad_right >= kernel_x)
				90	{
				91	msg = "At least one pad is >= kernel dimension";
				92	return 1;
				93	}
				94
Kevin Cheng	8079480	2021-11-01 11:14:13 -0700	[diff] [blame^]	95	if ((OH != (IH + pad_top + pad_bottom + stride_y - kernel_y) / stride_y) \|\|
				96	(OW != (IW + pad_left + pad_right + stride_x - kernel_x) / stride_x))
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	97	{
				98	msg = "Mismatch between output shape provided and expected output shape";
				99	return 1;
				100	}
				101
				102	return 0;
				103	}
				104
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	105	template <int Rank, DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	106	OpArgMax<Rank, Dtype>::OpArgMax(SubgraphTraverser* sgt_,
				107	TosaAttributeBase* attribute_,
				108	TosaQuantInfoBase* qinfo_,
				109	uint64_t id_)
				110	: GraphNode(sgt_, Op_ARGMAX, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	111	{
				112	setRequiredOperands(1, 1);
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	113	setRequiredRank(1, 4);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	114
				115	INIT_ATTRIBUTE(Axis);
				116	}
				117
				118	template <int Rank, DType Dtype>
				119	OpArgMax<Rank, Dtype>::~OpArgMax()
				120	{
				121	if (attribute)
				122	delete attribute;
				123	}
				124
				125	template <int Rank, DType Dtype>
				126	int OpArgMax<Rank, Dtype>::checkTensorAttributes()
				127	{
				128	if (validateRequiredOperands())
				129	return 1;
				130
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	131	if (validateRequiredRank(inputs[0]))
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	132	{
				133	return 1;
				134	}
				135
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	136	int32_t output_rank = inputs[0]->getRank() - 1;
				137	if (output_rank != outputs[0]->getRank())
				138	{
				139	printNodeValidationError("OpArgMax: Output rank needs to be rank(input) - 1");
				140	return 1;
				141	}
				142
				143	if (outputs[0]->getDtype() != DType_INT32)
				144	{
				145	printNodeValidationError("OpArgMax: Output data type not supported for this configuration of operator");
				146	return 1;
				147	}
				148
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	149	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				150	output = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
				151
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	152	if (attribute->axis() < 0 \|\| attribute->axis() >= input->getRank())
				153	{
				154	printNodeValidationError("OpArgMax: Axis needs to be within [0, rank(input)]");
				155	return 1;
				156	}
				157
				158	bool shape_check = true;
				159	for (int32_t i = 0; i < input->getRank(); i++)
				160	{
				161	if (i < attribute->axis())
				162	{
				163	if (input->getShape()[i] != output->getShape()[i])
				164	{
				165	shape_check = false;
				166	break;
				167	}
				168	}
				169	else if (i > attribute->axis())
				170	{
				171	if (input->getShape()[i] != output->getShape()[i - 1])
				172	{
				173	shape_check = false;
				174	break;
				175	}
				176	}
				177	// No need to check i == axis
				178	}
				179	if (!shape_check)
				180	{
				181	printNodeValidationError("OpArgMax: Mismatch between output shape provided and expected output shape");
				182	return 1;
				183	}
				184
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	185	return 0;
				186	}
				187
				188	template <int Rank, DType Dtype>
				189	int OpArgMax<Rank, Dtype>::eval()
				190	{
				191	Eigen::Tensor<DenseIndex, Rank - 1> index = this->input->getTensor().argmax(attribute->axis());
				192
				193	this->output->getTensor() = index.unaryExpr([](DenseIndex in) -> OutEigenType { return (OutEigenType)in; });
				194
				195	return GraphNode::eval();
				196	}
				197
				198	template <DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	199	OpAvgPool2d<Dtype>::OpAvgPool2d(SubgraphTraverser* sgt_,
				200	TosaAttributeBase* attribute_,
				201	TosaQuantInfoBase* qinfo_,
				202	uint64_t id_)
				203	: GraphNode(sgt_, Op_AVG_POOL2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	204	{
				205	setRequiredOperands(1, 1);
				206	setRequiredRank(4);
				207
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	208	INIT_ATTRIBUTE(Pool);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	209	INIT_QINFO(Unary);
				210	}
				211
				212	template <DType Dtype>
				213	OpAvgPool2d<Dtype>::~OpAvgPool2d()
				214	{
				215	if (attribute)
				216	delete attribute;
				217	}
				218
				219	template <DType Dtype>
				220	int OpAvgPool2d<Dtype>::checkTensorAttributes()
				221	{
				222	if (validateRequiredOperands())
				223	return 1;
				224
				225	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(outputs[0]))
				226	{
				227	return 1;
				228	}
				229
				230	if (inputs[0]->matchType(*outputs[0]))
				231	{
				232	printNodeValidationError("OpAvgPool2d: input and output tensor type mismatch");
				233	return 1;
				234	}
				235
				236	in = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				237	out = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
				238
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	239	if (Dtype != DType_INT8 && this->qinfo)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	240	{
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	241	ERROR_IF(this->qinfo->input_zp() != 0, "OpAvgPool2d: zeropoint only for int8_t");
				242	ERROR_IF(this->qinfo->output_zp() != 0, "OpAvgPool2d: zeropoint only for int8_t");
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	243	}
				244
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	245	std::string msg;
				246	if (check_pool2d_attribute_common(attribute, in->getShape(), out->getShape(), msg))
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	247	{
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	248	msg = "OpAvgPool2d: " + msg;
				249	printNodeValidationError(msg.c_str());
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	250	return 1;
				251	}
				252
				253	return 0;
				254	}
				255
				256	template <DType Dtype>
				257	ETensor1<int32_t> OpAvgPool2d<Dtype>::calculate_div_map_1d(int in_size, int out_size, int kernel_size, int stride)
				258	{
				259	ETensor1<int32_t> result(out_size);
				260
				261	int32_t total_pad = (out_size - 1) * stride + kernel_size - in_size;
				262	total_pad = total_pad < 0 ? 0 : total_pad;
				263
				264	int32_t pad_left = total_pad >> 1;
				265	int32_t pad_right = total_pad - pad_left;
				266
				267	result.setConstant(kernel_size);
				268
				269	// the index left to 'left_index' and index right to 'right_index' indicates
				270	// the input window of this output covers a pad bit
				271	int32_t left_index = pad_left / stride;
				272	int32_t right_index = pad_right / stride;
				273
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	274	// minus the number of pad bit this index cover
				275	while (left_index >= 0)
				276	{
				277	result(left_index) -= (pad_left - left_index * stride);
				278	left_index--;
				279	}
				280
				281	while (right_index >= 0)
				282	{
				283	result(out_size - 1 - right_index) -= (pad_right - right_index * stride);
				284	right_index--;
				285	}
				286
				287	return result;
				288	}
				289
				290	// assuming input and output tensor have same scales like tflite reference
				291	// so no need to scale input and output
				292	template <DType Dtype>
				293	int OpAvgPool2d<Dtype>::eval()
				294	{
				295	int in_batch = this->in->getShape()[0];
				296	int in_height = this->in->getShape()[1];
				297	int in_width = this->in->getShape()[2];
				298	int in_channels = this->in->getShape()[3];
				299
				300	int out_batch = this->out->getShape()[0];
				301	int out_height = this->out->getShape()[1];
				302	int out_width = this->out->getShape()[2];
				303	int out_channels = this->out->getShape()[3];
				304
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	305	ERROR_IF(in_batch != out_batch, "OpAvgPool2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				306	ERROR_IF(in_channels != out_channels, "OpAvgPool2d: tensor channel mismatch %d != %d", in_channels, out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	307
				308	int padding_top = this->attribute->padding()[0];
				309	int padding_bottom = this->attribute->padding()[1];
				310	int padding_left = this->attribute->padding()[2];
				311	int padding_right = this->attribute->padding()[3];
				312	int kernel_h = this->attribute->kernel()[0];
				313	int kernel_w = this->attribute->kernel()[1];
				314	int stride_h = this->attribute->stride()[0];
				315	int stride_w = this->attribute->stride()[1];
				316
				317	DEBUG_INFO(OP,
				318	"perform AvgPool2d, input.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], kernel=[%d,%d], "
				319	"stride=[%d,%d], padding=[%d,%d,%d,%d]",
				320	in_batch, in_height, in_width, in_channels, out_batch, out_height, out_width, out_channels, kernel_h,
				321	kernel_w, stride_h, stride_w, padding_top, padding_bottom, padding_left, padding_right);
				322
				323	Eigen::array<Eigen::Index, 2> im2col_input_dims;
				324	im2col_input_dims[0] = kernel_h * kernel_w;
				325	im2col_input_dims[1] = out_batch * out_height * out_width * out_channels;
				326
				327	Eigen::array<Eigen::Index, 4> col2im_output_dims;
				328	col2im_output_dims[0] = out_batch;
				329	col2im_output_dims[1] = out_height;
				330	col2im_output_dims[2] = out_width;
				331	col2im_output_dims[3] = out_channels;
				332
				333	Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
				334	padding[0] = std::make_pair(0, 0);
				335	padding[1] = std::make_pair(padding_top, padding_bottom);
				336	padding[2] = std::make_pair(padding_left, padding_right);
				337	padding[3] = std::make_pair(0, 0);
				338
				339	ETensor4<InEigenType> input_val = this->in->getTensor();
				340	if (this->qinfo)
				341	{
				342	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				343	}
				344
				345	ETensor4<InEigenType> input_padded = input_val.pad(padding);
				346
				347	// assuming input and output have same scales
				348	// so input and output scaling is not required
				349	// TODO: check if this assumption TOSA made
				350
				351	// extract_image_patches() output [N, KH, KW, H * W, C]
				352	// transpose to [KH, KW, N, H * W, C]
				353	// reshape to [KH * KW, N * H * W * C]
				354	ETensor2<InEigenType> input_extract_patches =
				355	input_padded.extract_image_patches(kernel_h, kernel_w, stride_h, stride_w, 1, 1, Eigen::PADDING_VALID)
				356	.shuffle(Eigen::array<Eigen::Index, 5>{ 1, 2, 0, 3, 4 })
				357	.reshape(im2col_input_dims);
				358
				359	// 1D result with [N * H * W * C]
				360	ETensor1<AccEigenType> out_1d(this->out->getElementCount());
				361	out_1d.setZero();
				362
				363	// sum pool
				364	for (size_t i = 0; i < this->out->getElementCount(); i++)
				365	{
				366	for (int32_t j = 0; j < kernel_h * kernel_w; j++)
				367	{
				368	out_1d(i) += (AccEigenType)input_extract_patches(j, i);
				369	}
				370	}
				371
				372	// reshape result to [N, H, W, C] and divide with div_map
				373	ETensor4<AccEigenType> sum = out_1d.reshape(col2im_output_dims);
				374
				375	// calculate 1d height/width div_map (number of elements this pooling window covers)
				376	// and outer product to get 2d div_map, then reshape/broadcast to [N, H, W, C]
				377	ETensor1<int32_t> div_map_h = calculate_div_map_1d(in_height, out_height, kernel_h, stride_h);
				378	ETensor1<int32_t> div_map_w = calculate_div_map_1d(in_width, out_width, kernel_w, stride_w);
				379	Eigen::array<Eigen::IndexPair<Eigen::Index>, 1> contract_dims = { Eigen::IndexPair<Eigen::Index>(1, 0) };
				380	Eigen::array<Eigen::Index, 4> bcast{ out_batch, 1, 1, out_channels };
				381
				382	ETensor4<int32_t> div_map =
				383	div_map_h.reshape(Eigen::array<Eigen::Index, 2>{ out_height, 1 })
				384	.contract(div_map_w.reshape(Eigen::array<Eigen::Index, 2>{ 1, out_width }), contract_dims)
				385	.reshape(Eigen::array<Eigen::Index, 4>{ 1, out_height, out_width, 1 })
				386	.broadcast(bcast);
				387
				388	if (Dtype != DType_FLOAT)
				389	{
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	390	try
				391	{
				392	this->out->getTensor() = sum.binaryExpr(div_map, [](AccEigenType value, int32_t div) -> OutEigenType {
				393	int32_t multiplier, shift;
				394	TosaReference::QuantUtil::reciprocal_scale(div, multiplier, shift);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	395
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	396	return (OutEigenType)TosaReference::QuantUtil::apply_scale_32(value, multiplier, shift, false);
				397	});
				398	}
				399	catch (std::string desc)
				400	{
				401	REQUIRE(false, "OpAvgPool2d apply_scale_32() fails: %s.", desc.c_str());
				402	}
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	403	this->out->getTensor() = this->out->getTensor() + (OutEigenType)(this->qinfo->output_zp());
				404	this->out->getTensor() = this->out->getTensor().cwiseMax((OutEigenType)QMin);
				405	this->out->getTensor() = this->out->getTensor().cwiseMin((OutEigenType)QMax);
				406	}
				407	else
				408	{
				409	this->out->getTensor() = (sum / div_map.template cast<AccEigenType>()).template cast<OutEigenType>();
				410	}
				411
				412	return GraphNode::eval();
				413	}
				414
				415	template <DType InDtype, DType WeightDtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	416	OpConv2d<InDtype, WeightDtype>::OpConv2d(SubgraphTraverser* sgt_,
				417	TosaAttributeBase* attribute_,
				418	TosaQuantInfoBase* qinfo_,
				419	uint64_t id_)
				420	: GraphNode(sgt_, Op_CONV2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	421	{
				422	setRequiredOperands(3, 1);
				423	setRequiredRank(4);
				424
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	425	INIT_ATTRIBUTE(Conv);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	426	INIT_QINFO(Conv);
				427	}
				428
				429	template <DType InDtype, DType WeightDtype>
				430	OpConv2d<InDtype, WeightDtype>::~OpConv2d()
				431	{
				432	if (attribute)
				433	delete attribute;
				434	if (qinfo)
				435	delete qinfo;
				436	}
				437
				438	template <DType InDtype, DType WeightDtype>
				439	int OpConv2d<InDtype, WeightDtype>::checkTensorAttributes()
				440	{
				441	if (validateRequiredOperands())
				442	return 1;
				443
				444	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				445	{
				446	return 1;
				447	}
				448
				449	// 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
				450	if (inputs[2]->getRank() != 1)
				451	{
				452	printNodeValidationError("OpConv2d: bias tensor must be rank 1");
				453	}
				454
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	455	ERROR_IF(outputs[0]->getDtype() != AccDtype,
Kevin Cheng	8079480	2021-11-01 11:14:13 -0700	[diff] [blame^]	456	"OpConv2d: Output data type not supported for this configuration of operator");
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	457
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	458	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				459	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				460	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				461	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				462
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	463	if (attribute->padding().size() != 4)
				464	{
				465	printNodeValidationError("OpConv2d: illegal size for attribute padding");
				466	return 1;
				467	}
				468
				469	if (attribute->stride().size() != 2)
				470	{
				471	printNodeValidationError("OpConv2d: illegal size for attribute stride");
				472	return 1;
				473	}
				474
				475	if (attribute->dilation().size() != 2)
				476	{
				477	printNodeValidationError("OpConv2d: illegal size for attribute dilation");
				478	return 1;
				479	}
				480
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	481	if (this->qinfo)
				482	{
				483	if (InDtype != DType_INT8)
				484	{
				485	ERROR_IF(this->qinfo->input_zp() != 0, "OpConv2d: zeropoint only for int8_t");
				486	}
				487	if (WeightDtype != DType_INT8)
				488	{
				489	ERROR_IF(this->qinfo->weight_zp() != 0, "OpConv2d: zeropoint only for int8_t");
				490	}
				491	}
				492
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	493	return 0;
				494	}
				495
				496	template <DType InDtype, DType WeightDtype>
				497	int OpConv2d<InDtype, WeightDtype>::eval()
				498	{
				499	int in_batch = this->input->getShape()[0];
				500	int in_height = this->input->getShape()[1];
				501	int in_width = this->input->getShape()[2];
				502	int in_channels = this->input->getShape()[3];
				503
				504	int f_out_channels = this->weight->getShape()[0];
				505	int f_height = this->weight->getShape()[1];
				506	int f_width = this->weight->getShape()[2];
				507	int f_in_channels = this->weight->getShape()[3];
				508
				509	int b_out_channels = this->bias->getShape()[0];
				510
				511	int out_batch = this->output->getShape()[0];
				512	int out_height = this->output->getShape()[1];
				513	int out_width = this->output->getShape()[2];
				514	int out_channels = this->output->getShape()[3];
				515
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	516	ERROR_IF(in_batch != out_batch, "OpConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				517	ERROR_IF(f_in_channels != in_channels, "OpConv2d: tensor input channel mismatch %d != %d", f_in_channels,
				518	in_channels);
				519	ERROR_IF(f_out_channels != out_channels, "OpConv2d: tensor output channel mismatch %d != %d", f_out_channels,
				520	out_channels);
				521	ERROR_IF(b_out_channels != out_channels, "OpConv2d: bias channel mismatch %d != %d", b_out_channels, out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	522
				523	int padding_top = this->attribute->padding()[0];
				524	int padding_bottom = this->attribute->padding()[1];
				525	int padding_left = this->attribute->padding()[2];
				526	int padding_right = this->attribute->padding()[3];
				527	int stride_h = this->attribute->stride()[0];
				528	int stride_w = this->attribute->stride()[1];
				529	int dilation_h = this->attribute->dilation()[0];
				530	int dilation_w = this->attribute->dilation()[1];
				531
				532	DEBUG_INFO(OP,
				533	"perform OpConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], "
				534	"stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d,%d,%d]",
				535	in_batch, in_height, in_width, in_channels, f_height, f_width, f_in_channels, f_out_channels, out_batch,
				536	out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
				537	padding_bottom, padding_left, padding_right);
				538
				539	// GEMM-conv2d, left matrix is input, right matrix is weight
				540	Eigen::array<Eigen::Index, 2> im2col_input_dims;
				541	im2col_input_dims[0] = out_batch * out_height * out_width;
				542	im2col_input_dims[1] = f_height * f_width * f_in_channels;
				543
				544	Eigen::array<Eigen::Index, 2> im2col_weight_dims;
				545	im2col_weight_dims[0] = f_height * f_width * f_in_channels;
				546	im2col_weight_dims[1] = f_out_channels;
				547
				548	Eigen::array<Eigen::Index, 2> bias_reshaped_dims;
				549	bias_reshaped_dims[0] = 1;
				550	bias_reshaped_dims[1] = b_out_channels;
				551
				552	Eigen::array<Eigen::Index, 4> weight_zp_bcast_dims;
				553	weight_zp_bcast_dims[0] = f_height;
				554	weight_zp_bcast_dims[1] = f_width;
				555	weight_zp_bcast_dims[2] = f_in_channels;
				556
				557	Eigen::array<Eigen::Index, 2> bias_bcast_dims;
				558	bias_bcast_dims[0] = out_batch * out_height * out_width;
				559	bias_bcast_dims[1] = 1;
				560
				561	Eigen::array<Eigen::Index, 4> col2im_output_dims;
				562	col2im_output_dims[0] = out_batch;
				563	col2im_output_dims[1] = out_height;
				564	col2im_output_dims[2] = out_width;
				565	col2im_output_dims[3] = out_channels;
				566
				567	Eigen::array<Eigen::IndexPair<Eigen::Index>, 1> contract_dims = { Eigen::IndexPair<Eigen::Index>(1, 0) };
				568
				569	Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
				570	padding[0] = std::make_pair(0, 0);
				571	padding[1] = std::make_pair(padding_top, padding_bottom);
				572	padding[2] = std::make_pair(padding_left, padding_right);
				573	padding[3] = std::make_pair(0, 0);
				574
				575	TIn input_val = this->input->getTensor();
				576	TWeight weight_val = this->weight->getTensor();
				577	if (this->qinfo)
				578	{
				579	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				580	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				581	}
				582
				583	ETensor4<InEigenType> input_padded = input_val.pad(padding);
				584
				585	// extract_image_patches() output [N, KH, KW, H * W, C]
				586	// need to transpose to [N, H * W, KH, KW, C]
				587	ETensor5<InEigenType> input_extract_patches =
				588	input_padded
				589	.extract_image_patches(f_height, f_width, stride_h, stride_w, dilation_h, dilation_w, Eigen::PADDING_VALID)
				590	.shuffle(Eigen::array<Eigen::Index, 5>{ 0, 3, 1, 2, 4 });
				591
				592	// reshape input to [N * H * W, KH * KW * C]
				593	ETensor2<InEigenType> im2col_input = input_extract_patches.reshape(im2col_input_dims);
				594
				595	// transpose and reshape weight from [OC, H, W, IC] to [H * W * IC, OC]
				596	ETensor2<WeightEigenType> im2col_weight =
				597	weight_val.shuffle(Eigen::array<Eigen::Index, 4>({ 1, 2, 3, 0 })).reshape(im2col_weight_dims);
				598
				599	// don't need to apply bias_multiplier ( * bias_scale and >> bias_shift) since tflite already scale it
				600	// and reshaped from [C] to [1, C], and broadcast to [N * H * W, C]
				601	ETensor2<AccEigenType> bias_2d = this->bias->getTensor().reshape(bias_reshaped_dims).broadcast(bias_bcast_dims);
				602
				603	// output matrix is [N * H * W, C]
				604	ETensor2<AccEigenType> contracted_result =
				605	im2col_input.template cast<AccEigenType>().contract(im2col_weight.template cast<AccEigenType>(), contract_dims);
				606
				607	// adding bias
				608	ETensor2<AccEigenType> biased_output = contracted_result + bias_2d.template cast<AccEigenType>();
				609
				610	// reshape back to [N, H, W, C]
				611	this->output->getTensor() = biased_output.reshape(col2im_output_dims);
				612
				613	if (AccDtype == DType_INT48)
				614	{
				615	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				616	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				617	}
				618
				619	return GraphNode::eval();
				620	}
				621
				622	template <DType InDtype, DType WeightDtype>
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	623	OpConv3d<InDtype, WeightDtype>::OpConv3d(SubgraphTraverser* sgt_,
				624	TosaAttributeBase* attribute_,
				625	TosaQuantInfoBase* qinfo_,
				626	uint64_t id_)
				627	: GraphNode(sgt_, Op_CONV3D, id_)
				628	{
				629	setRequiredOperands(3, 1);
				630	setRequiredRank(5);
				631
				632	INIT_ATTRIBUTE(Conv);
				633	INIT_QINFO(Conv);
				634	}
				635
				636	template <DType InDtype, DType WeightDtype>
				637	OpConv3d<InDtype, WeightDtype>::~OpConv3d()
				638	{
				639	if (attribute)
				640	delete attribute;
				641	if (qinfo)
				642	delete qinfo;
				643	}
				644
				645	template <DType InDtype, DType WeightDtype>
				646	int OpConv3d<InDtype, WeightDtype>::checkTensorAttributes()
				647	{
				648	if (validateRequiredOperands())
				649	return 1;
				650
				651	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				652	{
				653	return 1;
				654	}
				655
				656	// 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
				657	if (inputs[2]->getRank() != 1)
				658	{
				659	printNodeValidationError("OpConv3d: bias tensor must be rank 1");
				660	}
				661
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	662	ERROR_IF(outputs[0]->getDtype() != AccDtype,
Kevin Cheng	8079480	2021-11-01 11:14:13 -0700	[diff] [blame^]	663	"OpConv3d: Output data type not supported for this configuration of operator");
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	664
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	665	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				666	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				667	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				668	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				669
				670	if (attribute->padding().size() != 6)
				671	{
				672	printNodeValidationError("OpConv3d: illegal size for attribute padding");
				673	return 1;
				674	}
				675
				676	if (attribute->stride().size() != 3)
				677	{
				678	printNodeValidationError("OpConv3d: illegal size for attribute stride");
				679	return 1;
				680	}
				681
				682	if (attribute->dilation().size() != 3)
				683	{
				684	printNodeValidationError("OpConv3d: illegal size for attribute dilation");
				685	return 1;
				686	}
				687
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	688	if (this->qinfo)
				689	{
				690	if (InDtype != DType_INT8)
				691	{
				692	ERROR_IF(this->qinfo->input_zp() != 0, "OpConv3d: zeropoint only for int8_t");
				693	}
				694	if (WeightDtype != DType_INT8)
				695	{
				696	ERROR_IF(this->qinfo->weight_zp() != 0, "OpConv3d: zeropoint only for int8_t");
				697	}
				698	}
				699
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	700	return 0;
				701	}
				702
				703	template <DType InDtype, DType WeightDtype>
				704	int OpConv3d<InDtype, WeightDtype>::eval()
				705	{
				706	int in_batch = this->input->getShape()[0];
				707	int in_depth = this->input->getShape()[1];
				708	int in_height = this->input->getShape()[2];
				709	int in_width = this->input->getShape()[3];
				710	int in_channels = this->input->getShape()[4];
				711
				712	int f_out_channels = this->weight->getShape()[0];
				713	int f_depth = this->weight->getShape()[1];
				714	int f_height = this->weight->getShape()[2];
				715	int f_width = this->weight->getShape()[3];
				716	int f_in_channels = this->weight->getShape()[4];
				717
				718	int b_out_channels = this->bias->getShape()[0];
				719
				720	int out_batch = this->output->getShape()[0];
				721	int out_depth = this->output->getShape()[1];
				722	int out_height = this->output->getShape()[2];
				723	int out_width = this->output->getShape()[3];
				724	int out_channels = this->output->getShape()[4];
				725
				726	ERROR_IF(in_batch != out_batch, "OpConv3d: tensor batch mismatch %d != %d", in_batch, out_batch);
				727	ERROR_IF(f_in_channels != in_channels, "OpConv3d: tensor input channel mismatch %d != %d", f_in_channels,
				728	in_channels);
				729	ERROR_IF(f_out_channels != out_channels, "OpConv3d: tensor output channel mismatch %d != %d", f_out_channels,
				730	out_channels);
				731	ERROR_IF(b_out_channels != out_channels, "OpConv3d: bias channel mismatch %d != %d", b_out_channels, out_channels);
				732
				733	int padding_d0 = this->attribute->padding()[0];
				734	int padding_d1 = this->attribute->padding()[1];
				735	int padding_top = this->attribute->padding()[2];
				736	int padding_bottom = this->attribute->padding()[3];
				737	int padding_left = this->attribute->padding()[4];
				738	int padding_right = this->attribute->padding()[5];
				739	int stride_d = this->attribute->stride()[0];
				740	int stride_h = this->attribute->stride()[1];
				741	int stride_w = this->attribute->stride()[2];
				742	int dilation_d = this->attribute->dilation()[0];
				743	int dilation_h = this->attribute->dilation()[1];
				744	int dilation_w = this->attribute->dilation()[2];
				745
				746	DEBUG_INFO(
				747	OP,
				748	"perform OpConv3d, input.shape=[%d,%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d,%d], output.shape=[%d,%d,%d,%d,%d], "
				749	"stride=[%d,%d,%d], dilation=[%d,%d,%d], padding=[%d,%d,%d,%d,%d,%d]",
				750	in_batch, in_depth, in_height, in_width, in_channels, f_out_channels, f_depth, f_height, f_width, f_in_channels,
				751	out_batch, out_depth, out_height, out_width, out_channels, stride_d, stride_h, stride_w, dilation_d, dilation_h,
				752	dilation_w, padding_d0, padding_d1, padding_top, padding_bottom, padding_left, padding_right);
				753
				754	Eigen::array<std::pair<int32_t, int32_t>, 5> padding;
				755	padding[0] = std::make_pair(0, 0);
				756	padding[1] = std::make_pair(padding_d0, padding_d1);
				757	padding[2] = std::make_pair(padding_top, padding_bottom);
				758	padding[3] = std::make_pair(padding_left, padding_right);
				759	padding[4] = std::make_pair(0, 0);
				760
				761	TIn input_val = this->input->getTensor();
				762	TWeight weight_val = this->weight->getTensor();
				763	if (this->qinfo)
				764	{
				765	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				766	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				767	}
				768
				769	ETensor5<InEigenType> input_padded = input_val.pad(padding);
				770
				771	// 1. initialize with bias
				772	Eigen::array<Eigen::Index, 5> reshape_dim;
				773	reshape_dim.fill(1);
				774	reshape_dim[4] = b_out_channels;
				775
				776	Eigen::array<Eigen::Index, 5> bcast;
				777	bcast[0] = out_batch;
				778	bcast[1] = out_depth;
				779	bcast[2] = out_height;
				780	bcast[3] = out_width;
				781	bcast[4] = 1;
				782	this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
				783
				784	// 2. direct convolution
				785	AccEigenType acc = 0;
				786	int d_idx, h_idx, w_idx;
				787
				788	for (int ob = 0; ob < out_batch; ob++)
				789	{
				790	for (int od = 0; od < out_depth; od++)
				791	{
				792	for (int oh = 0; oh < out_height; oh++)
				793	{
				794	for (int ow = 0; ow < out_width; ow++)
				795	{
				796	for (int oc = 0; oc < out_channels; oc++)
				797	{
				798	acc = 0;
				799	for (int fd = 0; fd < f_depth; fd++)
				800	{
				801	d_idx = od * stride_d + fd * dilation_d;
				802	for (int fh = 0; fh < f_height; fh++)
				803	{
				804	h_idx = oh * stride_h + fh * dilation_h;
				805	for (int fw = 0; fw < f_width; fw++)
				806	{
				807	w_idx = ow * stride_w + fw * dilation_w;
				808	for (int ic = 0; ic < in_channels; ic++)
				809	{
				810	acc += ((AccEigenType)input_padded(ob, d_idx, h_idx, w_idx, ic) *
				811	(AccEigenType)weight_val(oc, fd, fh, fw, ic));
				812	}
				813	}
				814	}
				815	}
				816	this->output->getTensor()(ob, od, oh, ow, oc) = acc;
				817	}
				818	}
				819	}
				820	}
				821	}
				822
				823	if (AccDtype == DType_INT48)
				824	{
				825	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				826	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				827	}
				828
				829	return GraphNode::eval();
				830	}
				831
				832	template <DType InDtype, DType WeightDtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	833	OpDepthwiseConv2d<InDtype, WeightDtype>::OpDepthwiseConv2d(SubgraphTraverser* sgt_,
				834	TosaAttributeBase* attribute_,
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	835	TosaQuantInfoBase* qinfo_,
				836	uint64_t id_)
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	837	: GraphNode(sgt_, Op_DEPTHWISE_CONV2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	838	{
				839	setRequiredOperands(3, 1);
				840	setRequiredRank(4);
				841
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	842	INIT_ATTRIBUTE(Conv);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	843	INIT_QINFO(Conv);
				844	}
				845
				846	template <DType InDtype, DType WeightDtype>
				847	OpDepthwiseConv2d<InDtype, WeightDtype>::~OpDepthwiseConv2d()
				848	{
				849	if (attribute)
				850	delete attribute;
				851	if (qinfo)
				852	delete qinfo;
				853	}
				854
				855	template <DType InDtype, DType WeightDtype>
				856	int OpDepthwiseConv2d<InDtype, WeightDtype>::checkTensorAttributes()
				857	{
				858	if (validateRequiredOperands())
				859	return 1;
				860
				861	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				862	{
				863	return 1;
				864	}
				865
				866	// 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
				867	if (inputs[2]->getRank() != 1)
				868	{
				869	printNodeValidationError("OpDepthwiseConv2d: bias tensor must be rank 1");
				870	}
				871
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	872	ERROR_IF(outputs[0]->getDtype() != AccDtype,
Kevin Cheng	8079480	2021-11-01 11:14:13 -0700	[diff] [blame^]	873	"OpDepthwiseConv2d: Output data type not supported for this configuration of operator");
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	874
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	875	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				876	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				877	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				878	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				879
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	880	if (attribute->padding().size() != 4)
				881	{
				882	printNodeValidationError("OpDepthwiseConv2d: illegal size for attribute padding");
				883	return 1;
				884	}
				885
				886	if (attribute->stride().size() != 2)
				887	{
				888	printNodeValidationError("OpDepthwiseConv2d: illegal size for attribute stride");
				889	return 1;
				890	}
				891
				892	if (attribute->dilation().size() != 2)
				893	{
				894	printNodeValidationError("OpDepthwiseConv2d: illegal size for attribute dilation");
				895	return 1;
				896	}
				897
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	898	if (this->qinfo)
				899	{
				900	if (InDtype != DType_INT8)
				901	{
				902	ERROR_IF(this->qinfo->input_zp() != 0, "OpDepthwiseConv2d: zeropoint only for int8_t");
				903	}
				904	if (WeightDtype != DType_INT8)
				905	{
				906	ERROR_IF(this->qinfo->weight_zp() != 0, "OpDepthwiseConv2d: zeropoint only for int8_t");
				907	}
				908	}
				909
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	910	return 0;
				911	}
				912
				913	template <DType InDtype, DType WeightDtype>
				914	int OpDepthwiseConv2d<InDtype, WeightDtype>::eval()
				915	{
				916	int in_batch = this->input->getShape()[0];
				917	int in_height = this->input->getShape()[1];
				918	int in_width = this->input->getShape()[2];
				919	int in_channels = this->input->getShape()[3];
				920
				921	int f_height = this->weight->getShape()[0];
				922	int f_width = this->weight->getShape()[1];
				923	int f_in_channels = this->weight->getShape()[2];
				924	int f_multiplier = this->weight->getShape()[3];
				925
				926	int b_out_channels = this->bias->getShape()[0];
				927
				928	int out_batch = this->output->getShape()[0];
				929	int out_height = this->output->getShape()[1];
				930	int out_width = this->output->getShape()[2];
				931	int out_channels = this->output->getShape()[3];
				932
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	933	ERROR_IF(in_batch != out_batch, "OpDepthwiseConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				934	ERROR_IF(f_in_channels != in_channels, "OpDepthwiseConv2d: tensor input channel mismatch %d != %d", f_in_channels,
				935	in_channels);
				936	ERROR_IF(in_channels * f_multiplier != out_channels, "OpDepthwiseConv2d: tensor output channel mismatch %d != %d",
				937	in_channels * f_multiplier, out_channels);
				938	ERROR_IF(b_out_channels != out_channels, "OpDepthwiseConv2d: bias channels mismatch %d != %d", b_out_channels,
				939	out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	940
				941	int padding_top = this->attribute->padding()[0];
				942	int padding_bottom = this->attribute->padding()[1];
				943	int padding_left = this->attribute->padding()[2];
				944	int padding_right = this->attribute->padding()[3];
				945	int stride_h = this->attribute->stride()[0];
				946	int stride_w = this->attribute->stride()[1];
				947	int dilation_h = this->attribute->dilation()[0];
				948	int dilation_w = this->attribute->dilation()[1];
				949
				950	DEBUG_INFO(OP,
				951	"perform OpDepthwiseConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], "
				952	"output.shape=[%d,%d,%d,%d], stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d,%d,%d]",
				953	in_batch, in_height, in_width, in_channels, f_height, f_width, f_in_channels, f_multiplier, out_batch,
				954	out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
				955	padding_bottom, padding_left, padding_right);
				956
				957	Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
				958	padding[0] = std::make_pair(0, 0);
				959	padding[1] = std::make_pair(padding_top, padding_bottom);
				960	padding[2] = std::make_pair(padding_left, padding_right);
				961	padding[3] = std::make_pair(0, 0);
				962
				963	TIn input_val = this->input->getTensor();
				964	TWeight weight_val = this->weight->getTensor();
				965	if (this->qinfo)
				966	{
				967	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				968	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				969	}
				970
				971	ETensor4<InEigenType> input_padded = input_val.pad(padding);
				972
				973	// GEMM doesn't fit well with DepthwiseConv2d
				974	// 1. use extract_image_patches() to handle stride/dilation/padding
				975	// 2. perform direct convolution
				976
				977	// 1. extract_image_patches() output [N, KH, KW, OH * OW, IC]
				978	ETensor5<InEigenType> input_extract_patches = input_padded.extract_image_patches(
				979	f_height, f_width, stride_h, stride_w, dilation_h, dilation_w, Eigen::PADDING_VALID);
				980
				981	Eigen::array<Eigen::Index, 4> reshape_dim;
				982	reshape_dim.fill(1);
				983	reshape_dim[3] = b_out_channels;
				984
				985	Eigen::array<Eigen::Index, 4> bcast;
				986	bcast[0] = out_batch;
				987	bcast[1] = out_height;
				988	bcast[2] = out_width;
				989	bcast[3] = 1;
				990
				991	// initialize with bias
				992	this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
				993
				994	// 2. direct depthwise convolution
				995	for (int ob = 0; ob < out_batch; ob++)
				996	{
				997	for (int oh = 0; oh < out_height; oh++)
				998	{
				999	for (int ow = 0; ow < out_width; ow++)
				1000	{
				1001	for (int ic = 0; ic < in_channels; ic++)
				1002	{
				1003	for (int cm = 0; cm < f_multiplier; cm++)
				1004	{
				1005	for (int fh = 0; fh < f_height; fh++)
				1006	{
				1007	for (int fw = 0; fw < f_width; fw++)
				1008	{
				1009	this->output->getTensor()(ob, oh, ow, ic * f_multiplier + cm) +=
				1010	((AccEigenType)input_extract_patches(ob, fh, fw, ow * out_height + oh, ic) *
				1011	(AccEigenType)weight_val(fh, fw, ic, cm));
				1012	}
				1013	}
				1014	}
				1015	}
				1016	}
				1017	}
				1018	}
				1019
				1020	if (AccDtype == DType_INT48)
				1021	{
				1022	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				1023	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				1024	}
				1025
				1026	return GraphNode::eval();
				1027	}
				1028
				1029	template <DType InDtype, DType WeightDtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1030	OpFullyConnected<InDtype, WeightDtype>::OpFullyConnected(SubgraphTraverser* sgt_,
				1031	TosaAttributeBase* attribute_,
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1032	TosaQuantInfoBase* qinfo_,
				1033	uint64_t id_)
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1034	: GraphNode(sgt_, Op_FULLY_CONNECTED, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1035	{
				1036	setRequiredOperands(3, 1);
				1037	setRequiredRank(2);
				1038
				1039	INIT_QINFO(Conv);
				1040	}
				1041
				1042	template <DType InDtype, DType WeightDtype>
				1043	OpFullyConnected<InDtype, WeightDtype>::~OpFullyConnected()
				1044	{
				1045	if (qinfo)
				1046	delete qinfo;
				1047	}
				1048
				1049	template <DType InDtype, DType WeightDtype>
				1050	int OpFullyConnected<InDtype, WeightDtype>::checkTensorAttributes()
				1051	{
				1052	if (validateRequiredOperands())
				1053	return 1;
				1054
				1055	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				1056	{
				1057	return 1;
				1058	}
				1059
				1060	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				1061	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				1062	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				1063
				1064	if (input->getShape()[1] != weight->getShape()[1])
				1065	{
				1066	printNodeValidationError("OpFullyConnected operator input.shape[1] should match weight.shape[1]");
				1067	return 1;
				1068	}
				1069
				1070	if (weight->getShape()[0] != bias->getShape()[0])
				1071	{
				1072	printNodeValidationError("OpFullyConnected operator bias.shape[0] should match weight.shape[0]");
				1073	return 1;
				1074	}
				1075
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1076	ERROR_IF(outputs[0]->getDtype() != AccDtype,
				1077	"OpFullyConnected: Output data type not supported for this configuration of operator");
				1078
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1079	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				1080
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1081	if (this->qinfo)
				1082	{
				1083	if (InDtype != DType_INT8)
				1084	{
				1085	ERROR_IF(this->qinfo->input_zp() != 0, "OpFullyConnected: zeropoint only for int8_t");
				1086	}
				1087	if (WeightDtype != DType_INT8)
				1088	{
				1089	ERROR_IF(this->qinfo->weight_zp() != 0, "OpFullyConnected: zeropoint only for int8_t");
				1090	}
				1091	}
				1092
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1093	return 0;
				1094	}
				1095
				1096	template <DType InDtype, DType WeightDtype>
				1097	int OpFullyConnected<InDtype, WeightDtype>::eval()
				1098	{
				1099	typedef Eigen::Tensor<int, 1>::DimensionPair DimPair;
				1100	Eigen::array<DimPair, 1> dims{ { DimPair(1, 0) } };
				1101
				1102	Eigen::array<Eigen::Index, 2> weight_shuffle{ 1, 0 };
				1103
				1104	Eigen::array<Eigen::Index, 2> bias_reshape;
				1105	bias_reshape[0] = 1;
				1106	bias_reshape[1] = this->bias->getShape()[0];
				1107
				1108	Eigen::array<Eigen::Index, 2> bias_bcast;
				1109	bias_bcast[0] = this->input->getShape()[0];
				1110	bias_bcast[1] = 1;
				1111
				1112	TIn input_val = this->input->getTensor();
				1113	TWeight weight_val = this->weight->getTensor().shuffle(weight_shuffle);
				1114	if (this->qinfo)
				1115	{
				1116	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				1117	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				1118	}
				1119
				1120	this->output->getTensor() =
				1121	input_val.template cast<AccEigenType>().contract(weight_val.template cast<AccEigenType>(), dims) +
				1122	this->bias->getTensor().reshape(bias_reshape).broadcast(bias_bcast);
				1123
				1124	if (AccDtype == DType_INT48)
				1125	{
				1126	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				1127	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				1128	}
				1129	return GraphNode::eval();
				1130	}
				1131
				1132	template <DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1133	OpMatMul<Dtype>::OpMatMul(SubgraphTraverser* sgt_,
				1134	TosaAttributeBase* attribute_,
				1135	TosaQuantInfoBase* qinfo_,
				1136	uint64_t id_)
				1137	: GraphNode(sgt_, Op_MATMUL, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1138	{
				1139	setRequiredOperands(2, 1);
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1140	setRequiredRank(3);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1141
				1142	INIT_QINFO(MatMul);
				1143	}
				1144
				1145	template <DType Dtype>
				1146	OpMatMul<Dtype>::~OpMatMul()
				1147	{
				1148	if (qinfo)
				1149	delete qinfo;
				1150	}
				1151
				1152	template <DType Dtype>
				1153	int OpMatMul<Dtype>::checkTensorAttributes()
				1154	{
				1155	if (validateRequiredOperands())
				1156	return 1;
				1157
				1158	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				1159	{
				1160	return 1;
				1161	}
				1162
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1163	ERROR_IF(outputs[0]->getDtype() != AccDtype,
Kevin Cheng	8079480	2021-11-01 11:14:13 -0700	[diff] [blame^]	1164	"OpMatMul: Output data type not supported for this configuration of operator");
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1165
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1166	a = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				1167	b = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[1]);
				1168	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1169
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1170	ASSERT_MEM(a && b && output);
				1171
				1172	// a: [N, H, C]
				1173	// b: [N, C, W]
				1174	// c: [N, H, W]
				1175
				1176	// Check N
				1177	if (a->getShape()[0] != b->getShape()[0] \|\| a->getShape()[0] != output->getShape()[0])
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1178	{
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1179	printNodeValidationError("OpMatMul operator a.shape[0], b.shape[0] and output.shape[0] should match");
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1180	return 1;
				1181	}
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1182	N = a->getShape()[0];
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1183
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1184	// Check C
				1185	if (a->getShape()[2] != b->getShape()[1])
				1186	{
				1187	printNodeValidationError("OpMatMul operator a.shape[2] should match b.shape[1]");
				1188	return 1;
				1189	}
				1190	C = a->getShape()[2];
				1191
				1192	// Check H
				1193	if (a->getShape()[1] != output->getShape()[1])
				1194	{
				1195	printNodeValidationError("OpMatMul operator a.shape[1] should match output.shape[1]");
				1196	return 1;
				1197	}
				1198	H = a->getShape()[1];
				1199
				1200	// Check W
				1201	if (b->getShape()[2] != output->getShape()[2])
				1202	{
				1203	printNodeValidationError("OpMatMul operator output.shape[2] should match output.shape[2]");
				1204	return 1;
				1205	}
				1206	W = b->getShape()[2];
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1207
Kevin Cheng	8079480	2021-11-01 11:14:13 -0700	[diff] [blame^]	1208	if (Dtype != DType_INT8 && this->qinfo)
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1209	{
				1210	ERROR_IF(this->qinfo->a_zp() != 0, "OpMatMul: zeropoint only for int8_t");
				1211	ERROR_IF(this->qinfo->b_zp() != 0, "OpMatMul: zeropoint only for int8_t");
				1212	}
				1213
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1214	return 0;
				1215	}
				1216
				1217	template <DType Dtype>
				1218	int OpMatMul<Dtype>::eval()
				1219	{
				1220	typedef Eigen::Tensor<int, 1>::DimensionPair DimPair;
				1221	Eigen::array<DimPair, 1> dims{ { DimPair(1, 0) } };
				1222
				1223	TIn a_val = this->a->getTensor();
				1224	TIn b_val = this->b->getTensor();
				1225	if (this->qinfo)
				1226	{
				1227	a_val = a_val - (InEigenType)this->qinfo->a_zp();
				1228	b_val = b_val - (InEigenType)this->qinfo->b_zp();
				1229	}
				1230
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1231	Eigen::array<Eigen::Index, 2> a_rank2_shape({ H, C });
				1232	Eigen::array<Eigen::Index, 2> b_rank2_shape({ C, W });
				1233	Eigen::array<Eigen::Index, 3> output_rank3_shape({ 1, H, W });
				1234
				1235	Eigen::array<Eigen::Index, 3> a_size_array({ 1, H, C });
				1236	Eigen::array<Eigen::Index, 3> b_size_array({ 1, C, W });
				1237
				1238	Eigen::array<Eigen::Index, 3> a_begin_array({ 0, 0, 0 });
				1239	Eigen::array<Eigen::Index, 3> b_begin_array({ 0, 0, 0 });
				1240
				1241	// Iterate N dimension.
				1242	for (int i = 0; i < N; i++)
				1243	{
				1244	a_begin_array[0] = i;
				1245	b_begin_array[0] = i;
				1246
				1247	TInRank2 a_rank2_val = a_val.slice(a_begin_array, a_size_array).reshape(a_rank2_shape);
				1248	TInRank2 b_rank2_val = b_val.slice(b_begin_array, b_size_array).reshape(b_rank2_shape);
				1249	TAccRank2 output_rank2_val =
				1250	a_rank2_val.template cast<AccEigenType>().contract(b_rank2_val.template cast<AccEigenType>(), dims);
				1251	TAcc output_rank3_val = output_rank2_val.reshape(output_rank3_shape);
				1252	if (i == 0)
				1253	{
				1254	this->output->getTensor() = output_rank3_val;
				1255	}
				1256	else
				1257	{
				1258	TAcc temp = this->output->getTensor().concatenate(output_rank3_val, 0);
				1259	this->output->getTensor() = temp;
				1260	}
				1261	}
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1262
				1263	if (AccDtype == DType_INT48)
				1264	{
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1265	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				1266	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1267	}
				1268
				1269	return GraphNode::eval();
				1270	}
				1271
				1272	template <DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1273	OpMaxPool2d<Dtype>::OpMaxPool2d(SubgraphTraverser* sgt_,
				1274	TosaAttributeBase* attribute_,
				1275	TosaQuantInfoBase* qinfo_,
				1276	uint64_t id_)
				1277	: GraphNode(sgt_, Op_MAX_POOL2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1278	{
				1279	setRequiredOperands(1, 1);
				1280	setRequiredRank(4);
				1281
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	1282	INIT_ATTRIBUTE(Pool);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1283	}
				1284
				1285	template <DType Dtype>
				1286	OpMaxPool2d<Dtype>::~OpMaxPool2d()
				1287	{
				1288	if (attribute)
				1289	delete attribute;
				1290	}
				1291
				1292	template <DType Dtype>
				1293	int OpMaxPool2d<Dtype>::checkTensorAttributes()
				1294	{
				1295	if (validateRequiredOperands())
				1296	return 1;
				1297
				1298	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(outputs[0]))
				1299	{
				1300	return 1;
				1301	}
				1302
				1303	if (inputs[0]->matchType(*outputs[0]))
				1304	{
				1305	printNodeValidationError("OpMaxPool2d: input and output tensor type mismatch");
				1306	return 1;
				1307	}
				1308
				1309	in = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				1310	out = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
				1311
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	1312	std::string msg;
				1313	if (check_pool2d_attribute_common(attribute, in->getShape(), out->getShape(), msg))
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1314	{
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	1315	msg = "OpMaxPool2d: " + msg;
				1316	printNodeValidationError(msg.c_str());
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1317	return 1;
				1318	}
				1319
				1320	return 0;
				1321	}
				1322
				1323	template <DType Dtype>
				1324	int OpMaxPool2d<Dtype>::eval()
				1325	{
				1326	int in_batch = this->in->getShape()[0];
				1327	int in_height = this->in->getShape()[1];
				1328	int in_width = this->in->getShape()[2];
				1329	int in_channels = this->in->getShape()[3];
				1330
				1331	int out_batch = this->out->getShape()[0];
				1332	int out_height = this->out->getShape()[1];
				1333	int out_width = this->out->getShape()[2];
				1334	int out_channels = this->out->getShape()[3];
				1335
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1336	ERROR_IF(in_batch != out_batch, "OpMaxPool2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				1337	ERROR_IF(in_channels != out_channels, "OpMaxPool2d: tensor channel mismatch %d != %d", in_channels, out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1338
				1339	int padding_top = this->attribute->padding()[0];
				1340	int padding_bottom = this->attribute->padding()[1];
				1341	int padding_left = this->attribute->padding()[2];
				1342	int padding_right = this->attribute->padding()[3];
				1343	int kernel_h = this->attribute->kernel()[0];
				1344	int kernel_w = this->attribute->kernel()[1];
				1345	int stride_h = this->attribute->stride()[0];
				1346	int stride_w = this->attribute->stride()[1];
				1347
				1348	DEBUG_INFO(OP,
				1349	"perform MaxPool2d, input.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], kernel=[%d,%d], "
				1350	"stride=[%d,%d], padding=[%d,%d,%d,%d]",
				1351	in_batch, in_height, in_width, in_channels, out_batch, out_height, out_width, out_channels, kernel_h,
				1352	kernel_w, stride_h, stride_w, padding_top, padding_bottom, padding_left, padding_right);
				1353
				1354	Eigen::array<Eigen::Index, 2> im2col_input_dims;
				1355	im2col_input_dims[0] = kernel_h * kernel_w;
				1356	im2col_input_dims[1] = out_batch * out_height * out_width * out_channels;
				1357
				1358	Eigen::array<Eigen::Index, 4> col2im_output_dims;
				1359	col2im_output_dims[0] = out_batch;
				1360	col2im_output_dims[1] = out_height;
				1361	col2im_output_dims[2] = out_width;
				1362	col2im_output_dims[3] = out_channels;
				1363
				1364	Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
				1365	padding[0] = std::make_pair(0, 0);
				1366	padding[1] = std::make_pair(padding_top, padding_bottom);
				1367	padding[2] = std::make_pair(padding_left, padding_right);
				1368	padding[3] = std::make_pair(0, 0);
				1369
				1370	ETensor4<InEigenType> input_padded = this->in->getTensor().pad(padding, std::numeric_limits<InEigenType>::lowest());
				1371
				1372	// extract_image_patches() output [N, KH, KW, H * W, C]
				1373	// transpose to [KH, KW, N, H * W, C]
				1374	// reshape to [KH * KW, N * H * W * C]
				1375	//
				1376	// Set the padding value to be the most negative value that can be
				1377	// represented by the datatype to ensure that any padding values will be equal
				1378	// to or smaller than the actual maximum in the KH x KW patch.
				1379	ETensor2<InEigenType> input_extract_patches =
				1380	input_padded
				1381	.extract_image_patches(kernel_h, kernel_w, stride_h, stride_w, 1, 1, Eigen::PADDING_VALID,
				1382	std::numeric_limits<InEigenType>::lowest())
				1383	.shuffle(Eigen::array<Eigen::Index, 5>{ 1, 2, 0, 3, 4 })
				1384	.reshape(im2col_input_dims);
				1385
				1386	// Get the maximum of the KHxHW patches along axis 0
				1387	Eigen::Tensor<DenseIndex, 1> tensor_argmax = input_extract_patches.argmax(0);
				1388
				1389	// 1D result with [N * H * W * C]
				1390	ETensor1<OutEigenType> out_1d(this->out->getElementCount());
				1391
				1392	// index input_patches with argmax array should give the result
				1393	for (size_t i = 0; i < this->out->getElementCount(); i++)
				1394	{
				1395	out_1d(i) = (OutEigenType)input_extract_patches(tensor_argmax(i), i);
				1396	}
				1397
				1398	// reshape result to [N, H, W, C]
				1399	this->out->getTensor() = out_1d.reshape(col2im_output_dims);
				1400
				1401	return GraphNode::eval();
				1402	}
				1403
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1404	template <DType InDtype, DType WeightDtype>
				1405	OpTransposeConv2d<InDtype, WeightDtype>::OpTransposeConv2d(SubgraphTraverser* sgt_,
				1406	TosaAttributeBase* attribute_,
				1407	TosaQuantInfoBase* qinfo_,
				1408	uint64_t id_)
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1409	: GraphNode(sgt_, Op_TRANSPOSE_CONV2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1410	{
				1411	setRequiredOperands(3, 1);
				1412	setRequiredRank(4);
				1413
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	1414	INIT_ATTRIBUTE(TransposeConv);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1415	INIT_QINFO(Conv);
				1416	}
				1417
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1418	template <DType InDtype, DType WeightDtype>
				1419	OpTransposeConv2d<InDtype, WeightDtype>::~OpTransposeConv2d()
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1420	{
				1421	if (attribute)
				1422	delete attribute;
				1423	if (qinfo)
				1424	delete qinfo;
				1425	}
				1426
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1427	template <DType InDtype, DType WeightDtype>
				1428	int OpTransposeConv2d<InDtype, WeightDtype>::checkTensorAttributes()
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1429	{
				1430	if (validateRequiredOperands())
				1431	return 1;
				1432
				1433	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				1434	{
				1435	return 1;
				1436	}
				1437
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1438	ERROR_IF(outputs[0]->getDtype() != AccDtype,
Kevin Cheng	8079480	2021-11-01 11:14:13 -0700	[diff] [blame^]	1439	"OpTransposeConv2d: Output data type not supported for this configuration of operator");
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1440
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1441	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				1442	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				1443	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				1444	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				1445
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1446	if (attribute->outpad().size() != 2)
				1447	{
				1448	printNodeValidationError("OpTransposeConv2d: illegal size for attribute outpad");
				1449	return 1;
				1450	}
				1451
				1452	if (attribute->stride().size() != 2)
				1453	{
				1454	printNodeValidationError("OpTransposeConv2d: illegal size for attribute stride");
				1455	return 1;
				1456	}
				1457
				1458	if (attribute->dilation().size() != 2)
				1459	{
				1460	printNodeValidationError("OpTransposeConv2d: illegal size for attribute dilation");
				1461	return 1;
				1462	}
				1463
				1464	if (attribute->output_shape().size() != 4)
				1465	{
				1466	printNodeValidationError("OpTransposeConv2d: illegal size for attribute output_shape");
				1467	return 1;
				1468	}
				1469
				1470	for (int d = 0; d < 4; d++)
				1471	{
				1472	if (attribute->output_shape()[d] != this->output->getShape()[d])
				1473	{
				1474	printNodeValidationError("OpTransposeConv2d: illegal size for attribute output_shape");
				1475	return 1;
				1476	}
				1477	}
				1478
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1479	if (this->qinfo)
				1480	{
				1481	if (InDtype != DType_INT8)
				1482	{
				1483	ERROR_IF(this->qinfo->input_zp() != 0, "OpTransposeConv2d: zeropoint only for int8_t");
				1484	}
				1485	if (WeightDtype != DType_INT8)
				1486	{
				1487	ERROR_IF(this->qinfo->weight_zp() != 0, "OpTransposeConv2d: zeropoint only for int8_t");
				1488	}
				1489	}
				1490
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1491	return 0;
				1492	}
				1493
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1494	template <DType InDtype, DType WeightDtype>
				1495	int OpTransposeConv2d<InDtype, WeightDtype>::eval()
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1496	{
				1497	int in_batch = this->input->getShape()[0];
				1498	int in_height = this->input->getShape()[1];
				1499	int in_width = this->input->getShape()[2];
				1500	int in_channels = this->input->getShape()[3];
				1501
				1502	int f_out_channels = this->weight->getShape()[0];
				1503	int f_height = this->weight->getShape()[1];
				1504	int f_width = this->weight->getShape()[2];
				1505	int f_in_channels = this->weight->getShape()[3];
				1506
				1507	int b_out_channels = this->bias->getShape()[0];
				1508
				1509	int out_batch = this->output->getShape()[0];
				1510	int out_height = this->output->getShape()[1];
				1511	int out_width = this->output->getShape()[2];
				1512	int out_channels = this->output->getShape()[3];
				1513
				1514	int padding_top = this->attribute->outpad()[0];
				1515	int padding_left = this->attribute->outpad()[1];
				1516	int stride_h = this->attribute->stride()[0];
				1517	int stride_w = this->attribute->stride()[1];
				1518	int dilation_h = this->attribute->dilation()[0];
				1519	int dilation_w = this->attribute->dilation()[1];
				1520
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1521	ERROR_IF(in_batch != out_batch, "OpTransposeConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				1522	ERROR_IF(f_in_channels != in_channels, "OpTransposeConv2d: tensor input channel mismatch %d != %d", f_in_channels,
				1523	in_channels);
				1524	ERROR_IF(f_out_channels != out_channels, "OpTransposeConv2d: tensor output channel mismatch %d != %d",
				1525	f_out_channels, out_channels);
				1526	ERROR_IF(b_out_channels != out_channels, "OpDepthwiseConv2d: bias channels mismatch %d != %d", b_out_channels,
				1527	out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1528
				1529	DEBUG_INFO(OP,
				1530	"perform OpTransposeConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], "
				1531	"output.shape=[%d,%d,%d,%d], stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d]",
				1532	in_batch, in_height, in_width, in_channels, f_height, f_width, f_out_channels, f_in_channels, out_batch,
				1533	out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
				1534	padding_left);
				1535
				1536	TIn input_val = this->input->getTensor();
				1537	TWeight weight_val = this->weight->getTensor();
				1538	if (this->qinfo)
				1539	{
				1540	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				1541	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				1542	}
				1543
				1544	Eigen::array<Eigen::Index, 4> reshape_dim;
				1545	reshape_dim.fill(1);
				1546	reshape_dim[3] = b_out_channels;
				1547
				1548	Eigen::array<Eigen::Index, 4> bcast;
				1549	bcast[0] = out_batch;
				1550	bcast[1] = out_height;
				1551	bcast[2] = out_width;
				1552	bcast[3] = 1;
				1553
				1554	// initialize with bias
				1555	this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
				1556
				1557	int out_x_origin, out_y_origin;
				1558	int out_x, out_y;
				1559
				1560	// reference implementation from: tensorflow/tensorflow/lite/kernels/internal/reference/reference_ops.h
				1561	for (int ob = 0; ob < out_batch; ob++)
				1562	{
				1563	for (int ih = 0; ih < in_height; ih++)
				1564	{
				1565	for (int iw = 0; iw < in_width; iw++)
				1566	{
				1567	out_x_origin = iw * stride_w - padding_left;
				1568	out_y_origin = ih * stride_h - padding_top;
				1569	for (int ic = 0; ic < in_channels; ic++)
				1570	{
				1571	for (int fh = 0; fh < f_height; fh++)
				1572	{
				1573	for (int fw = 0; fw < f_width; fw++)
				1574	{
				1575	out_x = out_x_origin + fw * dilation_w;
				1576	out_y = out_y_origin + fh * dilation_h;
				1577	for (int oc = 0; oc < out_channels; oc++)
				1578	{
				1579	if ((out_x >= 0 && out_x < out_width) && (out_y >= 0 && out_y < out_height))
				1580	{
				1581	this->output->getTensor()(ob, out_y, out_x, oc) +=
				1582	((AccEigenType)input_val(ob, ih, iw, ic) *
				1583	(AccEigenType)weight_val(oc, fh, fw, ic));
				1584	}
				1585	}
				1586	}
				1587	}
				1588	}
				1589	}
				1590	}
				1591	}
				1592
				1593	if (AccDtype == DType_INT48)
				1594	{
				1595	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				1596	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				1597	}
				1598
				1599	return GraphNode::eval();
				1600	}
				1601
				1602	// template explicit instantiation
				1603	DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1604	DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1605	DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, INT16);
				1606
				1607	DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, FLOAT)
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1608	DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, INT8)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1609	DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, INT16)
				1610
				1611	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1612	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT8, INT4);
				1613	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1614	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT16, INT8);
				1615
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	1616	DEF_INSTANTIATE_TWO_TYPE(OpConv3d, FLOAT, FLOAT);
				1617	DEF_INSTANTIATE_TWO_TYPE(OpConv3d, INT8, INT4);
				1618	DEF_INSTANTIATE_TWO_TYPE(OpConv3d, INT8, INT8);
				1619	DEF_INSTANTIATE_TWO_TYPE(OpConv3d, INT16, INT8);
				1620
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1621	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1622	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT8, INT4);
				1623	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1624	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT16, INT8);
				1625
				1626	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1627	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT8, INT4);
				1628	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1629	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT16, INT8);
				1630
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1631	DEF_INSTANTIATE_ONE_TYPE(OpMatMul, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1632	DEF_INSTANTIATE_ONE_TYPE(OpMatMul, INT16);
				1633	DEF_INSTANTIATE_ONE_TYPE(OpMatMul, FLOAT);
				1634
				1635	DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1636	DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1637	DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, INT16);
				1638
				1639	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1640	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT8, INT4);
				1641	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1642	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT16, INT8);