Blame - reference_model/src/ops/tensor_ops.cc - tosa/reference_model

blob: 02fdf0122c80517eef7ab9e79fc69fad1f17b7e8 [file] [log] [blame]

Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	2	// Copyright (c) 2020-2021, ARM Limited.
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	3	//
				4	// Licensed under the Apache License, Version 2.0 (the "License");
				5	// you may not use this file except in compliance with the License.
				6	// You may obtain a copy of the License at
				7	//
				8	// http://www.apache.org/licenses/LICENSE-2.0
				9	//
				10	// Unless required by applicable law or agreed to in writing, software
				11	// distributed under the License is distributed on an "AS IS" BASIS,
				12	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	// See the License for the specific language governing permissions and
				14	// limitations under the License.
				15
				16	#include "tensor_ops.h"
				17	#include "quant_util.h"
				18	#include "template_types.h"
				19
				20	using namespace TosaReference;
				21	using namespace Eigen;
				22	using namespace tosa;
				23
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	24	int check_pool2d_attribute(tosa::TosaPoolAttribute* attribute,
				25	std::vector<int32_t> input_shape,
				26	std::vector<int32_t> output_shape,
				27	std::string& msg)
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	28	{
				29	if (attribute->padding().size() != 4)
				30	{
				31	msg = "illegal size for attribute padding";
				32	return 1;
				33	}
				34
				35	if (attribute->kernel().size() != 2)
				36	{
				37	msg = "illegal size for attribute kernel";
				38	return 1;
				39	}
				40
				41	if (attribute->stride().size() != 2)
				42	{
				43	msg = "illegal size for attribute stride";
				44	return 1;
				45	}
				46
				47	for (int32_t i : attribute->padding())
				48	{
				49	if (i < 0)
				50	{
				51	msg = "At least one pad is smaller than zero";
				52	return 1;
				53	}
				54	}
				55
				56	for (int32_t i : attribute->kernel())
				57	{
				58	if (i < 1)
				59	{
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	60	msg = "At least one kernel dimension is smaller than one";
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	61	return 1;
				62	}
				63	}
				64
				65	for (int32_t i : attribute->stride())
				66	{
				67	if (i < 1)
				68	{
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	69	msg = "At least one stride dimension is smaller than one";
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	70	return 1;
				71	}
				72	}
				73
				74	int32_t IH = input_shape[1];
				75	int32_t IW = input_shape[2];
				76	int32_t OH = output_shape[1];
				77	int32_t OW = output_shape[2];
				78
				79	int32_t pad_top = attribute->padding()[0];
				80	int32_t pad_bottom = attribute->padding()[1];
				81	int32_t pad_left = attribute->padding()[2];
				82	int32_t pad_right = attribute->padding()[3];
				83
				84	int32_t stride_y = attribute->stride()[0];
				85	int32_t stride_x = attribute->stride()[1];
				86	int32_t kernel_y = attribute->kernel()[0];
				87	int32_t kernel_x = attribute->kernel()[1];
				88
				89	if (pad_top >= kernel_y \|\| pad_bottom >= kernel_y \|\| pad_left >= kernel_x \|\| pad_right >= kernel_x)
				90	{
				91	msg = "At least one pad is >= kernel dimension";
				92	return 1;
				93	}
				94
Kevin Cheng	8079480	2021-11-01 11:14:13 -0700	[diff] [blame]	95	if ((OH != (IH + pad_top + pad_bottom + stride_y - kernel_y) / stride_y) \|\|
				96	(OW != (IW + pad_left + pad_right + stride_x - kernel_x) / stride_x))
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	97	{
				98	msg = "Mismatch between output shape provided and expected output shape";
				99	return 1;
				100	}
				101
				102	return 0;
				103	}
				104
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	105	int check_conv_attribute_qinfo(tosa::TosaConvAttribute* attribute,
				106	tosa::TosaConvQuantInfo* qinfo,
				107	uint32_t conv_dimension,
				108	std::vector<int32_t> input_shape,
				109	std::vector<int32_t> output_shape,
				110	DType InDtype,
				111	DType WeightDtype,
				112	std::string& msg)
				113	{
				114	if (attribute->padding().size() != (2 * conv_dimension))
				115	{
				116	msg = "Illegal size for attribute padding";
				117	return 1;
				118	}
				119
				120	if (attribute->stride().size() != conv_dimension)
				121	{
				122	msg = "Illegal size for attribute stride";
				123	return 1;
				124	}
				125
				126	if (attribute->dilation().size() != conv_dimension)
				127	{
				128	msg = "Illegal size for attribute dilation";
				129	return 1;
				130	}
				131
				132	for (int32_t i : attribute->padding())
				133	{
				134	if (i < 0)
				135	{
				136	msg = "At least one pad is smaller than zero";
				137	return 1;
				138	}
				139	}
				140
				141	for (int32_t i : attribute->stride())
				142	{
				143	if (i < 1)
				144	{
				145	msg = "At least one stride dimension is smaller than one";
				146	return 1;
				147	}
				148	}
				149
				150	for (int32_t i : attribute->dilation())
				151	{
				152	if (i < 1)
				153	{
				154	msg = "At least one dilation dimension is smaller than one";
				155	return 1;
				156	}
				157	}
				158
				159	if (qinfo)
				160	{
				161	if (InDtype != DType_INT8 && qinfo->input_zp() != 0)
				162	{
				163	msg = "zeropoint only for int8_t";
				164	return 1;
				165	}
				166	if (WeightDtype != DType_INT8 && qinfo->weight_zp() != 0)
				167	{
				168	msg = "zeropoint only for int8_t";
				169	return 1;
				170	}
				171	}
				172
				173	return 0;
				174	}
				175
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	176	template <int Rank, DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	177	OpArgMax<Rank, Dtype>::OpArgMax(SubgraphTraverser* sgt_,
				178	TosaAttributeBase* attribute_,
				179	TosaQuantInfoBase* qinfo_,
				180	uint64_t id_)
				181	: GraphNode(sgt_, Op_ARGMAX, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	182	{
				183	setRequiredOperands(1, 1);
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	184	setRequiredRank(1, 4);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	185
				186	INIT_ATTRIBUTE(Axis);
				187	}
				188
				189	template <int Rank, DType Dtype>
				190	OpArgMax<Rank, Dtype>::~OpArgMax()
				191	{
				192	if (attribute)
				193	delete attribute;
				194	}
				195
				196	template <int Rank, DType Dtype>
				197	int OpArgMax<Rank, Dtype>::checkTensorAttributes()
				198	{
				199	if (validateRequiredOperands())
				200	return 1;
				201
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	202	if (validateRequiredRank(inputs[0]))
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	203	{
				204	return 1;
				205	}
				206
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	207	int32_t output_rank = inputs[0]->getRank() - 1;
				208	if (output_rank != outputs[0]->getRank())
				209	{
				210	printNodeValidationError("OpArgMax: Output rank needs to be rank(input) - 1");
				211	return 1;
				212	}
				213
				214	if (outputs[0]->getDtype() != DType_INT32)
				215	{
				216	printNodeValidationError("OpArgMax: Output data type not supported for this configuration of operator");
				217	return 1;
				218	}
				219
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	220	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				221	output = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
				222
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	223	if (attribute->axis() < 0 \|\| attribute->axis() >= input->getRank())
				224	{
				225	printNodeValidationError("OpArgMax: Axis needs to be within [0, rank(input)]");
				226	return 1;
				227	}
				228
				229	bool shape_check = true;
				230	for (int32_t i = 0; i < input->getRank(); i++)
				231	{
				232	if (i < attribute->axis())
				233	{
				234	if (input->getShape()[i] != output->getShape()[i])
				235	{
				236	shape_check = false;
				237	break;
				238	}
				239	}
				240	else if (i > attribute->axis())
				241	{
				242	if (input->getShape()[i] != output->getShape()[i - 1])
				243	{
				244	shape_check = false;
				245	break;
				246	}
				247	}
				248	// No need to check i == axis
				249	}
				250	if (!shape_check)
				251	{
				252	printNodeValidationError("OpArgMax: Mismatch between output shape provided and expected output shape");
				253	return 1;
				254	}
				255
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	256	return 0;
				257	}
				258
				259	template <int Rank, DType Dtype>
				260	int OpArgMax<Rank, Dtype>::eval()
				261	{
				262	Eigen::Tensor<DenseIndex, Rank - 1> index = this->input->getTensor().argmax(attribute->axis());
				263
				264	this->output->getTensor() = index.unaryExpr([](DenseIndex in) -> OutEigenType { return (OutEigenType)in; });
				265
				266	return GraphNode::eval();
				267	}
				268
				269	template <DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	270	OpAvgPool2d<Dtype>::OpAvgPool2d(SubgraphTraverser* sgt_,
				271	TosaAttributeBase* attribute_,
				272	TosaQuantInfoBase* qinfo_,
				273	uint64_t id_)
				274	: GraphNode(sgt_, Op_AVG_POOL2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	275	{
				276	setRequiredOperands(1, 1);
				277	setRequiredRank(4);
				278
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	279	INIT_ATTRIBUTE(Pool);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	280	INIT_QINFO(Unary);
				281	}
				282
				283	template <DType Dtype>
				284	OpAvgPool2d<Dtype>::~OpAvgPool2d()
				285	{
				286	if (attribute)
				287	delete attribute;
				288	}
				289
				290	template <DType Dtype>
				291	int OpAvgPool2d<Dtype>::checkTensorAttributes()
				292	{
				293	if (validateRequiredOperands())
				294	return 1;
				295
				296	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(outputs[0]))
				297	{
				298	return 1;
				299	}
				300
				301	if (inputs[0]->matchType(*outputs[0]))
				302	{
				303	printNodeValidationError("OpAvgPool2d: input and output tensor type mismatch");
				304	return 1;
				305	}
				306
				307	in = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				308	out = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
				309
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	310	if (Dtype != DType_INT8 && this->qinfo)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	311	{
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	312	ERROR_IF(this->qinfo->input_zp() != 0, "OpAvgPool2d: zeropoint only for int8_t");
				313	ERROR_IF(this->qinfo->output_zp() != 0, "OpAvgPool2d: zeropoint only for int8_t");
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	314	}
				315
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	316	std::string msg;
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	317	if (check_pool2d_attribute(attribute, in->getShape(), out->getShape(), msg))
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	318	{
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	319	msg = "OpAvgPool2d: " + msg;
				320	printNodeValidationError(msg.c_str());
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	321	return 1;
				322	}
				323
				324	return 0;
				325	}
				326
Eric Kunze	830add4	2022-01-25 22:56:46 -0800	[diff] [blame^]	327	// This calculates the number of padding elements used for each location along an axis
				328	// Average pooling only divides by the number of elements used, not including padding.
				329	// This function uses left/right, but is also used for vertical padding with top/bottom
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	330	template <DType Dtype>
Eric Kunze	830add4	2022-01-25 22:56:46 -0800	[diff] [blame^]	331	ETensor1<int32_t> OpAvgPool2d<Dtype>::calculate_div_map_1d(int in_size, int out_size, int kernel_size, int stride, int32_t pad_left, int32_t pad_right)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	332	{
				333	ETensor1<int32_t> result(out_size);
				334
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	335	result.setConstant(kernel_size);
				336
Eric Kunze	830add4	2022-01-25 22:56:46 -0800	[diff] [blame^]	337	// adjust divisors on the left side for padding
				338	// We start at the leftmost output element, and remove pad_left - (index * stride) elements
				339	// until we have no more padding being used
				340	for(int index = 0; (index < pad_left / stride) && (index < out_size); index++) {
				341	int32_t adjust = pad_left - (index * stride);
				342	result(index) -= adjust;
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	343	}
				344
Eric Kunze	830add4	2022-01-25 22:56:46 -0800	[diff] [blame^]	345	// The process repeats on the right side. Padding starts taking effect as we
				346	// near the rightmost input element. The first output element which touches
				347	// padding is defined in the initialization of index below. Then we keep moving
				348	// to the right, increasing padding until we get to the last output element.
				349	int index = std::max(0, ((pad_left + in_size - kernel_size) / stride) + 1);
				350	for (; index < out_size; index++) {
				351	int32_t adjust = ((index * stride) + kernel_size) - (pad_left + in_size);
				352	result(index) -= adjust;
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	353	}
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	354	return result;
				355	}
				356
				357	// assuming input and output tensor have same scales like tflite reference
				358	// so no need to scale input and output
				359	template <DType Dtype>
				360	int OpAvgPool2d<Dtype>::eval()
				361	{
				362	int in_batch = this->in->getShape()[0];
				363	int in_height = this->in->getShape()[1];
				364	int in_width = this->in->getShape()[2];
				365	int in_channels = this->in->getShape()[3];
				366
				367	int out_batch = this->out->getShape()[0];
				368	int out_height = this->out->getShape()[1];
				369	int out_width = this->out->getShape()[2];
				370	int out_channels = this->out->getShape()[3];
				371
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	372	ERROR_IF(in_batch != out_batch, "OpAvgPool2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				373	ERROR_IF(in_channels != out_channels, "OpAvgPool2d: tensor channel mismatch %d != %d", in_channels, out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	374
				375	int padding_top = this->attribute->padding()[0];
				376	int padding_bottom = this->attribute->padding()[1];
				377	int padding_left = this->attribute->padding()[2];
				378	int padding_right = this->attribute->padding()[3];
				379	int kernel_h = this->attribute->kernel()[0];
				380	int kernel_w = this->attribute->kernel()[1];
				381	int stride_h = this->attribute->stride()[0];
				382	int stride_w = this->attribute->stride()[1];
				383
				384	DEBUG_INFO(OP,
				385	"perform AvgPool2d, input.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], kernel=[%d,%d], "
				386	"stride=[%d,%d], padding=[%d,%d,%d,%d]",
				387	in_batch, in_height, in_width, in_channels, out_batch, out_height, out_width, out_channels, kernel_h,
				388	kernel_w, stride_h, stride_w, padding_top, padding_bottom, padding_left, padding_right);
				389
				390	Eigen::array<Eigen::Index, 2> im2col_input_dims;
				391	im2col_input_dims[0] = kernel_h * kernel_w;
				392	im2col_input_dims[1] = out_batch * out_height * out_width * out_channels;
				393
				394	Eigen::array<Eigen::Index, 4> col2im_output_dims;
				395	col2im_output_dims[0] = out_batch;
				396	col2im_output_dims[1] = out_height;
				397	col2im_output_dims[2] = out_width;
				398	col2im_output_dims[3] = out_channels;
				399
				400	Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
				401	padding[0] = std::make_pair(0, 0);
				402	padding[1] = std::make_pair(padding_top, padding_bottom);
				403	padding[2] = std::make_pair(padding_left, padding_right);
				404	padding[3] = std::make_pair(0, 0);
				405
				406	ETensor4<InEigenType> input_val = this->in->getTensor();
				407	if (this->qinfo)
				408	{
				409	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				410	}
				411
				412	ETensor4<InEigenType> input_padded = input_val.pad(padding);
				413
				414	// assuming input and output have same scales
				415	// so input and output scaling is not required
				416	// TODO: check if this assumption TOSA made
				417
				418	// extract_image_patches() output [N, KH, KW, H * W, C]
				419	// transpose to [KH, KW, N, H * W, C]
				420	// reshape to [KH * KW, N * H * W * C]
				421	ETensor2<InEigenType> input_extract_patches =
				422	input_padded.extract_image_patches(kernel_h, kernel_w, stride_h, stride_w, 1, 1, Eigen::PADDING_VALID)
				423	.shuffle(Eigen::array<Eigen::Index, 5>{ 1, 2, 0, 3, 4 })
				424	.reshape(im2col_input_dims);
				425
				426	// 1D result with [N * H * W * C]
				427	ETensor1<AccEigenType> out_1d(this->out->getElementCount());
				428	out_1d.setZero();
				429
				430	// sum pool
				431	for (size_t i = 0; i < this->out->getElementCount(); i++)
				432	{
				433	for (int32_t j = 0; j < kernel_h * kernel_w; j++)
				434	{
				435	out_1d(i) += (AccEigenType)input_extract_patches(j, i);
				436	}
				437	}
				438
				439	// reshape result to [N, H, W, C] and divide with div_map
				440	ETensor4<AccEigenType> sum = out_1d.reshape(col2im_output_dims);
				441
				442	// calculate 1d height/width div_map (number of elements this pooling window covers)
				443	// and outer product to get 2d div_map, then reshape/broadcast to [N, H, W, C]
Eric Kunze	830add4	2022-01-25 22:56:46 -0800	[diff] [blame^]	444	ETensor1<int32_t> div_map_h = calculate_div_map_1d(in_height, out_height, kernel_h, stride_h, padding_top, padding_bottom);
				445	ETensor1<int32_t> div_map_w = calculate_div_map_1d(in_width, out_width, kernel_w, stride_w, padding_left, padding_right);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	446	Eigen::array<Eigen::IndexPair<Eigen::Index>, 1> contract_dims = { Eigen::IndexPair<Eigen::Index>(1, 0) };
				447	Eigen::array<Eigen::Index, 4> bcast{ out_batch, 1, 1, out_channels };
				448
				449	ETensor4<int32_t> div_map =
				450	div_map_h.reshape(Eigen::array<Eigen::Index, 2>{ out_height, 1 })
				451	.contract(div_map_w.reshape(Eigen::array<Eigen::Index, 2>{ 1, out_width }), contract_dims)
				452	.reshape(Eigen::array<Eigen::Index, 4>{ 1, out_height, out_width, 1 })
				453	.broadcast(bcast);
				454
				455	if (Dtype != DType_FLOAT)
				456	{
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	457	try
				458	{
				459	this->out->getTensor() = sum.binaryExpr(div_map, [](AccEigenType value, int32_t div) -> OutEigenType {
				460	int32_t multiplier, shift;
				461	TosaReference::QuantUtil::reciprocal_scale(div, multiplier, shift);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	462
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	463	return (OutEigenType)TosaReference::QuantUtil::apply_scale_32(value, multiplier, shift, false);
				464	});
				465	}
				466	catch (std::string desc)
				467	{
				468	REQUIRE(false, "OpAvgPool2d apply_scale_32() fails: %s.", desc.c_str());
				469	}
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	470	this->out->getTensor() = this->out->getTensor() + (OutEigenType)(this->qinfo->output_zp());
				471	this->out->getTensor() = this->out->getTensor().cwiseMax((OutEigenType)QMin);
				472	this->out->getTensor() = this->out->getTensor().cwiseMin((OutEigenType)QMax);
				473	}
				474	else
				475	{
				476	this->out->getTensor() = (sum / div_map.template cast<AccEigenType>()).template cast<OutEigenType>();
				477	}
				478
				479	return GraphNode::eval();
				480	}
				481
				482	template <DType InDtype, DType WeightDtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	483	OpConv2d<InDtype, WeightDtype>::OpConv2d(SubgraphTraverser* sgt_,
				484	TosaAttributeBase* attribute_,
				485	TosaQuantInfoBase* qinfo_,
				486	uint64_t id_)
				487	: GraphNode(sgt_, Op_CONV2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	488	{
				489	setRequiredOperands(3, 1);
				490	setRequiredRank(4);
				491
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	492	INIT_ATTRIBUTE(Conv);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	493	INIT_QINFO(Conv);
				494	}
				495
				496	template <DType InDtype, DType WeightDtype>
				497	OpConv2d<InDtype, WeightDtype>::~OpConv2d()
				498	{
				499	if (attribute)
				500	delete attribute;
				501	if (qinfo)
				502	delete qinfo;
				503	}
				504
				505	template <DType InDtype, DType WeightDtype>
				506	int OpConv2d<InDtype, WeightDtype>::checkTensorAttributes()
				507	{
				508	if (validateRequiredOperands())
				509	return 1;
				510
				511	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				512	{
				513	return 1;
				514	}
				515
				516	// 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
				517	if (inputs[2]->getRank() != 1)
				518	{
				519	printNodeValidationError("OpConv2d: bias tensor must be rank 1");
				520	}
				521
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	522	ERROR_IF(outputs[0]->getDtype() != AccDtype,
Kevin Cheng	8079480	2021-11-01 11:14:13 -0700	[diff] [blame]	523	"OpConv2d: Output data type not supported for this configuration of operator");
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	524
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	525	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				526	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				527	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				528	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				529
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	530	std::string msg;
				531	if (check_conv_attribute_qinfo(attribute, qinfo, 2 /* conv_dimension */, input->getShape(), output->getShape(),
				532	InDtype, WeightDtype, msg))
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	533	{
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	534	msg = "OpConv2d: " + msg;
				535	printNodeValidationError(msg.c_str());
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	536	return 1;
				537	}
				538
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	539	return 0;
				540	}
				541
				542	template <DType InDtype, DType WeightDtype>
				543	int OpConv2d<InDtype, WeightDtype>::eval()
				544	{
				545	int in_batch = this->input->getShape()[0];
				546	int in_height = this->input->getShape()[1];
				547	int in_width = this->input->getShape()[2];
				548	int in_channels = this->input->getShape()[3];
				549
				550	int f_out_channels = this->weight->getShape()[0];
				551	int f_height = this->weight->getShape()[1];
				552	int f_width = this->weight->getShape()[2];
				553	int f_in_channels = this->weight->getShape()[3];
				554
				555	int b_out_channels = this->bias->getShape()[0];
				556
				557	int out_batch = this->output->getShape()[0];
				558	int out_height = this->output->getShape()[1];
				559	int out_width = this->output->getShape()[2];
				560	int out_channels = this->output->getShape()[3];
				561
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	562	ERROR_IF(in_batch != out_batch, "OpConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				563	ERROR_IF(f_in_channels != in_channels, "OpConv2d: tensor input channel mismatch %d != %d", f_in_channels,
				564	in_channels);
				565	ERROR_IF(f_out_channels != out_channels, "OpConv2d: tensor output channel mismatch %d != %d", f_out_channels,
				566	out_channels);
				567	ERROR_IF(b_out_channels != out_channels, "OpConv2d: bias channel mismatch %d != %d", b_out_channels, out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	568
				569	int padding_top = this->attribute->padding()[0];
				570	int padding_bottom = this->attribute->padding()[1];
				571	int padding_left = this->attribute->padding()[2];
				572	int padding_right = this->attribute->padding()[3];
				573	int stride_h = this->attribute->stride()[0];
				574	int stride_w = this->attribute->stride()[1];
				575	int dilation_h = this->attribute->dilation()[0];
				576	int dilation_w = this->attribute->dilation()[1];
				577
				578	DEBUG_INFO(OP,
				579	"perform OpConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], "
				580	"stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d,%d,%d]",
				581	in_batch, in_height, in_width, in_channels, f_height, f_width, f_in_channels, f_out_channels, out_batch,
				582	out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
				583	padding_bottom, padding_left, padding_right);
				584
				585	// GEMM-conv2d, left matrix is input, right matrix is weight
				586	Eigen::array<Eigen::Index, 2> im2col_input_dims;
				587	im2col_input_dims[0] = out_batch * out_height * out_width;
				588	im2col_input_dims[1] = f_height * f_width * f_in_channels;
				589
				590	Eigen::array<Eigen::Index, 2> im2col_weight_dims;
				591	im2col_weight_dims[0] = f_height * f_width * f_in_channels;
				592	im2col_weight_dims[1] = f_out_channels;
				593
				594	Eigen::array<Eigen::Index, 2> bias_reshaped_dims;
				595	bias_reshaped_dims[0] = 1;
				596	bias_reshaped_dims[1] = b_out_channels;
				597
				598	Eigen::array<Eigen::Index, 4> weight_zp_bcast_dims;
				599	weight_zp_bcast_dims[0] = f_height;
				600	weight_zp_bcast_dims[1] = f_width;
				601	weight_zp_bcast_dims[2] = f_in_channels;
				602
				603	Eigen::array<Eigen::Index, 2> bias_bcast_dims;
				604	bias_bcast_dims[0] = out_batch * out_height * out_width;
				605	bias_bcast_dims[1] = 1;
				606
				607	Eigen::array<Eigen::Index, 4> col2im_output_dims;
				608	col2im_output_dims[0] = out_batch;
				609	col2im_output_dims[1] = out_height;
				610	col2im_output_dims[2] = out_width;
				611	col2im_output_dims[3] = out_channels;
				612
				613	Eigen::array<Eigen::IndexPair<Eigen::Index>, 1> contract_dims = { Eigen::IndexPair<Eigen::Index>(1, 0) };
				614
				615	Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
				616	padding[0] = std::make_pair(0, 0);
				617	padding[1] = std::make_pair(padding_top, padding_bottom);
				618	padding[2] = std::make_pair(padding_left, padding_right);
				619	padding[3] = std::make_pair(0, 0);
				620
				621	TIn input_val = this->input->getTensor();
				622	TWeight weight_val = this->weight->getTensor();
				623	if (this->qinfo)
				624	{
				625	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				626	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				627	}
				628
				629	ETensor4<InEigenType> input_padded = input_val.pad(padding);
				630
				631	// extract_image_patches() output [N, KH, KW, H * W, C]
				632	// need to transpose to [N, H * W, KH, KW, C]
				633	ETensor5<InEigenType> input_extract_patches =
				634	input_padded
				635	.extract_image_patches(f_height, f_width, stride_h, stride_w, dilation_h, dilation_w, Eigen::PADDING_VALID)
				636	.shuffle(Eigen::array<Eigen::Index, 5>{ 0, 3, 1, 2, 4 });
				637
				638	// reshape input to [N * H * W, KH * KW * C]
				639	ETensor2<InEigenType> im2col_input = input_extract_patches.reshape(im2col_input_dims);
				640
				641	// transpose and reshape weight from [OC, H, W, IC] to [H * W * IC, OC]
				642	ETensor2<WeightEigenType> im2col_weight =
				643	weight_val.shuffle(Eigen::array<Eigen::Index, 4>({ 1, 2, 3, 0 })).reshape(im2col_weight_dims);
				644
				645	// don't need to apply bias_multiplier ( * bias_scale and >> bias_shift) since tflite already scale it
				646	// and reshaped from [C] to [1, C], and broadcast to [N * H * W, C]
				647	ETensor2<AccEigenType> bias_2d = this->bias->getTensor().reshape(bias_reshaped_dims).broadcast(bias_bcast_dims);
				648
				649	// output matrix is [N * H * W, C]
				650	ETensor2<AccEigenType> contracted_result =
				651	im2col_input.template cast<AccEigenType>().contract(im2col_weight.template cast<AccEigenType>(), contract_dims);
				652
				653	// adding bias
				654	ETensor2<AccEigenType> biased_output = contracted_result + bias_2d.template cast<AccEigenType>();
				655
				656	// reshape back to [N, H, W, C]
				657	this->output->getTensor() = biased_output.reshape(col2im_output_dims);
				658
				659	if (AccDtype == DType_INT48)
				660	{
				661	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				662	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				663	}
				664
				665	return GraphNode::eval();
				666	}
				667
				668	template <DType InDtype, DType WeightDtype>
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	669	OpConv3d<InDtype, WeightDtype>::OpConv3d(SubgraphTraverser* sgt_,
				670	TosaAttributeBase* attribute_,
				671	TosaQuantInfoBase* qinfo_,
				672	uint64_t id_)
				673	: GraphNode(sgt_, Op_CONV3D, id_)
				674	{
				675	setRequiredOperands(3, 1);
				676	setRequiredRank(5);
				677
				678	INIT_ATTRIBUTE(Conv);
				679	INIT_QINFO(Conv);
				680	}
				681
				682	template <DType InDtype, DType WeightDtype>
				683	OpConv3d<InDtype, WeightDtype>::~OpConv3d()
				684	{
				685	if (attribute)
				686	delete attribute;
				687	if (qinfo)
				688	delete qinfo;
				689	}
				690
				691	template <DType InDtype, DType WeightDtype>
				692	int OpConv3d<InDtype, WeightDtype>::checkTensorAttributes()
				693	{
				694	if (validateRequiredOperands())
				695	return 1;
				696
				697	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				698	{
				699	return 1;
				700	}
				701
				702	// 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
				703	if (inputs[2]->getRank() != 1)
				704	{
				705	printNodeValidationError("OpConv3d: bias tensor must be rank 1");
				706	}
				707
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	708	ERROR_IF(outputs[0]->getDtype() != AccDtype,
Kevin Cheng	8079480	2021-11-01 11:14:13 -0700	[diff] [blame]	709	"OpConv3d: Output data type not supported for this configuration of operator");
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	710
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	711	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				712	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				713	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				714	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				715
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	716	std::string msg;
				717	if (check_conv_attribute_qinfo(attribute, qinfo, 3 /* conv_dimension */, input->getShape(), output->getShape(),
				718	InDtype, WeightDtype, msg))
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	719	{
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	720	msg = "OpConv3d: " + msg;
				721	printNodeValidationError(msg.c_str());
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	722	return 1;
				723	}
				724
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	725	return 0;
				726	}
				727
				728	template <DType InDtype, DType WeightDtype>
				729	int OpConv3d<InDtype, WeightDtype>::eval()
				730	{
				731	int in_batch = this->input->getShape()[0];
				732	int in_depth = this->input->getShape()[1];
				733	int in_height = this->input->getShape()[2];
				734	int in_width = this->input->getShape()[3];
				735	int in_channels = this->input->getShape()[4];
				736
				737	int f_out_channels = this->weight->getShape()[0];
				738	int f_depth = this->weight->getShape()[1];
				739	int f_height = this->weight->getShape()[2];
				740	int f_width = this->weight->getShape()[3];
				741	int f_in_channels = this->weight->getShape()[4];
				742
				743	int b_out_channels = this->bias->getShape()[0];
				744
				745	int out_batch = this->output->getShape()[0];
				746	int out_depth = this->output->getShape()[1];
				747	int out_height = this->output->getShape()[2];
				748	int out_width = this->output->getShape()[3];
				749	int out_channels = this->output->getShape()[4];
				750
				751	ERROR_IF(in_batch != out_batch, "OpConv3d: tensor batch mismatch %d != %d", in_batch, out_batch);
				752	ERROR_IF(f_in_channels != in_channels, "OpConv3d: tensor input channel mismatch %d != %d", f_in_channels,
				753	in_channels);
				754	ERROR_IF(f_out_channels != out_channels, "OpConv3d: tensor output channel mismatch %d != %d", f_out_channels,
				755	out_channels);
				756	ERROR_IF(b_out_channels != out_channels, "OpConv3d: bias channel mismatch %d != %d", b_out_channels, out_channels);
				757
				758	int padding_d0 = this->attribute->padding()[0];
				759	int padding_d1 = this->attribute->padding()[1];
				760	int padding_top = this->attribute->padding()[2];
				761	int padding_bottom = this->attribute->padding()[3];
				762	int padding_left = this->attribute->padding()[4];
				763	int padding_right = this->attribute->padding()[5];
				764	int stride_d = this->attribute->stride()[0];
				765	int stride_h = this->attribute->stride()[1];
				766	int stride_w = this->attribute->stride()[2];
				767	int dilation_d = this->attribute->dilation()[0];
				768	int dilation_h = this->attribute->dilation()[1];
				769	int dilation_w = this->attribute->dilation()[2];
				770
				771	DEBUG_INFO(
				772	OP,
				773	"perform OpConv3d, input.shape=[%d,%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d,%d], output.shape=[%d,%d,%d,%d,%d], "
				774	"stride=[%d,%d,%d], dilation=[%d,%d,%d], padding=[%d,%d,%d,%d,%d,%d]",
				775	in_batch, in_depth, in_height, in_width, in_channels, f_out_channels, f_depth, f_height, f_width, f_in_channels,
				776	out_batch, out_depth, out_height, out_width, out_channels, stride_d, stride_h, stride_w, dilation_d, dilation_h,
				777	dilation_w, padding_d0, padding_d1, padding_top, padding_bottom, padding_left, padding_right);
				778
				779	Eigen::array<std::pair<int32_t, int32_t>, 5> padding;
				780	padding[0] = std::make_pair(0, 0);
				781	padding[1] = std::make_pair(padding_d0, padding_d1);
				782	padding[2] = std::make_pair(padding_top, padding_bottom);
				783	padding[3] = std::make_pair(padding_left, padding_right);
				784	padding[4] = std::make_pair(0, 0);
				785
				786	TIn input_val = this->input->getTensor();
				787	TWeight weight_val = this->weight->getTensor();
				788	if (this->qinfo)
				789	{
				790	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				791	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				792	}
				793
				794	ETensor5<InEigenType> input_padded = input_val.pad(padding);
				795
				796	// 1. initialize with bias
				797	Eigen::array<Eigen::Index, 5> reshape_dim;
				798	reshape_dim.fill(1);
				799	reshape_dim[4] = b_out_channels;
				800
				801	Eigen::array<Eigen::Index, 5> bcast;
				802	bcast[0] = out_batch;
				803	bcast[1] = out_depth;
				804	bcast[2] = out_height;
				805	bcast[3] = out_width;
				806	bcast[4] = 1;
				807	this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
				808
				809	// 2. direct convolution
				810	AccEigenType acc = 0;
				811	int d_idx, h_idx, w_idx;
				812
				813	for (int ob = 0; ob < out_batch; ob++)
				814	{
				815	for (int od = 0; od < out_depth; od++)
				816	{
				817	for (int oh = 0; oh < out_height; oh++)
				818	{
				819	for (int ow = 0; ow < out_width; ow++)
				820	{
				821	for (int oc = 0; oc < out_channels; oc++)
				822	{
				823	acc = 0;
				824	for (int fd = 0; fd < f_depth; fd++)
				825	{
				826	d_idx = od * stride_d + fd * dilation_d;
				827	for (int fh = 0; fh < f_height; fh++)
				828	{
				829	h_idx = oh * stride_h + fh * dilation_h;
				830	for (int fw = 0; fw < f_width; fw++)
				831	{
				832	w_idx = ow * stride_w + fw * dilation_w;
				833	for (int ic = 0; ic < in_channels; ic++)
				834	{
				835	acc += ((AccEigenType)input_padded(ob, d_idx, h_idx, w_idx, ic) *
				836	(AccEigenType)weight_val(oc, fd, fh, fw, ic));
				837	}
				838	}
				839	}
				840	}
				841	this->output->getTensor()(ob, od, oh, ow, oc) = acc;
				842	}
				843	}
				844	}
				845	}
				846	}
				847
				848	if (AccDtype == DType_INT48)
				849	{
				850	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				851	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				852	}
				853
				854	return GraphNode::eval();
				855	}
				856
				857	template <DType InDtype, DType WeightDtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	858	OpDepthwiseConv2d<InDtype, WeightDtype>::OpDepthwiseConv2d(SubgraphTraverser* sgt_,
				859	TosaAttributeBase* attribute_,
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	860	TosaQuantInfoBase* qinfo_,
				861	uint64_t id_)
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	862	: GraphNode(sgt_, Op_DEPTHWISE_CONV2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	863	{
				864	setRequiredOperands(3, 1);
				865	setRequiredRank(4);
				866
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	867	INIT_ATTRIBUTE(Conv);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	868	INIT_QINFO(Conv);
				869	}
				870
				871	template <DType InDtype, DType WeightDtype>
				872	OpDepthwiseConv2d<InDtype, WeightDtype>::~OpDepthwiseConv2d()
				873	{
				874	if (attribute)
				875	delete attribute;
				876	if (qinfo)
				877	delete qinfo;
				878	}
				879
				880	template <DType InDtype, DType WeightDtype>
				881	int OpDepthwiseConv2d<InDtype, WeightDtype>::checkTensorAttributes()
				882	{
				883	if (validateRequiredOperands())
				884	return 1;
				885
				886	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				887	{
				888	return 1;
				889	}
				890
				891	// 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
				892	if (inputs[2]->getRank() != 1)
				893	{
				894	printNodeValidationError("OpDepthwiseConv2d: bias tensor must be rank 1");
				895	}
				896
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	897	ERROR_IF(outputs[0]->getDtype() != AccDtype,
Kevin Cheng	8079480	2021-11-01 11:14:13 -0700	[diff] [blame]	898	"OpDepthwiseConv2d: Output data type not supported for this configuration of operator");
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	899
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	900	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				901	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				902	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				903	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				904
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	905	std::string msg;
				906	if (check_conv_attribute_qinfo(attribute, qinfo, 2 /* conv_dimension */, input->getShape(), output->getShape(),
				907	InDtype, WeightDtype, msg))
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	908	{
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	909	msg = "OpDepthwiseConv2d: " + msg;
				910	printNodeValidationError(msg.c_str());
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	911	return 1;
				912	}
				913
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	914	return 0;
				915	}
				916
				917	template <DType InDtype, DType WeightDtype>
				918	int OpDepthwiseConv2d<InDtype, WeightDtype>::eval()
				919	{
				920	int in_batch = this->input->getShape()[0];
				921	int in_height = this->input->getShape()[1];
				922	int in_width = this->input->getShape()[2];
				923	int in_channels = this->input->getShape()[3];
				924
				925	int f_height = this->weight->getShape()[0];
				926	int f_width = this->weight->getShape()[1];
				927	int f_in_channels = this->weight->getShape()[2];
				928	int f_multiplier = this->weight->getShape()[3];
				929
				930	int b_out_channels = this->bias->getShape()[0];
				931
				932	int out_batch = this->output->getShape()[0];
				933	int out_height = this->output->getShape()[1];
				934	int out_width = this->output->getShape()[2];
				935	int out_channels = this->output->getShape()[3];
				936
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	937	ERROR_IF(in_batch != out_batch, "OpDepthwiseConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				938	ERROR_IF(f_in_channels != in_channels, "OpDepthwiseConv2d: tensor input channel mismatch %d != %d", f_in_channels,
				939	in_channels);
				940	ERROR_IF(in_channels * f_multiplier != out_channels, "OpDepthwiseConv2d: tensor output channel mismatch %d != %d",
				941	in_channels * f_multiplier, out_channels);
				942	ERROR_IF(b_out_channels != out_channels, "OpDepthwiseConv2d: bias channels mismatch %d != %d", b_out_channels,
				943	out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	944
				945	int padding_top = this->attribute->padding()[0];
				946	int padding_bottom = this->attribute->padding()[1];
				947	int padding_left = this->attribute->padding()[2];
				948	int padding_right = this->attribute->padding()[3];
				949	int stride_h = this->attribute->stride()[0];
				950	int stride_w = this->attribute->stride()[1];
				951	int dilation_h = this->attribute->dilation()[0];
				952	int dilation_w = this->attribute->dilation()[1];
				953
				954	DEBUG_INFO(OP,
				955	"perform OpDepthwiseConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], "
				956	"output.shape=[%d,%d,%d,%d], stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d,%d,%d]",
				957	in_batch, in_height, in_width, in_channels, f_height, f_width, f_in_channels, f_multiplier, out_batch,
				958	out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
				959	padding_bottom, padding_left, padding_right);
				960
				961	Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
				962	padding[0] = std::make_pair(0, 0);
				963	padding[1] = std::make_pair(padding_top, padding_bottom);
				964	padding[2] = std::make_pair(padding_left, padding_right);
				965	padding[3] = std::make_pair(0, 0);
				966
				967	TIn input_val = this->input->getTensor();
				968	TWeight weight_val = this->weight->getTensor();
				969	if (this->qinfo)
				970	{
				971	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				972	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				973	}
				974
				975	ETensor4<InEigenType> input_padded = input_val.pad(padding);
				976
				977	// GEMM doesn't fit well with DepthwiseConv2d
				978	// 1. use extract_image_patches() to handle stride/dilation/padding
				979	// 2. perform direct convolution
				980
				981	// 1. extract_image_patches() output [N, KH, KW, OH * OW, IC]
				982	ETensor5<InEigenType> input_extract_patches = input_padded.extract_image_patches(
				983	f_height, f_width, stride_h, stride_w, dilation_h, dilation_w, Eigen::PADDING_VALID);
				984
				985	Eigen::array<Eigen::Index, 4> reshape_dim;
				986	reshape_dim.fill(1);
				987	reshape_dim[3] = b_out_channels;
				988
				989	Eigen::array<Eigen::Index, 4> bcast;
				990	bcast[0] = out_batch;
				991	bcast[1] = out_height;
				992	bcast[2] = out_width;
				993	bcast[3] = 1;
				994
				995	// initialize with bias
				996	this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
				997
				998	// 2. direct depthwise convolution
				999	for (int ob = 0; ob < out_batch; ob++)
				1000	{
				1001	for (int oh = 0; oh < out_height; oh++)
				1002	{
				1003	for (int ow = 0; ow < out_width; ow++)
				1004	{
				1005	for (int ic = 0; ic < in_channels; ic++)
				1006	{
				1007	for (int cm = 0; cm < f_multiplier; cm++)
				1008	{
				1009	for (int fh = 0; fh < f_height; fh++)
				1010	{
				1011	for (int fw = 0; fw < f_width; fw++)
				1012	{
				1013	this->output->getTensor()(ob, oh, ow, ic * f_multiplier + cm) +=
				1014	((AccEigenType)input_extract_patches(ob, fh, fw, ow * out_height + oh, ic) *
				1015	(AccEigenType)weight_val(fh, fw, ic, cm));
				1016	}
				1017	}
				1018	}
				1019	}
				1020	}
				1021	}
				1022	}
				1023
				1024	if (AccDtype == DType_INT48)
				1025	{
				1026	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				1027	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				1028	}
				1029
				1030	return GraphNode::eval();
				1031	}
				1032
				1033	template <DType InDtype, DType WeightDtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1034	OpFullyConnected<InDtype, WeightDtype>::OpFullyConnected(SubgraphTraverser* sgt_,
				1035	TosaAttributeBase* attribute_,
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1036	TosaQuantInfoBase* qinfo_,
				1037	uint64_t id_)
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1038	: GraphNode(sgt_, Op_FULLY_CONNECTED, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1039	{
				1040	setRequiredOperands(3, 1);
				1041	setRequiredRank(2);
				1042
				1043	INIT_QINFO(Conv);
				1044	}
				1045
				1046	template <DType InDtype, DType WeightDtype>
				1047	OpFullyConnected<InDtype, WeightDtype>::~OpFullyConnected()
				1048	{
				1049	if (qinfo)
				1050	delete qinfo;
				1051	}
				1052
				1053	template <DType InDtype, DType WeightDtype>
				1054	int OpFullyConnected<InDtype, WeightDtype>::checkTensorAttributes()
				1055	{
				1056	if (validateRequiredOperands())
				1057	return 1;
				1058
				1059	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				1060	{
				1061	return 1;
				1062	}
				1063
				1064	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				1065	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				1066	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				1067
				1068	if (input->getShape()[1] != weight->getShape()[1])
				1069	{
				1070	printNodeValidationError("OpFullyConnected operator input.shape[1] should match weight.shape[1]");
				1071	return 1;
				1072	}
				1073
				1074	if (weight->getShape()[0] != bias->getShape()[0])
				1075	{
				1076	printNodeValidationError("OpFullyConnected operator bias.shape[0] should match weight.shape[0]");
				1077	return 1;
				1078	}
				1079
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1080	ERROR_IF(outputs[0]->getDtype() != AccDtype,
				1081	"OpFullyConnected: Output data type not supported for this configuration of operator");
				1082
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1083	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				1084
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1085	if (this->qinfo)
				1086	{
				1087	if (InDtype != DType_INT8)
				1088	{
				1089	ERROR_IF(this->qinfo->input_zp() != 0, "OpFullyConnected: zeropoint only for int8_t");
				1090	}
				1091	if (WeightDtype != DType_INT8)
				1092	{
				1093	ERROR_IF(this->qinfo->weight_zp() != 0, "OpFullyConnected: zeropoint only for int8_t");
				1094	}
				1095	}
				1096
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1097	return 0;
				1098	}
				1099
				1100	template <DType InDtype, DType WeightDtype>
				1101	int OpFullyConnected<InDtype, WeightDtype>::eval()
				1102	{
				1103	typedef Eigen::Tensor<int, 1>::DimensionPair DimPair;
				1104	Eigen::array<DimPair, 1> dims{ { DimPair(1, 0) } };
				1105
				1106	Eigen::array<Eigen::Index, 2> weight_shuffle{ 1, 0 };
				1107
				1108	Eigen::array<Eigen::Index, 2> bias_reshape;
				1109	bias_reshape[0] = 1;
				1110	bias_reshape[1] = this->bias->getShape()[0];
				1111
				1112	Eigen::array<Eigen::Index, 2> bias_bcast;
				1113	bias_bcast[0] = this->input->getShape()[0];
				1114	bias_bcast[1] = 1;
				1115
				1116	TIn input_val = this->input->getTensor();
				1117	TWeight weight_val = this->weight->getTensor().shuffle(weight_shuffle);
				1118	if (this->qinfo)
				1119	{
				1120	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				1121	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				1122	}
				1123
				1124	this->output->getTensor() =
				1125	input_val.template cast<AccEigenType>().contract(weight_val.template cast<AccEigenType>(), dims) +
				1126	this->bias->getTensor().reshape(bias_reshape).broadcast(bias_bcast);
				1127
				1128	if (AccDtype == DType_INT48)
				1129	{
				1130	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				1131	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				1132	}
				1133	return GraphNode::eval();
				1134	}
				1135
				1136	template <DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1137	OpMatMul<Dtype>::OpMatMul(SubgraphTraverser* sgt_,
				1138	TosaAttributeBase* attribute_,
				1139	TosaQuantInfoBase* qinfo_,
				1140	uint64_t id_)
				1141	: GraphNode(sgt_, Op_MATMUL, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1142	{
				1143	setRequiredOperands(2, 1);
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1144	setRequiredRank(3);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1145
				1146	INIT_QINFO(MatMul);
				1147	}
				1148
				1149	template <DType Dtype>
				1150	OpMatMul<Dtype>::~OpMatMul()
				1151	{
				1152	if (qinfo)
				1153	delete qinfo;
				1154	}
				1155
				1156	template <DType Dtype>
				1157	int OpMatMul<Dtype>::checkTensorAttributes()
				1158	{
				1159	if (validateRequiredOperands())
				1160	return 1;
				1161
				1162	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				1163	{
				1164	return 1;
				1165	}
				1166
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1167	ERROR_IF(outputs[0]->getDtype() != AccDtype,
Kevin Cheng	8079480	2021-11-01 11:14:13 -0700	[diff] [blame]	1168	"OpMatMul: Output data type not supported for this configuration of operator");
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1169
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1170	a = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				1171	b = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[1]);
				1172	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1173
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1174	ASSERT_MEM(a && b && output);
				1175
				1176	// a: [N, H, C]
				1177	// b: [N, C, W]
				1178	// c: [N, H, W]
				1179
				1180	// Check N
				1181	if (a->getShape()[0] != b->getShape()[0] \|\| a->getShape()[0] != output->getShape()[0])
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1182	{
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1183	printNodeValidationError("OpMatMul operator a.shape[0], b.shape[0] and output.shape[0] should match");
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1184	return 1;
				1185	}
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1186	N = a->getShape()[0];
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1187
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1188	// Check C
				1189	if (a->getShape()[2] != b->getShape()[1])
				1190	{
				1191	printNodeValidationError("OpMatMul operator a.shape[2] should match b.shape[1]");
				1192	return 1;
				1193	}
				1194	C = a->getShape()[2];
				1195
				1196	// Check H
				1197	if (a->getShape()[1] != output->getShape()[1])
				1198	{
				1199	printNodeValidationError("OpMatMul operator a.shape[1] should match output.shape[1]");
				1200	return 1;
				1201	}
				1202	H = a->getShape()[1];
				1203
				1204	// Check W
				1205	if (b->getShape()[2] != output->getShape()[2])
				1206	{
				1207	printNodeValidationError("OpMatMul operator output.shape[2] should match output.shape[2]");
				1208	return 1;
				1209	}
				1210	W = b->getShape()[2];
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1211
Kevin Cheng	8079480	2021-11-01 11:14:13 -0700	[diff] [blame]	1212	if (Dtype != DType_INT8 && this->qinfo)
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1213	{
				1214	ERROR_IF(this->qinfo->a_zp() != 0, "OpMatMul: zeropoint only for int8_t");
				1215	ERROR_IF(this->qinfo->b_zp() != 0, "OpMatMul: zeropoint only for int8_t");
				1216	}
				1217
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1218	return 0;
				1219	}
				1220
				1221	template <DType Dtype>
				1222	int OpMatMul<Dtype>::eval()
				1223	{
				1224	typedef Eigen::Tensor<int, 1>::DimensionPair DimPair;
				1225	Eigen::array<DimPair, 1> dims{ { DimPair(1, 0) } };
				1226
				1227	TIn a_val = this->a->getTensor();
				1228	TIn b_val = this->b->getTensor();
				1229	if (this->qinfo)
				1230	{
				1231	a_val = a_val - (InEigenType)this->qinfo->a_zp();
				1232	b_val = b_val - (InEigenType)this->qinfo->b_zp();
				1233	}
				1234
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1235	Eigen::array<Eigen::Index, 2> a_rank2_shape({ H, C });
				1236	Eigen::array<Eigen::Index, 2> b_rank2_shape({ C, W });
				1237	Eigen::array<Eigen::Index, 3> output_rank3_shape({ 1, H, W });
				1238
				1239	Eigen::array<Eigen::Index, 3> a_size_array({ 1, H, C });
				1240	Eigen::array<Eigen::Index, 3> b_size_array({ 1, C, W });
				1241
				1242	Eigen::array<Eigen::Index, 3> a_begin_array({ 0, 0, 0 });
				1243	Eigen::array<Eigen::Index, 3> b_begin_array({ 0, 0, 0 });
				1244
				1245	// Iterate N dimension.
				1246	for (int i = 0; i < N; i++)
				1247	{
				1248	a_begin_array[0] = i;
				1249	b_begin_array[0] = i;
				1250
				1251	TInRank2 a_rank2_val = a_val.slice(a_begin_array, a_size_array).reshape(a_rank2_shape);
				1252	TInRank2 b_rank2_val = b_val.slice(b_begin_array, b_size_array).reshape(b_rank2_shape);
				1253	TAccRank2 output_rank2_val =
				1254	a_rank2_val.template cast<AccEigenType>().contract(b_rank2_val.template cast<AccEigenType>(), dims);
				1255	TAcc output_rank3_val = output_rank2_val.reshape(output_rank3_shape);
				1256	if (i == 0)
				1257	{
				1258	this->output->getTensor() = output_rank3_val;
				1259	}
				1260	else
				1261	{
				1262	TAcc temp = this->output->getTensor().concatenate(output_rank3_val, 0);
				1263	this->output->getTensor() = temp;
				1264	}
				1265	}
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1266
				1267	if (AccDtype == DType_INT48)
				1268	{
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1269	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				1270	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1271	}
				1272
				1273	return GraphNode::eval();
				1274	}
				1275
				1276	template <DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1277	OpMaxPool2d<Dtype>::OpMaxPool2d(SubgraphTraverser* sgt_,
				1278	TosaAttributeBase* attribute_,
				1279	TosaQuantInfoBase* qinfo_,
				1280	uint64_t id_)
				1281	: GraphNode(sgt_, Op_MAX_POOL2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1282	{
				1283	setRequiredOperands(1, 1);
				1284	setRequiredRank(4);
				1285
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	1286	INIT_ATTRIBUTE(Pool);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1287	}
				1288
				1289	template <DType Dtype>
				1290	OpMaxPool2d<Dtype>::~OpMaxPool2d()
				1291	{
				1292	if (attribute)
				1293	delete attribute;
				1294	}
				1295
				1296	template <DType Dtype>
				1297	int OpMaxPool2d<Dtype>::checkTensorAttributes()
				1298	{
				1299	if (validateRequiredOperands())
				1300	return 1;
				1301
				1302	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(outputs[0]))
				1303	{
				1304	return 1;
				1305	}
				1306
				1307	if (inputs[0]->matchType(*outputs[0]))
				1308	{
				1309	printNodeValidationError("OpMaxPool2d: input and output tensor type mismatch");
				1310	return 1;
				1311	}
				1312
				1313	in = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				1314	out = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
				1315
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	1316	std::string msg;
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	1317	if (check_pool2d_attribute(attribute, in->getShape(), out->getShape(), msg))
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1318	{
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	1319	msg = "OpMaxPool2d: " + msg;
				1320	printNodeValidationError(msg.c_str());
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1321	return 1;
				1322	}
				1323
				1324	return 0;
				1325	}
				1326
				1327	template <DType Dtype>
				1328	int OpMaxPool2d<Dtype>::eval()
				1329	{
				1330	int in_batch = this->in->getShape()[0];
				1331	int in_height = this->in->getShape()[1];
				1332	int in_width = this->in->getShape()[2];
				1333	int in_channels = this->in->getShape()[3];
				1334
				1335	int out_batch = this->out->getShape()[0];
				1336	int out_height = this->out->getShape()[1];
				1337	int out_width = this->out->getShape()[2];
				1338	int out_channels = this->out->getShape()[3];
				1339
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1340	ERROR_IF(in_batch != out_batch, "OpMaxPool2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				1341	ERROR_IF(in_channels != out_channels, "OpMaxPool2d: tensor channel mismatch %d != %d", in_channels, out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1342
				1343	int padding_top = this->attribute->padding()[0];
				1344	int padding_bottom = this->attribute->padding()[1];
				1345	int padding_left = this->attribute->padding()[2];
				1346	int padding_right = this->attribute->padding()[3];
				1347	int kernel_h = this->attribute->kernel()[0];
				1348	int kernel_w = this->attribute->kernel()[1];
				1349	int stride_h = this->attribute->stride()[0];
				1350	int stride_w = this->attribute->stride()[1];
				1351
				1352	DEBUG_INFO(OP,
				1353	"perform MaxPool2d, input.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], kernel=[%d,%d], "
				1354	"stride=[%d,%d], padding=[%d,%d,%d,%d]",
				1355	in_batch, in_height, in_width, in_channels, out_batch, out_height, out_width, out_channels, kernel_h,
				1356	kernel_w, stride_h, stride_w, padding_top, padding_bottom, padding_left, padding_right);
				1357
				1358	Eigen::array<Eigen::Index, 2> im2col_input_dims;
				1359	im2col_input_dims[0] = kernel_h * kernel_w;
				1360	im2col_input_dims[1] = out_batch * out_height * out_width * out_channels;
				1361
				1362	Eigen::array<Eigen::Index, 4> col2im_output_dims;
				1363	col2im_output_dims[0] = out_batch;
				1364	col2im_output_dims[1] = out_height;
				1365	col2im_output_dims[2] = out_width;
				1366	col2im_output_dims[3] = out_channels;
				1367
				1368	Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
				1369	padding[0] = std::make_pair(0, 0);
				1370	padding[1] = std::make_pair(padding_top, padding_bottom);
				1371	padding[2] = std::make_pair(padding_left, padding_right);
				1372	padding[3] = std::make_pair(0, 0);
				1373
				1374	ETensor4<InEigenType> input_padded = this->in->getTensor().pad(padding, std::numeric_limits<InEigenType>::lowest());
				1375
				1376	// extract_image_patches() output [N, KH, KW, H * W, C]
				1377	// transpose to [KH, KW, N, H * W, C]
				1378	// reshape to [KH * KW, N * H * W * C]
				1379	//
				1380	// Set the padding value to be the most negative value that can be
				1381	// represented by the datatype to ensure that any padding values will be equal
				1382	// to or smaller than the actual maximum in the KH x KW patch.
				1383	ETensor2<InEigenType> input_extract_patches =
				1384	input_padded
				1385	.extract_image_patches(kernel_h, kernel_w, stride_h, stride_w, 1, 1, Eigen::PADDING_VALID,
				1386	std::numeric_limits<InEigenType>::lowest())
				1387	.shuffle(Eigen::array<Eigen::Index, 5>{ 1, 2, 0, 3, 4 })
				1388	.reshape(im2col_input_dims);
				1389
				1390	// Get the maximum of the KHxHW patches along axis 0
				1391	Eigen::Tensor<DenseIndex, 1> tensor_argmax = input_extract_patches.argmax(0);
				1392
				1393	// 1D result with [N * H * W * C]
				1394	ETensor1<OutEigenType> out_1d(this->out->getElementCount());
				1395
				1396	// index input_patches with argmax array should give the result
				1397	for (size_t i = 0; i < this->out->getElementCount(); i++)
				1398	{
				1399	out_1d(i) = (OutEigenType)input_extract_patches(tensor_argmax(i), i);
				1400	}
				1401
				1402	// reshape result to [N, H, W, C]
				1403	this->out->getTensor() = out_1d.reshape(col2im_output_dims);
				1404
				1405	return GraphNode::eval();
				1406	}
				1407
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1408	template <DType InDtype, DType WeightDtype>
				1409	OpTransposeConv2d<InDtype, WeightDtype>::OpTransposeConv2d(SubgraphTraverser* sgt_,
				1410	TosaAttributeBase* attribute_,
				1411	TosaQuantInfoBase* qinfo_,
				1412	uint64_t id_)
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1413	: GraphNode(sgt_, Op_TRANSPOSE_CONV2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1414	{
				1415	setRequiredOperands(3, 1);
				1416	setRequiredRank(4);
				1417
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	1418	INIT_ATTRIBUTE(TransposeConv);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1419	INIT_QINFO(Conv);
				1420	}
				1421
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1422	template <DType InDtype, DType WeightDtype>
				1423	OpTransposeConv2d<InDtype, WeightDtype>::~OpTransposeConv2d()
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1424	{
				1425	if (attribute)
				1426	delete attribute;
				1427	if (qinfo)
				1428	delete qinfo;
				1429	}
				1430
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1431	template <DType InDtype, DType WeightDtype>
				1432	int OpTransposeConv2d<InDtype, WeightDtype>::checkTensorAttributes()
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1433	{
				1434	if (validateRequiredOperands())
				1435	return 1;
				1436
				1437	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				1438	{
				1439	return 1;
				1440	}
				1441
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1442	ERROR_IF(outputs[0]->getDtype() != AccDtype,
Kevin Cheng	8079480	2021-11-01 11:14:13 -0700	[diff] [blame]	1443	"OpTransposeConv2d: Output data type not supported for this configuration of operator");
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1444
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1445	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				1446	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				1447	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				1448	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				1449
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1450	if (attribute->outpad().size() != 2)
				1451	{
				1452	printNodeValidationError("OpTransposeConv2d: illegal size for attribute outpad");
				1453	return 1;
				1454	}
				1455
				1456	if (attribute->stride().size() != 2)
				1457	{
				1458	printNodeValidationError("OpTransposeConv2d: illegal size for attribute stride");
				1459	return 1;
				1460	}
				1461
				1462	if (attribute->dilation().size() != 2)
				1463	{
				1464	printNodeValidationError("OpTransposeConv2d: illegal size for attribute dilation");
				1465	return 1;
				1466	}
				1467
				1468	if (attribute->output_shape().size() != 4)
				1469	{
				1470	printNodeValidationError("OpTransposeConv2d: illegal size for attribute output_shape");
				1471	return 1;
				1472	}
				1473
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	1474	for (int32_t i : attribute->outpad())
				1475	{
				1476	if (i < 0)
				1477	{
				1478	printNodeValidationError("OpTransposeConv2d: At least one pad is smaller than zero");
				1479	return 1;
				1480	}
				1481	}
				1482
				1483	for (int32_t i : attribute->stride())
				1484	{
				1485	if (i < 1)
				1486	{
				1487	printNodeValidationError("OpTransposeConv2d: At least one stride is smaller than one");
				1488	return 1;
				1489	}
				1490	}
				1491
				1492	for (int32_t i : attribute->dilation())
				1493	{
				1494	if (i < 1)
				1495	{
				1496	printNodeValidationError("OpTransposeConv2d: At least one dilation is smaller than one");
				1497	return 1;
				1498	}
				1499	}
				1500
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1501	for (int d = 0; d < 4; d++)
				1502	{
				1503	if (attribute->output_shape()[d] != this->output->getShape()[d])
				1504	{
				1505	printNodeValidationError("OpTransposeConv2d: illegal size for attribute output_shape");
				1506	return 1;
				1507	}
				1508	}
				1509
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1510	if (this->qinfo)
				1511	{
				1512	if (InDtype != DType_INT8)
				1513	{
				1514	ERROR_IF(this->qinfo->input_zp() != 0, "OpTransposeConv2d: zeropoint only for int8_t");
				1515	}
				1516	if (WeightDtype != DType_INT8)
				1517	{
				1518	ERROR_IF(this->qinfo->weight_zp() != 0, "OpTransposeConv2d: zeropoint only for int8_t");
				1519	}
				1520	}
				1521
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1522	return 0;
				1523	}
				1524
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1525	template <DType InDtype, DType WeightDtype>
				1526	int OpTransposeConv2d<InDtype, WeightDtype>::eval()
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1527	{
				1528	int in_batch = this->input->getShape()[0];
				1529	int in_height = this->input->getShape()[1];
				1530	int in_width = this->input->getShape()[2];
				1531	int in_channels = this->input->getShape()[3];
				1532
				1533	int f_out_channels = this->weight->getShape()[0];
				1534	int f_height = this->weight->getShape()[1];
				1535	int f_width = this->weight->getShape()[2];
				1536	int f_in_channels = this->weight->getShape()[3];
				1537
				1538	int b_out_channels = this->bias->getShape()[0];
				1539
				1540	int out_batch = this->output->getShape()[0];
				1541	int out_height = this->output->getShape()[1];
				1542	int out_width = this->output->getShape()[2];
				1543	int out_channels = this->output->getShape()[3];
				1544
				1545	int padding_top = this->attribute->outpad()[0];
				1546	int padding_left = this->attribute->outpad()[1];
				1547	int stride_h = this->attribute->stride()[0];
				1548	int stride_w = this->attribute->stride()[1];
				1549	int dilation_h = this->attribute->dilation()[0];
				1550	int dilation_w = this->attribute->dilation()[1];
				1551
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1552	ERROR_IF(in_batch != out_batch, "OpTransposeConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				1553	ERROR_IF(f_in_channels != in_channels, "OpTransposeConv2d: tensor input channel mismatch %d != %d", f_in_channels,
				1554	in_channels);
				1555	ERROR_IF(f_out_channels != out_channels, "OpTransposeConv2d: tensor output channel mismatch %d != %d",
				1556	f_out_channels, out_channels);
				1557	ERROR_IF(b_out_channels != out_channels, "OpDepthwiseConv2d: bias channels mismatch %d != %d", b_out_channels,
				1558	out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1559
				1560	DEBUG_INFO(OP,
				1561	"perform OpTransposeConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], "
				1562	"output.shape=[%d,%d,%d,%d], stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d]",
				1563	in_batch, in_height, in_width, in_channels, f_height, f_width, f_out_channels, f_in_channels, out_batch,
				1564	out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
				1565	padding_left);
				1566
				1567	TIn input_val = this->input->getTensor();
				1568	TWeight weight_val = this->weight->getTensor();
				1569	if (this->qinfo)
				1570	{
				1571	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				1572	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				1573	}
				1574
				1575	Eigen::array<Eigen::Index, 4> reshape_dim;
				1576	reshape_dim.fill(1);
				1577	reshape_dim[3] = b_out_channels;
				1578
				1579	Eigen::array<Eigen::Index, 4> bcast;
				1580	bcast[0] = out_batch;
				1581	bcast[1] = out_height;
				1582	bcast[2] = out_width;
				1583	bcast[3] = 1;
				1584
				1585	// initialize with bias
				1586	this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
				1587
				1588	int out_x_origin, out_y_origin;
				1589	int out_x, out_y;
				1590
				1591	// reference implementation from: tensorflow/tensorflow/lite/kernels/internal/reference/reference_ops.h
				1592	for (int ob = 0; ob < out_batch; ob++)
				1593	{
				1594	for (int ih = 0; ih < in_height; ih++)
				1595	{
				1596	for (int iw = 0; iw < in_width; iw++)
				1597	{
				1598	out_x_origin = iw * stride_w - padding_left;
				1599	out_y_origin = ih * stride_h - padding_top;
				1600	for (int ic = 0; ic < in_channels; ic++)
				1601	{
				1602	for (int fh = 0; fh < f_height; fh++)
				1603	{
				1604	for (int fw = 0; fw < f_width; fw++)
				1605	{
				1606	out_x = out_x_origin + fw * dilation_w;
				1607	out_y = out_y_origin + fh * dilation_h;
				1608	for (int oc = 0; oc < out_channels; oc++)
				1609	{
				1610	if ((out_x >= 0 && out_x < out_width) && (out_y >= 0 && out_y < out_height))
				1611	{
				1612	this->output->getTensor()(ob, out_y, out_x, oc) +=
				1613	((AccEigenType)input_val(ob, ih, iw, ic) *
				1614	(AccEigenType)weight_val(oc, fh, fw, ic));
				1615	}
				1616	}
				1617	}
				1618	}
				1619	}
				1620	}
				1621	}
				1622	}
				1623
				1624	if (AccDtype == DType_INT48)
				1625	{
				1626	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				1627	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				1628	}
				1629
				1630	return GraphNode::eval();
				1631	}
				1632
				1633	// template explicit instantiation
				1634	DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1635	DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1636	DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, INT16);
				1637
				1638	DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, FLOAT)
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1639	DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, INT8)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1640	DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, INT16)
				1641
				1642	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1643	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT8, INT4);
				1644	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1645	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT16, INT8);
				1646
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	1647	DEF_INSTANTIATE_TWO_TYPE(OpConv3d, FLOAT, FLOAT);
				1648	DEF_INSTANTIATE_TWO_TYPE(OpConv3d, INT8, INT4);
				1649	DEF_INSTANTIATE_TWO_TYPE(OpConv3d, INT8, INT8);
				1650	DEF_INSTANTIATE_TWO_TYPE(OpConv3d, INT16, INT8);
				1651
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1652	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1653	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT8, INT4);
				1654	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1655	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT16, INT8);
				1656
				1657	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1658	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT8, INT4);
				1659	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1660	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT16, INT8);
				1661
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1662	DEF_INSTANTIATE_ONE_TYPE(OpMatMul, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1663	DEF_INSTANTIATE_ONE_TYPE(OpMatMul, INT16);
				1664	DEF_INSTANTIATE_ONE_TYPE(OpMatMul, FLOAT);
				1665
				1666	DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1667	DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1668	DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, INT16);
				1669
				1670	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1671	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT8, INT4);
				1672	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1673	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT16, INT8);