Blame - reference_model/src/ops/tensor_ops.cc - tosa/reference_model

blob: 6144dbc4cfd47385fb0d8107a421df03874b9fdb [file] [log] [blame]

Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	2	// Copyright (c) 2020-2021, ARM Limited.
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	3	//
				4	// Licensed under the Apache License, Version 2.0 (the "License");
				5	// you may not use this file except in compliance with the License.
				6	// You may obtain a copy of the License at
				7	//
				8	// http://www.apache.org/licenses/LICENSE-2.0
				9	//
				10	// Unless required by applicable law or agreed to in writing, software
				11	// distributed under the License is distributed on an "AS IS" BASIS,
				12	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	// See the License for the specific language governing permissions and
				14	// limitations under the License.
				15
				16	#include "tensor_ops.h"
				17	#include "quant_util.h"
				18	#include "template_types.h"
				19
				20	using namespace TosaReference;
				21	using namespace Eigen;
				22	using namespace tosa;
				23
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	24	int check_pool2d_attribute(tosa::TosaPoolAttribute* attribute,
				25	std::vector<int32_t> input_shape,
				26	std::vector<int32_t> output_shape,
				27	std::string& msg)
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	28	{
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	29	if (attribute->pad().size() != 4)
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	30	{
				31	msg = "illegal size for attribute padding";
				32	return 1;
				33	}
				34
				35	if (attribute->kernel().size() != 2)
				36	{
				37	msg = "illegal size for attribute kernel";
				38	return 1;
				39	}
				40
				41	if (attribute->stride().size() != 2)
				42	{
				43	msg = "illegal size for attribute stride";
				44	return 1;
				45	}
				46
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	47	for (int32_t i : attribute->pad())
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	48	{
				49	if (i < 0)
				50	{
				51	msg = "At least one pad is smaller than zero";
				52	return 1;
				53	}
				54	}
				55
				56	for (int32_t i : attribute->kernel())
				57	{
				58	if (i < 1)
				59	{
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	60	msg = "At least one kernel dimension is smaller than one";
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	61	return 1;
				62	}
				63	}
				64
				65	for (int32_t i : attribute->stride())
				66	{
				67	if (i < 1)
				68	{
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	69	msg = "At least one stride dimension is smaller than one";
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	70	return 1;
				71	}
				72	}
				73
				74	int32_t IH = input_shape[1];
				75	int32_t IW = input_shape[2];
				76	int32_t OH = output_shape[1];
				77	int32_t OW = output_shape[2];
				78
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	79	int32_t pad_top = attribute->pad()[0];
				80	int32_t pad_bottom = attribute->pad()[1];
				81	int32_t pad_left = attribute->pad()[2];
				82	int32_t pad_right = attribute->pad()[3];
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	83
				84	int32_t stride_y = attribute->stride()[0];
				85	int32_t stride_x = attribute->stride()[1];
				86	int32_t kernel_y = attribute->kernel()[0];
				87	int32_t kernel_x = attribute->kernel()[1];
				88
				89	if (pad_top >= kernel_y \|\| pad_bottom >= kernel_y \|\| pad_left >= kernel_x \|\| pad_right >= kernel_x)
				90	{
				91	msg = "At least one pad is >= kernel dimension";
				92	return 1;
				93	}
				94
Jeremy Johnson	4a6fb9b	2022-04-26 15:47:21 +0100	[diff] [blame]	95	int32_t full_H = IH + pad_top + pad_bottom - kernel_y;
				96	int32_t full_W = IW + pad_left + pad_right - kernel_x;
				97
				98	if ((full_H % stride_y != 0) \|\|
				99	(full_W % stride_x != 0))
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	100	{
Jeremy Johnson	4a6fb9b	2022-04-26 15:47:21 +0100	[diff] [blame]	101	msg = "Parameters must yield exact integer output dimensions";
				102	return 1;
				103	}
				104
				105	if ((OH != (full_H / stride_y) + 1) \|\|
				106	(OW != (full_W / stride_x) + 1))
				107	{
				108	msg = "Mismatch between output shape provided and expected output shape (" +
				109	std::to_string((full_H / stride_y) + 1) + "," +
				110	std::to_string((full_W / stride_x) + 1) + ")";
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	111	return 1;
				112	}
				113
				114	return 0;
				115	}
				116
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	117	int check_conv_attribute_qinfo(tosa::TosaConvAttribute* attribute,
				118	tosa::TosaConvQuantInfo* qinfo,
				119	uint32_t conv_dimension,
				120	std::vector<int32_t> input_shape,
				121	std::vector<int32_t> output_shape,
Jeremy Johnson	4a6fb9b	2022-04-26 15:47:21 +0100	[diff] [blame]	122	std::vector<int32_t> weights,
				123	uint32_t offset_kernel,
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	124	DType InDtype,
				125	DType WeightDtype,
				126	std::string& msg)
				127	{
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	128	if (attribute->pad().size() != (2 * conv_dimension))
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	129	{
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	130	msg = "Illegal size for attribute pad";
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	131	return 1;
				132	}
				133
				134	if (attribute->stride().size() != conv_dimension)
				135	{
				136	msg = "Illegal size for attribute stride";
				137	return 1;
				138	}
				139
				140	if (attribute->dilation().size() != conv_dimension)
				141	{
				142	msg = "Illegal size for attribute dilation";
				143	return 1;
				144	}
				145
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	146	for (int32_t i : attribute->pad())
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	147	{
				148	if (i < 0)
				149	{
				150	msg = "At least one pad is smaller than zero";
				151	return 1;
				152	}
				153	}
				154
				155	for (int32_t i : attribute->stride())
				156	{
				157	if (i < 1)
				158	{
				159	msg = "At least one stride dimension is smaller than one";
				160	return 1;
				161	}
				162	}
				163
				164	for (int32_t i : attribute->dilation())
				165	{
				166	if (i < 1)
				167	{
				168	msg = "At least one dilation dimension is smaller than one";
				169	return 1;
				170	}
				171	}
				172
Jeremy Johnson	4a6fb9b	2022-04-26 15:47:21 +0100	[diff] [blame]	173	ASSERT_MSG(conv_dimension == 2 \|\| conv_dimension == 3, "Unsupported convolution dimension")
				174
				175	int32_t offset_d = 1 ? conv_dimension == 3 : 0;
				176	int32_t ID = conv_dimension == 3 ? input_shape[1] : 1;
				177	int32_t IH = input_shape[1 + offset_d];
				178	int32_t IW = input_shape[2 + offset_d];
				179	int32_t OD = conv_dimension == 3 ? output_shape[1] : 1;
				180	int32_t OH = output_shape[1 + offset_d];
				181	int32_t OW = output_shape[2 + offset_d];
				182
				183	int32_t stride_d = conv_dimension == 3 ? attribute->stride()[0] : 1;
				184	int32_t stride_y = attribute->stride()[0 + offset_d];
				185	int32_t stride_x = attribute->stride()[1 + offset_d];
				186	int32_t kernel_d = conv_dimension == 3 ? weights[offset_kernel] : 1;
				187	int32_t kernel_h = weights[offset_kernel + offset_d];
				188	int32_t kernel_w = weights[offset_kernel + 1 + offset_d];
				189	int32_t dilation_d = conv_dimension == 3 ? attribute->dilation()[0] : 1;
				190	int32_t dilation_y = attribute->dilation()[0 + offset_d];
				191	int32_t dilation_x = attribute->dilation()[1 + offset_d];
				192
				193	offset_d *= 2;
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	194	int32_t pad_d0 = conv_dimension == 3 ? attribute->pad()[0] : 0;
				195	int32_t pad_d1 = conv_dimension == 3 ? attribute->pad()[1] : 0;
				196	int32_t pad_top = attribute->pad()[0 + offset_d];
				197	int32_t pad_bottom = attribute->pad()[1 + offset_d];
				198	int32_t pad_left = attribute->pad()[2 + offset_d];
				199	int32_t pad_right = attribute->pad()[3 + offset_d];
Jeremy Johnson	4a6fb9b	2022-04-26 15:47:21 +0100	[diff] [blame]	200
				201	int32_t full_D = ID - 1 + pad_d0 + pad_d1 - (kernel_d - 1) * dilation_d;
				202	int32_t full_H = IH - 1 + pad_top + pad_bottom - (kernel_h - 1) * dilation_y;
				203	int32_t full_W = IW - 1 + pad_left + pad_right - (kernel_w - 1) * dilation_x;
				204
				205	if ((full_H % stride_y != 0) \|\|
				206	(full_W % stride_x != 0) \|\|
				207	(full_D % stride_d != 0))
				208	{
				209	msg = "Parameters must yield exact integer output dimensions";
				210	return 1;
				211	}
				212
				213	if ((OH != (full_H / stride_y) + 1) \|\|
				214	(OW != (full_W / stride_x) + 1) \|\|
				215	(OD != (full_D / stride_d) + 1))
				216	{
				217	std::string msg_d = "";
				218	if (conv_dimension == 3)
				219	{
				220	msg_d += std::to_string((full_D / stride_d) + 1) + ",";
				221	}
				222	msg = "Mismatch between output shape provided and expected output shape (" +
				223	msg_d +
				224	std::to_string((full_H / stride_y) + 1) + "," +
				225	std::to_string((full_W / stride_x) + 1) + ")";
				226	return 1;
				227	}
				228
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	229	if (qinfo)
				230	{
				231	if (InDtype != DType_INT8 && qinfo->input_zp() != 0)
				232	{
				233	msg = "zeropoint only for int8_t";
				234	return 1;
				235	}
				236	if (WeightDtype != DType_INT8 && qinfo->weight_zp() != 0)
				237	{
				238	msg = "zeropoint only for int8_t";
				239	return 1;
				240	}
				241	}
				242
				243	return 0;
				244	}
				245
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	246	template <int Rank, DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	247	OpArgMax<Rank, Dtype>::OpArgMax(SubgraphTraverser* sgt_,
				248	TosaAttributeBase* attribute_,
				249	TosaQuantInfoBase* qinfo_,
				250	uint64_t id_)
				251	: GraphNode(sgt_, Op_ARGMAX, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	252	{
				253	setRequiredOperands(1, 1);
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	254	setRequiredRank(1, 4);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	255
				256	INIT_ATTRIBUTE(Axis);
				257	}
				258
				259	template <int Rank, DType Dtype>
				260	OpArgMax<Rank, Dtype>::~OpArgMax()
				261	{
				262	if (attribute)
				263	delete attribute;
				264	}
				265
				266	template <int Rank, DType Dtype>
				267	int OpArgMax<Rank, Dtype>::checkTensorAttributes()
				268	{
				269	if (validateRequiredOperands())
				270	return 1;
				271
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	272	if (validateRequiredRank(inputs[0]))
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	273	{
				274	return 1;
				275	}
				276
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	277	int32_t output_rank = inputs[0]->getRank() - 1;
				278	if (output_rank != outputs[0]->getRank())
				279	{
				280	printNodeValidationError("OpArgMax: Output rank needs to be rank(input) - 1");
				281	return 1;
				282	}
				283
				284	if (outputs[0]->getDtype() != DType_INT32)
				285	{
				286	printNodeValidationError("OpArgMax: Output data type not supported for this configuration of operator");
				287	return 1;
				288	}
				289
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	290	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				291	output = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
				292
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	293	if (attribute->axis() < 0 \|\| attribute->axis() >= input->getRank())
				294	{
				295	printNodeValidationError("OpArgMax: Axis needs to be within [0, rank(input)]");
				296	return 1;
				297	}
				298
				299	bool shape_check = true;
				300	for (int32_t i = 0; i < input->getRank(); i++)
				301	{
				302	if (i < attribute->axis())
				303	{
				304	if (input->getShape()[i] != output->getShape()[i])
				305	{
				306	shape_check = false;
				307	break;
				308	}
				309	}
				310	else if (i > attribute->axis())
				311	{
				312	if (input->getShape()[i] != output->getShape()[i - 1])
				313	{
				314	shape_check = false;
				315	break;
				316	}
				317	}
				318	// No need to check i == axis
				319	}
				320	if (!shape_check)
				321	{
				322	printNodeValidationError("OpArgMax: Mismatch between output shape provided and expected output shape");
				323	return 1;
				324	}
				325
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	326	return 0;
				327	}
				328
				329	template <int Rank, DType Dtype>
				330	int OpArgMax<Rank, Dtype>::eval()
				331	{
				332	Eigen::Tensor<DenseIndex, Rank - 1> index = this->input->getTensor().argmax(attribute->axis());
				333
				334	this->output->getTensor() = index.unaryExpr([](DenseIndex in) -> OutEigenType { return (OutEigenType)in; });
				335
				336	return GraphNode::eval();
				337	}
				338
				339	template <DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	340	OpAvgPool2d<Dtype>::OpAvgPool2d(SubgraphTraverser* sgt_,
				341	TosaAttributeBase* attribute_,
				342	TosaQuantInfoBase* qinfo_,
				343	uint64_t id_)
				344	: GraphNode(sgt_, Op_AVG_POOL2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	345	{
				346	setRequiredOperands(1, 1);
				347	setRequiredRank(4);
				348
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	349	INIT_ATTRIBUTE(Pool);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	350	INIT_QINFO(Unary);
				351	}
				352
				353	template <DType Dtype>
				354	OpAvgPool2d<Dtype>::~OpAvgPool2d()
				355	{
				356	if (attribute)
				357	delete attribute;
				358	}
				359
				360	template <DType Dtype>
				361	int OpAvgPool2d<Dtype>::checkTensorAttributes()
				362	{
				363	if (validateRequiredOperands())
				364	return 1;
				365
				366	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(outputs[0]))
				367	{
				368	return 1;
				369	}
				370
				371	if (inputs[0]->matchType(*outputs[0]))
				372	{
				373	printNodeValidationError("OpAvgPool2d: input and output tensor type mismatch");
				374	return 1;
				375	}
				376
				377	in = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				378	out = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
				379
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	380	if (Dtype != DType_INT8 && this->qinfo)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	381	{
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	382	ERROR_IF(this->qinfo->input_zp() != 0, "OpAvgPool2d: zeropoint only for int8_t");
				383	ERROR_IF(this->qinfo->output_zp() != 0, "OpAvgPool2d: zeropoint only for int8_t");
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	384	}
				385
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	386	std::string msg;
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	387	if (check_pool2d_attribute(attribute, in->getShape(), out->getShape(), msg))
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	388	{
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	389	msg = "OpAvgPool2d: " + msg;
				390	printNodeValidationError(msg.c_str());
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	391	return 1;
				392	}
				393
				394	return 0;
				395	}
				396
Eric Kunze	830add4	2022-01-25 22:56:46 -0800	[diff] [blame]	397	// This calculates the number of padding elements used for each location along an axis
				398	// Average pooling only divides by the number of elements used, not including padding.
				399	// This function uses left/right, but is also used for vertical padding with top/bottom
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	400	template <DType Dtype>
Eric Kunze	830add4	2022-01-25 22:56:46 -0800	[diff] [blame]	401	ETensor1<int32_t> OpAvgPool2d<Dtype>::calculate_div_map_1d(int in_size, int out_size, int kernel_size, int stride, int32_t pad_left, int32_t pad_right)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	402	{
				403	ETensor1<int32_t> result(out_size);
				404
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	405	result.setConstant(kernel_size);
				406
Eric Kunze	830add4	2022-01-25 22:56:46 -0800	[diff] [blame]	407	// adjust divisors on the left side for padding
				408	// We start at the leftmost output element, and remove pad_left - (index * stride) elements
				409	// until we have no more padding being used
Eric Kunze	67a9155	2022-02-02 11:27:21 -0800	[diff] [blame]	410	for(int index = 0; (index <= pad_left / stride) && (index < out_size); index++) {
Eric Kunze	830add4	2022-01-25 22:56:46 -0800	[diff] [blame]	411	int32_t adjust = pad_left - (index * stride);
				412	result(index) -= adjust;
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	413	}
				414
Eric Kunze	830add4	2022-01-25 22:56:46 -0800	[diff] [blame]	415	// The process repeats on the right side. Padding starts taking effect as we
				416	// near the rightmost input element. The first output element which touches
				417	// padding is defined in the initialization of index below. Then we keep moving
				418	// to the right, increasing padding until we get to the last output element.
				419	int index = std::max(0, ((pad_left + in_size - kernel_size) / stride) + 1);
				420	for (; index < out_size; index++) {
				421	int32_t adjust = ((index * stride) + kernel_size) - (pad_left + in_size);
				422	result(index) -= adjust;
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	423	}
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	424	return result;
				425	}
				426
				427	// assuming input and output tensor have same scales like tflite reference
				428	// so no need to scale input and output
				429	template <DType Dtype>
				430	int OpAvgPool2d<Dtype>::eval()
				431	{
				432	int in_batch = this->in->getShape()[0];
				433	int in_height = this->in->getShape()[1];
				434	int in_width = this->in->getShape()[2];
				435	int in_channels = this->in->getShape()[3];
				436
				437	int out_batch = this->out->getShape()[0];
				438	int out_height = this->out->getShape()[1];
				439	int out_width = this->out->getShape()[2];
				440	int out_channels = this->out->getShape()[3];
				441
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	442	ERROR_IF(in_batch != out_batch, "OpAvgPool2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				443	ERROR_IF(in_channels != out_channels, "OpAvgPool2d: tensor channel mismatch %d != %d", in_channels, out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	444
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	445	int pad_top = this->attribute->pad()[0];
				446	int pad_bottom = this->attribute->pad()[1];
				447	int pad_left = this->attribute->pad()[2];
				448	int pad_right = this->attribute->pad()[3];
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	449	int kernel_h = this->attribute->kernel()[0];
				450	int kernel_w = this->attribute->kernel()[1];
				451	int stride_h = this->attribute->stride()[0];
				452	int stride_w = this->attribute->stride()[1];
				453
				454	DEBUG_INFO(OP,
				455	"perform AvgPool2d, input.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], kernel=[%d,%d], "
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	456	"stride=[%d,%d], pad=[%d,%d,%d,%d]",
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	457	in_batch, in_height, in_width, in_channels, out_batch, out_height, out_width, out_channels, kernel_h,
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	458	kernel_w, stride_h, stride_w, pad_top, pad_bottom, pad_left, pad_right);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	459
				460	Eigen::array<Eigen::Index, 2> im2col_input_dims;
				461	im2col_input_dims[0] = kernel_h * kernel_w;
				462	im2col_input_dims[1] = out_batch * out_height * out_width * out_channels;
				463
				464	Eigen::array<Eigen::Index, 4> col2im_output_dims;
				465	col2im_output_dims[0] = out_batch;
				466	col2im_output_dims[1] = out_height;
				467	col2im_output_dims[2] = out_width;
				468	col2im_output_dims[3] = out_channels;
				469
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	470	Eigen::array<std::pair<int32_t, int32_t>, 4> pad;
				471	pad[0] = std::make_pair(0, 0);
				472	pad[1] = std::make_pair(pad_top, pad_bottom);
				473	pad[2] = std::make_pair(pad_left, pad_right);
				474	pad[3] = std::make_pair(0, 0);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	475
				476	ETensor4<InEigenType> input_val = this->in->getTensor();
				477	if (this->qinfo)
				478	{
				479	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				480	}
				481
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	482	ETensor4<InEigenType> input_padded = input_val.pad(pad);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	483
				484	// assuming input and output have same scales
				485	// so input and output scaling is not required
				486	// TODO: check if this assumption TOSA made
				487
				488	// extract_image_patches() output [N, KH, KW, H * W, C]
				489	// transpose to [KH, KW, N, H * W, C]
				490	// reshape to [KH * KW, N * H * W * C]
				491	ETensor2<InEigenType> input_extract_patches =
				492	input_padded.extract_image_patches(kernel_h, kernel_w, stride_h, stride_w, 1, 1, Eigen::PADDING_VALID)
				493	.shuffle(Eigen::array<Eigen::Index, 5>{ 1, 2, 0, 3, 4 })
				494	.reshape(im2col_input_dims);
				495
				496	// 1D result with [N * H * W * C]
				497	ETensor1<AccEigenType> out_1d(this->out->getElementCount());
				498	out_1d.setZero();
				499
				500	// sum pool
				501	for (size_t i = 0; i < this->out->getElementCount(); i++)
				502	{
				503	for (int32_t j = 0; j < kernel_h * kernel_w; j++)
				504	{
				505	out_1d(i) += (AccEigenType)input_extract_patches(j, i);
				506	}
				507	}
				508
				509	// reshape result to [N, H, W, C] and divide with div_map
				510	ETensor4<AccEigenType> sum = out_1d.reshape(col2im_output_dims);
				511
				512	// calculate 1d height/width div_map (number of elements this pooling window covers)
				513	// and outer product to get 2d div_map, then reshape/broadcast to [N, H, W, C]
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	514	ETensor1<int32_t> div_map_h = calculate_div_map_1d(in_height, out_height, kernel_h, stride_h, pad_top, pad_bottom);
				515	ETensor1<int32_t> div_map_w = calculate_div_map_1d(in_width, out_width, kernel_w, stride_w, pad_left, pad_right);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	516	Eigen::array<Eigen::IndexPair<Eigen::Index>, 1> contract_dims = { Eigen::IndexPair<Eigen::Index>(1, 0) };
				517	Eigen::array<Eigen::Index, 4> bcast{ out_batch, 1, 1, out_channels };
				518
				519	ETensor4<int32_t> div_map =
				520	div_map_h.reshape(Eigen::array<Eigen::Index, 2>{ out_height, 1 })
				521	.contract(div_map_w.reshape(Eigen::array<Eigen::Index, 2>{ 1, out_width }), contract_dims)
				522	.reshape(Eigen::array<Eigen::Index, 4>{ 1, out_height, out_width, 1 })
				523	.broadcast(bcast);
				524
				525	if (Dtype != DType_FLOAT)
				526	{
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	527	try
				528	{
				529	this->out->getTensor() = sum.binaryExpr(div_map, [](AccEigenType value, int32_t div) -> OutEigenType {
				530	int32_t multiplier, shift;
				531	TosaReference::QuantUtil::reciprocal_scale(div, multiplier, shift);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	532
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	533	return (OutEigenType)TosaReference::QuantUtil::apply_scale_32(value, multiplier, shift, false);
				534	});
				535	}
				536	catch (std::string desc)
				537	{
				538	REQUIRE(false, "OpAvgPool2d apply_scale_32() fails: %s.", desc.c_str());
				539	}
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	540	this->out->getTensor() = this->out->getTensor() + (OutEigenType)(this->qinfo->output_zp());
				541	this->out->getTensor() = this->out->getTensor().cwiseMax((OutEigenType)QMin);
				542	this->out->getTensor() = this->out->getTensor().cwiseMin((OutEigenType)QMax);
				543	}
				544	else
				545	{
				546	this->out->getTensor() = (sum / div_map.template cast<AccEigenType>()).template cast<OutEigenType>();
				547	}
				548
				549	return GraphNode::eval();
				550	}
				551
				552	template <DType InDtype, DType WeightDtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	553	OpConv2d<InDtype, WeightDtype>::OpConv2d(SubgraphTraverser* sgt_,
				554	TosaAttributeBase* attribute_,
				555	TosaQuantInfoBase* qinfo_,
				556	uint64_t id_)
				557	: GraphNode(sgt_, Op_CONV2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	558	{
				559	setRequiredOperands(3, 1);
				560	setRequiredRank(4);
				561
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	562	INIT_ATTRIBUTE(Conv);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	563	INIT_QINFO(Conv);
				564	}
				565
				566	template <DType InDtype, DType WeightDtype>
				567	OpConv2d<InDtype, WeightDtype>::~OpConv2d()
				568	{
				569	if (attribute)
				570	delete attribute;
				571	if (qinfo)
				572	delete qinfo;
				573	}
				574
				575	template <DType InDtype, DType WeightDtype>
				576	int OpConv2d<InDtype, WeightDtype>::checkTensorAttributes()
				577	{
				578	if (validateRequiredOperands())
				579	return 1;
				580
				581	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				582	{
				583	return 1;
				584	}
				585
				586	// 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
				587	if (inputs[2]->getRank() != 1)
				588	{
				589	printNodeValidationError("OpConv2d: bias tensor must be rank 1");
				590	}
				591
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	592	ERROR_IF(outputs[0]->getDtype() != AccDtype,
Kevin Cheng	8079480	2021-11-01 11:14:13 -0700	[diff] [blame]	593	"OpConv2d: Output data type not supported for this configuration of operator");
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	594
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	595	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				596	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				597	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				598	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				599
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	600	std::string msg;
				601	if (check_conv_attribute_qinfo(attribute, qinfo, 2 /* conv_dimension */, input->getShape(), output->getShape(),
Jeremy Johnson	4a6fb9b	2022-04-26 15:47:21 +0100	[diff] [blame]	602	weight->getShape(), 1 /* offset_kernel */, InDtype, WeightDtype, msg))
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	603	{
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	604	msg = "OpConv2d: " + msg;
				605	printNodeValidationError(msg.c_str());
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	606	return 1;
				607	}
				608
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	609	return 0;
				610	}
				611
				612	template <DType InDtype, DType WeightDtype>
				613	int OpConv2d<InDtype, WeightDtype>::eval()
				614	{
				615	int in_batch = this->input->getShape()[0];
				616	int in_height = this->input->getShape()[1];
				617	int in_width = this->input->getShape()[2];
				618	int in_channels = this->input->getShape()[3];
				619
				620	int f_out_channels = this->weight->getShape()[0];
				621	int f_height = this->weight->getShape()[1];
				622	int f_width = this->weight->getShape()[2];
				623	int f_in_channels = this->weight->getShape()[3];
				624
				625	int b_out_channels = this->bias->getShape()[0];
				626
				627	int out_batch = this->output->getShape()[0];
				628	int out_height = this->output->getShape()[1];
				629	int out_width = this->output->getShape()[2];
				630	int out_channels = this->output->getShape()[3];
				631
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	632	ERROR_IF(in_batch != out_batch, "OpConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				633	ERROR_IF(f_in_channels != in_channels, "OpConv2d: tensor input channel mismatch %d != %d", f_in_channels,
				634	in_channels);
				635	ERROR_IF(f_out_channels != out_channels, "OpConv2d: tensor output channel mismatch %d != %d", f_out_channels,
				636	out_channels);
				637	ERROR_IF(b_out_channels != out_channels, "OpConv2d: bias channel mismatch %d != %d", b_out_channels, out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	638
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	639	int pad_top = this->attribute->pad()[0];
				640	int pad_bottom = this->attribute->pad()[1];
				641	int pad_left = this->attribute->pad()[2];
				642	int pad_right = this->attribute->pad()[3];
				643
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	644	int stride_h = this->attribute->stride()[0];
				645	int stride_w = this->attribute->stride()[1];
				646	int dilation_h = this->attribute->dilation()[0];
				647	int dilation_w = this->attribute->dilation()[1];
				648
				649	DEBUG_INFO(OP,
				650	"perform OpConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], "
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	651	"stride=[%d,%d], dilation=[%d,%d], pad=[%d,%d,%d,%d]",
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	652	in_batch, in_height, in_width, in_channels, f_height, f_width, f_in_channels, f_out_channels, out_batch,
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	653	out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, pad_top,
				654	pad_bottom, pad_left, pad_right);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	655
				656	// GEMM-conv2d, left matrix is input, right matrix is weight
				657	Eigen::array<Eigen::Index, 2> im2col_input_dims;
				658	im2col_input_dims[0] = out_batch * out_height * out_width;
				659	im2col_input_dims[1] = f_height * f_width * f_in_channels;
				660
				661	Eigen::array<Eigen::Index, 2> im2col_weight_dims;
				662	im2col_weight_dims[0] = f_height * f_width * f_in_channels;
				663	im2col_weight_dims[1] = f_out_channels;
				664
				665	Eigen::array<Eigen::Index, 2> bias_reshaped_dims;
				666	bias_reshaped_dims[0] = 1;
				667	bias_reshaped_dims[1] = b_out_channels;
				668
				669	Eigen::array<Eigen::Index, 4> weight_zp_bcast_dims;
				670	weight_zp_bcast_dims[0] = f_height;
				671	weight_zp_bcast_dims[1] = f_width;
				672	weight_zp_bcast_dims[2] = f_in_channels;
				673
				674	Eigen::array<Eigen::Index, 2> bias_bcast_dims;
				675	bias_bcast_dims[0] = out_batch * out_height * out_width;
				676	bias_bcast_dims[1] = 1;
				677
				678	Eigen::array<Eigen::Index, 4> col2im_output_dims;
				679	col2im_output_dims[0] = out_batch;
				680	col2im_output_dims[1] = out_height;
				681	col2im_output_dims[2] = out_width;
				682	col2im_output_dims[3] = out_channels;
				683
				684	Eigen::array<Eigen::IndexPair<Eigen::Index>, 1> contract_dims = { Eigen::IndexPair<Eigen::Index>(1, 0) };
				685
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	686	Eigen::array<std::pair<int32_t, int32_t>, 4> pad;
				687	pad[0] = std::make_pair(0, 0);
				688	pad[1] = std::make_pair(pad_top, pad_bottom);
				689	pad[2] = std::make_pair(pad_left, pad_right);
				690	pad[3] = std::make_pair(0, 0);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	691
				692	TIn input_val = this->input->getTensor();
				693	TWeight weight_val = this->weight->getTensor();
				694	if (this->qinfo)
				695	{
				696	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				697	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				698	}
				699
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	700	ETensor4<InEigenType> input_padded = input_val.pad(pad);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	701
				702	// extract_image_patches() output [N, KH, KW, H * W, C]
				703	// need to transpose to [N, H * W, KH, KW, C]
				704	ETensor5<InEigenType> input_extract_patches =
				705	input_padded
				706	.extract_image_patches(f_height, f_width, stride_h, stride_w, dilation_h, dilation_w, Eigen::PADDING_VALID)
				707	.shuffle(Eigen::array<Eigen::Index, 5>{ 0, 3, 1, 2, 4 });
				708
				709	// reshape input to [N * H * W, KH * KW * C]
				710	ETensor2<InEigenType> im2col_input = input_extract_patches.reshape(im2col_input_dims);
				711
				712	// transpose and reshape weight from [OC, H, W, IC] to [H * W * IC, OC]
				713	ETensor2<WeightEigenType> im2col_weight =
				714	weight_val.shuffle(Eigen::array<Eigen::Index, 4>({ 1, 2, 3, 0 })).reshape(im2col_weight_dims);
				715
				716	// don't need to apply bias_multiplier ( * bias_scale and >> bias_shift) since tflite already scale it
				717	// and reshaped from [C] to [1, C], and broadcast to [N * H * W, C]
				718	ETensor2<AccEigenType> bias_2d = this->bias->getTensor().reshape(bias_reshaped_dims).broadcast(bias_bcast_dims);
				719
				720	// output matrix is [N * H * W, C]
				721	ETensor2<AccEigenType> contracted_result =
				722	im2col_input.template cast<AccEigenType>().contract(im2col_weight.template cast<AccEigenType>(), contract_dims);
				723
				724	// adding bias
				725	ETensor2<AccEigenType> biased_output = contracted_result + bias_2d.template cast<AccEigenType>();
				726
				727	// reshape back to [N, H, W, C]
				728	this->output->getTensor() = biased_output.reshape(col2im_output_dims);
				729
				730	if (AccDtype == DType_INT48)
				731	{
				732	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				733	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				734	}
				735
				736	return GraphNode::eval();
				737	}
				738
				739	template <DType InDtype, DType WeightDtype>
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	740	OpConv3d<InDtype, WeightDtype>::OpConv3d(SubgraphTraverser* sgt_,
				741	TosaAttributeBase* attribute_,
				742	TosaQuantInfoBase* qinfo_,
				743	uint64_t id_)
				744	: GraphNode(sgt_, Op_CONV3D, id_)
				745	{
				746	setRequiredOperands(3, 1);
				747	setRequiredRank(5);
				748
				749	INIT_ATTRIBUTE(Conv);
				750	INIT_QINFO(Conv);
				751	}
				752
				753	template <DType InDtype, DType WeightDtype>
				754	OpConv3d<InDtype, WeightDtype>::~OpConv3d()
				755	{
				756	if (attribute)
				757	delete attribute;
				758	if (qinfo)
				759	delete qinfo;
				760	}
				761
				762	template <DType InDtype, DType WeightDtype>
				763	int OpConv3d<InDtype, WeightDtype>::checkTensorAttributes()
				764	{
				765	if (validateRequiredOperands())
				766	return 1;
				767
				768	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				769	{
				770	return 1;
				771	}
				772
				773	// 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
				774	if (inputs[2]->getRank() != 1)
				775	{
				776	printNodeValidationError("OpConv3d: bias tensor must be rank 1");
				777	}
				778
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	779	ERROR_IF(outputs[0]->getDtype() != AccDtype,
Kevin Cheng	8079480	2021-11-01 11:14:13 -0700	[diff] [blame]	780	"OpConv3d: Output data type not supported for this configuration of operator");
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	781
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	782	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				783	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				784	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				785	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				786
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	787	std::string msg;
				788	if (check_conv_attribute_qinfo(attribute, qinfo, 3 /* conv_dimension */, input->getShape(), output->getShape(),
Jeremy Johnson	4a6fb9b	2022-04-26 15:47:21 +0100	[diff] [blame]	789	weight->getShape(), 1 /* offset_kernel */, InDtype, WeightDtype, msg))
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	790	{
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	791	msg = "OpConv3d: " + msg;
				792	printNodeValidationError(msg.c_str());
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	793	return 1;
				794	}
				795
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	796	return 0;
				797	}
				798
				799	template <DType InDtype, DType WeightDtype>
				800	int OpConv3d<InDtype, WeightDtype>::eval()
				801	{
				802	int in_batch = this->input->getShape()[0];
				803	int in_depth = this->input->getShape()[1];
				804	int in_height = this->input->getShape()[2];
				805	int in_width = this->input->getShape()[3];
				806	int in_channels = this->input->getShape()[4];
				807
				808	int f_out_channels = this->weight->getShape()[0];
				809	int f_depth = this->weight->getShape()[1];
				810	int f_height = this->weight->getShape()[2];
				811	int f_width = this->weight->getShape()[3];
				812	int f_in_channels = this->weight->getShape()[4];
				813
				814	int b_out_channels = this->bias->getShape()[0];
				815
				816	int out_batch = this->output->getShape()[0];
				817	int out_depth = this->output->getShape()[1];
				818	int out_height = this->output->getShape()[2];
				819	int out_width = this->output->getShape()[3];
				820	int out_channels = this->output->getShape()[4];
				821
				822	ERROR_IF(in_batch != out_batch, "OpConv3d: tensor batch mismatch %d != %d", in_batch, out_batch);
				823	ERROR_IF(f_in_channels != in_channels, "OpConv3d: tensor input channel mismatch %d != %d", f_in_channels,
				824	in_channels);
				825	ERROR_IF(f_out_channels != out_channels, "OpConv3d: tensor output channel mismatch %d != %d", f_out_channels,
				826	out_channels);
				827	ERROR_IF(b_out_channels != out_channels, "OpConv3d: bias channel mismatch %d != %d", b_out_channels, out_channels);
				828
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	829	int pad_d0 = this->attribute->pad()[0];
				830	int pad_d1 = this->attribute->pad()[1];
				831	int pad_top = this->attribute->pad()[2];
				832	int pad_bottom = this->attribute->pad()[3];
				833	int pad_left = this->attribute->pad()[4];
				834	int pad_right = this->attribute->pad()[5];
				835
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	836	int stride_d = this->attribute->stride()[0];
				837	int stride_h = this->attribute->stride()[1];
				838	int stride_w = this->attribute->stride()[2];
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	839
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	840	int dilation_d = this->attribute->dilation()[0];
				841	int dilation_h = this->attribute->dilation()[1];
				842	int dilation_w = this->attribute->dilation()[2];
				843
				844	DEBUG_INFO(
				845	OP,
				846	"perform OpConv3d, input.shape=[%d,%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d,%d], output.shape=[%d,%d,%d,%d,%d], "
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	847	"stride=[%d,%d,%d], dilation=[%d,%d,%d], pad=[%d,%d,%d,%d,%d,%d]",
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	848	in_batch, in_depth, in_height, in_width, in_channels, f_out_channels, f_depth, f_height, f_width, f_in_channels,
				849	out_batch, out_depth, out_height, out_width, out_channels, stride_d, stride_h, stride_w, dilation_d, dilation_h,
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	850	dilation_w, pad_d0, pad_d1, pad_top, pad_bottom, pad_left, pad_right);
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	851
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	852	Eigen::array<std::pair<int32_t, int32_t>, 5> pad;
				853	pad[0] = std::make_pair(0, 0);
				854	pad[1] = std::make_pair(pad_d0, pad_d1);
				855	pad[2] = std::make_pair(pad_top, pad_bottom);
				856	pad[3] = std::make_pair(pad_left, pad_right);
				857	pad[4] = std::make_pair(0, 0);
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	858
				859	TIn input_val = this->input->getTensor();
				860	TWeight weight_val = this->weight->getTensor();
				861	if (this->qinfo)
				862	{
				863	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				864	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				865	}
				866
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	867	ETensor5<InEigenType> input_padded = input_val.pad(pad);
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	868
				869	// 1. initialize with bias
				870	Eigen::array<Eigen::Index, 5> reshape_dim;
				871	reshape_dim.fill(1);
				872	reshape_dim[4] = b_out_channels;
				873
				874	Eigen::array<Eigen::Index, 5> bcast;
				875	bcast[0] = out_batch;
				876	bcast[1] = out_depth;
				877	bcast[2] = out_height;
				878	bcast[3] = out_width;
				879	bcast[4] = 1;
				880	this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
				881
				882	// 2. direct convolution
				883	AccEigenType acc = 0;
				884	int d_idx, h_idx, w_idx;
				885
				886	for (int ob = 0; ob < out_batch; ob++)
				887	{
				888	for (int od = 0; od < out_depth; od++)
				889	{
				890	for (int oh = 0; oh < out_height; oh++)
				891	{
				892	for (int ow = 0; ow < out_width; ow++)
				893	{
				894	for (int oc = 0; oc < out_channels; oc++)
				895	{
Eric Kunze	7edb34c	2022-05-16 17:34:40 -0700	[diff] [blame]	896	// Initialize accumulator with bias value
				897	acc = this->output->getTensor()(ob, od, oh, ow, oc);
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	898	for (int fd = 0; fd < f_depth; fd++)
				899	{
				900	d_idx = od * stride_d + fd * dilation_d;
				901	for (int fh = 0; fh < f_height; fh++)
				902	{
				903	h_idx = oh * stride_h + fh * dilation_h;
				904	for (int fw = 0; fw < f_width; fw++)
				905	{
				906	w_idx = ow * stride_w + fw * dilation_w;
				907	for (int ic = 0; ic < in_channels; ic++)
				908	{
				909	acc += ((AccEigenType)input_padded(ob, d_idx, h_idx, w_idx, ic) *
				910	(AccEigenType)weight_val(oc, fd, fh, fw, ic));
				911	}
				912	}
				913	}
				914	}
				915	this->output->getTensor()(ob, od, oh, ow, oc) = acc;
				916	}
				917	}
				918	}
				919	}
				920	}
				921
				922	if (AccDtype == DType_INT48)
				923	{
				924	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				925	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				926	}
				927
				928	return GraphNode::eval();
				929	}
				930
				931	template <DType InDtype, DType WeightDtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	932	OpDepthwiseConv2d<InDtype, WeightDtype>::OpDepthwiseConv2d(SubgraphTraverser* sgt_,
				933	TosaAttributeBase* attribute_,
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	934	TosaQuantInfoBase* qinfo_,
				935	uint64_t id_)
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	936	: GraphNode(sgt_, Op_DEPTHWISE_CONV2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	937	{
				938	setRequiredOperands(3, 1);
				939	setRequiredRank(4);
				940
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	941	INIT_ATTRIBUTE(Conv);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	942	INIT_QINFO(Conv);
				943	}
				944
				945	template <DType InDtype, DType WeightDtype>
				946	OpDepthwiseConv2d<InDtype, WeightDtype>::~OpDepthwiseConv2d()
				947	{
				948	if (attribute)
				949	delete attribute;
				950	if (qinfo)
				951	delete qinfo;
				952	}
				953
				954	template <DType InDtype, DType WeightDtype>
				955	int OpDepthwiseConv2d<InDtype, WeightDtype>::checkTensorAttributes()
				956	{
				957	if (validateRequiredOperands())
				958	return 1;
				959
				960	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				961	{
				962	return 1;
				963	}
				964
				965	// 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
				966	if (inputs[2]->getRank() != 1)
				967	{
				968	printNodeValidationError("OpDepthwiseConv2d: bias tensor must be rank 1");
				969	}
				970
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	971	ERROR_IF(outputs[0]->getDtype() != AccDtype,
Kevin Cheng	8079480	2021-11-01 11:14:13 -0700	[diff] [blame]	972	"OpDepthwiseConv2d: Output data type not supported for this configuration of operator");
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	973
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	974	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				975	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				976	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				977	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				978
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	979	std::string msg;
				980	if (check_conv_attribute_qinfo(attribute, qinfo, 2 /* conv_dimension */, input->getShape(), output->getShape(),
Jeremy Johnson	4a6fb9b	2022-04-26 15:47:21 +0100	[diff] [blame]	981	weight->getShape(), 0 /* offset_kernel */, InDtype, WeightDtype, msg))
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	982	{
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	983	msg = "OpDepthwiseConv2d: " + msg;
				984	printNodeValidationError(msg.c_str());
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	985	return 1;
				986	}
				987
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	988	return 0;
				989	}
				990
				991	template <DType InDtype, DType WeightDtype>
				992	int OpDepthwiseConv2d<InDtype, WeightDtype>::eval()
				993	{
				994	int in_batch = this->input->getShape()[0];
				995	int in_height = this->input->getShape()[1];
				996	int in_width = this->input->getShape()[2];
				997	int in_channels = this->input->getShape()[3];
				998
				999	int f_height = this->weight->getShape()[0];
				1000	int f_width = this->weight->getShape()[1];
				1001	int f_in_channels = this->weight->getShape()[2];
				1002	int f_multiplier = this->weight->getShape()[3];
				1003
				1004	int b_out_channels = this->bias->getShape()[0];
				1005
				1006	int out_batch = this->output->getShape()[0];
				1007	int out_height = this->output->getShape()[1];
				1008	int out_width = this->output->getShape()[2];
				1009	int out_channels = this->output->getShape()[3];
				1010
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1011	ERROR_IF(in_batch != out_batch, "OpDepthwiseConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				1012	ERROR_IF(f_in_channels != in_channels, "OpDepthwiseConv2d: tensor input channel mismatch %d != %d", f_in_channels,
				1013	in_channels);
				1014	ERROR_IF(in_channels * f_multiplier != out_channels, "OpDepthwiseConv2d: tensor output channel mismatch %d != %d",
				1015	in_channels * f_multiplier, out_channels);
				1016	ERROR_IF(b_out_channels != out_channels, "OpDepthwiseConv2d: bias channels mismatch %d != %d", b_out_channels,
				1017	out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1018
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	1019	int pad_top = this->attribute->pad()[0];
				1020	int pad_bottom = this->attribute->pad()[1];
				1021	int pad_left = this->attribute->pad()[2];
				1022	int pad_right = this->attribute->pad()[3];
				1023
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1024	int stride_h = this->attribute->stride()[0];
				1025	int stride_w = this->attribute->stride()[1];
				1026	int dilation_h = this->attribute->dilation()[0];
				1027	int dilation_w = this->attribute->dilation()[1];
				1028
				1029	DEBUG_INFO(OP,
				1030	"perform OpDepthwiseConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], "
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	1031	"output.shape=[%d,%d,%d,%d], stride=[%d,%d], dilation=[%d,%d], pad=[%d,%d,%d,%d]",
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1032	in_batch, in_height, in_width, in_channels, f_height, f_width, f_in_channels, f_multiplier, out_batch,
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	1033	out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, pad_top,
				1034	pad_bottom, pad_left, pad_right);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1035
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	1036	Eigen::array<std::pair<int32_t, int32_t>, 4> pad;
				1037	pad[0] = std::make_pair(0, 0);
				1038	pad[1] = std::make_pair(pad_top, pad_bottom);
				1039	pad[2] = std::make_pair(pad_left, pad_right);
				1040	pad[3] = std::make_pair(0, 0);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1041
				1042	TIn input_val = this->input->getTensor();
				1043	TWeight weight_val = this->weight->getTensor();
				1044	if (this->qinfo)
				1045	{
				1046	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				1047	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				1048	}
				1049
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	1050	ETensor4<InEigenType> input_padded = input_val.pad(pad);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1051
				1052	// GEMM doesn't fit well with DepthwiseConv2d
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	1053	// 1. use extract_image_patches() to handle stride/dilation/pad
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1054	// 2. perform direct convolution
				1055
				1056	// 1. extract_image_patches() output [N, KH, KW, OH * OW, IC]
				1057	ETensor5<InEigenType> input_extract_patches = input_padded.extract_image_patches(
				1058	f_height, f_width, stride_h, stride_w, dilation_h, dilation_w, Eigen::PADDING_VALID);
				1059
				1060	Eigen::array<Eigen::Index, 4> reshape_dim;
				1061	reshape_dim.fill(1);
				1062	reshape_dim[3] = b_out_channels;
				1063
				1064	Eigen::array<Eigen::Index, 4> bcast;
				1065	bcast[0] = out_batch;
				1066	bcast[1] = out_height;
				1067	bcast[2] = out_width;
				1068	bcast[3] = 1;
				1069
				1070	// initialize with bias
				1071	this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
				1072
				1073	// 2. direct depthwise convolution
				1074	for (int ob = 0; ob < out_batch; ob++)
				1075	{
				1076	for (int oh = 0; oh < out_height; oh++)
				1077	{
				1078	for (int ow = 0; ow < out_width; ow++)
				1079	{
				1080	for (int ic = 0; ic < in_channels; ic++)
				1081	{
				1082	for (int cm = 0; cm < f_multiplier; cm++)
				1083	{
				1084	for (int fh = 0; fh < f_height; fh++)
				1085	{
				1086	for (int fw = 0; fw < f_width; fw++)
				1087	{
				1088	this->output->getTensor()(ob, oh, ow, ic * f_multiplier + cm) +=
				1089	((AccEigenType)input_extract_patches(ob, fh, fw, ow * out_height + oh, ic) *
				1090	(AccEigenType)weight_val(fh, fw, ic, cm));
				1091	}
				1092	}
				1093	}
				1094	}
				1095	}
				1096	}
				1097	}
				1098
				1099	if (AccDtype == DType_INT48)
				1100	{
				1101	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				1102	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				1103	}
				1104
				1105	return GraphNode::eval();
				1106	}
				1107
				1108	template <DType InDtype, DType WeightDtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1109	OpFullyConnected<InDtype, WeightDtype>::OpFullyConnected(SubgraphTraverser* sgt_,
				1110	TosaAttributeBase* attribute_,
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1111	TosaQuantInfoBase* qinfo_,
				1112	uint64_t id_)
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1113	: GraphNode(sgt_, Op_FULLY_CONNECTED, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1114	{
				1115	setRequiredOperands(3, 1);
				1116	setRequiredRank(2);
				1117
				1118	INIT_QINFO(Conv);
				1119	}
				1120
				1121	template <DType InDtype, DType WeightDtype>
				1122	OpFullyConnected<InDtype, WeightDtype>::~OpFullyConnected()
				1123	{
				1124	if (qinfo)
				1125	delete qinfo;
				1126	}
				1127
				1128	template <DType InDtype, DType WeightDtype>
				1129	int OpFullyConnected<InDtype, WeightDtype>::checkTensorAttributes()
				1130	{
				1131	if (validateRequiredOperands())
				1132	return 1;
				1133
				1134	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				1135	{
				1136	return 1;
				1137	}
				1138
				1139	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				1140	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				1141	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				1142
				1143	if (input->getShape()[1] != weight->getShape()[1])
				1144	{
				1145	printNodeValidationError("OpFullyConnected operator input.shape[1] should match weight.shape[1]");
				1146	return 1;
				1147	}
				1148
				1149	if (weight->getShape()[0] != bias->getShape()[0])
				1150	{
				1151	printNodeValidationError("OpFullyConnected operator bias.shape[0] should match weight.shape[0]");
				1152	return 1;
				1153	}
				1154
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1155	ERROR_IF(outputs[0]->getDtype() != AccDtype,
				1156	"OpFullyConnected: Output data type not supported for this configuration of operator");
				1157
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1158	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				1159
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1160	if (this->qinfo)
				1161	{
				1162	if (InDtype != DType_INT8)
				1163	{
				1164	ERROR_IF(this->qinfo->input_zp() != 0, "OpFullyConnected: zeropoint only for int8_t");
				1165	}
				1166	if (WeightDtype != DType_INT8)
				1167	{
				1168	ERROR_IF(this->qinfo->weight_zp() != 0, "OpFullyConnected: zeropoint only for int8_t");
				1169	}
				1170	}
				1171
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1172	return 0;
				1173	}
				1174
				1175	template <DType InDtype, DType WeightDtype>
				1176	int OpFullyConnected<InDtype, WeightDtype>::eval()
				1177	{
				1178	typedef Eigen::Tensor<int, 1>::DimensionPair DimPair;
				1179	Eigen::array<DimPair, 1> dims{ { DimPair(1, 0) } };
				1180
				1181	Eigen::array<Eigen::Index, 2> weight_shuffle{ 1, 0 };
				1182
				1183	Eigen::array<Eigen::Index, 2> bias_reshape;
				1184	bias_reshape[0] = 1;
				1185	bias_reshape[1] = this->bias->getShape()[0];
				1186
				1187	Eigen::array<Eigen::Index, 2> bias_bcast;
				1188	bias_bcast[0] = this->input->getShape()[0];
				1189	bias_bcast[1] = 1;
				1190
				1191	TIn input_val = this->input->getTensor();
				1192	TWeight weight_val = this->weight->getTensor().shuffle(weight_shuffle);
				1193	if (this->qinfo)
				1194	{
				1195	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				1196	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				1197	}
				1198
				1199	this->output->getTensor() =
				1200	input_val.template cast<AccEigenType>().contract(weight_val.template cast<AccEigenType>(), dims) +
				1201	this->bias->getTensor().reshape(bias_reshape).broadcast(bias_bcast);
				1202
				1203	if (AccDtype == DType_INT48)
				1204	{
				1205	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				1206	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				1207	}
				1208	return GraphNode::eval();
				1209	}
				1210
				1211	template <DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1212	OpMatMul<Dtype>::OpMatMul(SubgraphTraverser* sgt_,
				1213	TosaAttributeBase* attribute_,
				1214	TosaQuantInfoBase* qinfo_,
				1215	uint64_t id_)
				1216	: GraphNode(sgt_, Op_MATMUL, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1217	{
				1218	setRequiredOperands(2, 1);
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1219	setRequiredRank(3);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1220
				1221	INIT_QINFO(MatMul);
				1222	}
				1223
				1224	template <DType Dtype>
				1225	OpMatMul<Dtype>::~OpMatMul()
				1226	{
				1227	if (qinfo)
				1228	delete qinfo;
				1229	}
				1230
				1231	template <DType Dtype>
				1232	int OpMatMul<Dtype>::checkTensorAttributes()
				1233	{
				1234	if (validateRequiredOperands())
				1235	return 1;
				1236
				1237	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				1238	{
				1239	return 1;
				1240	}
				1241
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1242	ERROR_IF(outputs[0]->getDtype() != AccDtype,
Kevin Cheng	8079480	2021-11-01 11:14:13 -0700	[diff] [blame]	1243	"OpMatMul: Output data type not supported for this configuration of operator");
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1244
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1245	a = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				1246	b = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[1]);
				1247	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1248
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1249	ASSERT_MEM(a && b && output);
				1250
				1251	// a: [N, H, C]
				1252	// b: [N, C, W]
				1253	// c: [N, H, W]
				1254
				1255	// Check N
				1256	if (a->getShape()[0] != b->getShape()[0] \|\| a->getShape()[0] != output->getShape()[0])
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1257	{
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1258	printNodeValidationError("OpMatMul operator a.shape[0], b.shape[0] and output.shape[0] should match");
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1259	return 1;
				1260	}
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1261	N = a->getShape()[0];
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1262
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1263	// Check C
				1264	if (a->getShape()[2] != b->getShape()[1])
				1265	{
				1266	printNodeValidationError("OpMatMul operator a.shape[2] should match b.shape[1]");
				1267	return 1;
				1268	}
				1269	C = a->getShape()[2];
				1270
				1271	// Check H
				1272	if (a->getShape()[1] != output->getShape()[1])
				1273	{
				1274	printNodeValidationError("OpMatMul operator a.shape[1] should match output.shape[1]");
				1275	return 1;
				1276	}
				1277	H = a->getShape()[1];
				1278
				1279	// Check W
				1280	if (b->getShape()[2] != output->getShape()[2])
				1281	{
				1282	printNodeValidationError("OpMatMul operator output.shape[2] should match output.shape[2]");
				1283	return 1;
				1284	}
				1285	W = b->getShape()[2];
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1286
Kevin Cheng	8079480	2021-11-01 11:14:13 -0700	[diff] [blame]	1287	if (Dtype != DType_INT8 && this->qinfo)
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1288	{
				1289	ERROR_IF(this->qinfo->a_zp() != 0, "OpMatMul: zeropoint only for int8_t");
				1290	ERROR_IF(this->qinfo->b_zp() != 0, "OpMatMul: zeropoint only for int8_t");
				1291	}
				1292
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1293	return 0;
				1294	}
				1295
				1296	template <DType Dtype>
				1297	int OpMatMul<Dtype>::eval()
				1298	{
				1299	typedef Eigen::Tensor<int, 1>::DimensionPair DimPair;
				1300	Eigen::array<DimPair, 1> dims{ { DimPair(1, 0) } };
				1301
				1302	TIn a_val = this->a->getTensor();
				1303	TIn b_val = this->b->getTensor();
				1304	if (this->qinfo)
				1305	{
				1306	a_val = a_val - (InEigenType)this->qinfo->a_zp();
				1307	b_val = b_val - (InEigenType)this->qinfo->b_zp();
				1308	}
				1309
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1310	Eigen::array<Eigen::Index, 2> a_rank2_shape({ H, C });
				1311	Eigen::array<Eigen::Index, 2> b_rank2_shape({ C, W });
				1312	Eigen::array<Eigen::Index, 3> output_rank3_shape({ 1, H, W });
				1313
				1314	Eigen::array<Eigen::Index, 3> a_size_array({ 1, H, C });
				1315	Eigen::array<Eigen::Index, 3> b_size_array({ 1, C, W });
				1316
				1317	Eigen::array<Eigen::Index, 3> a_begin_array({ 0, 0, 0 });
				1318	Eigen::array<Eigen::Index, 3> b_begin_array({ 0, 0, 0 });
				1319
				1320	// Iterate N dimension.
				1321	for (int i = 0; i < N; i++)
				1322	{
				1323	a_begin_array[0] = i;
				1324	b_begin_array[0] = i;
				1325
				1326	TInRank2 a_rank2_val = a_val.slice(a_begin_array, a_size_array).reshape(a_rank2_shape);
				1327	TInRank2 b_rank2_val = b_val.slice(b_begin_array, b_size_array).reshape(b_rank2_shape);
				1328	TAccRank2 output_rank2_val =
				1329	a_rank2_val.template cast<AccEigenType>().contract(b_rank2_val.template cast<AccEigenType>(), dims);
				1330	TAcc output_rank3_val = output_rank2_val.reshape(output_rank3_shape);
				1331	if (i == 0)
				1332	{
				1333	this->output->getTensor() = output_rank3_val;
				1334	}
				1335	else
				1336	{
				1337	TAcc temp = this->output->getTensor().concatenate(output_rank3_val, 0);
				1338	this->output->getTensor() = temp;
				1339	}
				1340	}
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1341
				1342	if (AccDtype == DType_INT48)
				1343	{
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	1344	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				1345	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1346	}
				1347
				1348	return GraphNode::eval();
				1349	}
				1350
				1351	template <DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1352	OpMaxPool2d<Dtype>::OpMaxPool2d(SubgraphTraverser* sgt_,
				1353	TosaAttributeBase* attribute_,
				1354	TosaQuantInfoBase* qinfo_,
				1355	uint64_t id_)
				1356	: GraphNode(sgt_, Op_MAX_POOL2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1357	{
				1358	setRequiredOperands(1, 1);
				1359	setRequiredRank(4);
				1360
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	1361	INIT_ATTRIBUTE(Pool);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1362	}
				1363
				1364	template <DType Dtype>
				1365	OpMaxPool2d<Dtype>::~OpMaxPool2d()
				1366	{
				1367	if (attribute)
				1368	delete attribute;
				1369	}
				1370
				1371	template <DType Dtype>
				1372	int OpMaxPool2d<Dtype>::checkTensorAttributes()
				1373	{
				1374	if (validateRequiredOperands())
				1375	return 1;
				1376
				1377	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(outputs[0]))
				1378	{
				1379	return 1;
				1380	}
				1381
				1382	if (inputs[0]->matchType(*outputs[0]))
				1383	{
				1384	printNodeValidationError("OpMaxPool2d: input and output tensor type mismatch");
				1385	return 1;
				1386	}
				1387
				1388	in = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				1389	out = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
				1390
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	1391	std::string msg;
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	1392	if (check_pool2d_attribute(attribute, in->getShape(), out->getShape(), msg))
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1393	{
Kevin Cheng	7eb93d7	2021-10-09 01:26:08 +0000	[diff] [blame]	1394	msg = "OpMaxPool2d: " + msg;
				1395	printNodeValidationError(msg.c_str());
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1396	return 1;
				1397	}
				1398
				1399	return 0;
				1400	}
				1401
				1402	template <DType Dtype>
				1403	int OpMaxPool2d<Dtype>::eval()
				1404	{
				1405	int in_batch = this->in->getShape()[0];
				1406	int in_height = this->in->getShape()[1];
				1407	int in_width = this->in->getShape()[2];
				1408	int in_channels = this->in->getShape()[3];
				1409
				1410	int out_batch = this->out->getShape()[0];
				1411	int out_height = this->out->getShape()[1];
				1412	int out_width = this->out->getShape()[2];
				1413	int out_channels = this->out->getShape()[3];
				1414
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1415	ERROR_IF(in_batch != out_batch, "OpMaxPool2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				1416	ERROR_IF(in_channels != out_channels, "OpMaxPool2d: tensor channel mismatch %d != %d", in_channels, out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1417
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	1418	int pad_top = this->attribute->pad()[0];
				1419	int pad_bottom = this->attribute->pad()[1];
				1420	int pad_left = this->attribute->pad()[2];
				1421	int pad_right = this->attribute->pad()[3];
				1422
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1423	int kernel_h = this->attribute->kernel()[0];
				1424	int kernel_w = this->attribute->kernel()[1];
				1425	int stride_h = this->attribute->stride()[0];
				1426	int stride_w = this->attribute->stride()[1];
				1427
				1428	DEBUG_INFO(OP,
				1429	"perform MaxPool2d, input.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], kernel=[%d,%d], "
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	1430	"stride=[%d,%d], pad=[%d,%d,%d,%d]",
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1431	in_batch, in_height, in_width, in_channels, out_batch, out_height, out_width, out_channels, kernel_h,
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	1432	kernel_w, stride_h, stride_w, pad_top, pad_bottom, pad_left, pad_right);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1433
				1434	Eigen::array<Eigen::Index, 2> im2col_input_dims;
				1435	im2col_input_dims[0] = kernel_h * kernel_w;
				1436	im2col_input_dims[1] = out_batch * out_height * out_width * out_channels;
				1437
				1438	Eigen::array<Eigen::Index, 4> col2im_output_dims;
				1439	col2im_output_dims[0] = out_batch;
				1440	col2im_output_dims[1] = out_height;
				1441	col2im_output_dims[2] = out_width;
				1442	col2im_output_dims[3] = out_channels;
				1443
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	1444	Eigen::array<std::pair<int32_t, int32_t>, 4> pad;
				1445	pad[0] = std::make_pair(0, 0);
				1446	pad[1] = std::make_pair(pad_top, pad_bottom);
				1447	pad[2] = std::make_pair(pad_left, pad_right);
				1448	pad[3] = std::make_pair(0, 0);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1449
TatWai Chong	86c403b	2022-06-06 20:46:01 -0700	[diff] [blame^]	1450	ETensor4<InEigenType> input_padded = this->in->getTensor().pad(pad, std::numeric_limits<InEigenType>::lowest());
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1451
				1452	// extract_image_patches() output [N, KH, KW, H * W, C]
				1453	// transpose to [KH, KW, N, H * W, C]
				1454	// reshape to [KH * KW, N * H * W * C]
				1455	//
				1456	// Set the padding value to be the most negative value that can be
				1457	// represented by the datatype to ensure that any padding values will be equal
				1458	// to or smaller than the actual maximum in the KH x KW patch.
				1459	ETensor2<InEigenType> input_extract_patches =
				1460	input_padded
				1461	.extract_image_patches(kernel_h, kernel_w, stride_h, stride_w, 1, 1, Eigen::PADDING_VALID,
				1462	std::numeric_limits<InEigenType>::lowest())
				1463	.shuffle(Eigen::array<Eigen::Index, 5>{ 1, 2, 0, 3, 4 })
				1464	.reshape(im2col_input_dims);
				1465
				1466	// Get the maximum of the KHxHW patches along axis 0
				1467	Eigen::Tensor<DenseIndex, 1> tensor_argmax = input_extract_patches.argmax(0);
				1468
				1469	// 1D result with [N * H * W * C]
				1470	ETensor1<OutEigenType> out_1d(this->out->getElementCount());
				1471
				1472	// index input_patches with argmax array should give the result
				1473	for (size_t i = 0; i < this->out->getElementCount(); i++)
				1474	{
				1475	out_1d(i) = (OutEigenType)input_extract_patches(tensor_argmax(i), i);
				1476	}
				1477
				1478	// reshape result to [N, H, W, C]
				1479	this->out->getTensor() = out_1d.reshape(col2im_output_dims);
				1480
				1481	return GraphNode::eval();
				1482	}
				1483
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1484	template <DType InDtype, DType WeightDtype>
				1485	OpTransposeConv2d<InDtype, WeightDtype>::OpTransposeConv2d(SubgraphTraverser* sgt_,
				1486	TosaAttributeBase* attribute_,
				1487	TosaQuantInfoBase* qinfo_,
				1488	uint64_t id_)
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1489	: GraphNode(sgt_, Op_TRANSPOSE_CONV2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1490	{
				1491	setRequiredOperands(3, 1);
				1492	setRequiredRank(4);
				1493
Kevin Cheng	93a1628	2021-08-31 16:14:03 -0700	[diff] [blame]	1494	INIT_ATTRIBUTE(TransposeConv);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1495	INIT_QINFO(Conv);
				1496	}
				1497
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1498	template <DType InDtype, DType WeightDtype>
				1499	OpTransposeConv2d<InDtype, WeightDtype>::~OpTransposeConv2d()
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1500	{
				1501	if (attribute)
				1502	delete attribute;
				1503	if (qinfo)
				1504	delete qinfo;
				1505	}
				1506
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1507	template <DType InDtype, DType WeightDtype>
				1508	int OpTransposeConv2d<InDtype, WeightDtype>::checkTensorAttributes()
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1509	{
				1510	if (validateRequiredOperands())
				1511	return 1;
				1512
				1513	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				1514	{
				1515	return 1;
				1516	}
				1517
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1518	ERROR_IF(outputs[0]->getDtype() != AccDtype,
Kevin Cheng	8079480	2021-11-01 11:14:13 -0700	[diff] [blame]	1519	"OpTransposeConv2d: Output data type not supported for this configuration of operator");
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1520
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1521	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				1522	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				1523	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				1524	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				1525
Jeremy Johnson	4a6fb9b	2022-04-26 15:47:21 +0100	[diff] [blame]	1526	if (attribute->outpad().size() != 4)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1527	{
				1528	printNodeValidationError("OpTransposeConv2d: illegal size for attribute outpad");
				1529	return 1;
				1530	}
				1531
				1532	if (attribute->stride().size() != 2)
				1533	{
				1534	printNodeValidationError("OpTransposeConv2d: illegal size for attribute stride");
				1535	return 1;
				1536	}
				1537
				1538	if (attribute->dilation().size() != 2)
				1539	{
				1540	printNodeValidationError("OpTransposeConv2d: illegal size for attribute dilation");
				1541	return 1;
				1542	}
				1543
				1544	if (attribute->output_shape().size() != 4)
				1545	{
				1546	printNodeValidationError("OpTransposeConv2d: illegal size for attribute output_shape");
				1547	return 1;
				1548	}
				1549
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	1550	for (int32_t i : attribute->outpad())
				1551	{
				1552	if (i < 0)
				1553	{
				1554	printNodeValidationError("OpTransposeConv2d: At least one pad is smaller than zero");
				1555	return 1;
				1556	}
				1557	}
				1558
				1559	for (int32_t i : attribute->stride())
				1560	{
				1561	if (i < 1)
				1562	{
				1563	printNodeValidationError("OpTransposeConv2d: At least one stride is smaller than one");
				1564	return 1;
				1565	}
				1566	}
				1567
Jeremy Johnson	4a6fb9b	2022-04-26 15:47:21 +0100	[diff] [blame]	1568	// TODO: Remove dilation once schema updated
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	1569	for (int32_t i : attribute->dilation())
				1570	{
Jeremy Johnson	4a6fb9b	2022-04-26 15:47:21 +0100	[diff] [blame]	1571	if (i != 1)
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	1572	{
Jeremy Johnson	4a6fb9b	2022-04-26 15:47:21 +0100	[diff] [blame]	1573	printNodeValidationError("OpTransposeConv2d: Dilation is deprecated and must be set to one");
Kevin Cheng	9fe1724	2021-11-10 01:04:39 +0000	[diff] [blame]	1574	return 1;
				1575	}
				1576	}
				1577
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1578	for (int d = 0; d < 4; d++)
				1579	{
				1580	if (attribute->output_shape()[d] != this->output->getShape()[d])
				1581	{
				1582	printNodeValidationError("OpTransposeConv2d: illegal size for attribute output_shape");
				1583	return 1;
				1584	}
				1585	}
				1586
Jeremy Johnson	4a6fb9b	2022-04-26 15:47:21 +0100	[diff] [blame]	1587	int32_t IH = input->getShape()[1];
				1588	int32_t IW = input->getShape()[2];
				1589	int32_t OH = output->getShape()[1];
				1590	int32_t OW = output->getShape()[2];
				1591
				1592	int32_t stride_y = attribute->stride()[0];
				1593	int32_t stride_x = attribute->stride()[1];
				1594	int32_t kernel_h = weight->getShape()[1];
				1595	int32_t kernel_w = weight->getShape()[2];
				1596
				1597	int32_t outpad_top = attribute->outpad()[0];
				1598	int32_t outpad_bottom = attribute->outpad()[1];
				1599	int32_t outpad_left = attribute->outpad()[2];
				1600	int32_t outpad_right = attribute->outpad()[3];
				1601
				1602	int32_t H = (IH - 1) * stride_y - outpad_top - outpad_bottom + kernel_h;
				1603	int32_t W = (IW - 1) * stride_x - outpad_left - outpad_right + kernel_w;
				1604
				1605	if ((OH != H) \|\| (OW != W))
				1606	{
				1607	std::string msg = "OpTransposeConv2d: Mismatch between output shape provided and expected output shape (" +
				1608	std::to_string(H) + "," +
				1609	std::to_string(W) + ")";
				1610	printNodeValidationError(msg.c_str());
				1611	return 1;
				1612	}
				1613
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1614	if (this->qinfo)
				1615	{
				1616	if (InDtype != DType_INT8)
				1617	{
				1618	ERROR_IF(this->qinfo->input_zp() != 0, "OpTransposeConv2d: zeropoint only for int8_t");
				1619	}
				1620	if (WeightDtype != DType_INT8)
				1621	{
				1622	ERROR_IF(this->qinfo->weight_zp() != 0, "OpTransposeConv2d: zeropoint only for int8_t");
				1623	}
				1624	}
				1625
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1626	return 0;
				1627	}
				1628
Kevin Cheng	cc61be3	2021-10-14 17:09:57 -0700	[diff] [blame]	1629	template <DType InDtype, DType WeightDtype>
				1630	int OpTransposeConv2d<InDtype, WeightDtype>::eval()
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1631	{
				1632	int in_batch = this->input->getShape()[0];
				1633	int in_height = this->input->getShape()[1];
				1634	int in_width = this->input->getShape()[2];
				1635	int in_channels = this->input->getShape()[3];
				1636
				1637	int f_out_channels = this->weight->getShape()[0];
				1638	int f_height = this->weight->getShape()[1];
				1639	int f_width = this->weight->getShape()[2];
				1640	int f_in_channels = this->weight->getShape()[3];
				1641
				1642	int b_out_channels = this->bias->getShape()[0];
				1643
				1644	int out_batch = this->output->getShape()[0];
				1645	int out_height = this->output->getShape()[1];
				1646	int out_width = this->output->getShape()[2];
				1647	int out_channels = this->output->getShape()[3];
				1648
Jeremy Johnson	4a6fb9b	2022-04-26 15:47:21 +0100	[diff] [blame]	1649	int outpad_top = this->attribute->outpad()[0];
				1650	int outpad_bottom = this->attribute->outpad()[1];
				1651	int outpad_left = this->attribute->outpad()[2];
				1652	int outpad_right = this->attribute->outpad()[3];
				1653
				1654	int stride_h = this->attribute->stride()[0];
				1655	int stride_w = this->attribute->stride()[1];
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1656
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame]	1657	ERROR_IF(in_batch != out_batch, "OpTransposeConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				1658	ERROR_IF(f_in_channels != in_channels, "OpTransposeConv2d: tensor input channel mismatch %d != %d", f_in_channels,
				1659	in_channels);
				1660	ERROR_IF(f_out_channels != out_channels, "OpTransposeConv2d: tensor output channel mismatch %d != %d",
				1661	f_out_channels, out_channels);
				1662	ERROR_IF(b_out_channels != out_channels, "OpDepthwiseConv2d: bias channels mismatch %d != %d", b_out_channels,
				1663	out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1664
				1665	DEBUG_INFO(OP,
				1666	"perform OpTransposeConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], "
Jeremy Johnson	4a6fb9b	2022-04-26 15:47:21 +0100	[diff] [blame]	1667	"output.shape=[%d,%d,%d,%d], stride=[%d,%d], outpad=[%d,%d,%d,%d]",
				1668	in_batch, in_height, in_width, in_channels, f_height, f_width, f_out_channels, f_in_channels,
				1669	out_batch, out_height, out_width, out_channels, stride_h, stride_w, outpad_top,
				1670	outpad_bottom, outpad_left, outpad_right);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1671
				1672	TIn input_val = this->input->getTensor();
				1673	TWeight weight_val = this->weight->getTensor();
				1674	if (this->qinfo)
				1675	{
				1676	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				1677	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				1678	}
				1679
				1680	Eigen::array<Eigen::Index, 4> reshape_dim;
				1681	reshape_dim.fill(1);
				1682	reshape_dim[3] = b_out_channels;
				1683
				1684	Eigen::array<Eigen::Index, 4> bcast;
				1685	bcast[0] = out_batch;
				1686	bcast[1] = out_height;
				1687	bcast[2] = out_width;
				1688	bcast[3] = 1;
				1689
				1690	// initialize with bias
				1691	this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
				1692
				1693	int out_x_origin, out_y_origin;
				1694	int out_x, out_y;
				1695
				1696	// reference implementation from: tensorflow/tensorflow/lite/kernels/internal/reference/reference_ops.h
				1697	for (int ob = 0; ob < out_batch; ob++)
				1698	{
				1699	for (int ih = 0; ih < in_height; ih++)
				1700	{
				1701	for (int iw = 0; iw < in_width; iw++)
				1702	{
Jeremy Johnson	4a6fb9b	2022-04-26 15:47:21 +0100	[diff] [blame]	1703	out_x_origin = iw * stride_w - outpad_left;
				1704	out_y_origin = ih * stride_h - outpad_top;
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1705	for (int ic = 0; ic < in_channels; ic++)
				1706	{
				1707	for (int fh = 0; fh < f_height; fh++)
				1708	{
				1709	for (int fw = 0; fw < f_width; fw++)
				1710	{
Jeremy Johnson	4a6fb9b	2022-04-26 15:47:21 +0100	[diff] [blame]	1711	out_x = out_x_origin + fw;
				1712	out_y = out_y_origin + fh;
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1713	for (int oc = 0; oc < out_channels; oc++)
				1714	{
				1715	if ((out_x >= 0 && out_x < out_width) && (out_y >= 0 && out_y < out_height))
				1716	{
				1717	this->output->getTensor()(ob, out_y, out_x, oc) +=
				1718	((AccEigenType)input_val(ob, ih, iw, ic) *
				1719	(AccEigenType)weight_val(oc, fh, fw, ic));
				1720	}
				1721	}
				1722	}
				1723	}
				1724	}
				1725	}
				1726	}
				1727	}
				1728
				1729	if (AccDtype == DType_INT48)
				1730	{
				1731	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				1732	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				1733	}
				1734
				1735	return GraphNode::eval();
				1736	}
				1737
				1738	// template explicit instantiation
				1739	DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1740	DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1741	DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, INT16);
				1742
				1743	DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, FLOAT)
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1744	DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, INT8)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1745	DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, INT16)
				1746
				1747	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1748	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT8, INT4);
				1749	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1750	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT16, INT8);
				1751
Kevin Cheng	1533b85	2021-09-01 12:51:58 -0700	[diff] [blame]	1752	DEF_INSTANTIATE_TWO_TYPE(OpConv3d, FLOAT, FLOAT);
				1753	DEF_INSTANTIATE_TWO_TYPE(OpConv3d, INT8, INT4);
				1754	DEF_INSTANTIATE_TWO_TYPE(OpConv3d, INT8, INT8);
				1755	DEF_INSTANTIATE_TWO_TYPE(OpConv3d, INT16, INT8);
				1756
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1757	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1758	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT8, INT4);
				1759	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1760	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT16, INT8);
				1761
				1762	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1763	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT8, INT4);
				1764	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1765	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT16, INT8);
				1766
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1767	DEF_INSTANTIATE_ONE_TYPE(OpMatMul, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1768	DEF_INSTANTIATE_ONE_TYPE(OpMatMul, INT16);
				1769	DEF_INSTANTIATE_ONE_TYPE(OpMatMul, FLOAT);
				1770
				1771	DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1772	DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1773	DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, INT16);
				1774
				1775	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1776	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT8, INT4);
				1777	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1778	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT16, INT8);