Blame - reference_model/src/ops/tensor_ops.cc - tosa/reference_model

blob: 045c0a5139f674a0dfddf7b185ad788791b93908 [file] [log] [blame]

Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	2	// Copyright (c) 2020-2021, ARM Limited.
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	3	//
				4	// Licensed under the Apache License, Version 2.0 (the "License");
				5	// you may not use this file except in compliance with the License.
				6	// You may obtain a copy of the License at
				7	//
				8	// http://www.apache.org/licenses/LICENSE-2.0
				9	//
				10	// Unless required by applicable law or agreed to in writing, software
				11	// distributed under the License is distributed on an "AS IS" BASIS,
				12	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	// See the License for the specific language governing permissions and
				14	// limitations under the License.
				15
				16	#include "tensor_ops.h"
				17	#include "quant_util.h"
				18	#include "template_types.h"
				19
				20	using namespace TosaReference;
				21	using namespace Eigen;
				22	using namespace tosa;
				23
				24	template <int Rank, DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame^]	25	OpArgMax<Rank, Dtype>::OpArgMax(SubgraphTraverser* sgt_,
				26	TosaAttributeBase* attribute_,
				27	TosaQuantInfoBase* qinfo_,
				28	uint64_t id_)
				29	: GraphNode(sgt_, Op_ARGMAX, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	30	{
				31	setRequiredOperands(1, 1);
				32	setRequiredRank(0, 6);
				33
				34	INIT_ATTRIBUTE(Axis);
				35	}
				36
				37	template <int Rank, DType Dtype>
				38	OpArgMax<Rank, Dtype>::~OpArgMax()
				39	{
				40	if (attribute)
				41	delete attribute;
				42	}
				43
				44	template <int Rank, DType Dtype>
				45	int OpArgMax<Rank, Dtype>::checkTensorAttributes()
				46	{
				47	if (validateRequiredOperands())
				48	return 1;
				49
				50	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(outputs[0]))
				51	{
				52	return 1;
				53	}
				54
				55	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				56	output = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
				57
				58	return 0;
				59	}
				60
				61	template <int Rank, DType Dtype>
				62	int OpArgMax<Rank, Dtype>::eval()
				63	{
				64	Eigen::Tensor<DenseIndex, Rank - 1> index = this->input->getTensor().argmax(attribute->axis());
				65
				66	this->output->getTensor() = index.unaryExpr([](DenseIndex in) -> OutEigenType { return (OutEigenType)in; });
				67
				68	return GraphNode::eval();
				69	}
				70
				71	template <DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame^]	72	OpAvgPool2d<Dtype>::OpAvgPool2d(SubgraphTraverser* sgt_,
				73	TosaAttributeBase* attribute_,
				74	TosaQuantInfoBase* qinfo_,
				75	uint64_t id_)
				76	: GraphNode(sgt_, Op_AVG_POOL2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	77	{
				78	setRequiredOperands(1, 1);
				79	setRequiredRank(4);
				80
				81	INIT_ATTRIBUTE(Pool2d);
				82	INIT_QINFO(Unary);
				83	}
				84
				85	template <DType Dtype>
				86	OpAvgPool2d<Dtype>::~OpAvgPool2d()
				87	{
				88	if (attribute)
				89	delete attribute;
				90	}
				91
				92	template <DType Dtype>
				93	int OpAvgPool2d<Dtype>::checkTensorAttributes()
				94	{
				95	if (validateRequiredOperands())
				96	return 1;
				97
				98	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(outputs[0]))
				99	{
				100	return 1;
				101	}
				102
				103	if (inputs[0]->matchType(*outputs[0]))
				104	{
				105	printNodeValidationError("OpAvgPool2d: input and output tensor type mismatch");
				106	return 1;
				107	}
				108
				109	in = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				110	out = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
				111
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	112	if (attribute->padding().size() != 4)
				113	{
				114	printNodeValidationError("OpAvgPool2d: illegal size for attribute padding");
				115	return 1;
				116	}
				117
				118	if (attribute->kernel().size() != 2)
				119	{
				120	printNodeValidationError("OpAvgPool2d: illegal size for attribute kernel");
				121	return 1;
				122	}
				123
				124	if (attribute->stride().size() != 2)
				125	{
				126	printNodeValidationError("OpAvgPool2d: illegal size for attribute stride");
				127	return 1;
				128	}
				129
				130	return 0;
				131	}
				132
				133	template <DType Dtype>
				134	ETensor1<int32_t> OpAvgPool2d<Dtype>::calculate_div_map_1d(int in_size, int out_size, int kernel_size, int stride)
				135	{
				136	ETensor1<int32_t> result(out_size);
				137
				138	int32_t total_pad = (out_size - 1) * stride + kernel_size - in_size;
				139	total_pad = total_pad < 0 ? 0 : total_pad;
				140
				141	int32_t pad_left = total_pad >> 1;
				142	int32_t pad_right = total_pad - pad_left;
				143
				144	result.setConstant(kernel_size);
				145
				146	// the index left to 'left_index' and index right to 'right_index' indicates
				147	// the input window of this output covers a pad bit
				148	int32_t left_index = pad_left / stride;
				149	int32_t right_index = pad_right / stride;
				150
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	151	// minus the number of pad bit this index cover
				152	while (left_index >= 0)
				153	{
				154	result(left_index) -= (pad_left - left_index * stride);
				155	left_index--;
				156	}
				157
				158	while (right_index >= 0)
				159	{
				160	result(out_size - 1 - right_index) -= (pad_right - right_index * stride);
				161	right_index--;
				162	}
				163
				164	return result;
				165	}
				166
				167	// assuming input and output tensor have same scales like tflite reference
				168	// so no need to scale input and output
				169	template <DType Dtype>
				170	int OpAvgPool2d<Dtype>::eval()
				171	{
				172	int in_batch = this->in->getShape()[0];
				173	int in_height = this->in->getShape()[1];
				174	int in_width = this->in->getShape()[2];
				175	int in_channels = this->in->getShape()[3];
				176
				177	int out_batch = this->out->getShape()[0];
				178	int out_height = this->out->getShape()[1];
				179	int out_width = this->out->getShape()[2];
				180	int out_channels = this->out->getShape()[3];
				181
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame^]	182	ERROR_IF(in_batch != out_batch, "OpAvgPool2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				183	ERROR_IF(in_channels != out_channels, "OpAvgPool2d: tensor channel mismatch %d != %d", in_channels, out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	184
				185	int padding_top = this->attribute->padding()[0];
				186	int padding_bottom = this->attribute->padding()[1];
				187	int padding_left = this->attribute->padding()[2];
				188	int padding_right = this->attribute->padding()[3];
				189	int kernel_h = this->attribute->kernel()[0];
				190	int kernel_w = this->attribute->kernel()[1];
				191	int stride_h = this->attribute->stride()[0];
				192	int stride_w = this->attribute->stride()[1];
				193
				194	DEBUG_INFO(OP,
				195	"perform AvgPool2d, input.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], kernel=[%d,%d], "
				196	"stride=[%d,%d], padding=[%d,%d,%d,%d]",
				197	in_batch, in_height, in_width, in_channels, out_batch, out_height, out_width, out_channels, kernel_h,
				198	kernel_w, stride_h, stride_w, padding_top, padding_bottom, padding_left, padding_right);
				199
				200	Eigen::array<Eigen::Index, 2> im2col_input_dims;
				201	im2col_input_dims[0] = kernel_h * kernel_w;
				202	im2col_input_dims[1] = out_batch * out_height * out_width * out_channels;
				203
				204	Eigen::array<Eigen::Index, 4> col2im_output_dims;
				205	col2im_output_dims[0] = out_batch;
				206	col2im_output_dims[1] = out_height;
				207	col2im_output_dims[2] = out_width;
				208	col2im_output_dims[3] = out_channels;
				209
				210	Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
				211	padding[0] = std::make_pair(0, 0);
				212	padding[1] = std::make_pair(padding_top, padding_bottom);
				213	padding[2] = std::make_pair(padding_left, padding_right);
				214	padding[3] = std::make_pair(0, 0);
				215
				216	ETensor4<InEigenType> input_val = this->in->getTensor();
				217	if (this->qinfo)
				218	{
				219	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				220	}
				221
				222	ETensor4<InEigenType> input_padded = input_val.pad(padding);
				223
				224	// assuming input and output have same scales
				225	// so input and output scaling is not required
				226	// TODO: check if this assumption TOSA made
				227
				228	// extract_image_patches() output [N, KH, KW, H * W, C]
				229	// transpose to [KH, KW, N, H * W, C]
				230	// reshape to [KH * KW, N * H * W * C]
				231	ETensor2<InEigenType> input_extract_patches =
				232	input_padded.extract_image_patches(kernel_h, kernel_w, stride_h, stride_w, 1, 1, Eigen::PADDING_VALID)
				233	.shuffle(Eigen::array<Eigen::Index, 5>{ 1, 2, 0, 3, 4 })
				234	.reshape(im2col_input_dims);
				235
				236	// 1D result with [N * H * W * C]
				237	ETensor1<AccEigenType> out_1d(this->out->getElementCount());
				238	out_1d.setZero();
				239
				240	// sum pool
				241	for (size_t i = 0; i < this->out->getElementCount(); i++)
				242	{
				243	for (int32_t j = 0; j < kernel_h * kernel_w; j++)
				244	{
				245	out_1d(i) += (AccEigenType)input_extract_patches(j, i);
				246	}
				247	}
				248
				249	// reshape result to [N, H, W, C] and divide with div_map
				250	ETensor4<AccEigenType> sum = out_1d.reshape(col2im_output_dims);
				251
				252	// calculate 1d height/width div_map (number of elements this pooling window covers)
				253	// and outer product to get 2d div_map, then reshape/broadcast to [N, H, W, C]
				254	ETensor1<int32_t> div_map_h = calculate_div_map_1d(in_height, out_height, kernel_h, stride_h);
				255	ETensor1<int32_t> div_map_w = calculate_div_map_1d(in_width, out_width, kernel_w, stride_w);
				256	Eigen::array<Eigen::IndexPair<Eigen::Index>, 1> contract_dims = { Eigen::IndexPair<Eigen::Index>(1, 0) };
				257	Eigen::array<Eigen::Index, 4> bcast{ out_batch, 1, 1, out_channels };
				258
				259	ETensor4<int32_t> div_map =
				260	div_map_h.reshape(Eigen::array<Eigen::Index, 2>{ out_height, 1 })
				261	.contract(div_map_w.reshape(Eigen::array<Eigen::Index, 2>{ 1, out_width }), contract_dims)
				262	.reshape(Eigen::array<Eigen::Index, 4>{ 1, out_height, out_width, 1 })
				263	.broadcast(bcast);
				264
				265	if (Dtype != DType_FLOAT)
				266	{
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame^]	267	try
				268	{
				269	this->out->getTensor() = sum.binaryExpr(div_map, [](AccEigenType value, int32_t div) -> OutEigenType {
				270	int32_t multiplier, shift;
				271	TosaReference::QuantUtil::reciprocal_scale(div, multiplier, shift);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	272
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame^]	273	return (OutEigenType)TosaReference::QuantUtil::apply_scale_32(value, multiplier, shift, false);
				274	});
				275	}
				276	catch (std::string desc)
				277	{
				278	REQUIRE(false, "OpAvgPool2d apply_scale_32() fails: %s.", desc.c_str());
				279	}
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	280	this->out->getTensor() = this->out->getTensor() + (OutEigenType)(this->qinfo->output_zp());
				281	this->out->getTensor() = this->out->getTensor().cwiseMax((OutEigenType)QMin);
				282	this->out->getTensor() = this->out->getTensor().cwiseMin((OutEigenType)QMax);
				283	}
				284	else
				285	{
				286	this->out->getTensor() = (sum / div_map.template cast<AccEigenType>()).template cast<OutEigenType>();
				287	}
				288
				289	return GraphNode::eval();
				290	}
				291
				292	template <DType InDtype, DType WeightDtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame^]	293	OpConv2d<InDtype, WeightDtype>::OpConv2d(SubgraphTraverser* sgt_,
				294	TosaAttributeBase* attribute_,
				295	TosaQuantInfoBase* qinfo_,
				296	uint64_t id_)
				297	: GraphNode(sgt_, Op_CONV2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	298	{
				299	setRequiredOperands(3, 1);
				300	setRequiredRank(4);
				301
				302	INIT_ATTRIBUTE(Conv2d);
				303	INIT_QINFO(Conv);
				304	}
				305
				306	template <DType InDtype, DType WeightDtype>
				307	OpConv2d<InDtype, WeightDtype>::~OpConv2d()
				308	{
				309	if (attribute)
				310	delete attribute;
				311	if (qinfo)
				312	delete qinfo;
				313	}
				314
				315	template <DType InDtype, DType WeightDtype>
				316	int OpConv2d<InDtype, WeightDtype>::checkTensorAttributes()
				317	{
				318	if (validateRequiredOperands())
				319	return 1;
				320
				321	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				322	{
				323	return 1;
				324	}
				325
				326	// 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
				327	if (inputs[2]->getRank() != 1)
				328	{
				329	printNodeValidationError("OpConv2d: bias tensor must be rank 1");
				330	}
				331
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	332	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				333	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				334	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				335	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				336
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	337	if (attribute->padding().size() != 4)
				338	{
				339	printNodeValidationError("OpConv2d: illegal size for attribute padding");
				340	return 1;
				341	}
				342
				343	if (attribute->stride().size() != 2)
				344	{
				345	printNodeValidationError("OpConv2d: illegal size for attribute stride");
				346	return 1;
				347	}
				348
				349	if (attribute->dilation().size() != 2)
				350	{
				351	printNodeValidationError("OpConv2d: illegal size for attribute dilation");
				352	return 1;
				353	}
				354
				355	return 0;
				356	}
				357
				358	template <DType InDtype, DType WeightDtype>
				359	int OpConv2d<InDtype, WeightDtype>::eval()
				360	{
				361	int in_batch = this->input->getShape()[0];
				362	int in_height = this->input->getShape()[1];
				363	int in_width = this->input->getShape()[2];
				364	int in_channels = this->input->getShape()[3];
				365
				366	int f_out_channels = this->weight->getShape()[0];
				367	int f_height = this->weight->getShape()[1];
				368	int f_width = this->weight->getShape()[2];
				369	int f_in_channels = this->weight->getShape()[3];
				370
				371	int b_out_channels = this->bias->getShape()[0];
				372
				373	int out_batch = this->output->getShape()[0];
				374	int out_height = this->output->getShape()[1];
				375	int out_width = this->output->getShape()[2];
				376	int out_channels = this->output->getShape()[3];
				377
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame^]	378	ERROR_IF(in_batch != out_batch, "OpConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				379	ERROR_IF(f_in_channels != in_channels, "OpConv2d: tensor input channel mismatch %d != %d", f_in_channels,
				380	in_channels);
				381	ERROR_IF(f_out_channels != out_channels, "OpConv2d: tensor output channel mismatch %d != %d", f_out_channels,
				382	out_channels);
				383	ERROR_IF(b_out_channels != out_channels, "OpConv2d: bias channel mismatch %d != %d", b_out_channels, out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	384
				385	int padding_top = this->attribute->padding()[0];
				386	int padding_bottom = this->attribute->padding()[1];
				387	int padding_left = this->attribute->padding()[2];
				388	int padding_right = this->attribute->padding()[3];
				389	int stride_h = this->attribute->stride()[0];
				390	int stride_w = this->attribute->stride()[1];
				391	int dilation_h = this->attribute->dilation()[0];
				392	int dilation_w = this->attribute->dilation()[1];
				393
				394	DEBUG_INFO(OP,
				395	"perform OpConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], "
				396	"stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d,%d,%d]",
				397	in_batch, in_height, in_width, in_channels, f_height, f_width, f_in_channels, f_out_channels, out_batch,
				398	out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
				399	padding_bottom, padding_left, padding_right);
				400
				401	// GEMM-conv2d, left matrix is input, right matrix is weight
				402	Eigen::array<Eigen::Index, 2> im2col_input_dims;
				403	im2col_input_dims[0] = out_batch * out_height * out_width;
				404	im2col_input_dims[1] = f_height * f_width * f_in_channels;
				405
				406	Eigen::array<Eigen::Index, 2> im2col_weight_dims;
				407	im2col_weight_dims[0] = f_height * f_width * f_in_channels;
				408	im2col_weight_dims[1] = f_out_channels;
				409
				410	Eigen::array<Eigen::Index, 2> bias_reshaped_dims;
				411	bias_reshaped_dims[0] = 1;
				412	bias_reshaped_dims[1] = b_out_channels;
				413
				414	Eigen::array<Eigen::Index, 4> weight_zp_bcast_dims;
				415	weight_zp_bcast_dims[0] = f_height;
				416	weight_zp_bcast_dims[1] = f_width;
				417	weight_zp_bcast_dims[2] = f_in_channels;
				418
				419	Eigen::array<Eigen::Index, 2> bias_bcast_dims;
				420	bias_bcast_dims[0] = out_batch * out_height * out_width;
				421	bias_bcast_dims[1] = 1;
				422
				423	Eigen::array<Eigen::Index, 4> col2im_output_dims;
				424	col2im_output_dims[0] = out_batch;
				425	col2im_output_dims[1] = out_height;
				426	col2im_output_dims[2] = out_width;
				427	col2im_output_dims[3] = out_channels;
				428
				429	Eigen::array<Eigen::IndexPair<Eigen::Index>, 1> contract_dims = { Eigen::IndexPair<Eigen::Index>(1, 0) };
				430
				431	Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
				432	padding[0] = std::make_pair(0, 0);
				433	padding[1] = std::make_pair(padding_top, padding_bottom);
				434	padding[2] = std::make_pair(padding_left, padding_right);
				435	padding[3] = std::make_pair(0, 0);
				436
				437	TIn input_val = this->input->getTensor();
				438	TWeight weight_val = this->weight->getTensor();
				439	if (this->qinfo)
				440	{
				441	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				442	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				443	}
				444
				445	ETensor4<InEigenType> input_padded = input_val.pad(padding);
				446
				447	// extract_image_patches() output [N, KH, KW, H * W, C]
				448	// need to transpose to [N, H * W, KH, KW, C]
				449	ETensor5<InEigenType> input_extract_patches =
				450	input_padded
				451	.extract_image_patches(f_height, f_width, stride_h, stride_w, dilation_h, dilation_w, Eigen::PADDING_VALID)
				452	.shuffle(Eigen::array<Eigen::Index, 5>{ 0, 3, 1, 2, 4 });
				453
				454	// reshape input to [N * H * W, KH * KW * C]
				455	ETensor2<InEigenType> im2col_input = input_extract_patches.reshape(im2col_input_dims);
				456
				457	// transpose and reshape weight from [OC, H, W, IC] to [H * W * IC, OC]
				458	ETensor2<WeightEigenType> im2col_weight =
				459	weight_val.shuffle(Eigen::array<Eigen::Index, 4>({ 1, 2, 3, 0 })).reshape(im2col_weight_dims);
				460
				461	// don't need to apply bias_multiplier ( * bias_scale and >> bias_shift) since tflite already scale it
				462	// and reshaped from [C] to [1, C], and broadcast to [N * H * W, C]
				463	ETensor2<AccEigenType> bias_2d = this->bias->getTensor().reshape(bias_reshaped_dims).broadcast(bias_bcast_dims);
				464
				465	// output matrix is [N * H * W, C]
				466	ETensor2<AccEigenType> contracted_result =
				467	im2col_input.template cast<AccEigenType>().contract(im2col_weight.template cast<AccEigenType>(), contract_dims);
				468
				469	// adding bias
				470	ETensor2<AccEigenType> biased_output = contracted_result + bias_2d.template cast<AccEigenType>();
				471
				472	// reshape back to [N, H, W, C]
				473	this->output->getTensor() = biased_output.reshape(col2im_output_dims);
				474
				475	if (AccDtype == DType_INT48)
				476	{
				477	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				478	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				479	}
				480
				481	return GraphNode::eval();
				482	}
				483
				484	template <DType InDtype, DType WeightDtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame^]	485	OpDepthwiseConv2d<InDtype, WeightDtype>::OpDepthwiseConv2d(SubgraphTraverser* sgt_,
				486	TosaAttributeBase* attribute_,
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	487	TosaQuantInfoBase* qinfo_,
				488	uint64_t id_)
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame^]	489	: GraphNode(sgt_, Op_DEPTHWISE_CONV2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	490	{
				491	setRequiredOperands(3, 1);
				492	setRequiredRank(4);
				493
				494	INIT_ATTRIBUTE(Conv2d);
				495	INIT_QINFO(Conv);
				496	}
				497
				498	template <DType InDtype, DType WeightDtype>
				499	OpDepthwiseConv2d<InDtype, WeightDtype>::~OpDepthwiseConv2d()
				500	{
				501	if (attribute)
				502	delete attribute;
				503	if (qinfo)
				504	delete qinfo;
				505	}
				506
				507	template <DType InDtype, DType WeightDtype>
				508	int OpDepthwiseConv2d<InDtype, WeightDtype>::checkTensorAttributes()
				509	{
				510	if (validateRequiredOperands())
				511	return 1;
				512
				513	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				514	{
				515	return 1;
				516	}
				517
				518	// 'bias' checked separatedly since it doens't make sense to make required rank ranging from 1 to 4
				519	if (inputs[2]->getRank() != 1)
				520	{
				521	printNodeValidationError("OpDepthwiseConv2d: bias tensor must be rank 1");
				522	}
				523
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	524	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				525	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				526	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				527	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				528
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	529	if (attribute->padding().size() != 4)
				530	{
				531	printNodeValidationError("OpDepthwiseConv2d: illegal size for attribute padding");
				532	return 1;
				533	}
				534
				535	if (attribute->stride().size() != 2)
				536	{
				537	printNodeValidationError("OpDepthwiseConv2d: illegal size for attribute stride");
				538	return 1;
				539	}
				540
				541	if (attribute->dilation().size() != 2)
				542	{
				543	printNodeValidationError("OpDepthwiseConv2d: illegal size for attribute dilation");
				544	return 1;
				545	}
				546
				547	return 0;
				548	}
				549
				550	template <DType InDtype, DType WeightDtype>
				551	int OpDepthwiseConv2d<InDtype, WeightDtype>::eval()
				552	{
				553	int in_batch = this->input->getShape()[0];
				554	int in_height = this->input->getShape()[1];
				555	int in_width = this->input->getShape()[2];
				556	int in_channels = this->input->getShape()[3];
				557
				558	int f_height = this->weight->getShape()[0];
				559	int f_width = this->weight->getShape()[1];
				560	int f_in_channels = this->weight->getShape()[2];
				561	int f_multiplier = this->weight->getShape()[3];
				562
				563	int b_out_channels = this->bias->getShape()[0];
				564
				565	int out_batch = this->output->getShape()[0];
				566	int out_height = this->output->getShape()[1];
				567	int out_width = this->output->getShape()[2];
				568	int out_channels = this->output->getShape()[3];
				569
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame^]	570	ERROR_IF(in_batch != out_batch, "OpDepthwiseConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				571	ERROR_IF(f_in_channels != in_channels, "OpDepthwiseConv2d: tensor input channel mismatch %d != %d", f_in_channels,
				572	in_channels);
				573	ERROR_IF(in_channels * f_multiplier != out_channels, "OpDepthwiseConv2d: tensor output channel mismatch %d != %d",
				574	in_channels * f_multiplier, out_channels);
				575	ERROR_IF(b_out_channels != out_channels, "OpDepthwiseConv2d: bias channels mismatch %d != %d", b_out_channels,
				576	out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	577
				578	int padding_top = this->attribute->padding()[0];
				579	int padding_bottom = this->attribute->padding()[1];
				580	int padding_left = this->attribute->padding()[2];
				581	int padding_right = this->attribute->padding()[3];
				582	int stride_h = this->attribute->stride()[0];
				583	int stride_w = this->attribute->stride()[1];
				584	int dilation_h = this->attribute->dilation()[0];
				585	int dilation_w = this->attribute->dilation()[1];
				586
				587	DEBUG_INFO(OP,
				588	"perform OpDepthwiseConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], "
				589	"output.shape=[%d,%d,%d,%d], stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d,%d,%d]",
				590	in_batch, in_height, in_width, in_channels, f_height, f_width, f_in_channels, f_multiplier, out_batch,
				591	out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
				592	padding_bottom, padding_left, padding_right);
				593
				594	Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
				595	padding[0] = std::make_pair(0, 0);
				596	padding[1] = std::make_pair(padding_top, padding_bottom);
				597	padding[2] = std::make_pair(padding_left, padding_right);
				598	padding[3] = std::make_pair(0, 0);
				599
				600	TIn input_val = this->input->getTensor();
				601	TWeight weight_val = this->weight->getTensor();
				602	if (this->qinfo)
				603	{
				604	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				605	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				606	}
				607
				608	ETensor4<InEigenType> input_padded = input_val.pad(padding);
				609
				610	// GEMM doesn't fit well with DepthwiseConv2d
				611	// 1. use extract_image_patches() to handle stride/dilation/padding
				612	// 2. perform direct convolution
				613
				614	// 1. extract_image_patches() output [N, KH, KW, OH * OW, IC]
				615	ETensor5<InEigenType> input_extract_patches = input_padded.extract_image_patches(
				616	f_height, f_width, stride_h, stride_w, dilation_h, dilation_w, Eigen::PADDING_VALID);
				617
				618	Eigen::array<Eigen::Index, 4> reshape_dim;
				619	reshape_dim.fill(1);
				620	reshape_dim[3] = b_out_channels;
				621
				622	Eigen::array<Eigen::Index, 4> bcast;
				623	bcast[0] = out_batch;
				624	bcast[1] = out_height;
				625	bcast[2] = out_width;
				626	bcast[3] = 1;
				627
				628	// initialize with bias
				629	this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
				630
				631	// 2. direct depthwise convolution
				632	for (int ob = 0; ob < out_batch; ob++)
				633	{
				634	for (int oh = 0; oh < out_height; oh++)
				635	{
				636	for (int ow = 0; ow < out_width; ow++)
				637	{
				638	for (int ic = 0; ic < in_channels; ic++)
				639	{
				640	for (int cm = 0; cm < f_multiplier; cm++)
				641	{
				642	for (int fh = 0; fh < f_height; fh++)
				643	{
				644	for (int fw = 0; fw < f_width; fw++)
				645	{
				646	this->output->getTensor()(ob, oh, ow, ic * f_multiplier + cm) +=
				647	((AccEigenType)input_extract_patches(ob, fh, fw, ow * out_height + oh, ic) *
				648	(AccEigenType)weight_val(fh, fw, ic, cm));
				649	}
				650	}
				651	}
				652	}
				653	}
				654	}
				655	}
				656
				657	if (AccDtype == DType_INT48)
				658	{
				659	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				660	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				661	}
				662
				663	return GraphNode::eval();
				664	}
				665
				666	template <DType InDtype, DType WeightDtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame^]	667	OpFullyConnected<InDtype, WeightDtype>::OpFullyConnected(SubgraphTraverser* sgt_,
				668	TosaAttributeBase* attribute_,
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	669	TosaQuantInfoBase* qinfo_,
				670	uint64_t id_)
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame^]	671	: GraphNode(sgt_, Op_FULLY_CONNECTED, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	672	{
				673	setRequiredOperands(3, 1);
				674	setRequiredRank(2);
				675
				676	INIT_QINFO(Conv);
				677	}
				678
				679	template <DType InDtype, DType WeightDtype>
				680	OpFullyConnected<InDtype, WeightDtype>::~OpFullyConnected()
				681	{
				682	if (qinfo)
				683	delete qinfo;
				684	}
				685
				686	template <DType InDtype, DType WeightDtype>
				687	int OpFullyConnected<InDtype, WeightDtype>::checkTensorAttributes()
				688	{
				689	if (validateRequiredOperands())
				690	return 1;
				691
				692	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				693	{
				694	return 1;
				695	}
				696
				697	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				698	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				699	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				700
				701	if (input->getShape()[1] != weight->getShape()[1])
				702	{
				703	printNodeValidationError("OpFullyConnected operator input.shape[1] should match weight.shape[1]");
				704	return 1;
				705	}
				706
				707	if (weight->getShape()[0] != bias->getShape()[0])
				708	{
				709	printNodeValidationError("OpFullyConnected operator bias.shape[0] should match weight.shape[0]");
				710	return 1;
				711	}
				712
				713	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				714
				715	return 0;
				716	}
				717
				718	template <DType InDtype, DType WeightDtype>
				719	int OpFullyConnected<InDtype, WeightDtype>::eval()
				720	{
				721	typedef Eigen::Tensor<int, 1>::DimensionPair DimPair;
				722	Eigen::array<DimPair, 1> dims{ { DimPair(1, 0) } };
				723
				724	Eigen::array<Eigen::Index, 2> weight_shuffle{ 1, 0 };
				725
				726	Eigen::array<Eigen::Index, 2> bias_reshape;
				727	bias_reshape[0] = 1;
				728	bias_reshape[1] = this->bias->getShape()[0];
				729
				730	Eigen::array<Eigen::Index, 2> bias_bcast;
				731	bias_bcast[0] = this->input->getShape()[0];
				732	bias_bcast[1] = 1;
				733
				734	TIn input_val = this->input->getTensor();
				735	TWeight weight_val = this->weight->getTensor().shuffle(weight_shuffle);
				736	if (this->qinfo)
				737	{
				738	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				739	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				740	}
				741
				742	this->output->getTensor() =
				743	input_val.template cast<AccEigenType>().contract(weight_val.template cast<AccEigenType>(), dims) +
				744	this->bias->getTensor().reshape(bias_reshape).broadcast(bias_bcast);
				745
				746	if (AccDtype == DType_INT48)
				747	{
				748	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				749	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				750	}
				751	return GraphNode::eval();
				752	}
				753
				754	template <DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame^]	755	OpMatMul<Dtype>::OpMatMul(SubgraphTraverser* sgt_,
				756	TosaAttributeBase* attribute_,
				757	TosaQuantInfoBase* qinfo_,
				758	uint64_t id_)
				759	: GraphNode(sgt_, Op_MATMUL, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	760	{
				761	setRequiredOperands(2, 1);
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	762	setRequiredRank(3);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	763
				764	INIT_QINFO(MatMul);
				765	}
				766
				767	template <DType Dtype>
				768	OpMatMul<Dtype>::~OpMatMul()
				769	{
				770	if (qinfo)
				771	delete qinfo;
				772	}
				773
				774	template <DType Dtype>
				775	int OpMatMul<Dtype>::checkTensorAttributes()
				776	{
				777	if (validateRequiredOperands())
				778	return 1;
				779
				780	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				781	{
				782	return 1;
				783	}
				784
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	785	a = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				786	b = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[1]);
				787	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	788
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	789	ASSERT_MEM(a && b && output);
				790
				791	// a: [N, H, C]
				792	// b: [N, C, W]
				793	// c: [N, H, W]
				794
				795	// Check N
				796	if (a->getShape()[0] != b->getShape()[0] \|\| a->getShape()[0] != output->getShape()[0])
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	797	{
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	798	printNodeValidationError("OpMatMul operator a.shape[0], b.shape[0] and output.shape[0] should match");
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	799	return 1;
				800	}
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	801	N = a->getShape()[0];
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	802
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	803	// Check C
				804	if (a->getShape()[2] != b->getShape()[1])
				805	{
				806	printNodeValidationError("OpMatMul operator a.shape[2] should match b.shape[1]");
				807	return 1;
				808	}
				809	C = a->getShape()[2];
				810
				811	// Check H
				812	if (a->getShape()[1] != output->getShape()[1])
				813	{
				814	printNodeValidationError("OpMatMul operator a.shape[1] should match output.shape[1]");
				815	return 1;
				816	}
				817	H = a->getShape()[1];
				818
				819	// Check W
				820	if (b->getShape()[2] != output->getShape()[2])
				821	{
				822	printNodeValidationError("OpMatMul operator output.shape[2] should match output.shape[2]");
				823	return 1;
				824	}
				825	W = b->getShape()[2];
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	826
				827	return 0;
				828	}
				829
				830	template <DType Dtype>
				831	int OpMatMul<Dtype>::eval()
				832	{
				833	typedef Eigen::Tensor<int, 1>::DimensionPair DimPair;
				834	Eigen::array<DimPair, 1> dims{ { DimPair(1, 0) } };
				835
				836	TIn a_val = this->a->getTensor();
				837	TIn b_val = this->b->getTensor();
				838	if (this->qinfo)
				839	{
				840	a_val = a_val - (InEigenType)this->qinfo->a_zp();
				841	b_val = b_val - (InEigenType)this->qinfo->b_zp();
				842	}
				843
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	844	Eigen::array<Eigen::Index, 2> a_rank2_shape({ H, C });
				845	Eigen::array<Eigen::Index, 2> b_rank2_shape({ C, W });
				846	Eigen::array<Eigen::Index, 3> output_rank3_shape({ 1, H, W });
				847
				848	Eigen::array<Eigen::Index, 3> a_size_array({ 1, H, C });
				849	Eigen::array<Eigen::Index, 3> b_size_array({ 1, C, W });
				850
				851	Eigen::array<Eigen::Index, 3> a_begin_array({ 0, 0, 0 });
				852	Eigen::array<Eigen::Index, 3> b_begin_array({ 0, 0, 0 });
				853
				854	// Iterate N dimension.
				855	for (int i = 0; i < N; i++)
				856	{
				857	a_begin_array[0] = i;
				858	b_begin_array[0] = i;
				859
				860	TInRank2 a_rank2_val = a_val.slice(a_begin_array, a_size_array).reshape(a_rank2_shape);
				861	TInRank2 b_rank2_val = b_val.slice(b_begin_array, b_size_array).reshape(b_rank2_shape);
				862	TAccRank2 output_rank2_val =
				863	a_rank2_val.template cast<AccEigenType>().contract(b_rank2_val.template cast<AccEigenType>(), dims);
				864	TAcc output_rank3_val = output_rank2_val.reshape(output_rank3_shape);
				865	if (i == 0)
				866	{
				867	this->output->getTensor() = output_rank3_val;
				868	}
				869	else
				870	{
				871	TAcc temp = this->output->getTensor().concatenate(output_rank3_val, 0);
				872	this->output->getTensor() = temp;
				873	}
				874	}
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	875
				876	if (AccDtype == DType_INT48)
				877	{
Kevin Cheng	2d60f00	2021-06-09 14:18:32 -0700	[diff] [blame]	878	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				879	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	880	}
				881
				882	return GraphNode::eval();
				883	}
				884
				885	template <DType Dtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame^]	886	OpMaxPool2d<Dtype>::OpMaxPool2d(SubgraphTraverser* sgt_,
				887	TosaAttributeBase* attribute_,
				888	TosaQuantInfoBase* qinfo_,
				889	uint64_t id_)
				890	: GraphNode(sgt_, Op_MAX_POOL2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	891	{
				892	setRequiredOperands(1, 1);
				893	setRequiredRank(4);
				894
				895	INIT_ATTRIBUTE(Pool2d);
				896	}
				897
				898	template <DType Dtype>
				899	OpMaxPool2d<Dtype>::~OpMaxPool2d()
				900	{
				901	if (attribute)
				902	delete attribute;
				903	}
				904
				905	template <DType Dtype>
				906	int OpMaxPool2d<Dtype>::checkTensorAttributes()
				907	{
				908	if (validateRequiredOperands())
				909	return 1;
				910
				911	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(outputs[0]))
				912	{
				913	return 1;
				914	}
				915
				916	if (inputs[0]->matchType(*outputs[0]))
				917	{
				918	printNodeValidationError("OpMaxPool2d: input and output tensor type mismatch");
				919	return 1;
				920	}
				921
				922	in = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				923	out = dynamic_cast<TosaReference::TensorTemplate<TOut>*>(outputs[0]);
				924
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	925	if (attribute->padding().size() != 4)
				926	{
				927	printNodeValidationError("OpMaxPool2d: illegal size for attribute padding");
				928	return 1;
				929	}
				930
				931	if (attribute->kernel().size() != 2)
				932	{
				933	printNodeValidationError("OpMaxPool2d: illegal size for attribute kernel");
				934	return 1;
				935	}
				936
				937	if (attribute->stride().size() != 2)
				938	{
				939	printNodeValidationError("OpMaxPool2d: illegal size for attribute stride");
				940	return 1;
				941	}
				942
				943	return 0;
				944	}
				945
				946	template <DType Dtype>
				947	int OpMaxPool2d<Dtype>::eval()
				948	{
				949	int in_batch = this->in->getShape()[0];
				950	int in_height = this->in->getShape()[1];
				951	int in_width = this->in->getShape()[2];
				952	int in_channels = this->in->getShape()[3];
				953
				954	int out_batch = this->out->getShape()[0];
				955	int out_height = this->out->getShape()[1];
				956	int out_width = this->out->getShape()[2];
				957	int out_channels = this->out->getShape()[3];
				958
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame^]	959	ERROR_IF(in_batch != out_batch, "OpMaxPool2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				960	ERROR_IF(in_channels != out_channels, "OpMaxPool2d: tensor channel mismatch %d != %d", in_channels, out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	961
				962	int padding_top = this->attribute->padding()[0];
				963	int padding_bottom = this->attribute->padding()[1];
				964	int padding_left = this->attribute->padding()[2];
				965	int padding_right = this->attribute->padding()[3];
				966	int kernel_h = this->attribute->kernel()[0];
				967	int kernel_w = this->attribute->kernel()[1];
				968	int stride_h = this->attribute->stride()[0];
				969	int stride_w = this->attribute->stride()[1];
				970
				971	DEBUG_INFO(OP,
				972	"perform MaxPool2d, input.shape=[%d,%d,%d,%d], output.shape=[%d,%d,%d,%d], kernel=[%d,%d], "
				973	"stride=[%d,%d], padding=[%d,%d,%d,%d]",
				974	in_batch, in_height, in_width, in_channels, out_batch, out_height, out_width, out_channels, kernel_h,
				975	kernel_w, stride_h, stride_w, padding_top, padding_bottom, padding_left, padding_right);
				976
				977	Eigen::array<Eigen::Index, 2> im2col_input_dims;
				978	im2col_input_dims[0] = kernel_h * kernel_w;
				979	im2col_input_dims[1] = out_batch * out_height * out_width * out_channels;
				980
				981	Eigen::array<Eigen::Index, 4> col2im_output_dims;
				982	col2im_output_dims[0] = out_batch;
				983	col2im_output_dims[1] = out_height;
				984	col2im_output_dims[2] = out_width;
				985	col2im_output_dims[3] = out_channels;
				986
				987	Eigen::array<std::pair<int32_t, int32_t>, 4> padding;
				988	padding[0] = std::make_pair(0, 0);
				989	padding[1] = std::make_pair(padding_top, padding_bottom);
				990	padding[2] = std::make_pair(padding_left, padding_right);
				991	padding[3] = std::make_pair(0, 0);
				992
				993	ETensor4<InEigenType> input_padded = this->in->getTensor().pad(padding, std::numeric_limits<InEigenType>::lowest());
				994
				995	// extract_image_patches() output [N, KH, KW, H * W, C]
				996	// transpose to [KH, KW, N, H * W, C]
				997	// reshape to [KH * KW, N * H * W * C]
				998	//
				999	// Set the padding value to be the most negative value that can be
				1000	// represented by the datatype to ensure that any padding values will be equal
				1001	// to or smaller than the actual maximum in the KH x KW patch.
				1002	ETensor2<InEigenType> input_extract_patches =
				1003	input_padded
				1004	.extract_image_patches(kernel_h, kernel_w, stride_h, stride_w, 1, 1, Eigen::PADDING_VALID,
				1005	std::numeric_limits<InEigenType>::lowest())
				1006	.shuffle(Eigen::array<Eigen::Index, 5>{ 1, 2, 0, 3, 4 })
				1007	.reshape(im2col_input_dims);
				1008
				1009	// Get the maximum of the KHxHW patches along axis 0
				1010	Eigen::Tensor<DenseIndex, 1> tensor_argmax = input_extract_patches.argmax(0);
				1011
				1012	// 1D result with [N * H * W * C]
				1013	ETensor1<OutEigenType> out_1d(this->out->getElementCount());
				1014
				1015	// index input_patches with argmax array should give the result
				1016	for (size_t i = 0; i < this->out->getElementCount(); i++)
				1017	{
				1018	out_1d(i) = (OutEigenType)input_extract_patches(tensor_argmax(i), i);
				1019	}
				1020
				1021	// reshape result to [N, H, W, C]
				1022	this->out->getTensor() = out_1d.reshape(col2im_output_dims);
				1023
				1024	return GraphNode::eval();
				1025	}
				1026
				1027	template <DType InDtype, DType OutDtype>
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame^]	1028	OpTransposeConv2d<InDtype, OutDtype>::OpTransposeConv2d(SubgraphTraverser* sgt_,
				1029	TosaAttributeBase* attribute_,
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1030	TosaQuantInfoBase* qinfo_,
				1031	uint64_t id_)
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame^]	1032	: GraphNode(sgt_, Op_TRANSPOSE_CONV2D, id_)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1033	{
				1034	setRequiredOperands(3, 1);
				1035	setRequiredRank(4);
				1036
				1037	INIT_ATTRIBUTE(TransposeConv2d);
				1038	INIT_QINFO(Conv);
				1039	}
				1040
				1041	template <DType InDtype, DType OutDtype>
				1042	OpTransposeConv2d<InDtype, OutDtype>::~OpTransposeConv2d()
				1043	{
				1044	if (attribute)
				1045	delete attribute;
				1046	if (qinfo)
				1047	delete qinfo;
				1048	}
				1049
				1050	template <DType InDtype, DType OutDtype>
				1051	int OpTransposeConv2d<InDtype, OutDtype>::checkTensorAttributes()
				1052	{
				1053	if (validateRequiredOperands())
				1054	return 1;
				1055
				1056	if (validateRequiredRank(inputs[0]) \|\| validateRequiredRank(inputs[1]) \|\| validateRequiredRank(outputs[0]))
				1057	{
				1058	return 1;
				1059	}
				1060
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1061	input = dynamic_cast<TosaReference::TensorTemplate<TIn>*>(inputs[0]);
				1062	weight = dynamic_cast<TosaReference::TensorTemplate<TWeight>*>(inputs[1]);
				1063	bias = dynamic_cast<TosaReference::TensorTemplate<TBias>*>(inputs[2]);
				1064	output = dynamic_cast<TosaReference::TensorTemplate<TAcc>*>(outputs[0]);
				1065
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1066	if (attribute->outpad().size() != 2)
				1067	{
				1068	printNodeValidationError("OpTransposeConv2d: illegal size for attribute outpad");
				1069	return 1;
				1070	}
				1071
				1072	if (attribute->stride().size() != 2)
				1073	{
				1074	printNodeValidationError("OpTransposeConv2d: illegal size for attribute stride");
				1075	return 1;
				1076	}
				1077
				1078	if (attribute->dilation().size() != 2)
				1079	{
				1080	printNodeValidationError("OpTransposeConv2d: illegal size for attribute dilation");
				1081	return 1;
				1082	}
				1083
				1084	if (attribute->output_shape().size() != 4)
				1085	{
				1086	printNodeValidationError("OpTransposeConv2d: illegal size for attribute output_shape");
				1087	return 1;
				1088	}
				1089
				1090	for (int d = 0; d < 4; d++)
				1091	{
				1092	if (attribute->output_shape()[d] != this->output->getShape()[d])
				1093	{
				1094	printNodeValidationError("OpTransposeConv2d: illegal size for attribute output_shape");
				1095	return 1;
				1096	}
				1097	}
				1098
				1099	return 0;
				1100	}
				1101
				1102	template <DType InDtype, DType OutDtype>
				1103	int OpTransposeConv2d<InDtype, OutDtype>::eval()
				1104	{
				1105	int in_batch = this->input->getShape()[0];
				1106	int in_height = this->input->getShape()[1];
				1107	int in_width = this->input->getShape()[2];
				1108	int in_channels = this->input->getShape()[3];
				1109
				1110	int f_out_channels = this->weight->getShape()[0];
				1111	int f_height = this->weight->getShape()[1];
				1112	int f_width = this->weight->getShape()[2];
				1113	int f_in_channels = this->weight->getShape()[3];
				1114
				1115	int b_out_channels = this->bias->getShape()[0];
				1116
				1117	int out_batch = this->output->getShape()[0];
				1118	int out_height = this->output->getShape()[1];
				1119	int out_width = this->output->getShape()[2];
				1120	int out_channels = this->output->getShape()[3];
				1121
				1122	int padding_top = this->attribute->outpad()[0];
				1123	int padding_left = this->attribute->outpad()[1];
				1124	int stride_h = this->attribute->stride()[0];
				1125	int stride_w = this->attribute->stride()[1];
				1126	int dilation_h = this->attribute->dilation()[0];
				1127	int dilation_w = this->attribute->dilation()[1];
				1128
Kevin Cheng	acb550f	2021-06-29 15:32:19 -0700	[diff] [blame^]	1129	ERROR_IF(in_batch != out_batch, "OpTransposeConv2d: tensor batch mismatch %d != %d", in_batch, out_batch);
				1130	ERROR_IF(f_in_channels != in_channels, "OpTransposeConv2d: tensor input channel mismatch %d != %d", f_in_channels,
				1131	in_channels);
				1132	ERROR_IF(f_out_channels != out_channels, "OpTransposeConv2d: tensor output channel mismatch %d != %d",
				1133	f_out_channels, out_channels);
				1134	ERROR_IF(b_out_channels != out_channels, "OpDepthwiseConv2d: bias channels mismatch %d != %d", b_out_channels,
				1135	out_channels);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1136
				1137	DEBUG_INFO(OP,
				1138	"perform OpTransposeConv2d, input.shape=[%d,%d,%d,%d], weight.shape=[%d,%d,%d,%d], "
				1139	"output.shape=[%d,%d,%d,%d], stride=[%d,%d], dilation=[%d,%d], padding=[%d,%d]",
				1140	in_batch, in_height, in_width, in_channels, f_height, f_width, f_out_channels, f_in_channels, out_batch,
				1141	out_height, out_width, out_channels, stride_h, stride_w, dilation_h, dilation_w, padding_top,
				1142	padding_left);
				1143
				1144	TIn input_val = this->input->getTensor();
				1145	TWeight weight_val = this->weight->getTensor();
				1146	if (this->qinfo)
				1147	{
				1148	input_val = input_val - (InEigenType)this->qinfo->input_zp();
				1149	weight_val = weight_val - (WeightEigenType)this->qinfo->weight_zp();
				1150	}
				1151
				1152	Eigen::array<Eigen::Index, 4> reshape_dim;
				1153	reshape_dim.fill(1);
				1154	reshape_dim[3] = b_out_channels;
				1155
				1156	Eigen::array<Eigen::Index, 4> bcast;
				1157	bcast[0] = out_batch;
				1158	bcast[1] = out_height;
				1159	bcast[2] = out_width;
				1160	bcast[3] = 1;
				1161
				1162	// initialize with bias
				1163	this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
				1164
				1165	int out_x_origin, out_y_origin;
				1166	int out_x, out_y;
				1167
				1168	// reference implementation from: tensorflow/tensorflow/lite/kernels/internal/reference/reference_ops.h
				1169	for (int ob = 0; ob < out_batch; ob++)
				1170	{
				1171	for (int ih = 0; ih < in_height; ih++)
				1172	{
				1173	for (int iw = 0; iw < in_width; iw++)
				1174	{
				1175	out_x_origin = iw * stride_w - padding_left;
				1176	out_y_origin = ih * stride_h - padding_top;
				1177	for (int ic = 0; ic < in_channels; ic++)
				1178	{
				1179	for (int fh = 0; fh < f_height; fh++)
				1180	{
				1181	for (int fw = 0; fw < f_width; fw++)
				1182	{
				1183	out_x = out_x_origin + fw * dilation_w;
				1184	out_y = out_y_origin + fh * dilation_h;
				1185	for (int oc = 0; oc < out_channels; oc++)
				1186	{
				1187	if ((out_x >= 0 && out_x < out_width) && (out_y >= 0 && out_y < out_height))
				1188	{
				1189	this->output->getTensor()(ob, out_y, out_x, oc) +=
				1190	((AccEigenType)input_val(ob, ih, iw, ic) *
				1191	(AccEigenType)weight_val(oc, fh, fw, ic));
				1192	}
				1193	}
				1194	}
				1195	}
				1196	}
				1197	}
				1198	}
				1199	}
				1200
				1201	if (AccDtype == DType_INT48)
				1202	{
				1203	this->output->getTensor() = this->output->getTensor().cwiseMax((AccEigenType)AccQMin);
				1204	this->output->getTensor() = this->output->getTensor().cwiseMin((AccEigenType)AccQMax);
				1205	}
				1206
				1207	return GraphNode::eval();
				1208	}
				1209
				1210	// template explicit instantiation
				1211	DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1212	DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1213	DEF_INSTANTIATE_RANK1_6_ONE_RANK_ONE_TYPE(OpArgMax, INT16);
				1214
				1215	DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, FLOAT)
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1216	DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, INT8)
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1217	DEF_INSTANTIATE_ONE_TYPE(OpAvgPool2d, INT16)
				1218
				1219	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1220	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT8, INT4);
				1221	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1222	DEF_INSTANTIATE_TWO_TYPE(OpConv2d, INT16, INT8);
				1223
				1224	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1225	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT8, INT4);
				1226	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1227	DEF_INSTANTIATE_TWO_TYPE(OpDepthwiseConv2d, INT16, INT8);
				1228
				1229	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1230	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT8, INT4);
				1231	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1232	DEF_INSTANTIATE_TWO_TYPE(OpFullyConnected, INT16, INT8);
				1233
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1234	DEF_INSTANTIATE_ONE_TYPE(OpMatMul, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1235	DEF_INSTANTIATE_ONE_TYPE(OpMatMul, INT16);
				1236	DEF_INSTANTIATE_ONE_TYPE(OpMatMul, FLOAT);
				1237
				1238	DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1239	DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1240	DEF_INSTANTIATE_ONE_TYPE(OpMaxPool2d, INT16);
				1241
				1242	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, FLOAT, FLOAT);
Kevin Cheng	3a47857	2021-01-22 17:21:02 -0800	[diff] [blame]	1243	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT8, INT4);
				1244	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT8, INT8);
Eric Kunze	e5e2676	2020-10-13 16:11:07 -0700	[diff] [blame]	1245	DEF_INSTANTIATE_TWO_TYPE(OpTransposeConv2d, INT16, INT8);