blob: 3afda4d454b68bcf2daba1ae0e6e27fb49d90467 [file] [log] [blame]
//
// This confidential and proprietary software may be used only as
// authorised by a licensing agreement from ARM Limited
// (C) COPYRIGHT 2020-2024 ARM Limited
// ALL RIGHTS RESERVED
// The entire notice above must be reproduced on all authorised
// copies and copies may only be made to the extent permitted
// by a licensing agreement from ARM Limited.
ERROR_IF(in_t != i8_t && (A_zp != 0 || B_zp != 0)); // Zero point only for int8_t
for_each(0 <= n < N, 0 <= h < H, 0 <= w < W) {
out_t acc = 0;
for_each(0 <= c < C) {
out_t value1 = static_cast<out_t>(tensor_read<in_t>(A, [N,H,C], [n,h,c]));
out_t value2 = static_cast<out_t>(tensor_read<in_t>(B, [N,C,W], [n,c,w]));
value1 = apply_sub_s<out_t>(value1, static_cast<out_t>(A_zp));
value2 = apply_sub_s<out_t>(value2, static_cast<out_t>(B_zp));
acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value1 * value2));
}
tensor_write<out_t>(output, [N,H,W], [n,h,w], acc);
}