blob: 8c980ad448b356ea13ebc556234a413b2ba5316e [file] [log] [blame]
Fredrik Svedberga0c36242020-06-03 15:43:31 +02001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
Fredrik Svedberg1575b942020-08-18 13:19:18 +02003# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
4#
Fredrik Svedberga0c36242020-06-03 15:43:31 +02005# SPDX-License-Identifier: Apache-2.0
6#
Fredrik Svedberg1575b942020-08-18 13:19:18 +02007# Licensed under the Apache License, Version 2.0 (the "License");
8# you may not use this file except in compliance with the License.
Fredrik Svedberga0c36242020-06-03 15:43:31 +02009# You may obtain a copy of the License at
10#
Fredrik Svedberg1575b942020-08-18 13:19:18 +020011# http://www.apache.org/licenses/LICENSE-2.0
Fredrik Svedberga0c36242020-06-03 15:43:31 +020012#
13# Unless required by applicable law or agreed to in writing, software
Fredrik Svedberg1575b942020-08-18 13:19:18 +020014# distributed under the License is distributed on an "AS IS" BASIS,
15# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Fredrik Svedberga0c36242020-06-03 15:43:31 +020016# See the License for the specific language governing permissions and
17# limitations under the License.
Fredrik Svedberg1575b942020-08-18 13:19:18 +020018#
Fredrik Svedberga0c36242020-06-03 15:43:31 +020019# Description:
20# Contains SoftMax
Fredrik Svedberg1575b942020-08-18 13:19:18 +020021import math
22
Fredrik Svedberga0c36242020-06-03 15:43:31 +020023import numpy as np
24
Fredrik Svedberg1575b942020-08-18 13:19:18 +020025from . import fp_math
Fredrik Svedberga0c36242020-06-03 15:43:31 +020026from . import scaling
Louis Verhaarde8a5a782020-11-02 18:04:27 +010027from .api import NpuRoundingMode
Fredrik Svedberga0c36242020-06-03 15:43:31 +020028from .data_type import DataType
Tim Halle6ccd872020-11-09 16:46:37 +000029from .debug_database import DebugDatabase
Louis Verhaarde8a5a782020-11-02 18:04:27 +010030from .operation import ActivationFunction
Louis Verhaardaee5d752020-09-30 09:01:52 +020031from .operation import Op
Fredrik Svedberga0c36242020-06-03 15:43:31 +020032from .operation import Operation
Michael McGeagh5778ffd2020-08-06 17:31:02 +010033from .tensor import create_const_tensor
34from .tensor import create_reshape_tensor
Fredrik Svedberga0c36242020-06-03 15:43:31 +020035from .tensor import Tensor
36from .tensor import TensorPurpose
37
38
Fredrik Svedberga0c36242020-06-03 15:43:31 +020039class SoftMax:
40 # Turn off black formatting for the LUT tables to keep them compact
41 # fmt: off
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +020042
Fredrik Svedberga0c36242020-06-03 15:43:31 +020043 EXP_LUT = [
44 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002,
45 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002,
46 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002,
47 0x00000002, 0x00000002, 0x00010002, 0x00000003, 0x00000003, 0x00000003, 0x00000003, 0x00000003,
48 0x00000003, 0x00000003, 0x00000003, 0x00000003, 0x00000003, 0x00000003, 0x00000003, 0x00000003,
49 0x00000003, 0x00000003, 0x00000003, 0x00010003, 0x00000004, 0x00000004, 0x00000004, 0x00000004,
50 0x00000004, 0x00000004, 0x00000004, 0x00000004, 0x00000004, 0x00000004, 0x00000004, 0x00000004,
51 0x00010004, 0x00000005, 0x00000005, 0x00000005, 0x00000005, 0x00000005, 0x00000005, 0x00000005,
52 0x00000005, 0x00000005, 0x00010005, 0x00000006, 0x00000006, 0x00000006, 0x00000006, 0x00000006,
53 0x00000006, 0x00000006, 0x00010006, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007,
54 0x00000007, 0x00000007, 0x00010007, 0x00000008, 0x00000008, 0x00000008, 0x00000008, 0x00000008,
55 0x00010008, 0x00000009, 0x00000009, 0x00000009, 0x00000009, 0x00000009, 0x00010009, 0x0000000a,
56 0x0000000a, 0x0000000a, 0x0000000a, 0x0001000a, 0x0000000b, 0x0000000b, 0x0000000b, 0x0000000b,
57 0x0001000b, 0x0000000c, 0x0000000c, 0x0000000c, 0x0001000c, 0x0000000d, 0x0000000d, 0x0000000d,
58 0x0001000d, 0x0000000e, 0x0000000e, 0x0000000e, 0x0001000e, 0x0000000f, 0x0000000f, 0x0001000f,
59 0x00000010, 0x00000010, 0x00010010, 0x00000011, 0x00000011, 0x00010011, 0x00000012, 0x00000012,
60 0x00010012, 0x00000013, 0x00000013, 0x00010013, 0x00000014, 0x00010014, 0x00000015, 0x00000015,
61 0x00010015, 0x00000016, 0x00010016, 0x00000017, 0x00010017, 0x00000018, 0x00010018, 0x00000019,
62 0x00010019, 0x0000001a, 0x0001001a, 0x0000001b, 0x0001001b, 0x0000001c, 0x0001001c, 0x0000001d,
63 0x0001001d, 0x0000001e, 0x0001001e, 0x0001001f, 0x00000020, 0x00010020, 0x00010021, 0x00000022,
64 0x00010022, 0x00010023, 0x00000024, 0x00010024, 0x00000025, 0x00010025, 0x00010026, 0x00010027,
65 0x00000028, 0x00020028, 0x0000002a, 0x0001002a, 0x0001002b, 0x0001002c, 0x0000002d, 0x0001002d,
66 0x0001002e, 0x0001002f, 0x00010030, 0x00010031, 0x00010032, 0x00010033, 0x00010034, 0x00010035,
67 0x00010036, 0x00010037, 0x00010038, 0x00020039, 0x0001003b, 0x0000003c, 0x0002003c, 0x0001003e,
68 0x0002003f, 0x00000041, 0x00020041, 0x00010043, 0x00010044, 0x00020045, 0x00020047, 0x00010049,
69 0x0001004a, 0x0002004b, 0x0001004d, 0x0002004e, 0x00010050, 0x00020051, 0x00020053, 0x00010055,
70 0x00020056, 0x00020058, 0x0002005a, 0x0001005c, 0x0002005d, 0x0002005f, 0x00020061, 0x00020063,
71 0x00020065, 0x00020067, 0x00020069, 0x0002006b, 0x0003006d, 0x00020070, 0x00020072, 0x00020074,
72 0x00030076, 0x00020079, 0x0003007b, 0x0002007e, 0x00030080, 0x00020083, 0x00020085, 0x00040087,
73 0x0002008b, 0x0003008d, 0x00030090, 0x00020093, 0x00030095, 0x00030098, 0x0003009b, 0x0004009e,
74 0x000300a2, 0x000300a5, 0x000300a8, 0x000300ab, 0x000400ae, 0x000300b2, 0x000400b5, 0x000400b9,
75 0x000300bd, 0x000400c0, 0x000400c4, 0x000400c8, 0x000400cc, 0x000400d0, 0x000500d4, 0x000400d9,
76 0x000400dd, 0x000500e1, 0x000400e6, 0x000500ea, 0x000400ef, 0x000500f3, 0x000500f8, 0x000500fd,
77 0x00050102, 0x00050107, 0x0005010c, 0x00060111, 0x00050117, 0x0006011c, 0x00060122, 0x00060128,
78 0x0006012e, 0x00060134, 0x0006013a, 0x00070140, 0x00060147, 0x0007014d, 0x00060154, 0x0007015a,
79 0x00070161, 0x00060168, 0x0008016e, 0x00070176, 0x0008017d, 0x00080185, 0x0007018d, 0x00090194,
80 0x0008019d, 0x000801a5, 0x000801ad, 0x000901b5, 0x000901be, 0x000901c7, 0x000901d0, 0x000901d9,
81 0x000a01e2, 0x000901ec, 0x000a01f5, 0x000b01ff, 0x000a020a, 0x000b0214, 0x000a021f, 0x000b0229,
82 0x000b0234, 0x000b023f, 0x000c024a, 0x000c0256, 0x000c0262, 0x000c026e, 0x000c027a, 0x000d0286,
83 0x000d0293, 0x000d02a0, 0x000e02ad, 0x000e02bb, 0x000e02c9, 0x000e02d7, 0x000f02e5, 0x000f02f4,
84 0x000f0303, 0x000f0312, 0x00100321, 0x00100331, 0x00110341, 0x00100352, 0x00120362, 0x00110374,
85 0x00120385, 0x00120397, 0x001203a9, 0x001303bb, 0x001303ce, 0x001403e1, 0x001403f5, 0x00140409,
86 0x0015041d, 0x00150432, 0x00160447, 0x0016045d, 0x00160473, 0x00170489, 0x001704a0, 0x001904b7,
87 0x001804d0, 0x001904e8, 0x00190501, 0x001a051a, 0x001a0534, 0x001b054e, 0x001b0569, 0x001c0584,
88 0x001c05a0, 0x001d05bc, 0x001e05d9, 0x001e05f7, 0x001e0615, 0x00200633, 0x00200653, 0x00200673,
89 0x00210693, 0x002206b4, 0x002306d6, 0x002306f9, 0x0024071c, 0x00240740, 0x00260764, 0x0026078a,
90 0x002607b0, 0x002807d6, 0x002907fe, 0x00290827, 0x002a0850, 0x002a087a, 0x002c08a4, 0x002c08d0,
91 0x002e08fc, 0x002e092a, 0x002f0958, 0x00310987, 0x003109b8, 0x003209e9, 0x00330a1b, 0x00340a4e,
92 0x00350a82, 0x00350ab7, 0x00380aec, 0x00380b24, 0x003a0b5c, 0x003a0b96, 0x003c0bd0, 0x003d0c0c,
93 0x003e0c49, 0x003f0c87, 0x00400cc6, 0x00420d06, 0x00430d48, 0x00440d8b, 0x00460dcf, 0x00480e15,
94 0x00480e5d, 0x00490ea5, 0x004c0eee, 0x004d0f3a, 0x004e0f87, 0x00500fd5, 0x00511025, 0x00531076,
95 0x005610c9, 0x0056111f, 0x00581175, 0x005a11cd, 0x005c1227, 0x005e1283, 0x005e12e1, 0x0061133f,
96 0x006413a0, 0x00651404, 0x00671469, 0x006914d0, 0x006c1539, 0x006c15a5, 0x00701611, 0x00721681,
97 0x007416f3, 0x00761767, 0x007917dd, 0x007a1856, 0x007d18d0, 0x0080194d, 0x008319cd, 0x00841a50,
98 0x00881ad4, 0x00891b5c, 0x008d1be5, 0x00911c72, 0x00911d03, 0x00961d94, 0x00981e2a, 0x009c1ec2,
99 0x009e1f5e, 0x00a21ffc, 0x00a4209e, 0x00a92142, 0x00ab21eb, 0x00ae2296, 0x00b22344, 0x00b523f6,
100 0x00b924ab, 0x00be2564, 0x00c02622, 0x00c526e2, 0x00c827a7, 0x00cc286f, 0x00d0293b, 0x00d52a0b,
101 0x00d72ae0, 0x00dd2bb7, 0x00e12c94, 0x00e62d75, 0x00eb2e5b, 0x00ef2f46, 0x00f23035, 0x00f83127,
102 0x00fe321f, 0x0101331d, 0x0108341e, 0x010c3526, 0x01123632, 0x01173744, 0x011c385b, 0x01233977,
103 0x01273a9a, 0x012e3bc1, 0x01343cef, 0x013a3e23, 0x01403f5d, 0x0146409d, 0x014c41e3, 0x0154432f,
104 0x01594483, 0x016145dc, 0x0168473d, 0x016f48a5, 0x01764a14, 0x017d4b8a, 0x01854d07, 0x018d4e8c,
105 0x01945019, 0x019d51ad, 0x01a4534a, 0x01ad54ee, 0x01b5569b, 0x01be5850, 0x01c75a0e, 0x01d05bd5,
106 0x01d85da5, 0x01e35f7d, 0x01eb6160, 0x01f6634b, 0x01ff6541, 0x02096740, 0x02146949, 0x021e6b5d,
107 0x02296d7b, 0x02336fa4, 0x023f71d7, 0x024a7416, 0x02567660, 0x026278b6, 0x026d7b18, 0x027a7d85,
108 ]
109
110 ONE_OVER_ONE_PLUS_X_LUT = [
111 0xffc17fff, 0xffc07fc0, 0xffc27f80, 0xffc07f42, 0xffc17f02, 0xffc17ec3, 0xffc27e84, 0xffc27e46,
112 0xffc27e08, 0xffc37dca, 0xffc27d8d, 0xffc37d4f, 0xffc37d12, 0xffc37cd5, 0xffc37c98, 0xffc47c5b,
113 0xffc47c1f, 0xffc47be3, 0xffc57ba7, 0xffc57b6c, 0xffc37b31, 0xffc67af4, 0xffc57aba, 0xffc67a7f,
114 0xffc57a45, 0xffc67a0a, 0xffc779d0, 0xffc67997, 0xffc6795d, 0xffc77923, 0xffc778ea, 0xffc778b1,
115 0xffc87878, 0xffc77840, 0xffc87807, 0xffc877cf, 0xffc97797, 0xffc87760, 0xffc97728, 0xffc976f1,
116 0xffc976ba, 0xffc87683, 0xffca764b, 0xffca7615, 0xffca75df, 0xffca75a9, 0xffca7573, 0xffcb753d,
117 0xffca7508, 0xffcb74d2, 0xffcb749d, 0xffca7468, 0xffcc7432, 0xffcc73fe, 0xffcb73ca, 0xffcc7395,
118 0xffcd7361, 0xffcc732e, 0xffcc72fa, 0xffcd72c6, 0xffcd7293, 0xffcd7260, 0xffcc722d, 0xffce71f9,
119 0xffcd71c7, 0xffce7194, 0xffce7162, 0xffce7130, 0xffcf70fe, 0xffce70cd, 0xffce709b, 0xffcf7069,
120 0xffcf7038, 0xffcf7007, 0xffcf6fd6, 0xffcf6fa5, 0xffd06f74, 0xffd06f44, 0xffd06f14, 0xffd06ee4,
121 0xffd06eb4, 0xffd06e84, 0xffd16e54, 0xffd16e25, 0xffd16df6, 0xffd16dc7, 0xffd06d98, 0xffd26d68,
122 0xffd16d3a, 0xffd26d0b, 0xffd26cdd, 0xffd26caf, 0xffd26c81, 0xffd26c53, 0xffd36c25, 0xffd26bf8,
123 0xffd36bca, 0xffd36b9d, 0xffd36b70, 0xffd26b43, 0xffd46b15, 0xffd36ae9, 0xffd46abc, 0xffd46a90,
124 0xffd46a64, 0xffd46a38, 0xffd46a0c, 0xffd469e0, 0xffd469b4, 0xffd56988, 0xffd5695d, 0xffd56932,
125 0xffd56907, 0xffd568dc, 0xffd568b1, 0xffd56886, 0xffd6685b, 0xffd56831, 0xffd66806, 0xffd667dc,
126 0xffd667b2, 0xffd76788, 0xffd6675f, 0xffd76735, 0xffd6670c, 0xffd766e2, 0xffd666b9, 0xffd7668f,
127 0xffd86666, 0xffd6663e, 0xffd86614, 0xffd765ec, 0xffd865c3, 0xffd8659b, 0xffd86573, 0xffd8654b,
128 0xffd86523, 0xffd864fb, 0xffd964d3, 0xffd864ac, 0xffd96484, 0xffd8645d, 0xffd96435, 0xffd9640e,
129 0xffd963e7, 0xffd963c0, 0xffd96399, 0xffda6372, 0xffd9634c, 0xffda6325, 0xffda62ff, 0xffda62d9,
130 0xffda62b3, 0xffda628d, 0xffda6267, 0xffdb6241, 0xffda621c, 0xffdb61f6, 0xffda61d1, 0xffdc61ab,
131 0xffd96187, 0xffdc6160, 0xffdb613c, 0xffdb6117, 0xffdb60f2, 0xffdc60cd, 0xffdc60a9, 0xffdb6085,
132 0xffdc6060, 0xffdc603c, 0xffdc6018, 0xffdc5ff4, 0xffdc5fd0, 0xffdd5fac, 0xffdc5f89, 0xffdc5f65,
133 0xffdd5f41, 0xffdd5f1e, 0xffdd5efb, 0xffdd5ed8, 0xffdd5eb5, 0xffdd5e92, 0xffdd5e6f, 0xffdd5e4c,
134 0xffdd5e29, 0xffde5e06, 0xffde5de4, 0xffdd5dc2, 0xffde5d9f, 0xffde5d7d, 0xffde5d5b, 0xffde5d39,
135 0xffdf5d17, 0xffde5cf6, 0xffde5cd4, 0xffdf5cb2, 0xffdf5c91, 0xffde5c70, 0xffdf5c4e, 0xffdf5c2d,
136 0xffde5c0c, 0xffe05bea, 0xffdf5bca, 0xffdf5ba9, 0xffdf5b88, 0xffdf5b67, 0xffe05b46, 0xffe05b26,
137 0xffdf5b06, 0xffe05ae5, 0xffe05ac5, 0xffe05aa5, 0xffe05a85, 0xffe05a65, 0xffe05a45, 0xffe15a25,
138 0xffe05a06, 0xffe059e6, 0xffe159c6, 0xffe159a7, 0xffe05988, 0xffe15968, 0xffe15949, 0xffe1592a,
139 0xffe1590b, 0xffe158ec, 0xffe258cd, 0xffe158af, 0xffe15890, 0xffe25871, 0xffe15853, 0xffe25834,
140 0xffe25816, 0xffe257f8, 0xffe157da, 0xffe257bb, 0xffe3579d, 0xffe25780, 0xffe25762, 0xffe25744,
141 0xffe35726, 0xffe25709, 0xffe256eb, 0xffe356cd, 0xffe356b0, 0xffe35693, 0xffe25676, 0xffe35658,
142 0xffe3563b, 0xffe3561e, 0xffe35601, 0xffe355e4, 0xffe455c7, 0xffe355ab, 0xffe4558e, 0xffe35572,
143 0xffe45555, 0xffe35539, 0xffe4551c, 0xffe45500, 0xffe454e4, 0xffe454c8, 0xffe454ac, 0xffe45490,
144 0xffe45474, 0xffe55458, 0xffe4543d, 0xffe45421, 0xffe55405, 0xffe553ea, 0xffe453cf, 0xffe553b3,
145 0xffe45398, 0xffe5537c, 0xffe55361, 0xffe55346, 0xffe5532b, 0xffe55310, 0xffe552f5, 0xffe552da,
146 0xffe652bf, 0xffe552a5, 0xffe5528a, 0xffe6526f, 0xffe55255, 0xffe6523a, 0xffe65220, 0xffe55206,
147 0xffe651eb, 0xffe651d1, 0xffe651b7, 0xffe6519d, 0xffe65183, 0xffe65169, 0xffe7514f, 0xffe65136,
148 0xffe6511c, 0xffe75102, 0xffe650e9, 0xffe750cf, 0xffe650b6, 0xffe7509c, 0xffe75083, 0xffe6506a,
149 0xffe75050, 0xffe75037, 0xffe7501e, 0xffe75005, 0xffe74fec, 0xffe74fd3, 0xffe74fba, 0xffe74fa1,
150 0xffe84f88, 0xffe74f70, 0xffe84f57, 0xffe74f3f, 0xffe84f26, 0xffe74f0e, 0xffe84ef5, 0xffe84edd,
151 0xffe84ec5, 0xffe84ead, 0xffe74e95, 0xffe84e7c, 0xffe84e64, 0xffe94e4c, 0xffe84e35, 0xffe84e1d,
152 0xffe84e05, 0xffe94ded, 0xffe84dd6, 0xffe84dbe, 0xffe94da6, 0xffe94d8f, 0xffe84d78, 0xffe84d60,
153 0xffea4d48, 0xffe84d32, 0xffe94d1a, 0xffe94d03, 0xffe84cec, 0xffe94cd4, 0xffe94cbd, 0xffea4ca6,
154 0xffe94c90, 0xffe84c79, 0xffea4c61, 0xffe94c4b, 0xffe94c34, 0xffea4c1d, 0xffe94c07, 0xffea4bf0,
155 0xffe94bda, 0xffea4bc3, 0xffea4bad, 0xffe94b97, 0xffea4b80, 0xffea4b6a, 0xffea4b54, 0xffea4b3e,
156 0xffea4b28, 0xffea4b12, 0xffea4afc, 0xffea4ae6, 0xffea4ad0, 0xffeb4aba, 0xffea4aa5, 0xffea4a8f,
157 0xffeb4a79, 0xffea4a64, 0xffea4a4e, 0xffeb4a38, 0xffeb4a23, 0xffea4a0e, 0xffeb49f8, 0xffea49e3,
158 0xffeb49cd, 0xffeb49b8, 0xffeb49a3, 0xffeb498e, 0xffea4979, 0xffeb4963, 0xffeb494e, 0xffec4939,
159 0xffeb4925, 0xffea4910, 0xffec48fa, 0xffeb48e6, 0xffeb48d1, 0xffec48bc, 0xffeb48a8, 0xffec4893,
160 0xffeb487f, 0xffec486a, 0xffeb4856, 0xffec4841, 0xffec482d, 0xffeb4819, 0xffec4804, 0xffec47f0,
161 0xffec47dc, 0xffec47c8, 0xffec47b4, 0xffec47a0, 0xffec478c, 0xffec4778, 0xffec4764, 0xffec4750,
162 0xffec473c, 0xffed4728, 0xffec4715, 0xffec4701, 0xffed46ed, 0xffec46da, 0xffed46c6, 0xffec46b3,
163 0xffec469f, 0xffed468b, 0xffed4678, 0xffec4665, 0xffed4651, 0xffed463e, 0xffed462b, 0xffec4618,
164 0xffed4604, 0xffed45f1, 0xffed45de, 0xffed45cb, 0xffed45b8, 0xffed45a5, 0xffed4592, 0xffed457f,
165 0xffee456c, 0xffed455a, 0xffed4547, 0xffed4534, 0xffee4521, 0xffed450f, 0xffed44fc, 0xffee44e9,
166 0xffed44d7, 0xffee44c4, 0xffee44b2, 0xffed44a0, 0xffee448d, 0xffee447b, 0xffed4469, 0xffee4456,
167 0xffee4444, 0xffee4432, 0xffee4420, 0xffee440e, 0xffee43fc, 0xffee43ea, 0xffee43d8, 0xffee43c6,
168 0xffee43b4, 0xffee43a2, 0xffee4390, 0xffef437e, 0xffee436d, 0xffee435b, 0xffef4349, 0xffee4338,
169 0xffee4326, 0xffef4314, 0xffee4303, 0xffef42f1, 0xffee42e0, 0xffef42ce, 0xffee42bd, 0xffef42ab,
170 0xffef429a, 0xffee4289, 0xfff04277, 0xffee4267, 0xffef4255, 0xffef4244, 0xffef4233, 0xffef4222,
171 0xffee4211, 0xffef41ff, 0xfff041ee, 0xffef41de, 0xffef41cd, 0xffee41bc, 0xfff041aa, 0xffef419a,
172 0xffef4189, 0xffef4178, 0xfff04167, 0xffef4157, 0xffef4146, 0xfff04135, 0xffef4125, 0xfff04114,
173 0xffef4104, 0xfff040f3, 0xffef40e3, 0xfff040d2, 0xfff040c2, 0xffef40b2, 0xfff040a1, 0xfff04091,
174 0xfff04081, 0xffef4071, 0xfff04060, 0xfff04050, 0xfff04040, 0xfff04030, 0xfff04020, 0xfff04010
175 ]
176 # fmt: on
177
178 def __init__(self, op):
179 self.op = op
180
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200181 def generate_exp_table(self, beta, input_scale):
Fredrik Svedberg1575b942020-08-18 13:19:18 +0200182 integer_bits = 5
183 total_signed_bits = 31
184 # Calculate scaling
185 real_beta = min(
186 np.double(beta) * np.double(input_scale) * (1 << (31 - integer_bits)), np.double((1 << 31) - 1.0)
187 )
188 scale, shift = scaling.quantise_scale(real_beta)
189 shift = 31 - shift
190 diff_min = -1.0 * math.floor(
191 1.0 * ((1 << integer_bits) - 1) * (1 << (total_signed_bits - integer_bits)) / (1 << shift)
192 )
193 # Generate the exp LUT
194 lut = []
195 for x in range(256):
196 input_diff = x - 255
197 if input_diff >= diff_min:
198 rescale = fp_math.saturating_rounding_mul(input_diff * (1 << shift), scale)
199 lut.append(fp_math.exp_on_negative_values(rescale))
200 else:
201 lut.append(0)
202 return lut
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200203
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200204 def get_graph(self):
205 ifm = self.op.inputs[0]
206 ofm = self.op.outputs[0]
207
Fredrik Svedberg835d8e12020-09-04 09:46:17 +0200208 # Reshape ifm/ofm (if needed)
209 full_shape = ifm.get_full_shape()
210 if full_shape[0] > 1:
211 full_shape[1] *= full_shape[0]
212 full_shape[0] = 1
213 ifm = create_reshape_tensor(ifm, full_shape)
214 ofm = create_reshape_tensor(ofm, full_shape, False)
215
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200216 if ifm.dtype in (DataType.uint8, DataType.int8) and ofm.dtype == ifm.dtype:
217 return self.get_graph_8bit(ifm, ofm)
218 elif ifm.dtype == DataType.int16 and ofm.dtype == DataType.int16:
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200219 return self.get_graph_int16(ifm, ofm)
220 else:
221 self.op.run_on_npu = False
222 return self.op
223
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200224 def get_graph_8bit(self, ifm, ofm):
225 exp_lut = self.generate_exp_table(self.op.attrs.get("beta", 1.0), ifm.quantization.scale_f32)
Tim Halle6ccd872020-11-09 16:46:37 +0000226 ifm = create_reshape_tensor(ifm, ifm.get_full_shape())
227 DebugDatabase.add_optimised(self.op, ifm.ops[0])
228 ofm = create_reshape_tensor(ofm, ofm.get_full_shape(), False)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200229 no_scale_quant = ifm.quantization.clone()
230 no_scale_quant.scale_f32 = None
231 no_scale_quant.zero_point = 0
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100232 activation = ActivationFunction(Op.Clip)
233 activation.min = ifm.quantization.quant_min
234 activation.max = ifm.quantization.quant_max
235 activation2 = activation.clone()
236 activation2.min = 2 * ifm.quantization.quant_min
237 activation2.max = 2 * ifm.quantization.quant_max
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200238 one_scale_quant = ifm.quantization.clone()
239 one_scale_quant.scale_f32 = 1.0
240 one_scale_quant.zero_point = 0
241 ifm.quantization.zero_point = 0
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100242 pass_number = 0
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200243
244 # PASS 0 - Depthwise Maxpool
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100245 maxpool_op = self.op.clone(f"_maxpool{pass_number}")
Louis Verhaardaee5d752020-09-30 09:01:52 +0200246 maxpool_op.type = Op.MaxPool
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200247 maxpool_h = ifm.shape[1] * ifm.shape[2]
248 maxpool_w = ifm.shape[3]
249 maxpool_ifm_shape = [1, maxpool_h, maxpool_w, 1]
250 maxpool_op.attrs["padding"] = b"VALID"
251 maxpool_op.attrs["stride_w"] = 1
252 maxpool_op.attrs["stride_h"] = 1
253 maxpool_op.attrs["filter_width"] = maxpool_w
254 maxpool_op.attrs["filter_height"] = 1
255 maxpool_op.attrs["strides"] = [1, maxpool_op.attrs["stride_h"], maxpool_op.attrs["stride_w"], 1]
256 maxpool_op.attrs["ksize"] = [1, maxpool_op.attrs["filter_height"], maxpool_op.attrs["filter_width"], 1]
257 maxpool_op.inputs = [create_reshape_tensor(ifm, maxpool_ifm_shape)]
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100258 ifm_max = Tensor([1, maxpool_h, 1, 1], ifm.dtype, f"{maxpool_op.name}_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200259 ifm_max.quantization = no_scale_quant
260 maxpool_op.set_output_tensor(ifm_max)
Tim Halle6ccd872020-11-09 16:46:37 +0000261 DebugDatabase.add_optimised(self.op, maxpool_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100262 pass_number += 1
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200263
264 # PASS 1 - Sub+LUT(exp)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100265 sub_op = Operation(Op.Sub, f"{self.op.name}_sub{pass_number}")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200266 sub_op.add_input_tensor(ifm)
Fredrik Svedberg835d8e12020-09-04 09:46:17 +0200267 sub_op.add_input_tensor(create_reshape_tensor(ifm_max, [1, ifm.shape[1], ifm.shape[2], 1]))
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200268 sub_op.set_activation_lut(
269 create_const_tensor(
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100270 f"{sub_op.name}_lut", [1, 1, 1, 256], DataType.int32, exp_lut, np.int32, TensorPurpose.LUT
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200271 )
272 )
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100273 ifm_exp = Tensor(ifm.shape, DataType.int32, f"{sub_op.name}_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200274 ifm_exp.quantization = one_scale_quant.clone()
275 ifm_exp.quantization.zero_point = 127
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100276 sub_op.activation = ActivationFunction(Op.LUT)
277 # Note: activation.min/max are non-quantized values
278 sub_op.activation.min = -128 - ifm_exp.quantization.zero_point
279 sub_op.activation.max = 127 - ifm_exp.quantization.zero_point
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200280 sub_op.set_output_tensor(ifm_exp)
Tim Halle6ccd872020-11-09 16:46:37 +0000281 DebugDatabase.add_optimised(self.op, sub_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100282 pass_number += 1
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200283
284 # PASS 2 - SHR
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100285 shr2_op = Operation(Op.SHR, f"{self.op.name}_shr{pass_number}")
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100286 shr2_op.attrs["rounding_mode"] = NpuRoundingMode.NATURAL
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200287 shr2_op.add_input_tensor(ifm_exp)
288 shr2_op.add_input_tensor(
289 create_const_tensor(
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100290 f"{shr2_op.name}_const", [1, 1, 1, 1], DataType.int32, [12], np.int32, quantization=no_scale_quant
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200291 ),
292 )
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100293 shr2_op.activation = activation.clone()
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100294 rescaled_exp = Tensor(ifm.shape, ifm_exp.dtype, f"{shr2_op.name}_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200295 rescaled_exp.quantization = no_scale_quant
296 shr2_op.set_output_tensor(rescaled_exp)
Tim Halle6ccd872020-11-09 16:46:37 +0000297 DebugDatabase.add_optimised(self.op, shr2_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100298 pass_number += 1
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200299
300 # PASS 3 - Reduce sum
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100301 reduce_sum_op = Operation(Op.ReduceSum, f"{self.op.name}_reduce_sum3")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200302 reduce_sum_op.attrs["padding"] = b"VALID"
303 reduce_sum_op.attrs["stride_w"] = 1
304 reduce_sum_op.attrs["stride_h"] = 1
305 reduce_sum_op.attrs["filter_width"] = 1
306 reduce_sum_op.attrs["filter_height"] = 1
307 reduce_sum_op.attrs["strides"] = [1, reduce_sum_op.attrs["stride_h"], reduce_sum_op.attrs["stride_w"], 1]
308 reduce_sum_op.attrs["ksize"] = [1, reduce_sum_op.attrs["filter_height"], reduce_sum_op.attrs["filter_width"], 1]
309 reduce_sum_op.add_input_tensor(rescaled_exp)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100310 reduce_sum_op.activation = activation.clone()
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200311
312 reduce_sum_shape = [1, rescaled_exp.shape[1], rescaled_exp.shape[2], 1]
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100313 sum_of_exp = Tensor(reduce_sum_shape, DataType.int32, f"{reduce_sum_op.name}_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200314 sum_of_exp.quantization = no_scale_quant
315 reduce_sum_op.set_output_tensor(sum_of_exp)
Tim Halle6ccd872020-11-09 16:46:37 +0000316 DebugDatabase.add_optimised(self.op, reduce_sum_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100317 pass_number += 1
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200318
319 # PASS 4 - CLZ
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100320 clz_op = Operation(Op.CLZ, f"{self.op.name}_clz{pass_number}")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200321 clz_op.add_input_tensor(sum_of_exp)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100322 clz_op.activation = activation.clone()
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100323 headroom_plus_one = Tensor(sum_of_exp.shape, DataType.int32, f"{clz_op.name}_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200324 headroom_plus_one.quantization = no_scale_quant
325 clz_op.set_output_tensor(headroom_plus_one)
Tim Halle6ccd872020-11-09 16:46:37 +0000326 DebugDatabase.add_optimised(self.op, clz_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100327 pass_number += 1
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200328
329 # PASS 5 - Sub
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100330 sub5_op = Operation(Op.Sub, f"{self.op.name}_sub{pass_number}")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200331 sub5_op.add_input_tensor(
332 create_const_tensor(
Fredrik Svedberg1575b942020-08-18 13:19:18 +0200333 "headroom_offset_const",
334 [1, 1, 1, 1],
335 DataType.int32,
336 [12 + 31 - 8],
337 np.int32,
338 quantization=no_scale_quant,
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200339 ),
340 )
341 sub5_op.add_input_tensor(headroom_plus_one)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100342 sub5_op.activation = activation.clone()
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100343 right_shift = Tensor(sum_of_exp.shape, DataType.int32, f"{sub5_op.name}_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200344 right_shift.quantization = no_scale_quant
345 sub5_op.set_output_tensor(right_shift)
Tim Halle6ccd872020-11-09 16:46:37 +0000346 DebugDatabase.add_optimised(self.op, sub5_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100347 pass_number += 1
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200348
349 # PASS 6 - Sub
Fredrik Svedberg1575b942020-08-18 13:19:18 +0200350 one = create_const_tensor("one_const", [1, 1, 1, 1], DataType.int32, [1], np.int32, quantization=no_scale_quant)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100351 sub6_op = Operation(Op.Sub, f"{self.op.name}_sub{pass_number}")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200352 sub6_op.add_input_tensor(headroom_plus_one)
353 sub6_op.add_input_tensor(one)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100354 sub6_op.activation = activation.clone()
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100355 headroom = Tensor(sum_of_exp.shape, DataType.int32, f"{sub6_op.name}_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200356 headroom.quantization = no_scale_quant
357 sub6_op.set_output_tensor(headroom)
Tim Halle6ccd872020-11-09 16:46:37 +0000358 DebugDatabase.add_optimised(self.op, sub6_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100359 pass_number += 1
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200360
361 # PASS 7 - SHL
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100362 shl7_op = Operation(Op.SHL, f"{self.op.name}_shl{pass_number}")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200363 shl7_op.add_input_tensor(sum_of_exp)
364 shl7_op.add_input_tensor(headroom)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100365 shl7_op.activation = activation.clone()
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100366 shifted_sum = Tensor(sum_of_exp.shape, DataType.int32, f"{shl7_op.name}_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200367 shifted_sum.quantization = no_scale_quant
368 shl7_op.set_output_tensor(shifted_sum)
Tim Halle6ccd872020-11-09 16:46:37 +0000369 DebugDatabase.add_optimised(self.op, shl7_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100370 pass_number += 1
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200371
372 # PASS 8 - Sub
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100373 sub8_op = Operation(Op.Sub, f"{self.op.name}_sub{pass_number}")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200374 sub8_op.add_input_tensor(shifted_sum)
375 sub8_op.add_input_tensor(
376 create_const_tensor(
377 "shifted_one_const", [1, 1, 1, 1], DataType.int32, [1 << 30], np.int32, quantization=no_scale_quant
378 ),
379 )
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100380 sub8_op.activation = activation.clone()
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100381 shifted_sum_minus_one = Tensor(sum_of_exp.shape, DataType.int32, f"{sub8_op.name}_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200382 shifted_sum_minus_one.quantization = no_scale_quant
383 sub8_op.set_output_tensor(shifted_sum_minus_one)
Tim Halle6ccd872020-11-09 16:46:37 +0000384 DebugDatabase.add_optimised(self.op, sub8_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100385 pass_number += 1
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200386
387 # PASS 9 - SHL
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100388 shl9_op = Operation(Op.SHL, f"{self.op.name}_shl{pass_number}")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200389 shl9_op.add_input_tensor(shifted_sum_minus_one)
390 shl9_op.add_input_tensor(one)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100391 shl9_op.activation = activation.clone()
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100392 shifted_sum_minus_one = Tensor(sum_of_exp.shape, DataType.int32, f"{shl9_op.name}_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200393 shifted_sum_minus_one.quantization = no_scale_quant
394 shl9_op.set_output_tensor(shifted_sum_minus_one)
Tim Halle6ccd872020-11-09 16:46:37 +0000395 DebugDatabase.add_optimised(self.op, shl9_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100396 pass_number += 1
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200397
398 # PASS 10 - Add
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100399 add10_op = Operation(Op.Add, f"{self.op.name}_add{pass_number}")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200400 add10_op.add_input_tensor(
401 create_const_tensor(
402 "F0_one_const", [1, 1, 1, 1], DataType.int32, [(1 << 31) - 1], np.int32, quantization=no_scale_quant
403 ),
404 )
405 add10_op.add_input_tensor(shifted_sum_minus_one)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100406 add10_op.activation = activation.clone()
407 add10_op.attrs["rescale"] = (1, 1)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100408 half_denominator = Tensor(sum_of_exp.shape, DataType.int32, f"{add10_op.name}_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200409 half_denominator.quantization = one_scale_quant
410 add10_op.set_output_tensor(half_denominator)
Tim Halle6ccd872020-11-09 16:46:37 +0000411 DebugDatabase.add_optimised(self.op, add10_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100412 pass_number += 1
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200413
414 # PASS 11 - Multiply
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100415 mul11_op = Operation(Op.Mul, f"{self.op.name}_mul{pass_number}")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200416 mul11_op.add_input_tensor(half_denominator)
417 mul11_op.add_input_tensor(
418 create_const_tensor(
Fredrik Svedberg1575b942020-08-18 13:19:18 +0200419 "neg_32_over_17_const",
420 [1, 1, 1, 1],
421 DataType.int32,
422 [-1010580540],
423 np.int32,
424 quantization=one_scale_quant,
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200425 ),
426 )
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100427 rescaled = Tensor(sum_of_exp.shape, DataType.int32, f"{mul11_op.name}_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200428 rescaled.quantization = one_scale_quant.clone()
429 rescaled.quantization.scale_f32 = 2.0
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100430 mul11_op.activation = activation2.clone()
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200431 mul11_op.set_output_tensor(rescaled)
Tim Halle6ccd872020-11-09 16:46:37 +0000432 DebugDatabase.add_optimised(self.op, mul11_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100433 pass_number += 1
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200434
435 # PASS 12 - Add
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100436 add12_op = Operation(Op.Add, f"{self.op.name}_add{pass_number}")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200437 add12_op.add_input_tensor(rescaled)
438 add12_op.add_input_tensor(
439 create_const_tensor(
440 "48_over_17_const", [1, 1, 1, 1], DataType.int32, [1515870810], np.int32, quantization=no_scale_quant
441 ),
442 )
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100443 add12_op.activation = activation.clone()
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100444 rescale_w_offset = Tensor(sum_of_exp.shape, DataType.int32, f"{add12_op.name}_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200445 rescale_w_offset.quantization = one_scale_quant
446 add12_op.set_output_tensor(rescale_w_offset)
Tim Halle6ccd872020-11-09 16:46:37 +0000447 DebugDatabase.add_optimised(self.op, add12_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100448 pass_number += 1
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200449
450 nr_x = rescale_w_offset
451 F2_one = create_const_tensor(
452 "F2_one_const", [1, 1, 1, 1], DataType.int32, [(1 << 29)], np.int32, quantization=no_scale_quant
453 )
Fredrik Svedberg880e7352020-08-25 11:31:47 +0200454 four = create_const_tensor(
455 "four_const", [1, 1, 1, 1], DataType.int32, [4], np.int32, quantization=no_scale_quant
456 )
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100457 for _ in range(3):
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200458 # PASS 13, 18, 23 - MUL
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100459 mul_op = Operation(Op.Mul, f"{self.op.name}_mul{pass_number}")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200460 mul_op.add_input_tensor(nr_x)
461 mul_op.add_input_tensor(half_denominator)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100462 mul_op.activation = activation2.clone()
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100463 half_denominator_times_x = Tensor(sum_of_exp.shape, DataType.int32, f"{mul_op.name}_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200464 half_denominator_times_x.quantization = one_scale_quant.clone()
465 half_denominator_times_x.quantization.scale_f32 = 2.0
466 mul_op.set_output_tensor(half_denominator_times_x)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100467 pass_number += 1
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200468 # PASS 14, 19, 24 - SUB
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100469 sub_op = Operation(Op.Sub, f"{self.op.name}_sub{pass_number}")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200470 sub_op.add_input_tensor(F2_one)
471 sub_op.add_input_tensor(half_denominator_times_x)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100472 sub_op.activation = activation.clone()
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100473 one_minus_half_denominator_times_x = Tensor(sum_of_exp.shape, DataType.int32, f"{sub_op.name}_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200474 one_minus_half_denominator_times_x.quantization = one_scale_quant
475 sub_op.set_output_tensor(one_minus_half_denominator_times_x)
Tim Halle6ccd872020-11-09 16:46:37 +0000476 DebugDatabase.add_optimised(self.op, sub_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100477 pass_number += 1
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200478 # PASS 15, 20, 25 - MUL
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100479 mul_op = Operation(Op.Mul, f"{self.op.name}_mul{pass_number}")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200480 mul_op.add_input_tensor(nr_x)
481 mul_op.add_input_tensor(one_minus_half_denominator_times_x)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100482 mul_op.activation = activation2.clone()
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100483 to_rescale = Tensor(sum_of_exp.shape, DataType.int32, f"{mul_op.name}_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200484 to_rescale.quantization = one_scale_quant.clone()
485 to_rescale.quantization.scale_f32 = 2.0
486 mul_op.set_output_tensor(to_rescale)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100487 pass_number += 1
Fredrik Svedberg880e7352020-08-25 11:31:47 +0200488 # PASS 16, 21, 26 - MUL
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100489 shl_op = Operation(Op.Mul, f"{self.op.name}_mul{pass_number}")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200490 shl_op.add_input_tensor(to_rescale)
Fredrik Svedberg880e7352020-08-25 11:31:47 +0200491 shl_op.add_input_tensor(four)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100492 shl_op.activation = activation.clone()
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100493 to_add = Tensor(sum_of_exp.shape, DataType.int32, f"{shl_op.name}_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200494 to_add.quantization = no_scale_quant
495 shl_op.set_output_tensor(to_add)
Tim Halle6ccd872020-11-09 16:46:37 +0000496 DebugDatabase.add_optimised(self.op, shl_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100497 pass_number += 1
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200498 # PASS 17, 22, 27 - ADD
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100499 add_op = Operation(Op.Add, f"{self.op.name}_add{pass_number}")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200500 add_op.add_input_tensor(nr_x)
501 add_op.add_input_tensor(to_add)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100502 add_op.activation = activation.clone()
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100503 nr_x = Tensor(sum_of_exp.shape, DataType.int32, f"{add_op.name}_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200504 nr_x.quantization = one_scale_quant
505 add_op.set_output_tensor(nr_x)
Tim Halle6ccd872020-11-09 16:46:37 +0000506 DebugDatabase.add_optimised(self.op, add_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100507 pass_number += 1
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200508
Fredrik Svedberg880e7352020-08-25 11:31:47 +0200509 # PASS 28 - Multiply
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100510 mul28_op = Operation(Op.Mul, f"{self.op.name}_mul{pass_number}")
Fredrik Svedberg880e7352020-08-25 11:31:47 +0200511 mul28_op.add_input_tensor(nr_x)
512 mul28_op.add_input_tensor(
513 create_const_tensor("two_const", [1, 1, 1, 1], DataType.int32, [2], np.int32, quantization=no_scale_quant)
514 )
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100515 mul28_op.activation = activation.clone()
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100516 scale_factor = Tensor(sum_of_exp.shape, DataType.int32, f"{mul28_op.name}_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200517 scale_factor.quantization = one_scale_quant
Fredrik Svedberg880e7352020-08-25 11:31:47 +0200518 mul28_op.set_output_tensor(scale_factor)
Tim Halle6ccd872020-11-09 16:46:37 +0000519 DebugDatabase.add_optimised(self.op, mul28_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100520 pass_number += 1
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200521
522 # PASS 29 - Multiply
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100523 mul_op = Operation(Op.Mul, f"{self.op.name}_mul{pass_number}")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200524 mul_op.add_input_tensor(ifm_exp)
525 mul_op.add_input_tensor(scale_factor)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100526 mul_op.activation = activation2.clone()
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100527 scaled_exp = Tensor(ifm_exp.shape, DataType.int32, f"{mul_op.name}_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200528 scaled_exp.quantization = one_scale_quant.clone()
529 scaled_exp.quantization.scale_f32 = 2.0
530 mul_op.set_output_tensor(scaled_exp)
Tim Halle6ccd872020-11-09 16:46:37 +0000531 DebugDatabase.add_optimised(self.op, mul_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100532 pass_number += 1
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200533
534 # PASS 30 - SHR
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100535 shr30_op = Operation(Op.SHR, f"{self.op.name}_shr{pass_number}")
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100536 shr30_op.attrs["rounding_mode"] = NpuRoundingMode.NATURAL
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200537 shr30_op.add_input_tensor(scaled_exp)
538 shr30_op.add_input_tensor(right_shift)
539 shr30_op.set_output_tensor(ofm)
Tim Halle6ccd872020-11-09 16:46:37 +0000540 DebugDatabase.add_optimised(self.op, shr30_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100541 pass_number += 1
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200542
543 return shr30_op
544
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200545 def get_graph_int16(self, ifm, ofm):
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200546 no_scale_quant = ifm.quantization.clone()
547 no_scale_quant.scale_f32 = None
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100548 pass_number = 0
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200549
550 # PASS 0 - Depthwise Maxpool
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100551 maxpool_op = self.op.clone(f"_maxpool{pass_number}")
Louis Verhaardaee5d752020-09-30 09:01:52 +0200552 maxpool_op.type = Op.MaxPool
Tim Halle6ccd872020-11-09 16:46:37 +0000553 DebugDatabase.add_optimised(self.op, maxpool_op)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200554 maxpool_h = ifm.shape[1] * ifm.shape[2]
555 maxpool_w = ifm.shape[3]
556 maxpool_ifm_shape = [1, maxpool_h, maxpool_w, 1]
557 maxpool_op.attrs["padding"] = b"VALID"
558 maxpool_op.attrs["stride_w"] = 1
559 maxpool_op.attrs["stride_h"] = 1
560 maxpool_op.attrs["filter_width"] = maxpool_w
561 maxpool_op.attrs["filter_height"] = 1
562 maxpool_op.attrs["strides"] = [1, maxpool_op.attrs["stride_h"], maxpool_op.attrs["stride_w"], 1]
563 maxpool_op.attrs["ksize"] = [1, maxpool_op.attrs["filter_height"], maxpool_op.attrs["filter_width"], 1]
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100564 maxpool_op.inputs = [create_reshape_tensor(ifm, maxpool_ifm_shape)]
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100565 ifm_max = Tensor([1, maxpool_h, 1, 1], ifm.dtype, f"{maxpool_op.name}_0")
566 ifm_max.quantization = no_scale_quant
567 maxpool_op.set_output_tensor(ifm_max)
Tim Halle6ccd872020-11-09 16:46:37 +0000568 DebugDatabase.add_optimised(self.op, maxpool_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100569 pass_number += 1
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200570
571 # PASS 1 - Sub
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100572 sub1_op = Operation(Op.Sub, f"{self.op.name}_sub{pass_number}")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100573 sub1_op.add_input_tensor(ifm)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100574 sub1_op.add_input_tensor(create_reshape_tensor(ifm_max, [1, ifm.shape[1], ifm.shape[2], 1]))
575 sub1_ofm = Tensor(ifm.shape, DataType.int32, f"{sub1_op.name}_0")
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200576 sub1_ofm.quantization = ifm.quantization.clone()
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100577 sub1_op.set_output_tensor(sub1_ofm)
Tim Halle6ccd872020-11-09 16:46:37 +0000578 DebugDatabase.add_optimised(self.op, sub1_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100579 pass_number += 1
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200580
581 # PASS 2 - Mul
582 beta = self.op.attrs.get("beta", 1.0)
583 mul2_out_range = 10.0 / 65535.0
584 mul2_scale, _ = scaling.elementwise_mul_scale(sub1_ofm.quantization.scale_f32, beta, mul2_out_range)
585 mul2_quant = ifm.quantization.clone()
586 mul2_quant.scale_f32 = beta
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100587 mul2_op = Operation(Op.Mul, f"{self.op.name}_mul{pass_number}")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100588 mul2_op.add_input_tensor(sub1_ofm)
589 mul2_op.add_input_tensor(
590 create_const_tensor(
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100591 f"{mul2_op.name}_const", [1, 1, 1, 1], DataType.int32, [mul2_scale], np.int32, quantization=mul2_quant
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200592 ),
593 )
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100594 mul2_ofm = Tensor(ifm.shape, DataType.int32, f"{self.op.name}_mul{pass_number}")
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200595 mul2_ofm.quantization = ofm.quantization.clone()
596 mul2_ofm.quantization.scale_f32 = mul2_out_range
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100597 mul2_op.set_output_tensor(mul2_ofm)
Tim Halle6ccd872020-11-09 16:46:37 +0000598 DebugDatabase.add_optimised(self.op, mul2_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100599 pass_number += 1
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200600
601 # PASS 3 - Add+LUT(exp)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100602 add_op = Operation(Op.Add, f"{self.op.name}_add{pass_number}")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100603 add_op.add_input_tensor(mul2_ofm)
604 add_op.add_input_tensor(
605 create_const_tensor(
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100606 f"{add_op.name}_const", [1, 1, 1, 1], DataType.int32, [32767], np.int32, quantization=no_scale_quant
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200607 ),
608 )
609 add_op.set_activation_lut(
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100610 create_const_tensor(
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100611 f"{add_op.name}_lut", [1, 1, 1, 512], DataType.int32, self.EXP_LUT, np.int32, TensorPurpose.LUT
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200612 )
613 )
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100614 exp_ofm = Tensor(mul2_ofm.shape, DataType.int16, f"{add_op.name}_0")
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200615 exp_ofm.quantization = mul2_ofm.quantization.clone()
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100616 add_op.set_output_tensor(exp_ofm)
Tim Halle6ccd872020-11-09 16:46:37 +0000617 DebugDatabase.add_optimised(self.op, add_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100618 pass_number += 1
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200619
620 # PASS 4 - Reduce sum
Louis Verhaardaee5d752020-09-30 09:01:52 +0200621 reduce_sum_op = Operation(Op.ReduceSum, self.op.name + "_reduce_sum4")
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200622 reduce_sum_op.attrs["padding"] = b"VALID"
623 reduce_sum_op.attrs["stride_w"] = 1
624 reduce_sum_op.attrs["stride_h"] = 1
625 reduce_sum_op.attrs["filter_width"] = 1
626 reduce_sum_op.attrs["filter_height"] = 1
627 reduce_sum_op.attrs["strides"] = [1, reduce_sum_op.attrs["stride_h"], reduce_sum_op.attrs["stride_w"], 1]
628 reduce_sum_op.attrs["ksize"] = [1, reduce_sum_op.attrs["filter_height"], reduce_sum_op.attrs["filter_width"], 1]
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100629 reduce_sum_op.add_input_tensor(exp_ofm)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200630
631 reduce_sum_shape = [1, exp_ofm.shape[1], exp_ofm.shape[2], 1]
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100632 sum_of_exp = Tensor(reduce_sum_shape, DataType.int32, f"{reduce_sum_op.name}_0")
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200633 sum_of_exp.quantization = no_scale_quant
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100634 reduce_sum_op.set_output_tensor(sum_of_exp)
Tim Halle6ccd872020-11-09 16:46:37 +0000635 DebugDatabase.add_optimised(self.op, reduce_sum_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100636 pass_number += 1
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200637
638 # PASS 5 - CLZ
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100639 clz_op = Operation(Op.CLZ, f"{self.op.name}_clz{pass_number}")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100640 clz_op.add_input_tensor(sum_of_exp)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100641 headroom_plus_one = Tensor(sum_of_exp.shape, DataType.int32, f"{clz_op.name}_0")
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200642 headroom_plus_one.quantization = no_scale_quant
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100643 clz_op.set_output_tensor(headroom_plus_one)
Tim Halle6ccd872020-11-09 16:46:37 +0000644 DebugDatabase.add_optimised(self.op, clz_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100645 pass_number += 1
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200646
647 # PASS 6 - Sub
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100648 sub6_op = Operation(Op.Sub, f"{self.op.name}_sub{pass_number}")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100649 sub6_op.add_input_tensor(
650 create_const_tensor(
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100651 f"{sub6_op.name}_const", [1, 1, 1, 1], DataType.int32, [31], np.int32, quantization=no_scale_quant
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200652 ),
653 )
Jacob Bohlinbe733cf2020-08-13 10:21:34 +0200654 sub6_op.add_input_tensor(headroom_plus_one)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100655 reciprocal_right_shift = Tensor(sum_of_exp.shape, DataType.int32, f"{sub6_op.name}_0")
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200656 reciprocal_right_shift.quantization = no_scale_quant
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100657 sub6_op.set_output_tensor(reciprocal_right_shift)
Tim Halle6ccd872020-11-09 16:46:37 +0000658 DebugDatabase.add_optimised(self.op, sub6_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100659 pass_number += 1
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200660
661 # PASS 7 - SHL
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100662 shl7_op = Operation(Op.SHL, f"{self.op.name}_shl{pass_number}")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100663 shl7_op.add_input_tensor(
664 create_const_tensor(
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100665 f"{shl7_op.name}_const", [1, 1, 1, 1], DataType.int32, [1], np.int32, quantization=no_scale_quant
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200666 ),
667 )
Jacob Bohlinbe733cf2020-08-13 10:21:34 +0200668 shl7_op.add_input_tensor(reciprocal_right_shift)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100669 constant_one = Tensor(sum_of_exp.shape, DataType.int32, f"{shl7_op.name}_0")
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200670 constant_one.quantization = no_scale_quant
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100671 shl7_op.set_output_tensor(constant_one)
Tim Halle6ccd872020-11-09 16:46:37 +0000672 DebugDatabase.add_optimised(self.op, shl7_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100673 pass_number += 1
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200674
675 # PASS 8 - Sub
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100676 sub8_op = Operation(Op.Sub, f"{self.op.name}_sub{pass_number}")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100677 sub8_op.add_input_tensor(sum_of_exp)
678 sub8_op.add_input_tensor(constant_one)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100679 sum_of_exps_minus_one = Tensor(sum_of_exp.shape, DataType.int32, f"{sub8_op.name}_0")
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200680 sum_of_exps_minus_one.quantization = no_scale_quant
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100681 sub8_op.set_output_tensor(sum_of_exps_minus_one)
Tim Halle6ccd872020-11-09 16:46:37 +0000682 DebugDatabase.add_optimised(self.op, sub8_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100683 pass_number += 1
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200684
685 # PASS 9 - SHL
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100686 shl9_op = Operation(Op.SHL, f"{self.op.name}_shl{pass_number}")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100687 shl9_op.add_input_tensor(sum_of_exps_minus_one)
688 shl9_op.add_input_tensor(headroom_plus_one)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100689 shifted_sum_minus_one = Tensor(sum_of_exp.shape, DataType.int32, f"{shl9_op.name}_0")
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200690 shifted_sum_minus_one.quantization = no_scale_quant
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100691 shl9_op.set_output_tensor(shifted_sum_minus_one)
Tim Halle6ccd872020-11-09 16:46:37 +0000692 DebugDatabase.add_optimised(self.op, shl9_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100693 pass_number += 1
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200694
695 # PASS 10 - SHR
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100696 shr10_op = Operation(Op.SHR, f"{self.op.name}_shr{pass_number}")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100697 shr10_op.add_input_tensor(shifted_sum_minus_one)
698 shr10_op.add_input_tensor(
699 create_const_tensor(
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100700 f"{shr10_op.name}_const", [1, 1, 1, 1], DataType.int32, [15], np.int32, quantization=no_scale_quant
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200701 ),
702 )
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100703 shifted_sum_minus_one_16 = Tensor(sum_of_exp.shape, DataType.int32, f"{shr10_op.name}_0")
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200704 shifted_sum_minus_one_16.quantization = shifted_sum_minus_one.quantization.clone()
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100705 shr10_op.set_output_tensor(shifted_sum_minus_one_16)
Tim Halle6ccd872020-11-09 16:46:37 +0000706 DebugDatabase.add_optimised(self.op, shr10_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100707 pass_number += 1
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200708
709 # PASS 11 - Sub+LUT(one over one plus x)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100710 sub11_op = Operation(Op.Sub, f"{self.op.name}_sub{pass_number}")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100711 sub11_op.add_input_tensor(shifted_sum_minus_one_16)
712 sub11_op.add_input_tensor(
713 create_const_tensor(
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100714 f"{sub11_op.name}_const", [1, 1, 1, 1], DataType.int32, [32768], np.int32, quantization=no_scale_quant
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200715 ),
716 )
717 sub11_op.set_activation_lut(
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100718 create_const_tensor(
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100719 f"{sub11_op.name}_lut",
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200720 [1, 1, 1, 512],
721 DataType.int32,
722 self.ONE_OVER_ONE_PLUS_X_LUT,
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200723 np.int32,
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200724 TensorPurpose.LUT,
725 )
726 )
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100727 reciprocal_scale = Tensor(sum_of_exp.shape, DataType.int16, f"{sub11_op.name}_0")
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200728 reciprocal_scale.quantization = no_scale_quant
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100729 sub11_op.set_output_tensor(reciprocal_scale)
Tim Halle6ccd872020-11-09 16:46:37 +0000730 DebugDatabase.add_optimised(self.op, sub11_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100731 pass_number += 1
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200732
733 # PASS 12 - Multiply
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100734 mul_op = Operation(Op.Mul, f"{self.op.name}_mul{pass_number}")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100735 mul_op.add_input_tensor(exp_ofm)
736 mul_op.add_input_tensor(reciprocal_scale)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100737 mul_ofm = Tensor(exp_ofm.shape, DataType.int32, f"{mul_op.name}_0")
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200738 mul_ofm.quantization = no_scale_quant
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100739 mul_op.set_output_tensor(mul_ofm)
Tim Halle6ccd872020-11-09 16:46:37 +0000740 DebugDatabase.add_optimised(self.op, mul_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100741 pass_number += 1
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200742
743 # PASS 13 - SHR
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100744 shr13_op = Operation(Op.SHR, f"{self.op.name}_shr{pass_number}")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100745 shr13_op.add_input_tensor(mul_ofm)
746 shr13_op.add_input_tensor(reciprocal_right_shift)
747 shr13_op.set_output_tensor(ofm)
Tim Halle6ccd872020-11-09 16:46:37 +0000748 DebugDatabase.add_optimised(self.op, shr13_op)
Fredrik Svedberg32c7f5b2020-12-02 09:24:29 +0100749 pass_number += 1
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200750
751 return shr13_op