blob: 01146eef5a673b3d56350a8ceb5eb99f1d98ba6f [file] [log] [blame]
Fredrik Svedberga0c36242020-06-03 15:43:31 +02001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
Fredrik Svedberg1575b942020-08-18 13:19:18 +02003# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
4#
Fredrik Svedberga0c36242020-06-03 15:43:31 +02005# SPDX-License-Identifier: Apache-2.0
6#
Fredrik Svedberg1575b942020-08-18 13:19:18 +02007# Licensed under the Apache License, Version 2.0 (the "License");
8# you may not use this file except in compliance with the License.
Fredrik Svedberga0c36242020-06-03 15:43:31 +02009# You may obtain a copy of the License at
10#
Fredrik Svedberg1575b942020-08-18 13:19:18 +020011# http://www.apache.org/licenses/LICENSE-2.0
Fredrik Svedberga0c36242020-06-03 15:43:31 +020012#
13# Unless required by applicable law or agreed to in writing, software
Fredrik Svedberg1575b942020-08-18 13:19:18 +020014# distributed under the License is distributed on an "AS IS" BASIS,
15# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Fredrik Svedberga0c36242020-06-03 15:43:31 +020016# See the License for the specific language governing permissions and
17# limitations under the License.
Fredrik Svedberg1575b942020-08-18 13:19:18 +020018#
Fredrik Svedberga0c36242020-06-03 15:43:31 +020019# Description:
20# Contains SoftMax
Fredrik Svedberg1575b942020-08-18 13:19:18 +020021import math
22
Fredrik Svedberga0c36242020-06-03 15:43:31 +020023import numpy as np
24
Fredrik Svedberg1575b942020-08-18 13:19:18 +020025from . import fp_math
Fredrik Svedberga0c36242020-06-03 15:43:31 +020026from . import scaling
Louis Verhaarde8a5a782020-11-02 18:04:27 +010027from .api import NpuRoundingMode
Fredrik Svedberga0c36242020-06-03 15:43:31 +020028from .data_type import DataType
Tim Halle6ccd872020-11-09 16:46:37 +000029from .debug_database import DebugDatabase
Louis Verhaarde8a5a782020-11-02 18:04:27 +010030from .operation import ActivationFunction
Louis Verhaardaee5d752020-09-30 09:01:52 +020031from .operation import Op
Fredrik Svedberga0c36242020-06-03 15:43:31 +020032from .operation import Operation
Michael McGeagh5778ffd2020-08-06 17:31:02 +010033from .tensor import create_const_tensor
34from .tensor import create_reshape_tensor
Fredrik Svedberga0c36242020-06-03 15:43:31 +020035from .tensor import Tensor
36from .tensor import TensorPurpose
37
38
Fredrik Svedberga0c36242020-06-03 15:43:31 +020039class SoftMax:
40 # Turn off black formatting for the LUT tables to keep them compact
41 # fmt: off
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +020042
Fredrik Svedberga0c36242020-06-03 15:43:31 +020043 EXP_LUT = [
44 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002,
45 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002,
46 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002,
47 0x00000002, 0x00000002, 0x00010002, 0x00000003, 0x00000003, 0x00000003, 0x00000003, 0x00000003,
48 0x00000003, 0x00000003, 0x00000003, 0x00000003, 0x00000003, 0x00000003, 0x00000003, 0x00000003,
49 0x00000003, 0x00000003, 0x00000003, 0x00010003, 0x00000004, 0x00000004, 0x00000004, 0x00000004,
50 0x00000004, 0x00000004, 0x00000004, 0x00000004, 0x00000004, 0x00000004, 0x00000004, 0x00000004,
51 0x00010004, 0x00000005, 0x00000005, 0x00000005, 0x00000005, 0x00000005, 0x00000005, 0x00000005,
52 0x00000005, 0x00000005, 0x00010005, 0x00000006, 0x00000006, 0x00000006, 0x00000006, 0x00000006,
53 0x00000006, 0x00000006, 0x00010006, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007,
54 0x00000007, 0x00000007, 0x00010007, 0x00000008, 0x00000008, 0x00000008, 0x00000008, 0x00000008,
55 0x00010008, 0x00000009, 0x00000009, 0x00000009, 0x00000009, 0x00000009, 0x00010009, 0x0000000a,
56 0x0000000a, 0x0000000a, 0x0000000a, 0x0001000a, 0x0000000b, 0x0000000b, 0x0000000b, 0x0000000b,
57 0x0001000b, 0x0000000c, 0x0000000c, 0x0000000c, 0x0001000c, 0x0000000d, 0x0000000d, 0x0000000d,
58 0x0001000d, 0x0000000e, 0x0000000e, 0x0000000e, 0x0001000e, 0x0000000f, 0x0000000f, 0x0001000f,
59 0x00000010, 0x00000010, 0x00010010, 0x00000011, 0x00000011, 0x00010011, 0x00000012, 0x00000012,
60 0x00010012, 0x00000013, 0x00000013, 0x00010013, 0x00000014, 0x00010014, 0x00000015, 0x00000015,
61 0x00010015, 0x00000016, 0x00010016, 0x00000017, 0x00010017, 0x00000018, 0x00010018, 0x00000019,
62 0x00010019, 0x0000001a, 0x0001001a, 0x0000001b, 0x0001001b, 0x0000001c, 0x0001001c, 0x0000001d,
63 0x0001001d, 0x0000001e, 0x0001001e, 0x0001001f, 0x00000020, 0x00010020, 0x00010021, 0x00000022,
64 0x00010022, 0x00010023, 0x00000024, 0x00010024, 0x00000025, 0x00010025, 0x00010026, 0x00010027,
65 0x00000028, 0x00020028, 0x0000002a, 0x0001002a, 0x0001002b, 0x0001002c, 0x0000002d, 0x0001002d,
66 0x0001002e, 0x0001002f, 0x00010030, 0x00010031, 0x00010032, 0x00010033, 0x00010034, 0x00010035,
67 0x00010036, 0x00010037, 0x00010038, 0x00020039, 0x0001003b, 0x0000003c, 0x0002003c, 0x0001003e,
68 0x0002003f, 0x00000041, 0x00020041, 0x00010043, 0x00010044, 0x00020045, 0x00020047, 0x00010049,
69 0x0001004a, 0x0002004b, 0x0001004d, 0x0002004e, 0x00010050, 0x00020051, 0x00020053, 0x00010055,
70 0x00020056, 0x00020058, 0x0002005a, 0x0001005c, 0x0002005d, 0x0002005f, 0x00020061, 0x00020063,
71 0x00020065, 0x00020067, 0x00020069, 0x0002006b, 0x0003006d, 0x00020070, 0x00020072, 0x00020074,
72 0x00030076, 0x00020079, 0x0003007b, 0x0002007e, 0x00030080, 0x00020083, 0x00020085, 0x00040087,
73 0x0002008b, 0x0003008d, 0x00030090, 0x00020093, 0x00030095, 0x00030098, 0x0003009b, 0x0004009e,
74 0x000300a2, 0x000300a5, 0x000300a8, 0x000300ab, 0x000400ae, 0x000300b2, 0x000400b5, 0x000400b9,
75 0x000300bd, 0x000400c0, 0x000400c4, 0x000400c8, 0x000400cc, 0x000400d0, 0x000500d4, 0x000400d9,
76 0x000400dd, 0x000500e1, 0x000400e6, 0x000500ea, 0x000400ef, 0x000500f3, 0x000500f8, 0x000500fd,
77 0x00050102, 0x00050107, 0x0005010c, 0x00060111, 0x00050117, 0x0006011c, 0x00060122, 0x00060128,
78 0x0006012e, 0x00060134, 0x0006013a, 0x00070140, 0x00060147, 0x0007014d, 0x00060154, 0x0007015a,
79 0x00070161, 0x00060168, 0x0008016e, 0x00070176, 0x0008017d, 0x00080185, 0x0007018d, 0x00090194,
80 0x0008019d, 0x000801a5, 0x000801ad, 0x000901b5, 0x000901be, 0x000901c7, 0x000901d0, 0x000901d9,
81 0x000a01e2, 0x000901ec, 0x000a01f5, 0x000b01ff, 0x000a020a, 0x000b0214, 0x000a021f, 0x000b0229,
82 0x000b0234, 0x000b023f, 0x000c024a, 0x000c0256, 0x000c0262, 0x000c026e, 0x000c027a, 0x000d0286,
83 0x000d0293, 0x000d02a0, 0x000e02ad, 0x000e02bb, 0x000e02c9, 0x000e02d7, 0x000f02e5, 0x000f02f4,
84 0x000f0303, 0x000f0312, 0x00100321, 0x00100331, 0x00110341, 0x00100352, 0x00120362, 0x00110374,
85 0x00120385, 0x00120397, 0x001203a9, 0x001303bb, 0x001303ce, 0x001403e1, 0x001403f5, 0x00140409,
86 0x0015041d, 0x00150432, 0x00160447, 0x0016045d, 0x00160473, 0x00170489, 0x001704a0, 0x001904b7,
87 0x001804d0, 0x001904e8, 0x00190501, 0x001a051a, 0x001a0534, 0x001b054e, 0x001b0569, 0x001c0584,
88 0x001c05a0, 0x001d05bc, 0x001e05d9, 0x001e05f7, 0x001e0615, 0x00200633, 0x00200653, 0x00200673,
89 0x00210693, 0x002206b4, 0x002306d6, 0x002306f9, 0x0024071c, 0x00240740, 0x00260764, 0x0026078a,
90 0x002607b0, 0x002807d6, 0x002907fe, 0x00290827, 0x002a0850, 0x002a087a, 0x002c08a4, 0x002c08d0,
91 0x002e08fc, 0x002e092a, 0x002f0958, 0x00310987, 0x003109b8, 0x003209e9, 0x00330a1b, 0x00340a4e,
92 0x00350a82, 0x00350ab7, 0x00380aec, 0x00380b24, 0x003a0b5c, 0x003a0b96, 0x003c0bd0, 0x003d0c0c,
93 0x003e0c49, 0x003f0c87, 0x00400cc6, 0x00420d06, 0x00430d48, 0x00440d8b, 0x00460dcf, 0x00480e15,
94 0x00480e5d, 0x00490ea5, 0x004c0eee, 0x004d0f3a, 0x004e0f87, 0x00500fd5, 0x00511025, 0x00531076,
95 0x005610c9, 0x0056111f, 0x00581175, 0x005a11cd, 0x005c1227, 0x005e1283, 0x005e12e1, 0x0061133f,
96 0x006413a0, 0x00651404, 0x00671469, 0x006914d0, 0x006c1539, 0x006c15a5, 0x00701611, 0x00721681,
97 0x007416f3, 0x00761767, 0x007917dd, 0x007a1856, 0x007d18d0, 0x0080194d, 0x008319cd, 0x00841a50,
98 0x00881ad4, 0x00891b5c, 0x008d1be5, 0x00911c72, 0x00911d03, 0x00961d94, 0x00981e2a, 0x009c1ec2,
99 0x009e1f5e, 0x00a21ffc, 0x00a4209e, 0x00a92142, 0x00ab21eb, 0x00ae2296, 0x00b22344, 0x00b523f6,
100 0x00b924ab, 0x00be2564, 0x00c02622, 0x00c526e2, 0x00c827a7, 0x00cc286f, 0x00d0293b, 0x00d52a0b,
101 0x00d72ae0, 0x00dd2bb7, 0x00e12c94, 0x00e62d75, 0x00eb2e5b, 0x00ef2f46, 0x00f23035, 0x00f83127,
102 0x00fe321f, 0x0101331d, 0x0108341e, 0x010c3526, 0x01123632, 0x01173744, 0x011c385b, 0x01233977,
103 0x01273a9a, 0x012e3bc1, 0x01343cef, 0x013a3e23, 0x01403f5d, 0x0146409d, 0x014c41e3, 0x0154432f,
104 0x01594483, 0x016145dc, 0x0168473d, 0x016f48a5, 0x01764a14, 0x017d4b8a, 0x01854d07, 0x018d4e8c,
105 0x01945019, 0x019d51ad, 0x01a4534a, 0x01ad54ee, 0x01b5569b, 0x01be5850, 0x01c75a0e, 0x01d05bd5,
106 0x01d85da5, 0x01e35f7d, 0x01eb6160, 0x01f6634b, 0x01ff6541, 0x02096740, 0x02146949, 0x021e6b5d,
107 0x02296d7b, 0x02336fa4, 0x023f71d7, 0x024a7416, 0x02567660, 0x026278b6, 0x026d7b18, 0x027a7d85,
108 ]
109
110 ONE_OVER_ONE_PLUS_X_LUT = [
111 0xffc17fff, 0xffc07fc0, 0xffc27f80, 0xffc07f42, 0xffc17f02, 0xffc17ec3, 0xffc27e84, 0xffc27e46,
112 0xffc27e08, 0xffc37dca, 0xffc27d8d, 0xffc37d4f, 0xffc37d12, 0xffc37cd5, 0xffc37c98, 0xffc47c5b,
113 0xffc47c1f, 0xffc47be3, 0xffc57ba7, 0xffc57b6c, 0xffc37b31, 0xffc67af4, 0xffc57aba, 0xffc67a7f,
114 0xffc57a45, 0xffc67a0a, 0xffc779d0, 0xffc67997, 0xffc6795d, 0xffc77923, 0xffc778ea, 0xffc778b1,
115 0xffc87878, 0xffc77840, 0xffc87807, 0xffc877cf, 0xffc97797, 0xffc87760, 0xffc97728, 0xffc976f1,
116 0xffc976ba, 0xffc87683, 0xffca764b, 0xffca7615, 0xffca75df, 0xffca75a9, 0xffca7573, 0xffcb753d,
117 0xffca7508, 0xffcb74d2, 0xffcb749d, 0xffca7468, 0xffcc7432, 0xffcc73fe, 0xffcb73ca, 0xffcc7395,
118 0xffcd7361, 0xffcc732e, 0xffcc72fa, 0xffcd72c6, 0xffcd7293, 0xffcd7260, 0xffcc722d, 0xffce71f9,
119 0xffcd71c7, 0xffce7194, 0xffce7162, 0xffce7130, 0xffcf70fe, 0xffce70cd, 0xffce709b, 0xffcf7069,
120 0xffcf7038, 0xffcf7007, 0xffcf6fd6, 0xffcf6fa5, 0xffd06f74, 0xffd06f44, 0xffd06f14, 0xffd06ee4,
121 0xffd06eb4, 0xffd06e84, 0xffd16e54, 0xffd16e25, 0xffd16df6, 0xffd16dc7, 0xffd06d98, 0xffd26d68,
122 0xffd16d3a, 0xffd26d0b, 0xffd26cdd, 0xffd26caf, 0xffd26c81, 0xffd26c53, 0xffd36c25, 0xffd26bf8,
123 0xffd36bca, 0xffd36b9d, 0xffd36b70, 0xffd26b43, 0xffd46b15, 0xffd36ae9, 0xffd46abc, 0xffd46a90,
124 0xffd46a64, 0xffd46a38, 0xffd46a0c, 0xffd469e0, 0xffd469b4, 0xffd56988, 0xffd5695d, 0xffd56932,
125 0xffd56907, 0xffd568dc, 0xffd568b1, 0xffd56886, 0xffd6685b, 0xffd56831, 0xffd66806, 0xffd667dc,
126 0xffd667b2, 0xffd76788, 0xffd6675f, 0xffd76735, 0xffd6670c, 0xffd766e2, 0xffd666b9, 0xffd7668f,
127 0xffd86666, 0xffd6663e, 0xffd86614, 0xffd765ec, 0xffd865c3, 0xffd8659b, 0xffd86573, 0xffd8654b,
128 0xffd86523, 0xffd864fb, 0xffd964d3, 0xffd864ac, 0xffd96484, 0xffd8645d, 0xffd96435, 0xffd9640e,
129 0xffd963e7, 0xffd963c0, 0xffd96399, 0xffda6372, 0xffd9634c, 0xffda6325, 0xffda62ff, 0xffda62d9,
130 0xffda62b3, 0xffda628d, 0xffda6267, 0xffdb6241, 0xffda621c, 0xffdb61f6, 0xffda61d1, 0xffdc61ab,
131 0xffd96187, 0xffdc6160, 0xffdb613c, 0xffdb6117, 0xffdb60f2, 0xffdc60cd, 0xffdc60a9, 0xffdb6085,
132 0xffdc6060, 0xffdc603c, 0xffdc6018, 0xffdc5ff4, 0xffdc5fd0, 0xffdd5fac, 0xffdc5f89, 0xffdc5f65,
133 0xffdd5f41, 0xffdd5f1e, 0xffdd5efb, 0xffdd5ed8, 0xffdd5eb5, 0xffdd5e92, 0xffdd5e6f, 0xffdd5e4c,
134 0xffdd5e29, 0xffde5e06, 0xffde5de4, 0xffdd5dc2, 0xffde5d9f, 0xffde5d7d, 0xffde5d5b, 0xffde5d39,
135 0xffdf5d17, 0xffde5cf6, 0xffde5cd4, 0xffdf5cb2, 0xffdf5c91, 0xffde5c70, 0xffdf5c4e, 0xffdf5c2d,
136 0xffde5c0c, 0xffe05bea, 0xffdf5bca, 0xffdf5ba9, 0xffdf5b88, 0xffdf5b67, 0xffe05b46, 0xffe05b26,
137 0xffdf5b06, 0xffe05ae5, 0xffe05ac5, 0xffe05aa5, 0xffe05a85, 0xffe05a65, 0xffe05a45, 0xffe15a25,
138 0xffe05a06, 0xffe059e6, 0xffe159c6, 0xffe159a7, 0xffe05988, 0xffe15968, 0xffe15949, 0xffe1592a,
139 0xffe1590b, 0xffe158ec, 0xffe258cd, 0xffe158af, 0xffe15890, 0xffe25871, 0xffe15853, 0xffe25834,
140 0xffe25816, 0xffe257f8, 0xffe157da, 0xffe257bb, 0xffe3579d, 0xffe25780, 0xffe25762, 0xffe25744,
141 0xffe35726, 0xffe25709, 0xffe256eb, 0xffe356cd, 0xffe356b0, 0xffe35693, 0xffe25676, 0xffe35658,
142 0xffe3563b, 0xffe3561e, 0xffe35601, 0xffe355e4, 0xffe455c7, 0xffe355ab, 0xffe4558e, 0xffe35572,
143 0xffe45555, 0xffe35539, 0xffe4551c, 0xffe45500, 0xffe454e4, 0xffe454c8, 0xffe454ac, 0xffe45490,
144 0xffe45474, 0xffe55458, 0xffe4543d, 0xffe45421, 0xffe55405, 0xffe553ea, 0xffe453cf, 0xffe553b3,
145 0xffe45398, 0xffe5537c, 0xffe55361, 0xffe55346, 0xffe5532b, 0xffe55310, 0xffe552f5, 0xffe552da,
146 0xffe652bf, 0xffe552a5, 0xffe5528a, 0xffe6526f, 0xffe55255, 0xffe6523a, 0xffe65220, 0xffe55206,
147 0xffe651eb, 0xffe651d1, 0xffe651b7, 0xffe6519d, 0xffe65183, 0xffe65169, 0xffe7514f, 0xffe65136,
148 0xffe6511c, 0xffe75102, 0xffe650e9, 0xffe750cf, 0xffe650b6, 0xffe7509c, 0xffe75083, 0xffe6506a,
149 0xffe75050, 0xffe75037, 0xffe7501e, 0xffe75005, 0xffe74fec, 0xffe74fd3, 0xffe74fba, 0xffe74fa1,
150 0xffe84f88, 0xffe74f70, 0xffe84f57, 0xffe74f3f, 0xffe84f26, 0xffe74f0e, 0xffe84ef5, 0xffe84edd,
151 0xffe84ec5, 0xffe84ead, 0xffe74e95, 0xffe84e7c, 0xffe84e64, 0xffe94e4c, 0xffe84e35, 0xffe84e1d,
152 0xffe84e05, 0xffe94ded, 0xffe84dd6, 0xffe84dbe, 0xffe94da6, 0xffe94d8f, 0xffe84d78, 0xffe84d60,
153 0xffea4d48, 0xffe84d32, 0xffe94d1a, 0xffe94d03, 0xffe84cec, 0xffe94cd4, 0xffe94cbd, 0xffea4ca6,
154 0xffe94c90, 0xffe84c79, 0xffea4c61, 0xffe94c4b, 0xffe94c34, 0xffea4c1d, 0xffe94c07, 0xffea4bf0,
155 0xffe94bda, 0xffea4bc3, 0xffea4bad, 0xffe94b97, 0xffea4b80, 0xffea4b6a, 0xffea4b54, 0xffea4b3e,
156 0xffea4b28, 0xffea4b12, 0xffea4afc, 0xffea4ae6, 0xffea4ad0, 0xffeb4aba, 0xffea4aa5, 0xffea4a8f,
157 0xffeb4a79, 0xffea4a64, 0xffea4a4e, 0xffeb4a38, 0xffeb4a23, 0xffea4a0e, 0xffeb49f8, 0xffea49e3,
158 0xffeb49cd, 0xffeb49b8, 0xffeb49a3, 0xffeb498e, 0xffea4979, 0xffeb4963, 0xffeb494e, 0xffec4939,
159 0xffeb4925, 0xffea4910, 0xffec48fa, 0xffeb48e6, 0xffeb48d1, 0xffec48bc, 0xffeb48a8, 0xffec4893,
160 0xffeb487f, 0xffec486a, 0xffeb4856, 0xffec4841, 0xffec482d, 0xffeb4819, 0xffec4804, 0xffec47f0,
161 0xffec47dc, 0xffec47c8, 0xffec47b4, 0xffec47a0, 0xffec478c, 0xffec4778, 0xffec4764, 0xffec4750,
162 0xffec473c, 0xffed4728, 0xffec4715, 0xffec4701, 0xffed46ed, 0xffec46da, 0xffed46c6, 0xffec46b3,
163 0xffec469f, 0xffed468b, 0xffed4678, 0xffec4665, 0xffed4651, 0xffed463e, 0xffed462b, 0xffec4618,
164 0xffed4604, 0xffed45f1, 0xffed45de, 0xffed45cb, 0xffed45b8, 0xffed45a5, 0xffed4592, 0xffed457f,
165 0xffee456c, 0xffed455a, 0xffed4547, 0xffed4534, 0xffee4521, 0xffed450f, 0xffed44fc, 0xffee44e9,
166 0xffed44d7, 0xffee44c4, 0xffee44b2, 0xffed44a0, 0xffee448d, 0xffee447b, 0xffed4469, 0xffee4456,
167 0xffee4444, 0xffee4432, 0xffee4420, 0xffee440e, 0xffee43fc, 0xffee43ea, 0xffee43d8, 0xffee43c6,
168 0xffee43b4, 0xffee43a2, 0xffee4390, 0xffef437e, 0xffee436d, 0xffee435b, 0xffef4349, 0xffee4338,
169 0xffee4326, 0xffef4314, 0xffee4303, 0xffef42f1, 0xffee42e0, 0xffef42ce, 0xffee42bd, 0xffef42ab,
170 0xffef429a, 0xffee4289, 0xfff04277, 0xffee4267, 0xffef4255, 0xffef4244, 0xffef4233, 0xffef4222,
171 0xffee4211, 0xffef41ff, 0xfff041ee, 0xffef41de, 0xffef41cd, 0xffee41bc, 0xfff041aa, 0xffef419a,
172 0xffef4189, 0xffef4178, 0xfff04167, 0xffef4157, 0xffef4146, 0xfff04135, 0xffef4125, 0xfff04114,
173 0xffef4104, 0xfff040f3, 0xffef40e3, 0xfff040d2, 0xfff040c2, 0xffef40b2, 0xfff040a1, 0xfff04091,
174 0xfff04081, 0xffef4071, 0xfff04060, 0xfff04050, 0xfff04040, 0xfff04030, 0xfff04020, 0xfff04010
175 ]
176 # fmt: on
177
178 def __init__(self, op):
179 self.op = op
180
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200181 def generate_exp_table(self, beta, input_scale):
Fredrik Svedberg1575b942020-08-18 13:19:18 +0200182 integer_bits = 5
183 total_signed_bits = 31
184 # Calculate scaling
185 real_beta = min(
186 np.double(beta) * np.double(input_scale) * (1 << (31 - integer_bits)), np.double((1 << 31) - 1.0)
187 )
188 scale, shift = scaling.quantise_scale(real_beta)
189 shift = 31 - shift
190 diff_min = -1.0 * math.floor(
191 1.0 * ((1 << integer_bits) - 1) * (1 << (total_signed_bits - integer_bits)) / (1 << shift)
192 )
193 # Generate the exp LUT
194 lut = []
195 for x in range(256):
196 input_diff = x - 255
197 if input_diff >= diff_min:
198 rescale = fp_math.saturating_rounding_mul(input_diff * (1 << shift), scale)
199 lut.append(fp_math.exp_on_negative_values(rescale))
200 else:
201 lut.append(0)
202 return lut
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200203
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200204 def get_graph(self):
205 ifm = self.op.inputs[0]
206 ofm = self.op.outputs[0]
207
Fredrik Svedberg835d8e12020-09-04 09:46:17 +0200208 # Reshape ifm/ofm (if needed)
209 full_shape = ifm.get_full_shape()
210 if full_shape[0] > 1:
211 full_shape[1] *= full_shape[0]
212 full_shape[0] = 1
213 ifm = create_reshape_tensor(ifm, full_shape)
214 ofm = create_reshape_tensor(ofm, full_shape, False)
215
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200216 if ifm.dtype in (DataType.uint8, DataType.int8) and ofm.dtype == ifm.dtype:
217 return self.get_graph_8bit(ifm, ofm)
218 elif ifm.dtype == DataType.int16 and ofm.dtype == DataType.int16:
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200219 return self.get_graph_int16(ifm, ofm)
220 else:
221 self.op.run_on_npu = False
222 return self.op
223
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200224 def get_graph_8bit(self, ifm, ofm):
225 exp_lut = self.generate_exp_table(self.op.attrs.get("beta", 1.0), ifm.quantization.scale_f32)
Tim Halle6ccd872020-11-09 16:46:37 +0000226 ifm = create_reshape_tensor(ifm, ifm.get_full_shape())
227 DebugDatabase.add_optimised(self.op, ifm.ops[0])
228 ofm = create_reshape_tensor(ofm, ofm.get_full_shape(), False)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200229 no_scale_quant = ifm.quantization.clone()
230 no_scale_quant.scale_f32 = None
231 no_scale_quant.zero_point = 0
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100232 activation = ActivationFunction(Op.Clip)
233 activation.min = ifm.quantization.quant_min
234 activation.max = ifm.quantization.quant_max
235 activation2 = activation.clone()
236 activation2.min = 2 * ifm.quantization.quant_min
237 activation2.max = 2 * ifm.quantization.quant_max
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200238 one_scale_quant = ifm.quantization.clone()
239 one_scale_quant.scale_f32 = 1.0
240 one_scale_quant.zero_point = 0
241 ifm.quantization.zero_point = 0
242
243 # PASS 0 - Depthwise Maxpool
244 maxpool_op = self.op.clone("_maxpool0")
Louis Verhaardaee5d752020-09-30 09:01:52 +0200245 maxpool_op.type = Op.MaxPool
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200246 maxpool_h = ifm.shape[1] * ifm.shape[2]
247 maxpool_w = ifm.shape[3]
248 maxpool_ifm_shape = [1, maxpool_h, maxpool_w, 1]
249 maxpool_op.attrs["padding"] = b"VALID"
250 maxpool_op.attrs["stride_w"] = 1
251 maxpool_op.attrs["stride_h"] = 1
252 maxpool_op.attrs["filter_width"] = maxpool_w
253 maxpool_op.attrs["filter_height"] = 1
254 maxpool_op.attrs["strides"] = [1, maxpool_op.attrs["stride_h"], maxpool_op.attrs["stride_w"], 1]
255 maxpool_op.attrs["ksize"] = [1, maxpool_op.attrs["filter_height"], maxpool_op.attrs["filter_width"], 1]
256 maxpool_op.inputs = [create_reshape_tensor(ifm, maxpool_ifm_shape)]
257 ifm_max = Tensor([1, maxpool_h, 1, 1], ifm.dtype, maxpool_op.name + "_0")
258 ifm_max.quantization = no_scale_quant
259 maxpool_op.set_output_tensor(ifm_max)
Tim Halle6ccd872020-11-09 16:46:37 +0000260 DebugDatabase.add_optimised(self.op, maxpool_op)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200261
262 # PASS 1 - Sub+LUT(exp)
Louis Verhaardaee5d752020-09-30 09:01:52 +0200263 sub_op = Operation(Op.Sub, self.op.name + "_sub1")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200264 sub_op.add_input_tensor(ifm)
Fredrik Svedberg835d8e12020-09-04 09:46:17 +0200265 sub_op.add_input_tensor(create_reshape_tensor(ifm_max, [1, ifm.shape[1], ifm.shape[2], 1]))
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200266 sub_op.set_activation_lut(
267 create_const_tensor(
268 sub_op.name + "_lut", [1, 1, 1, 256], DataType.int32, exp_lut, np.int32, TensorPurpose.LUT
269 )
270 )
271 ifm_exp = Tensor(ifm.shape, DataType.int32, sub_op.name + "_0")
272 ifm_exp.quantization = one_scale_quant.clone()
273 ifm_exp.quantization.zero_point = 127
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100274 sub_op.activation = ActivationFunction(Op.LUT)
275 # Note: activation.min/max are non-quantized values
276 sub_op.activation.min = -128 - ifm_exp.quantization.zero_point
277 sub_op.activation.max = 127 - ifm_exp.quantization.zero_point
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200278 sub_op.set_output_tensor(ifm_exp)
Tim Halle6ccd872020-11-09 16:46:37 +0000279 DebugDatabase.add_optimised(self.op, sub_op)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200280
281 # PASS 2 - SHR
Louis Verhaardaee5d752020-09-30 09:01:52 +0200282 shr2_op = Operation(Op.SHR, self.op.name + "_shr2")
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100283 shr2_op.attrs["rounding_mode"] = NpuRoundingMode.NATURAL
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200284 shr2_op.add_input_tensor(ifm_exp)
285 shr2_op.add_input_tensor(
286 create_const_tensor(
287 shr2_op.name + "_const", [1, 1, 1, 1], DataType.int32, [12], np.int32, quantization=no_scale_quant
288 ),
289 )
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100290 shr2_op.activation = activation.clone()
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200291 rescaled_exp = Tensor(ifm.shape, ifm_exp.dtype, shr2_op.name + "_0")
292 rescaled_exp.quantization = no_scale_quant
293 shr2_op.set_output_tensor(rescaled_exp)
Tim Halle6ccd872020-11-09 16:46:37 +0000294 DebugDatabase.add_optimised(self.op, shr2_op)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200295
296 # PASS 3 - Reduce sum
Louis Verhaardaee5d752020-09-30 09:01:52 +0200297 reduce_sum_op = Operation(Op.ReduceSum, self.op.name + "_reduce_sum3")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200298 reduce_sum_op.attrs["padding"] = b"VALID"
299 reduce_sum_op.attrs["stride_w"] = 1
300 reduce_sum_op.attrs["stride_h"] = 1
301 reduce_sum_op.attrs["filter_width"] = 1
302 reduce_sum_op.attrs["filter_height"] = 1
303 reduce_sum_op.attrs["strides"] = [1, reduce_sum_op.attrs["stride_h"], reduce_sum_op.attrs["stride_w"], 1]
304 reduce_sum_op.attrs["ksize"] = [1, reduce_sum_op.attrs["filter_height"], reduce_sum_op.attrs["filter_width"], 1]
305 reduce_sum_op.add_input_tensor(rescaled_exp)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100306 reduce_sum_op.activation = activation.clone()
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200307
308 reduce_sum_shape = [1, rescaled_exp.shape[1], rescaled_exp.shape[2], 1]
309 sum_of_exp = Tensor(reduce_sum_shape, DataType.int32, reduce_sum_op.name + "_0")
310 sum_of_exp.quantization = no_scale_quant
311 reduce_sum_op.set_output_tensor(sum_of_exp)
Tim Halle6ccd872020-11-09 16:46:37 +0000312 DebugDatabase.add_optimised(self.op, reduce_sum_op)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200313
314 # PASS 4 - CLZ
Louis Verhaardaee5d752020-09-30 09:01:52 +0200315 clz_op = Operation(Op.CLZ, self.op.name + "_clz4")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200316 clz_op.add_input_tensor(sum_of_exp)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100317 clz_op.activation = activation.clone()
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200318 headroom_plus_one = Tensor(reduce_sum_shape, DataType.int32, clz_op.name + "_0")
319 headroom_plus_one.quantization = no_scale_quant
320 clz_op.set_output_tensor(headroom_plus_one)
Tim Halle6ccd872020-11-09 16:46:37 +0000321 DebugDatabase.add_optimised(self.op, clz_op)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200322
323 # PASS 5 - Sub
Louis Verhaardaee5d752020-09-30 09:01:52 +0200324 sub5_op = Operation(Op.Sub, self.op.name + "_sub5")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200325 sub5_op.add_input_tensor(
326 create_const_tensor(
Fredrik Svedberg1575b942020-08-18 13:19:18 +0200327 "headroom_offset_const",
328 [1, 1, 1, 1],
329 DataType.int32,
330 [12 + 31 - 8],
331 np.int32,
332 quantization=no_scale_quant,
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200333 ),
334 )
335 sub5_op.add_input_tensor(headroom_plus_one)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100336 sub5_op.activation = activation.clone()
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200337 right_shift = Tensor(reduce_sum_shape, DataType.int32, sub5_op.name + "_0")
338 right_shift.quantization = no_scale_quant
339 sub5_op.set_output_tensor(right_shift)
Tim Halle6ccd872020-11-09 16:46:37 +0000340 DebugDatabase.add_optimised(self.op, sub5_op)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200341
342 # PASS 6 - Sub
Fredrik Svedberg1575b942020-08-18 13:19:18 +0200343 one = create_const_tensor("one_const", [1, 1, 1, 1], DataType.int32, [1], np.int32, quantization=no_scale_quant)
Louis Verhaardaee5d752020-09-30 09:01:52 +0200344 sub6_op = Operation(Op.Sub, self.op.name + "_sub6")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200345 sub6_op.add_input_tensor(headroom_plus_one)
346 sub6_op.add_input_tensor(one)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100347 sub6_op.activation = activation.clone()
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200348 headroom = Tensor(reduce_sum_shape, DataType.int32, sub6_op.name + "_0")
349 headroom.quantization = no_scale_quant
350 sub6_op.set_output_tensor(headroom)
Tim Halle6ccd872020-11-09 16:46:37 +0000351 DebugDatabase.add_optimised(self.op, sub6_op)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200352
353 # PASS 7 - SHL
Louis Verhaardaee5d752020-09-30 09:01:52 +0200354 shl7_op = Operation(Op.SHL, self.op.name + "_shl7")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200355 shl7_op.add_input_tensor(sum_of_exp)
356 shl7_op.add_input_tensor(headroom)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100357 shl7_op.activation = activation.clone()
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200358 shifted_sum = Tensor(reduce_sum_shape, DataType.int32, shl7_op.name + "_0")
359 shifted_sum.quantization = no_scale_quant
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100360
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200361 shl7_op.set_output_tensor(shifted_sum)
Tim Halle6ccd872020-11-09 16:46:37 +0000362 DebugDatabase.add_optimised(self.op, shl7_op)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200363
364 # PASS 8 - Sub
Louis Verhaardaee5d752020-09-30 09:01:52 +0200365 sub8_op = Operation(Op.Sub, self.op.name + "_sub8")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200366 sub8_op.add_input_tensor(shifted_sum)
367 sub8_op.add_input_tensor(
368 create_const_tensor(
369 "shifted_one_const", [1, 1, 1, 1], DataType.int32, [1 << 30], np.int32, quantization=no_scale_quant
370 ),
371 )
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100372 sub8_op.activation = activation.clone()
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200373 shifted_sum_minus_one = Tensor(reduce_sum_shape, DataType.int32, sub8_op.name + "_0")
374 shifted_sum_minus_one.quantization = no_scale_quant
375 sub8_op.set_output_tensor(shifted_sum_minus_one)
Tim Halle6ccd872020-11-09 16:46:37 +0000376 DebugDatabase.add_optimised(self.op, sub8_op)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200377
378 # PASS 9 - SHL
Louis Verhaardaee5d752020-09-30 09:01:52 +0200379 shl9_op = Operation(Op.SHL, self.op.name + "_shl9")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200380 shl9_op.add_input_tensor(shifted_sum_minus_one)
381 shl9_op.add_input_tensor(one)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100382 shl9_op.activation = activation.clone()
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200383 shifted_sum_minus_one = Tensor(reduce_sum_shape, DataType.int32, shl9_op.name + "_0")
384 shifted_sum_minus_one.quantization = no_scale_quant
385 shl9_op.set_output_tensor(shifted_sum_minus_one)
Tim Halle6ccd872020-11-09 16:46:37 +0000386 DebugDatabase.add_optimised(self.op, shl9_op)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200387
388 # PASS 10 - Add
Louis Verhaardaee5d752020-09-30 09:01:52 +0200389 add10_op = Operation(Op.Add, self.op.name + "_add10")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200390 add10_op.add_input_tensor(
391 create_const_tensor(
392 "F0_one_const", [1, 1, 1, 1], DataType.int32, [(1 << 31) - 1], np.int32, quantization=no_scale_quant
393 ),
394 )
395 add10_op.add_input_tensor(shifted_sum_minus_one)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100396 add10_op.activation = activation.clone()
397 add10_op.attrs["rescale"] = (1, 1)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200398 half_denominator = Tensor(reduce_sum_shape, DataType.int32, add10_op.name + "_0")
399 half_denominator.quantization = one_scale_quant
400 add10_op.set_output_tensor(half_denominator)
Tim Halle6ccd872020-11-09 16:46:37 +0000401 DebugDatabase.add_optimised(self.op, add10_op)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200402
403 # PASS 11 - Multiply
Louis Verhaardaee5d752020-09-30 09:01:52 +0200404 mul11_op = Operation(Op.Mul, self.op.name + "_mul11")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200405 mul11_op.add_input_tensor(half_denominator)
406 mul11_op.add_input_tensor(
407 create_const_tensor(
Fredrik Svedberg1575b942020-08-18 13:19:18 +0200408 "neg_32_over_17_const",
409 [1, 1, 1, 1],
410 DataType.int32,
411 [-1010580540],
412 np.int32,
413 quantization=one_scale_quant,
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200414 ),
415 )
Fredrik Svedbergd9e38fe2020-09-21 10:34:48 +0200416 rescaled = Tensor(reduce_sum_shape, DataType.int32, mul11_op.name + "_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200417 rescaled.quantization = one_scale_quant.clone()
418 rescaled.quantization.scale_f32 = 2.0
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100419 mul11_op.activation = activation2.clone()
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200420 mul11_op.set_output_tensor(rescaled)
Tim Halle6ccd872020-11-09 16:46:37 +0000421 DebugDatabase.add_optimised(self.op, mul11_op)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200422
423 # PASS 12 - Add
Louis Verhaardaee5d752020-09-30 09:01:52 +0200424 add12_op = Operation(Op.Add, self.op.name + "_add12")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200425 add12_op.add_input_tensor(rescaled)
426 add12_op.add_input_tensor(
427 create_const_tensor(
428 "48_over_17_const", [1, 1, 1, 1], DataType.int32, [1515870810], np.int32, quantization=no_scale_quant
429 ),
430 )
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100431 add12_op.activation = activation.clone()
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200432 rescale_w_offset = Tensor(reduce_sum_shape, DataType.int32, add12_op.name + "_0")
433 rescale_w_offset.quantization = one_scale_quant
434 add12_op.set_output_tensor(rescale_w_offset)
Tim Halle6ccd872020-11-09 16:46:37 +0000435 DebugDatabase.add_optimised(self.op, add12_op)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200436
437 nr_x = rescale_w_offset
438 F2_one = create_const_tensor(
439 "F2_one_const", [1, 1, 1, 1], DataType.int32, [(1 << 29)], np.int32, quantization=no_scale_quant
440 )
Fredrik Svedberg880e7352020-08-25 11:31:47 +0200441 four = create_const_tensor(
442 "four_const", [1, 1, 1, 1], DataType.int32, [4], np.int32, quantization=no_scale_quant
443 )
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200444 for i in range(3):
445 # PASS 13, 18, 23 - MUL
Louis Verhaardaee5d752020-09-30 09:01:52 +0200446 mul_op = Operation(Op.Mul, self.op.name + "_mul%d" % (13 + i * 5))
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200447 mul_op.add_input_tensor(nr_x)
448 mul_op.add_input_tensor(half_denominator)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100449 mul_op.activation = activation2.clone()
Fredrik Svedbergd9e38fe2020-09-21 10:34:48 +0200450 half_denominator_times_x = Tensor(reduce_sum_shape, DataType.int32, mul_op.name + "_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200451 half_denominator_times_x.quantization = one_scale_quant.clone()
452 half_denominator_times_x.quantization.scale_f32 = 2.0
453 mul_op.set_output_tensor(half_denominator_times_x)
Tim Halle6ccd872020-11-09 16:46:37 +0000454 DebugDatabase.add_optimised(self.op, mul_op)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200455 # PASS 14, 19, 24 - SUB
Louis Verhaardaee5d752020-09-30 09:01:52 +0200456 sub_op = Operation(Op.Sub, self.op.name + "_sub%d" % (14 + i * 5))
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200457 sub_op.add_input_tensor(F2_one)
458 sub_op.add_input_tensor(half_denominator_times_x)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100459 sub_op.activation = activation.clone()
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200460 one_minus_half_denominator_times_x = Tensor(reduce_sum_shape, DataType.int32, sub_op.name + "_0")
461 one_minus_half_denominator_times_x.quantization = one_scale_quant
462 sub_op.set_output_tensor(one_minus_half_denominator_times_x)
Tim Halle6ccd872020-11-09 16:46:37 +0000463 DebugDatabase.add_optimised(self.op, sub_op)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200464 # PASS 15, 20, 25 - MUL
Louis Verhaardaee5d752020-09-30 09:01:52 +0200465 mul_op = Operation(Op.Mul, self.op.name + "_mul%d" % (15 + i * 5))
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200466 mul_op.add_input_tensor(nr_x)
467 mul_op.add_input_tensor(one_minus_half_denominator_times_x)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100468 mul_op.activation = activation2.clone()
Fredrik Svedbergd9e38fe2020-09-21 10:34:48 +0200469 to_rescale = Tensor(reduce_sum_shape, DataType.int32, mul_op.name + "_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200470 to_rescale.quantization = one_scale_quant.clone()
471 to_rescale.quantization.scale_f32 = 2.0
472 mul_op.set_output_tensor(to_rescale)
Tim Halle6ccd872020-11-09 16:46:37 +0000473 DebugDatabase.add_optimised(self.op, mul_op)
Fredrik Svedberg880e7352020-08-25 11:31:47 +0200474 # PASS 16, 21, 26 - MUL
Louis Verhaardaee5d752020-09-30 09:01:52 +0200475 shl_op = Operation(Op.Mul, self.op.name + "_mul%d" % (16 + i * 5))
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200476 shl_op.add_input_tensor(to_rescale)
Fredrik Svedberg880e7352020-08-25 11:31:47 +0200477 shl_op.add_input_tensor(four)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100478 shl_op.activation = activation.clone()
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200479 to_add = Tensor(reduce_sum_shape, DataType.int32, shl_op.name + "_0")
480 to_add.quantization = no_scale_quant
481 shl_op.set_output_tensor(to_add)
Tim Halle6ccd872020-11-09 16:46:37 +0000482 DebugDatabase.add_optimised(self.op, shl_op)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200483 # PASS 17, 22, 27 - ADD
Louis Verhaardaee5d752020-09-30 09:01:52 +0200484 add_op = Operation(Op.Add, self.op.name + "_add%d" % (17 + i * 5))
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200485 add_op.add_input_tensor(nr_x)
486 add_op.add_input_tensor(to_add)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100487 add_op.activation = activation.clone()
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200488 nr_x = Tensor(reduce_sum_shape, DataType.int32, add_op.name + "_0")
489 nr_x.quantization = one_scale_quant
490 add_op.set_output_tensor(nr_x)
Tim Halle6ccd872020-11-09 16:46:37 +0000491 DebugDatabase.add_optimised(self.op, add_op)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200492
Fredrik Svedberg880e7352020-08-25 11:31:47 +0200493 # PASS 28 - Multiply
Louis Verhaardaee5d752020-09-30 09:01:52 +0200494 mul28_op = Operation(Op.Mul, self.op.name + "_mul28")
Fredrik Svedberg880e7352020-08-25 11:31:47 +0200495 mul28_op.add_input_tensor(nr_x)
496 mul28_op.add_input_tensor(
497 create_const_tensor("two_const", [1, 1, 1, 1], DataType.int32, [2], np.int32, quantization=no_scale_quant)
498 )
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100499 mul28_op.activation = activation.clone()
Fredrik Svedberg880e7352020-08-25 11:31:47 +0200500 scale_factor = Tensor(reduce_sum_shape, DataType.int32, mul28_op.name + "_0")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200501 scale_factor.quantization = one_scale_quant
Fredrik Svedberg880e7352020-08-25 11:31:47 +0200502 mul28_op.set_output_tensor(scale_factor)
Tim Halle6ccd872020-11-09 16:46:37 +0000503 DebugDatabase.add_optimised(self.op, mul28_op)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200504
505 # PASS 29 - Multiply
Louis Verhaardaee5d752020-09-30 09:01:52 +0200506 mul_op = Operation(Op.Mul, self.op.name + "_mul29")
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200507 mul_op.add_input_tensor(ifm_exp)
508 mul_op.add_input_tensor(scale_factor)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100509 mul_op.activation = activation2.clone()
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200510 scaled_exp = Tensor(ifm_exp.shape, DataType.int32, mul_op.name + "_0")
511 scaled_exp.quantization = one_scale_quant.clone()
512 scaled_exp.quantization.scale_f32 = 2.0
513 mul_op.set_output_tensor(scaled_exp)
Tim Halle6ccd872020-11-09 16:46:37 +0000514 DebugDatabase.add_optimised(self.op, mul_op)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200515
516 # PASS 30 - SHR
Louis Verhaardaee5d752020-09-30 09:01:52 +0200517 shr30_op = Operation(Op.SHR, self.op.name + "_shr30")
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100518 shr30_op.attrs["rounding_mode"] = NpuRoundingMode.NATURAL
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200519 shr30_op.add_input_tensor(scaled_exp)
520 shr30_op.add_input_tensor(right_shift)
521 shr30_op.set_output_tensor(ofm)
Tim Halle6ccd872020-11-09 16:46:37 +0000522 DebugDatabase.add_optimised(self.op, shr30_op)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200523
524 return shr30_op
525
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200526 def get_graph_int16(self, ifm, ofm):
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200527 no_scale_quant = ifm.quantization.clone()
528 no_scale_quant.scale_f32 = None
529
530 # PASS 0 - Depthwise Maxpool
531 maxpool_op = self.op.clone("_maxpool0")
Louis Verhaardaee5d752020-09-30 09:01:52 +0200532 maxpool_op.type = Op.MaxPool
Tim Halle6ccd872020-11-09 16:46:37 +0000533 DebugDatabase.add_optimised(self.op, maxpool_op)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200534 maxpool_h = ifm.shape[1] * ifm.shape[2]
535 maxpool_w = ifm.shape[3]
536 maxpool_ifm_shape = [1, maxpool_h, maxpool_w, 1]
537 maxpool_op.attrs["padding"] = b"VALID"
538 maxpool_op.attrs["stride_w"] = 1
539 maxpool_op.attrs["stride_h"] = 1
540 maxpool_op.attrs["filter_width"] = maxpool_w
541 maxpool_op.attrs["filter_height"] = 1
542 maxpool_op.attrs["strides"] = [1, maxpool_op.attrs["stride_h"], maxpool_op.attrs["stride_w"], 1]
543 maxpool_op.attrs["ksize"] = [1, maxpool_op.attrs["filter_height"], maxpool_op.attrs["filter_width"], 1]
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100544 maxpool_op.inputs = [create_reshape_tensor(ifm, maxpool_ifm_shape)]
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200545 maxpool_ofm = Tensor([1, maxpool_h, 1, 1], ifm.dtype, maxpool_op.name + "_0")
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200546 maxpool_ofm.quantization = no_scale_quant
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100547 maxpool_op.set_output_tensor(maxpool_ofm)
Tim Halle6ccd872020-11-09 16:46:37 +0000548 DebugDatabase.add_optimised(self.op, maxpool_op)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200549
550 # PASS 1 - Sub
Louis Verhaardaee5d752020-09-30 09:01:52 +0200551 sub1_op = Operation(Op.Sub, self.op.name + "_sub1")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100552 sub1_op.add_input_tensor(ifm)
553 sub1_op.add_input_tensor(create_reshape_tensor(maxpool_ofm, [1, ifm.shape[1], ifm.shape[2], 1]))
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200554 sub1_ofm = Tensor(ifm.shape, DataType.int32, sub1_op.name + "_0")
555 sub1_ofm.quantization = ifm.quantization.clone()
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100556 sub1_op.set_output_tensor(sub1_ofm)
Tim Halle6ccd872020-11-09 16:46:37 +0000557 DebugDatabase.add_optimised(self.op, sub1_op)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200558
559 # PASS 2 - Mul
560 beta = self.op.attrs.get("beta", 1.0)
561 mul2_out_range = 10.0 / 65535.0
562 mul2_scale, _ = scaling.elementwise_mul_scale(sub1_ofm.quantization.scale_f32, beta, mul2_out_range)
563 mul2_quant = ifm.quantization.clone()
564 mul2_quant.scale_f32 = beta
Louis Verhaardaee5d752020-09-30 09:01:52 +0200565 mul2_op = Operation(Op.Mul, self.op.name + "_mul2")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100566 mul2_op.add_input_tensor(sub1_ofm)
567 mul2_op.add_input_tensor(
568 create_const_tensor(
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200569 mul2_op.name + "_const", [1, 1, 1, 1], DataType.int32, [mul2_scale], np.int32, quantization=mul2_quant
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200570 ),
571 )
572 mul2_ofm = Tensor(ifm.shape, DataType.int32, mul2_op.name + "_0")
573 mul2_ofm.quantization = ofm.quantization.clone()
574 mul2_ofm.quantization.scale_f32 = mul2_out_range
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100575 mul2_op.set_output_tensor(mul2_ofm)
Tim Halle6ccd872020-11-09 16:46:37 +0000576 DebugDatabase.add_optimised(self.op, mul2_op)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200577
578 # PASS 3 - Add+LUT(exp)
Louis Verhaardaee5d752020-09-30 09:01:52 +0200579 add_op = Operation(Op.Add, self.op.name + "_add3")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100580 add_op.add_input_tensor(mul2_ofm)
581 add_op.add_input_tensor(
582 create_const_tensor(
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200583 add_op.name + "_const", [1, 1, 1, 1], DataType.int32, [32767], np.int32, quantization=no_scale_quant
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200584 ),
585 )
586 add_op.set_activation_lut(
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100587 create_const_tensor(
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200588 add_op.name + "_lut", [1, 1, 1, 512], DataType.int32, self.EXP_LUT, np.int32, TensorPurpose.LUT
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200589 )
590 )
591 exp_ofm = Tensor(mul2_ofm.shape, DataType.int16, add_op.name + "_0")
592 exp_ofm.quantization = mul2_ofm.quantization.clone()
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100593 add_op.set_output_tensor(exp_ofm)
Tim Halle6ccd872020-11-09 16:46:37 +0000594 DebugDatabase.add_optimised(self.op, add_op)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200595
596 # PASS 4 - Reduce sum
Louis Verhaardaee5d752020-09-30 09:01:52 +0200597 reduce_sum_op = Operation(Op.ReduceSum, self.op.name + "_reduce_sum4")
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200598 reduce_sum_op.attrs["padding"] = b"VALID"
599 reduce_sum_op.attrs["stride_w"] = 1
600 reduce_sum_op.attrs["stride_h"] = 1
601 reduce_sum_op.attrs["filter_width"] = 1
602 reduce_sum_op.attrs["filter_height"] = 1
603 reduce_sum_op.attrs["strides"] = [1, reduce_sum_op.attrs["stride_h"], reduce_sum_op.attrs["stride_w"], 1]
604 reduce_sum_op.attrs["ksize"] = [1, reduce_sum_op.attrs["filter_height"], reduce_sum_op.attrs["filter_width"], 1]
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100605 reduce_sum_op.add_input_tensor(exp_ofm)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200606
607 reduce_sum_shape = [1, exp_ofm.shape[1], exp_ofm.shape[2], 1]
608 sum_of_exp = Tensor(reduce_sum_shape, DataType.int32, reduce_sum_op.name + "_0")
609 sum_of_exp.quantization = no_scale_quant
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100610 reduce_sum_op.set_output_tensor(sum_of_exp)
Tim Halle6ccd872020-11-09 16:46:37 +0000611 DebugDatabase.add_optimised(self.op, reduce_sum_op)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200612
613 # PASS 5 - CLZ
Louis Verhaardaee5d752020-09-30 09:01:52 +0200614 clz_op = Operation(Op.CLZ, self.op.name + "_clz5")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100615 clz_op.add_input_tensor(sum_of_exp)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200616 headroom_plus_one = Tensor(reduce_sum_shape, DataType.int32, clz_op.name + "_0")
617 headroom_plus_one.quantization = no_scale_quant
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100618 clz_op.set_output_tensor(headroom_plus_one)
Tim Halle6ccd872020-11-09 16:46:37 +0000619 DebugDatabase.add_optimised(self.op, clz_op)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200620
621 # PASS 6 - Sub
Louis Verhaardaee5d752020-09-30 09:01:52 +0200622 sub6_op = Operation(Op.Sub, self.op.name + "_sub6")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100623 sub6_op.add_input_tensor(
624 create_const_tensor(
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200625 sub6_op.name + "_const", [1, 1, 1, 1], DataType.int32, [31], np.int32, quantization=no_scale_quant
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200626 ),
627 )
Jacob Bohlinbe733cf2020-08-13 10:21:34 +0200628 sub6_op.add_input_tensor(headroom_plus_one)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200629 reciprocal_right_shift = Tensor(reduce_sum_shape, DataType.int32, sub6_op.name + "_0")
630 reciprocal_right_shift.quantization = no_scale_quant
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100631 sub6_op.set_output_tensor(reciprocal_right_shift)
Tim Halle6ccd872020-11-09 16:46:37 +0000632 DebugDatabase.add_optimised(self.op, sub6_op)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200633
634 # PASS 7 - SHL
Louis Verhaardaee5d752020-09-30 09:01:52 +0200635 shl7_op = Operation(Op.SHL, self.op.name + "_shl7")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100636 shl7_op.add_input_tensor(
637 create_const_tensor(
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200638 shl7_op.name + "_const", [1, 1, 1, 1], DataType.int32, [1], np.int32, quantization=no_scale_quant
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200639 ),
640 )
Jacob Bohlinbe733cf2020-08-13 10:21:34 +0200641 shl7_op.add_input_tensor(reciprocal_right_shift)
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200642 constant_one = Tensor(reduce_sum_shape, DataType.int32, shl7_op.name + "_0")
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200643 constant_one.quantization = no_scale_quant
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100644 shl7_op.set_output_tensor(constant_one)
Tim Halle6ccd872020-11-09 16:46:37 +0000645 DebugDatabase.add_optimised(self.op, shl7_op)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200646
647 # PASS 8 - Sub
Louis Verhaardaee5d752020-09-30 09:01:52 +0200648 sub8_op = Operation(Op.Sub, self.op.name + "_sub8")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100649 sub8_op.add_input_tensor(sum_of_exp)
650 sub8_op.add_input_tensor(constant_one)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200651 sum_of_exps_minus_one = Tensor(reduce_sum_shape, DataType.int32, sub8_op.name + "_0")
652 sum_of_exps_minus_one.quantization = no_scale_quant
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100653 sub8_op.set_output_tensor(sum_of_exps_minus_one)
Tim Halle6ccd872020-11-09 16:46:37 +0000654 DebugDatabase.add_optimised(self.op, sub8_op)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200655
656 # PASS 9 - SHL
Louis Verhaardaee5d752020-09-30 09:01:52 +0200657 shl9_op = Operation(Op.SHL, self.op.name + "_shl9")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100658 shl9_op.add_input_tensor(sum_of_exps_minus_one)
659 shl9_op.add_input_tensor(headroom_plus_one)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200660 shifted_sum_minus_one = Tensor(reduce_sum_shape, DataType.int32, shl9_op.name + "_0")
661 shifted_sum_minus_one.quantization = no_scale_quant
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100662 shl9_op.set_output_tensor(shifted_sum_minus_one)
Tim Halle6ccd872020-11-09 16:46:37 +0000663 DebugDatabase.add_optimised(self.op, shl9_op)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200664
665 # PASS 10 - SHR
Louis Verhaardaee5d752020-09-30 09:01:52 +0200666 shr10_op = Operation(Op.SHR, self.op.name + "_shr10")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100667 shr10_op.add_input_tensor(shifted_sum_minus_one)
668 shr10_op.add_input_tensor(
669 create_const_tensor(
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200670 shr10_op.name + "_const", [1, 1, 1, 1], DataType.int32, [15], np.int32, quantization=no_scale_quant
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200671 ),
672 )
673 shifted_sum_minus_one_16 = Tensor(reduce_sum_shape, DataType.int32, shr10_op.name + "_0")
674 shifted_sum_minus_one_16.quantization = shifted_sum_minus_one.quantization.clone()
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100675 shr10_op.set_output_tensor(shifted_sum_minus_one_16)
Tim Halle6ccd872020-11-09 16:46:37 +0000676 DebugDatabase.add_optimised(self.op, shr10_op)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200677
678 # PASS 11 - Sub+LUT(one over one plus x)
Louis Verhaardaee5d752020-09-30 09:01:52 +0200679 sub11_op = Operation(Op.Sub, self.op.name + "_sub11")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100680 sub11_op.add_input_tensor(shifted_sum_minus_one_16)
681 sub11_op.add_input_tensor(
682 create_const_tensor(
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200683 sub11_op.name + "_const", [1, 1, 1, 1], DataType.int32, [32768], np.int32, quantization=no_scale_quant
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200684 ),
685 )
686 sub11_op.set_activation_lut(
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100687 create_const_tensor(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200688 sub11_op.name + "_lut",
689 [1, 1, 1, 512],
690 DataType.int32,
691 self.ONE_OVER_ONE_PLUS_X_LUT,
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200692 np.int32,
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200693 TensorPurpose.LUT,
694 )
695 )
696 reciprocal_scale = Tensor(reduce_sum_shape, DataType.int16, sub11_op.name + "_0")
697 reciprocal_scale.quantization = no_scale_quant
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100698 sub11_op.set_output_tensor(reciprocal_scale)
Tim Halle6ccd872020-11-09 16:46:37 +0000699 DebugDatabase.add_optimised(self.op, sub11_op)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200700
701 # PASS 12 - Multiply
Louis Verhaardaee5d752020-09-30 09:01:52 +0200702 mul_op = Operation(Op.Mul, self.op.name + "_mul12")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100703 mul_op.add_input_tensor(exp_ofm)
704 mul_op.add_input_tensor(reciprocal_scale)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200705 mul_ofm = Tensor(exp_ofm.shape, DataType.int32, mul_op.name + "_0")
706 mul_ofm.quantization = no_scale_quant
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100707 mul_op.set_output_tensor(mul_ofm)
Tim Halle6ccd872020-11-09 16:46:37 +0000708 DebugDatabase.add_optimised(self.op, mul_op)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200709
710 # PASS 13 - SHR
Louis Verhaardaee5d752020-09-30 09:01:52 +0200711 shr13_op = Operation(Op.SHR, self.op.name + "_shr13")
Michael McGeagh5778ffd2020-08-06 17:31:02 +0100712 shr13_op.add_input_tensor(mul_ofm)
713 shr13_op.add_input_tensor(reciprocal_right_shift)
714 shr13_op.set_output_tensor(ofm)
Tim Halle6ccd872020-11-09 16:46:37 +0000715 DebugDatabase.add_optimised(self.op, shr13_op)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200716
717 return shr13_op