Blame - ethosu/vela/softmax.py - ml/ethos-u/ethos-u-vela

2020-06-03 15:43:31 +0200

[diff] [blame]

1

2

#

Fredrik Svedberg

2020-08-18 13:19:18 +0200

[diff] [blame]

3

4

#

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

5

# SPDX-License-Identifier: Apache-2.0

6

#

Fredrik Svedberg

2020-08-18 13:19:18 +0200

[diff] [blame]

7

# Licensed under the Apache License, Version 2.0 (the "License");

8

# you may not use this file except in compliance with the License.

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

9

# You may obtain a copy of the License at

10

#

Fredrik Svedberg

2020-08-18 13:19:18 +0200

[diff] [blame]

11

# http://www.apache.org/licenses/LICENSE-2.0

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

12

#

13

# Unless required by applicable law or agreed to in writing, software

Fredrik Svedberg

2020-08-18 13:19:18 +0200

[diff] [blame]

14

# distributed under the License is distributed on an "AS IS" BASIS,

15

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

16

# See the License for the specific language governing permissions and

17

# limitations under the License.

Fredrik Svedberg

2020-08-18 13:19:18 +0200

[diff] [blame]

18

#

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

19

# Description:

20

# Contains SoftMax

Fredrik Svedberg

2020-08-18 13:19:18 +0200

[diff] [blame]

21

import math

22

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

23

import numpy as np

24

Fredrik Svedberg

2020-08-18 13:19:18 +0200

[diff] [blame]

25

from . import fp_math

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

26

from . import scaling

27

from .data_type import DataType

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

28

from .operation import Op

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

29

from .operation import Operation

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

30

from .tensor import create_const_tensor

31

from .tensor import create_reshape_tensor

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

32

from .tensor import Tensor

33

from .tensor import TensorPurpose

34

35

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

36

class SoftMax:

37

# Turn off black formatting for the LUT tables to keep them compact

38

# fmt: off

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

39

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

40

EXP_LUT = [

41

0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002,

42

0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002,

43

0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002,

44

0x00000002, 0x00000002, 0x00010002, 0x00000003, 0x00000003, 0x00000003, 0x00000003, 0x00000003,

45

0x00000003, 0x00000003, 0x00000003, 0x00000003, 0x00000003, 0x00000003, 0x00000003, 0x00000003,

46

0x00000003, 0x00000003, 0x00000003, 0x00010003, 0x00000004, 0x00000004, 0x00000004, 0x00000004,

47

0x00000004, 0x00000004, 0x00000004, 0x00000004, 0x00000004, 0x00000004, 0x00000004, 0x00000004,

48

0x00010004, 0x00000005, 0x00000005, 0x00000005, 0x00000005, 0x00000005, 0x00000005, 0x00000005,

49

0x00000005, 0x00000005, 0x00010005, 0x00000006, 0x00000006, 0x00000006, 0x00000006, 0x00000006,

50

0x00000006, 0x00000006, 0x00010006, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007,

51

0x00000007, 0x00000007, 0x00010007, 0x00000008, 0x00000008, 0x00000008, 0x00000008, 0x00000008,

52

0x00010008, 0x00000009, 0x00000009, 0x00000009, 0x00000009, 0x00000009, 0x00010009, 0x0000000a,

53

0x0000000a, 0x0000000a, 0x0000000a, 0x0001000a, 0x0000000b, 0x0000000b, 0x0000000b, 0x0000000b,

54

0x0001000b, 0x0000000c, 0x0000000c, 0x0000000c, 0x0001000c, 0x0000000d, 0x0000000d, 0x0000000d,

55

0x0001000d, 0x0000000e, 0x0000000e, 0x0000000e, 0x0001000e, 0x0000000f, 0x0000000f, 0x0001000f,

56

0x00000010, 0x00000010, 0x00010010, 0x00000011, 0x00000011, 0x00010011, 0x00000012, 0x00000012,

57

0x00010012, 0x00000013, 0x00000013, 0x00010013, 0x00000014, 0x00010014, 0x00000015, 0x00000015,

58

0x00010015, 0x00000016, 0x00010016, 0x00000017, 0x00010017, 0x00000018, 0x00010018, 0x00000019,

59

0x00010019, 0x0000001a, 0x0001001a, 0x0000001b, 0x0001001b, 0x0000001c, 0x0001001c, 0x0000001d,

60

0x0001001d, 0x0000001e, 0x0001001e, 0x0001001f, 0x00000020, 0x00010020, 0x00010021, 0x00000022,

61

0x00010022, 0x00010023, 0x00000024, 0x00010024, 0x00000025, 0x00010025, 0x00010026, 0x00010027,

62

0x00000028, 0x00020028, 0x0000002a, 0x0001002a, 0x0001002b, 0x0001002c, 0x0000002d, 0x0001002d,

63

0x0001002e, 0x0001002f, 0x00010030, 0x00010031, 0x00010032, 0x00010033, 0x00010034, 0x00010035,

64

0x00010036, 0x00010037, 0x00010038, 0x00020039, 0x0001003b, 0x0000003c, 0x0002003c, 0x0001003e,

65

0x0002003f, 0x00000041, 0x00020041, 0x00010043, 0x00010044, 0x00020045, 0x00020047, 0x00010049,

66

0x0001004a, 0x0002004b, 0x0001004d, 0x0002004e, 0x00010050, 0x00020051, 0x00020053, 0x00010055,

67

0x00020056, 0x00020058, 0x0002005a, 0x0001005c, 0x0002005d, 0x0002005f, 0x00020061, 0x00020063,

68

0x00020065, 0x00020067, 0x00020069, 0x0002006b, 0x0003006d, 0x00020070, 0x00020072, 0x00020074,

69

0x00030076, 0x00020079, 0x0003007b, 0x0002007e, 0x00030080, 0x00020083, 0x00020085, 0x00040087,

70

0x0002008b, 0x0003008d, 0x00030090, 0x00020093, 0x00030095, 0x00030098, 0x0003009b, 0x0004009e,

71

0x000300a2, 0x000300a5, 0x000300a8, 0x000300ab, 0x000400ae, 0x000300b2, 0x000400b5, 0x000400b9,

72

0x000300bd, 0x000400c0, 0x000400c4, 0x000400c8, 0x000400cc, 0x000400d0, 0x000500d4, 0x000400d9,

73

0x000400dd, 0x000500e1, 0x000400e6, 0x000500ea, 0x000400ef, 0x000500f3, 0x000500f8, 0x000500fd,

74

0x00050102, 0x00050107, 0x0005010c, 0x00060111, 0x00050117, 0x0006011c, 0x00060122, 0x00060128,

75

0x0006012e, 0x00060134, 0x0006013a, 0x00070140, 0x00060147, 0x0007014d, 0x00060154, 0x0007015a,

76

0x00070161, 0x00060168, 0x0008016e, 0x00070176, 0x0008017d, 0x00080185, 0x0007018d, 0x00090194,

77

0x0008019d, 0x000801a5, 0x000801ad, 0x000901b5, 0x000901be, 0x000901c7, 0x000901d0, 0x000901d9,

78

0x000a01e2, 0x000901ec, 0x000a01f5, 0x000b01ff, 0x000a020a, 0x000b0214, 0x000a021f, 0x000b0229,

79

0x000b0234, 0x000b023f, 0x000c024a, 0x000c0256, 0x000c0262, 0x000c026e, 0x000c027a, 0x000d0286,

80

0x000d0293, 0x000d02a0, 0x000e02ad, 0x000e02bb, 0x000e02c9, 0x000e02d7, 0x000f02e5, 0x000f02f4,

81

0x000f0303, 0x000f0312, 0x00100321, 0x00100331, 0x00110341, 0x00100352, 0x00120362, 0x00110374,

82

0x00120385, 0x00120397, 0x001203a9, 0x001303bb, 0x001303ce, 0x001403e1, 0x001403f5, 0x00140409,

83

0x0015041d, 0x00150432, 0x00160447, 0x0016045d, 0x00160473, 0x00170489, 0x001704a0, 0x001904b7,

84

0x001804d0, 0x001904e8, 0x00190501, 0x001a051a, 0x001a0534, 0x001b054e, 0x001b0569, 0x001c0584,

85

0x001c05a0, 0x001d05bc, 0x001e05d9, 0x001e05f7, 0x001e0615, 0x00200633, 0x00200653, 0x00200673,

86

0x00210693, 0x002206b4, 0x002306d6, 0x002306f9, 0x0024071c, 0x00240740, 0x00260764, 0x0026078a,

87

0x002607b0, 0x002807d6, 0x002907fe, 0x00290827, 0x002a0850, 0x002a087a, 0x002c08a4, 0x002c08d0,

88

0x002e08fc, 0x002e092a, 0x002f0958, 0x00310987, 0x003109b8, 0x003209e9, 0x00330a1b, 0x00340a4e,

89

0x00350a82, 0x00350ab7, 0x00380aec, 0x00380b24, 0x003a0b5c, 0x003a0b96, 0x003c0bd0, 0x003d0c0c,

90

0x003e0c49, 0x003f0c87, 0x00400cc6, 0x00420d06, 0x00430d48, 0x00440d8b, 0x00460dcf, 0x00480e15,

91

0x00480e5d, 0x00490ea5, 0x004c0eee, 0x004d0f3a, 0x004e0f87, 0x00500fd5, 0x00511025, 0x00531076,

92

0x005610c9, 0x0056111f, 0x00581175, 0x005a11cd, 0x005c1227, 0x005e1283, 0x005e12e1, 0x0061133f,

93

0x006413a0, 0x00651404, 0x00671469, 0x006914d0, 0x006c1539, 0x006c15a5, 0x00701611, 0x00721681,

94

0x007416f3, 0x00761767, 0x007917dd, 0x007a1856, 0x007d18d0, 0x0080194d, 0x008319cd, 0x00841a50,

95

0x00881ad4, 0x00891b5c, 0x008d1be5, 0x00911c72, 0x00911d03, 0x00961d94, 0x00981e2a, 0x009c1ec2,

96

0x009e1f5e, 0x00a21ffc, 0x00a4209e, 0x00a92142, 0x00ab21eb, 0x00ae2296, 0x00b22344, 0x00b523f6,

97

0x00b924ab, 0x00be2564, 0x00c02622, 0x00c526e2, 0x00c827a7, 0x00cc286f, 0x00d0293b, 0x00d52a0b,

98

0x00d72ae0, 0x00dd2bb7, 0x00e12c94, 0x00e62d75, 0x00eb2e5b, 0x00ef2f46, 0x00f23035, 0x00f83127,

99

0x00fe321f, 0x0101331d, 0x0108341e, 0x010c3526, 0x01123632, 0x01173744, 0x011c385b, 0x01233977,

100

0x01273a9a, 0x012e3bc1, 0x01343cef, 0x013a3e23, 0x01403f5d, 0x0146409d, 0x014c41e3, 0x0154432f,

101

0x01594483, 0x016145dc, 0x0168473d, 0x016f48a5, 0x01764a14, 0x017d4b8a, 0x01854d07, 0x018d4e8c,

102

0x01945019, 0x019d51ad, 0x01a4534a, 0x01ad54ee, 0x01b5569b, 0x01be5850, 0x01c75a0e, 0x01d05bd5,

103

0x01d85da5, 0x01e35f7d, 0x01eb6160, 0x01f6634b, 0x01ff6541, 0x02096740, 0x02146949, 0x021e6b5d,

104

0x02296d7b, 0x02336fa4, 0x023f71d7, 0x024a7416, 0x02567660, 0x026278b6, 0x026d7b18, 0x027a7d85,

105

]

106

107

ONE_OVER_ONE_PLUS_X_LUT = [

108

0xffc17fff, 0xffc07fc0, 0xffc27f80, 0xffc07f42, 0xffc17f02, 0xffc17ec3, 0xffc27e84, 0xffc27e46,

109

0xffc27e08, 0xffc37dca, 0xffc27d8d, 0xffc37d4f, 0xffc37d12, 0xffc37cd5, 0xffc37c98, 0xffc47c5b,

110

0xffc47c1f, 0xffc47be3, 0xffc57ba7, 0xffc57b6c, 0xffc37b31, 0xffc67af4, 0xffc57aba, 0xffc67a7f,

111

0xffc57a45, 0xffc67a0a, 0xffc779d0, 0xffc67997, 0xffc6795d, 0xffc77923, 0xffc778ea, 0xffc778b1,

112

0xffc87878, 0xffc77840, 0xffc87807, 0xffc877cf, 0xffc97797, 0xffc87760, 0xffc97728, 0xffc976f1,

113

0xffc976ba, 0xffc87683, 0xffca764b, 0xffca7615, 0xffca75df, 0xffca75a9, 0xffca7573, 0xffcb753d,

114

0xffca7508, 0xffcb74d2, 0xffcb749d, 0xffca7468, 0xffcc7432, 0xffcc73fe, 0xffcb73ca, 0xffcc7395,

115

0xffcd7361, 0xffcc732e, 0xffcc72fa, 0xffcd72c6, 0xffcd7293, 0xffcd7260, 0xffcc722d, 0xffce71f9,

116

0xffcd71c7, 0xffce7194, 0xffce7162, 0xffce7130, 0xffcf70fe, 0xffce70cd, 0xffce709b, 0xffcf7069,

117

0xffcf7038, 0xffcf7007, 0xffcf6fd6, 0xffcf6fa5, 0xffd06f74, 0xffd06f44, 0xffd06f14, 0xffd06ee4,

118

0xffd06eb4, 0xffd06e84, 0xffd16e54, 0xffd16e25, 0xffd16df6, 0xffd16dc7, 0xffd06d98, 0xffd26d68,

119

0xffd16d3a, 0xffd26d0b, 0xffd26cdd, 0xffd26caf, 0xffd26c81, 0xffd26c53, 0xffd36c25, 0xffd26bf8,

120

0xffd36bca, 0xffd36b9d, 0xffd36b70, 0xffd26b43, 0xffd46b15, 0xffd36ae9, 0xffd46abc, 0xffd46a90,

121

0xffd46a64, 0xffd46a38, 0xffd46a0c, 0xffd469e0, 0xffd469b4, 0xffd56988, 0xffd5695d, 0xffd56932,

122

0xffd56907, 0xffd568dc, 0xffd568b1, 0xffd56886, 0xffd6685b, 0xffd56831, 0xffd66806, 0xffd667dc,

123

0xffd667b2, 0xffd76788, 0xffd6675f, 0xffd76735, 0xffd6670c, 0xffd766e2, 0xffd666b9, 0xffd7668f,

124

0xffd86666, 0xffd6663e, 0xffd86614, 0xffd765ec, 0xffd865c3, 0xffd8659b, 0xffd86573, 0xffd8654b,

125

0xffd86523, 0xffd864fb, 0xffd964d3, 0xffd864ac, 0xffd96484, 0xffd8645d, 0xffd96435, 0xffd9640e,

126

0xffd963e7, 0xffd963c0, 0xffd96399, 0xffda6372, 0xffd9634c, 0xffda6325, 0xffda62ff, 0xffda62d9,

127

0xffda62b3, 0xffda628d, 0xffda6267, 0xffdb6241, 0xffda621c, 0xffdb61f6, 0xffda61d1, 0xffdc61ab,

128

0xffd96187, 0xffdc6160, 0xffdb613c, 0xffdb6117, 0xffdb60f2, 0xffdc60cd, 0xffdc60a9, 0xffdb6085,

129

0xffdc6060, 0xffdc603c, 0xffdc6018, 0xffdc5ff4, 0xffdc5fd0, 0xffdd5fac, 0xffdc5f89, 0xffdc5f65,

130

0xffdd5f41, 0xffdd5f1e, 0xffdd5efb, 0xffdd5ed8, 0xffdd5eb5, 0xffdd5e92, 0xffdd5e6f, 0xffdd5e4c,

131

0xffdd5e29, 0xffde5e06, 0xffde5de4, 0xffdd5dc2, 0xffde5d9f, 0xffde5d7d, 0xffde5d5b, 0xffde5d39,

132

0xffdf5d17, 0xffde5cf6, 0xffde5cd4, 0xffdf5cb2, 0xffdf5c91, 0xffde5c70, 0xffdf5c4e, 0xffdf5c2d,

133

0xffde5c0c, 0xffe05bea, 0xffdf5bca, 0xffdf5ba9, 0xffdf5b88, 0xffdf5b67, 0xffe05b46, 0xffe05b26,

134

0xffdf5b06, 0xffe05ae5, 0xffe05ac5, 0xffe05aa5, 0xffe05a85, 0xffe05a65, 0xffe05a45, 0xffe15a25,

135

0xffe05a06, 0xffe059e6, 0xffe159c6, 0xffe159a7, 0xffe05988, 0xffe15968, 0xffe15949, 0xffe1592a,

136

0xffe1590b, 0xffe158ec, 0xffe258cd, 0xffe158af, 0xffe15890, 0xffe25871, 0xffe15853, 0xffe25834,

137

0xffe25816, 0xffe257f8, 0xffe157da, 0xffe257bb, 0xffe3579d, 0xffe25780, 0xffe25762, 0xffe25744,

138

0xffe35726, 0xffe25709, 0xffe256eb, 0xffe356cd, 0xffe356b0, 0xffe35693, 0xffe25676, 0xffe35658,

139

0xffe3563b, 0xffe3561e, 0xffe35601, 0xffe355e4, 0xffe455c7, 0xffe355ab, 0xffe4558e, 0xffe35572,

140

0xffe45555, 0xffe35539, 0xffe4551c, 0xffe45500, 0xffe454e4, 0xffe454c8, 0xffe454ac, 0xffe45490,

141

0xffe45474, 0xffe55458, 0xffe4543d, 0xffe45421, 0xffe55405, 0xffe553ea, 0xffe453cf, 0xffe553b3,

142

0xffe45398, 0xffe5537c, 0xffe55361, 0xffe55346, 0xffe5532b, 0xffe55310, 0xffe552f5, 0xffe552da,

143

0xffe652bf, 0xffe552a5, 0xffe5528a, 0xffe6526f, 0xffe55255, 0xffe6523a, 0xffe65220, 0xffe55206,

144

0xffe651eb, 0xffe651d1, 0xffe651b7, 0xffe6519d, 0xffe65183, 0xffe65169, 0xffe7514f, 0xffe65136,

145

0xffe6511c, 0xffe75102, 0xffe650e9, 0xffe750cf, 0xffe650b6, 0xffe7509c, 0xffe75083, 0xffe6506a,

146

0xffe75050, 0xffe75037, 0xffe7501e, 0xffe75005, 0xffe74fec, 0xffe74fd3, 0xffe74fba, 0xffe74fa1,

147

0xffe84f88, 0xffe74f70, 0xffe84f57, 0xffe74f3f, 0xffe84f26, 0xffe74f0e, 0xffe84ef5, 0xffe84edd,

148

0xffe84ec5, 0xffe84ead, 0xffe74e95, 0xffe84e7c, 0xffe84e64, 0xffe94e4c, 0xffe84e35, 0xffe84e1d,

149

0xffe84e05, 0xffe94ded, 0xffe84dd6, 0xffe84dbe, 0xffe94da6, 0xffe94d8f, 0xffe84d78, 0xffe84d60,

150

0xffea4d48, 0xffe84d32, 0xffe94d1a, 0xffe94d03, 0xffe84cec, 0xffe94cd4, 0xffe94cbd, 0xffea4ca6,

151

0xffe94c90, 0xffe84c79, 0xffea4c61, 0xffe94c4b, 0xffe94c34, 0xffea4c1d, 0xffe94c07, 0xffea4bf0,

152

0xffe94bda, 0xffea4bc3, 0xffea4bad, 0xffe94b97, 0xffea4b80, 0xffea4b6a, 0xffea4b54, 0xffea4b3e,

153

0xffea4b28, 0xffea4b12, 0xffea4afc, 0xffea4ae6, 0xffea4ad0, 0xffeb4aba, 0xffea4aa5, 0xffea4a8f,

154

0xffeb4a79, 0xffea4a64, 0xffea4a4e, 0xffeb4a38, 0xffeb4a23, 0xffea4a0e, 0xffeb49f8, 0xffea49e3,

155

0xffeb49cd, 0xffeb49b8, 0xffeb49a3, 0xffeb498e, 0xffea4979, 0xffeb4963, 0xffeb494e, 0xffec4939,

156

0xffeb4925, 0xffea4910, 0xffec48fa, 0xffeb48e6, 0xffeb48d1, 0xffec48bc, 0xffeb48a8, 0xffec4893,

157

0xffeb487f, 0xffec486a, 0xffeb4856, 0xffec4841, 0xffec482d, 0xffeb4819, 0xffec4804, 0xffec47f0,

158

0xffec47dc, 0xffec47c8, 0xffec47b4, 0xffec47a0, 0xffec478c, 0xffec4778, 0xffec4764, 0xffec4750,

159

0xffec473c, 0xffed4728, 0xffec4715, 0xffec4701, 0xffed46ed, 0xffec46da, 0xffed46c6, 0xffec46b3,

160

0xffec469f, 0xffed468b, 0xffed4678, 0xffec4665, 0xffed4651, 0xffed463e, 0xffed462b, 0xffec4618,

161

0xffed4604, 0xffed45f1, 0xffed45de, 0xffed45cb, 0xffed45b8, 0xffed45a5, 0xffed4592, 0xffed457f,

162

0xffee456c, 0xffed455a, 0xffed4547, 0xffed4534, 0xffee4521, 0xffed450f, 0xffed44fc, 0xffee44e9,

163

0xffed44d7, 0xffee44c4, 0xffee44b2, 0xffed44a0, 0xffee448d, 0xffee447b, 0xffed4469, 0xffee4456,

164

0xffee4444, 0xffee4432, 0xffee4420, 0xffee440e, 0xffee43fc, 0xffee43ea, 0xffee43d8, 0xffee43c6,

165

0xffee43b4, 0xffee43a2, 0xffee4390, 0xffef437e, 0xffee436d, 0xffee435b, 0xffef4349, 0xffee4338,

166

0xffee4326, 0xffef4314, 0xffee4303, 0xffef42f1, 0xffee42e0, 0xffef42ce, 0xffee42bd, 0xffef42ab,

167

0xffef429a, 0xffee4289, 0xfff04277, 0xffee4267, 0xffef4255, 0xffef4244, 0xffef4233, 0xffef4222,

168

0xffee4211, 0xffef41ff, 0xfff041ee, 0xffef41de, 0xffef41cd, 0xffee41bc, 0xfff041aa, 0xffef419a,

169

0xffef4189, 0xffef4178, 0xfff04167, 0xffef4157, 0xffef4146, 0xfff04135, 0xffef4125, 0xfff04114,

170

0xffef4104, 0xfff040f3, 0xffef40e3, 0xfff040d2, 0xfff040c2, 0xffef40b2, 0xfff040a1, 0xfff04091,

171

0xfff04081, 0xffef4071, 0xfff04060, 0xfff04050, 0xfff04040, 0xfff04030, 0xfff04020, 0xfff04010

]

# fmt: on

def __init__(self, op):

176

self.op = op

177

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

178

def generate_exp_table(self, beta, input_scale):

Fredrik Svedberg

2020-08-18 13:19:18 +0200

[diff] [blame]

179

integer_bits = 5

180

total_signed_bits = 31

181

# Calculate scaling

182

real_beta = min(

183

np.double(beta) * np.double(input_scale) * (1 << (31 - integer_bits)), np.double((1 << 31) - 1.0)

184

)

185

scale, shift = scaling.quantise_scale(real_beta)

186

shift = 31 - shift

187

diff_min = -1.0 * math.floor(

188

1.0 * ((1 << integer_bits) - 1) * (1 << (total_signed_bits - integer_bits)) / (1 << shift)

189

)

190

# Generate the exp LUT

lut = []

for x in range(256):

input_diff = x - 255

if input_diff >= diff_min:

195

rescale = fp_math.saturating_rounding_mul(input_diff * (1 << shift), scale)

196

lut.append(fp_math.exp_on_negative_values(rescale))

197

else:

198

lut.append(0)

199

return lut

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

200

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

201

def get_graph(self):

202

ifm = self.op.inputs[0]

203

ofm = self.op.outputs[0]

204

Fredrik Svedberg

835d8e1

2020-09-04 09:46:17 +0200

[diff] [blame]

205

# Reshape ifm/ofm (if needed)

206

full_shape = ifm.get_full_shape()

207

if full_shape[0] > 1:

208

full_shape[1] *= full_shape[0]

209

full_shape[0] = 1

210

ifm = create_reshape_tensor(ifm, full_shape)

211

ofm = create_reshape_tensor(ofm, full_shape, False)

212

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

213

if ifm.dtype in (DataType.uint8, DataType.int8) and ofm.dtype == ifm.dtype:

214

return self.get_graph_8bit(ifm, ofm)

215

elif ifm.dtype == DataType.int16 and ofm.dtype == DataType.int16:

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

216

return self.get_graph_int16(ifm, ofm)

217

else:

218

self.op.run_on_npu = False

219

return self.op

220

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

221

def get_graph_8bit(self, ifm, ofm):

222

exp_lut = self.generate_exp_table(self.op.attrs.get("beta", 1.0), ifm.quantization.scale_f32)

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

223

no_scale_quant = ifm.quantization.clone()

224

no_scale_quant.scale_f32 = None

225

no_scale_quant.zero_point = 0

226

one_scale_quant = ifm.quantization.clone()

227

one_scale_quant.scale_f32 = 1.0

228

one_scale_quant.zero_point = 0

229

ifm.quantization.zero_point = 0

230

231

# PASS 0 - Depthwise Maxpool

232

maxpool_op = self.op.clone("_maxpool0")

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

233

maxpool_op.type = Op.MaxPool

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

234

maxpool_h = ifm.shape[1] * ifm.shape[2]

235

maxpool_w = ifm.shape[3]

236

maxpool_ifm_shape = [1, maxpool_h, maxpool_w, 1]

237

maxpool_op.attrs["padding"] = b"VALID"

238

maxpool_op.attrs["stride_w"] = 1

239

maxpool_op.attrs["stride_h"] = 1

240

maxpool_op.attrs["filter_width"] = maxpool_w

241

maxpool_op.attrs["filter_height"] = 1

242

maxpool_op.attrs["strides"] = [1, maxpool_op.attrs["stride_h"], maxpool_op.attrs["stride_w"], 1]

243

maxpool_op.attrs["ksize"] = [1, maxpool_op.attrs["filter_height"], maxpool_op.attrs["filter_width"], 1]

244

maxpool_op.inputs = [create_reshape_tensor(ifm, maxpool_ifm_shape)]

245

ifm_max = Tensor([1, maxpool_h, 1, 1], ifm.dtype, maxpool_op.name + "_0")

246

ifm_max.quantization = no_scale_quant

247

maxpool_op.set_output_tensor(ifm_max)

248

249

# PASS 1 - Sub+LUT(exp)

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

250

sub_op = Operation(Op.Sub, self.op.name + "_sub1")

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

251

sub_op.add_input_tensor(ifm)

Fredrik Svedberg

835d8e1

2020-09-04 09:46:17 +0200

[diff] [blame]

252

sub_op.add_input_tensor(create_reshape_tensor(ifm_max, [1, ifm.shape[1], ifm.shape[2], 1]))

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

253

sub_op.set_activation_lut(

254

create_const_tensor(

255

sub_op.name + "_lut", [1, 1, 1, 256], DataType.int32, exp_lut, np.int32, TensorPurpose.LUT

256

)

257

)

258

ifm_exp = Tensor(ifm.shape, DataType.int32, sub_op.name + "_0")

259

ifm_exp.quantization = one_scale_quant.clone()

260

ifm_exp.quantization.zero_point = 127

261

ifm_exp.quantization.quant_min = -128

262

ifm_exp.quantization.quant_max = 127

263

sub_op.set_output_tensor(ifm_exp)

264

265

# PASS 2 - SHR

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

266

shr2_op = Operation(Op.SHR, self.op.name + "_shr2")

Tim Hall

d775e37

2020-08-28 18:33:38 +0100

[diff] [blame]

267

shr2_op.attrs["rounding_mode"] = b"NATURAL"

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

268

shr2_op.add_input_tensor(ifm_exp)

269

shr2_op.add_input_tensor(

270

create_const_tensor(

271

shr2_op.name + "_const", [1, 1, 1, 1], DataType.int32, [12], np.int32, quantization=no_scale_quant

272

),

273

)

274

rescaled_exp = Tensor(ifm.shape, ifm_exp.dtype, shr2_op.name + "_0")

275

rescaled_exp.quantization = no_scale_quant

276

shr2_op.set_output_tensor(rescaled_exp)

277

278

# PASS 3 - Reduce sum

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

279

reduce_sum_op = Operation(Op.ReduceSum, self.op.name + "_reduce_sum3")

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

280

reduce_sum_op.attrs["padding"] = b"VALID"

281

reduce_sum_op.attrs["stride_w"] = 1

282

reduce_sum_op.attrs["stride_h"] = 1

283

reduce_sum_op.attrs["filter_width"] = 1

284

reduce_sum_op.attrs["filter_height"] = 1

285

reduce_sum_op.attrs["strides"] = [1, reduce_sum_op.attrs["stride_h"], reduce_sum_op.attrs["stride_w"], 1]

286

reduce_sum_op.attrs["ksize"] = [1, reduce_sum_op.attrs["filter_height"], reduce_sum_op.attrs["filter_width"], 1]

287

reduce_sum_op.add_input_tensor(rescaled_exp)

288

289

reduce_sum_shape = [1, rescaled_exp.shape[1], rescaled_exp.shape[2], 1]

290

sum_of_exp = Tensor(reduce_sum_shape, DataType.int32, reduce_sum_op.name + "_0")

291

sum_of_exp.quantization = no_scale_quant

292

reduce_sum_op.set_output_tensor(sum_of_exp)

293

294

# PASS 4 - CLZ

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

295

clz_op = Operation(Op.CLZ, self.op.name + "_clz4")

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

296

clz_op.add_input_tensor(sum_of_exp)

297

headroom_plus_one = Tensor(reduce_sum_shape, DataType.int32, clz_op.name + "_0")

298

headroom_plus_one.quantization = no_scale_quant

299

clz_op.set_output_tensor(headroom_plus_one)

300

301

# PASS 5 - Sub

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

302

sub5_op = Operation(Op.Sub, self.op.name + "_sub5")

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

303

sub5_op.add_input_tensor(

304

create_const_tensor(

Fredrik Svedberg

2020-08-18 13:19:18 +0200

[diff] [blame]

305

"headroom_offset_const",

[1, 1, 1, 1],

DataType.int32,

[12 + 31 - 8],

np.int32,

quantization=no_scale_quant,

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

311

),

312

)

313

sub5_op.add_input_tensor(headroom_plus_one)

314

right_shift = Tensor(reduce_sum_shape, DataType.int32, sub5_op.name + "_0")

315

right_shift.quantization = no_scale_quant

316

sub5_op.set_output_tensor(right_shift)

317

318

# PASS 6 - Sub

Fredrik Svedberg

2020-08-18 13:19:18 +0200

[diff] [blame]

319

one = create_const_tensor("one_const", [1, 1, 1, 1], DataType.int32, [1], np.int32, quantization=no_scale_quant)

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

320

sub6_op = Operation(Op.Sub, self.op.name + "_sub6")

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

321

sub6_op.add_input_tensor(headroom_plus_one)

322

sub6_op.add_input_tensor(one)

323

headroom = Tensor(reduce_sum_shape, DataType.int32, sub6_op.name + "_0")

324

headroom.quantization = no_scale_quant

325

sub6_op.set_output_tensor(headroom)

326

327

# PASS 7 - SHL

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

328

shl7_op = Operation(Op.SHL, self.op.name + "_shl7")

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

329

shl7_op.add_input_tensor(sum_of_exp)

330

shl7_op.add_input_tensor(headroom)

331

shifted_sum = Tensor(reduce_sum_shape, DataType.int32, shl7_op.name + "_0")

332

shifted_sum.quantization = no_scale_quant

333

shl7_op.set_output_tensor(shifted_sum)

334

335

# PASS 8 - Sub

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

336

sub8_op = Operation(Op.Sub, self.op.name + "_sub8")

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

337

sub8_op.add_input_tensor(shifted_sum)

338

sub8_op.add_input_tensor(

339

create_const_tensor(

340

"shifted_one_const", [1, 1, 1, 1], DataType.int32, [1 << 30], np.int32, quantization=no_scale_quant

341

),

342

)

343

shifted_sum_minus_one = Tensor(reduce_sum_shape, DataType.int32, sub8_op.name + "_0")

344

shifted_sum_minus_one.quantization = no_scale_quant

345

sub8_op.set_output_tensor(shifted_sum_minus_one)

346

347

# PASS 9 - SHL

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

348

shl9_op = Operation(Op.SHL, self.op.name + "_shl9")

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

349

shl9_op.add_input_tensor(shifted_sum_minus_one)

350

shl9_op.add_input_tensor(one)

351

shifted_sum_minus_one = Tensor(reduce_sum_shape, DataType.int32, shl9_op.name + "_0")

352

shifted_sum_minus_one.quantization = no_scale_quant

353

shl9_op.set_output_tensor(shifted_sum_minus_one)

354

355

# PASS 10 - Add

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

356

add10_op = Operation(Op.Add, self.op.name + "_add10")

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

357

add10_op.add_input_tensor(

358

create_const_tensor(

359

"F0_one_const", [1, 1, 1, 1], DataType.int32, [(1 << 31) - 1], np.int32, quantization=no_scale_quant

360

),

361

)

362

add10_op.add_input_tensor(shifted_sum_minus_one)

363

add10_op.attrs["rescale"] = [1, 1]

364

half_denominator = Tensor(reduce_sum_shape, DataType.int32, add10_op.name + "_0")

365

half_denominator.quantization = one_scale_quant

366

add10_op.set_output_tensor(half_denominator)

367

368

# PASS 11 - Multiply

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

369

mul11_op = Operation(Op.Mul, self.op.name + "_mul11")

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

370

mul11_op.add_input_tensor(half_denominator)

371

mul11_op.add_input_tensor(

372

create_const_tensor(

Fredrik Svedberg

2020-08-18 13:19:18 +0200

[diff] [blame]

373

"neg_32_over_17_const",

[1, 1, 1, 1],

DataType.int32,

[-1010580540],

np.int32,

quantization=one_scale_quant,

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

379

),

380

)

Fredrik Svedberg

d9e38fe

2020-09-21 10:34:48 +0200

[diff] [blame]

381

rescaled = Tensor(reduce_sum_shape, DataType.int32, mul11_op.name + "_0")

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

382

rescaled.quantization = one_scale_quant.clone()

383

rescaled.quantization.scale_f32 = 2.0

384

mul11_op.set_output_tensor(rescaled)

385

386

# PASS 12 - Add

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

387

add12_op = Operation(Op.Add, self.op.name + "_add12")

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

388

add12_op.add_input_tensor(rescaled)

389

add12_op.add_input_tensor(

390

create_const_tensor(

391

"48_over_17_const", [1, 1, 1, 1], DataType.int32, [1515870810], np.int32, quantization=no_scale_quant

392

),

393

)

394

rescale_w_offset = Tensor(reduce_sum_shape, DataType.int32, add12_op.name + "_0")

395

rescale_w_offset.quantization = one_scale_quant

396

add12_op.set_output_tensor(rescale_w_offset)

397

398

nr_x = rescale_w_offset

399

F2_one = create_const_tensor(

400

"F2_one_const", [1, 1, 1, 1], DataType.int32, [(1 << 29)], np.int32, quantization=no_scale_quant

401

)

Fredrik Svedberg

2020-08-25 11:31:47 +0200

[diff] [blame]

402

four = create_const_tensor(

403

"four_const", [1, 1, 1, 1], DataType.int32, [4], np.int32, quantization=no_scale_quant

404

)

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

405

for i in range(3):

406

# PASS 13, 18, 23 - MUL

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

407

mul_op = Operation(Op.Mul, self.op.name + "_mul%d" % (13 + i * 5))

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

408

mul_op.add_input_tensor(nr_x)

409

mul_op.add_input_tensor(half_denominator)

Fredrik Svedberg

d9e38fe

2020-09-21 10:34:48 +0200

[diff] [blame]

410

half_denominator_times_x = Tensor(reduce_sum_shape, DataType.int32, mul_op.name + "_0")

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

411

half_denominator_times_x.quantization = one_scale_quant.clone()

412

half_denominator_times_x.quantization.scale_f32 = 2.0

413

mul_op.set_output_tensor(half_denominator_times_x)

414

# PASS 14, 19, 24 - SUB

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

415

sub_op = Operation(Op.Sub, self.op.name + "_sub%d" % (14 + i * 5))

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

416

sub_op.add_input_tensor(F2_one)

417

sub_op.add_input_tensor(half_denominator_times_x)

418

one_minus_half_denominator_times_x = Tensor(reduce_sum_shape, DataType.int32, sub_op.name + "_0")

419

one_minus_half_denominator_times_x.quantization = one_scale_quant

420

sub_op.set_output_tensor(one_minus_half_denominator_times_x)

421

# PASS 15, 20, 25 - MUL

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

422

mul_op = Operation(Op.Mul, self.op.name + "_mul%d" % (15 + i * 5))

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

423

mul_op.add_input_tensor(nr_x)

424

mul_op.add_input_tensor(one_minus_half_denominator_times_x)

Fredrik Svedberg

d9e38fe

2020-09-21 10:34:48 +0200

[diff] [blame]

425

to_rescale = Tensor(reduce_sum_shape, DataType.int32, mul_op.name + "_0")

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

426

to_rescale.quantization = one_scale_quant.clone()

427

to_rescale.quantization.scale_f32 = 2.0

428

mul_op.set_output_tensor(to_rescale)

Fredrik Svedberg

2020-08-25 11:31:47 +0200

[diff] [blame]

429

# PASS 16, 21, 26 - MUL

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

430

shl_op = Operation(Op.Mul, self.op.name + "_mul%d" % (16 + i * 5))

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

431

shl_op.add_input_tensor(to_rescale)

Fredrik Svedberg

2020-08-25 11:31:47 +0200

[diff] [blame]

432

shl_op.add_input_tensor(four)

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

433

to_add = Tensor(reduce_sum_shape, DataType.int32, shl_op.name + "_0")

434

to_add.quantization = no_scale_quant

435

shl_op.set_output_tensor(to_add)

436

# PASS 17, 22, 27 - ADD

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

437

add_op = Operation(Op.Add, self.op.name + "_add%d" % (17 + i * 5))

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

438

add_op.add_input_tensor(nr_x)

439

add_op.add_input_tensor(to_add)

440

nr_x = Tensor(reduce_sum_shape, DataType.int32, add_op.name + "_0")

441

nr_x.quantization = one_scale_quant

442

add_op.set_output_tensor(nr_x)

443

Fredrik Svedberg

2020-08-25 11:31:47 +0200

[diff] [blame]

444

# PASS 28 - Multiply

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

445

mul28_op = Operation(Op.Mul, self.op.name + "_mul28")

Fredrik Svedberg

2020-08-25 11:31:47 +0200

[diff] [blame]

446

mul28_op.add_input_tensor(nr_x)

447

mul28_op.add_input_tensor(

448

create_const_tensor("two_const", [1, 1, 1, 1], DataType.int32, [2], np.int32, quantization=no_scale_quant)

449

)

450

scale_factor = Tensor(reduce_sum_shape, DataType.int32, mul28_op.name + "_0")

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

451

scale_factor.quantization = one_scale_quant

Fredrik Svedberg

2020-08-25 11:31:47 +0200

[diff] [blame]

452

mul28_op.set_output_tensor(scale_factor)

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

453

454

# PASS 29 - Multiply

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

455

mul_op = Operation(Op.Mul, self.op.name + "_mul29")

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

456

mul_op.add_input_tensor(ifm_exp)

457

mul_op.add_input_tensor(scale_factor)

458

scaled_exp = Tensor(ifm_exp.shape, DataType.int32, mul_op.name + "_0")

459

scaled_exp.quantization = one_scale_quant.clone()

460

scaled_exp.quantization.scale_f32 = 2.0

461

mul_op.set_output_tensor(scaled_exp)

462

463

# PASS 30 - SHR

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

464

shr30_op = Operation(Op.SHR, self.op.name + "_shr30")

Tim Hall

d775e37

2020-08-28 18:33:38 +0100

[diff] [blame]

465

shr30_op.attrs["rounding_mode"] = b"NATURAL"

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

466

shr30_op.add_input_tensor(scaled_exp)

467

shr30_op.add_input_tensor(right_shift)

468

shr30_op.set_output_tensor(ofm)

return shr30_op

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

472

def get_graph_int16(self, ifm, ofm):

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

473

no_scale_quant = ifm.quantization.clone()

474

no_scale_quant.scale_f32 = None

475

476

# PASS 0 - Depthwise Maxpool

477

maxpool_op = self.op.clone("_maxpool0")

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

478

maxpool_op.type = Op.MaxPool

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

479

maxpool_h = ifm.shape[1] * ifm.shape[2]

480

maxpool_w = ifm.shape[3]

481

maxpool_ifm_shape = [1, maxpool_h, maxpool_w, 1]

482

maxpool_op.attrs["padding"] = b"VALID"

483

maxpool_op.attrs["stride_w"] = 1

484

maxpool_op.attrs["stride_h"] = 1

485

maxpool_op.attrs["filter_width"] = maxpool_w

486

maxpool_op.attrs["filter_height"] = 1

487

maxpool_op.attrs["strides"] = [1, maxpool_op.attrs["stride_h"], maxpool_op.attrs["stride_w"], 1]

488

maxpool_op.attrs["ksize"] = [1, maxpool_op.attrs["filter_height"], maxpool_op.attrs["filter_width"], 1]

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

489

maxpool_op.inputs = [create_reshape_tensor(ifm, maxpool_ifm_shape)]

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

490

maxpool_ofm = Tensor([1, maxpool_h, 1, 1], ifm.dtype, maxpool_op.name + "_0")

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

491

maxpool_ofm.quantization = no_scale_quant

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

492

maxpool_op.set_output_tensor(maxpool_ofm)

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

493

494

# PASS 1 - Sub

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

495

sub1_op = Operation(Op.Sub, self.op.name + "_sub1")

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

496

sub1_op.add_input_tensor(ifm)

497

sub1_op.add_input_tensor(create_reshape_tensor(maxpool_ofm, [1, ifm.shape[1], ifm.shape[2], 1]))

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

498

sub1_ofm = Tensor(ifm.shape, DataType.int32, sub1_op.name + "_0")

499

sub1_ofm.quantization = ifm.quantization.clone()

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

500

sub1_op.set_output_tensor(sub1_ofm)

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

501

502

# PASS 2 - Mul

503

beta = self.op.attrs.get("beta", 1.0)

504

mul2_out_range = 10.0 / 65535.0

505

mul2_scale, _ = scaling.elementwise_mul_scale(sub1_ofm.quantization.scale_f32, beta, mul2_out_range)

506

mul2_quant = ifm.quantization.clone()

507

mul2_quant.scale_f32 = beta

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

508

mul2_op = Operation(Op.Mul, self.op.name + "_mul2")

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

509

mul2_op.add_input_tensor(sub1_ofm)

510

mul2_op.add_input_tensor(

511

create_const_tensor(

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

512

mul2_op.name + "_const", [1, 1, 1, 1], DataType.int32, [mul2_scale], np.int32, quantization=mul2_quant

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

513

),

514

)

515

mul2_ofm = Tensor(ifm.shape, DataType.int32, mul2_op.name + "_0")

516

mul2_ofm.quantization = ofm.quantization.clone()

517

mul2_ofm.quantization.scale_f32 = mul2_out_range

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

518

mul2_op.set_output_tensor(mul2_ofm)

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

519

520

# PASS 3 - Add+LUT(exp)

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

521

add_op = Operation(Op.Add, self.op.name + "_add3")

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

522

add_op.add_input_tensor(mul2_ofm)

523

add_op.add_input_tensor(

524

create_const_tensor(

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

525

add_op.name + "_const", [1, 1, 1, 1], DataType.int32, [32767], np.int32, quantization=no_scale_quant

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

526

),

527

)

528

add_op.set_activation_lut(

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

529

create_const_tensor(

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

530

add_op.name + "_lut", [1, 1, 1, 512], DataType.int32, self.EXP_LUT, np.int32, TensorPurpose.LUT

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

531

)

532

)

533

exp_ofm = Tensor(mul2_ofm.shape, DataType.int16, add_op.name + "_0")

534

exp_ofm.quantization = mul2_ofm.quantization.clone()

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

535

add_op.set_output_tensor(exp_ofm)

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

536

537

# PASS 4 - Reduce sum

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

538

reduce_sum_op = Operation(Op.ReduceSum, self.op.name + "_reduce_sum4")

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

539

reduce_sum_op.attrs["padding"] = b"VALID"

540

reduce_sum_op.attrs["stride_w"] = 1

541

reduce_sum_op.attrs["stride_h"] = 1

542

reduce_sum_op.attrs["filter_width"] = 1

543

reduce_sum_op.attrs["filter_height"] = 1

544

reduce_sum_op.attrs["strides"] = [1, reduce_sum_op.attrs["stride_h"], reduce_sum_op.attrs["stride_w"], 1]

545

reduce_sum_op.attrs["ksize"] = [1, reduce_sum_op.attrs["filter_height"], reduce_sum_op.attrs["filter_width"], 1]

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

546

reduce_sum_op.add_input_tensor(exp_ofm)

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

547

548

reduce_sum_shape = [1, exp_ofm.shape[1], exp_ofm.shape[2], 1]

549

sum_of_exp = Tensor(reduce_sum_shape, DataType.int32, reduce_sum_op.name + "_0")

550

sum_of_exp.quantization = no_scale_quant

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

551

reduce_sum_op.set_output_tensor(sum_of_exp)

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

552

553

# PASS 5 - CLZ

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

554

clz_op = Operation(Op.CLZ, self.op.name + "_clz5")

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

555

clz_op.add_input_tensor(sum_of_exp)

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

556

headroom_plus_one = Tensor(reduce_sum_shape, DataType.int32, clz_op.name + "_0")

557

headroom_plus_one.quantization = no_scale_quant

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

558

clz_op.set_output_tensor(headroom_plus_one)

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

559

560

# PASS 6 - Sub

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

561

sub6_op = Operation(Op.Sub, self.op.name + "_sub6")

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

562

sub6_op.add_input_tensor(

563

create_const_tensor(

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

564

sub6_op.name + "_const", [1, 1, 1, 1], DataType.int32, [31], np.int32, quantization=no_scale_quant

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

565

),

566

)

Jacob Bohlin

be733cf

2020-08-13 10:21:34 +0200

[diff] [blame]

567

sub6_op.add_input_tensor(headroom_plus_one)

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

568

reciprocal_right_shift = Tensor(reduce_sum_shape, DataType.int32, sub6_op.name + "_0")

569

reciprocal_right_shift.quantization = no_scale_quant

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

570

sub6_op.set_output_tensor(reciprocal_right_shift)

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

571

572

# PASS 7 - SHL

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

573

shl7_op = Operation(Op.SHL, self.op.name + "_shl7")

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

574

shl7_op.add_input_tensor(

575

create_const_tensor(

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

576

shl7_op.name + "_const", [1, 1, 1, 1], DataType.int32, [1], np.int32, quantization=no_scale_quant

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

577

),

578

)

Jacob Bohlin

be733cf

2020-08-13 10:21:34 +0200

[diff] [blame]

579

shl7_op.add_input_tensor(reciprocal_right_shift)

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

580

constant_one = Tensor(reduce_sum_shape, DataType.int32, shl7_op.name + "_0")

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

581

constant_one.quantization = no_scale_quant

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

582

shl7_op.set_output_tensor(constant_one)

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

583

584

# PASS 8 - Sub

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

585

sub8_op = Operation(Op.Sub, self.op.name + "_sub8")

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

586

sub8_op.add_input_tensor(sum_of_exp)

587

sub8_op.add_input_tensor(constant_one)

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

588

sum_of_exps_minus_one = Tensor(reduce_sum_shape, DataType.int32, sub8_op.name + "_0")

589

sum_of_exps_minus_one.quantization = no_scale_quant

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

590

sub8_op.set_output_tensor(sum_of_exps_minus_one)

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

591

592

# PASS 9 - SHL

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

593

shl9_op = Operation(Op.SHL, self.op.name + "_shl9")

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

594

shl9_op.add_input_tensor(sum_of_exps_minus_one)

595

shl9_op.add_input_tensor(headroom_plus_one)

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

596

shifted_sum_minus_one = Tensor(reduce_sum_shape, DataType.int32, shl9_op.name + "_0")

597

shifted_sum_minus_one.quantization = no_scale_quant

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

598

shl9_op.set_output_tensor(shifted_sum_minus_one)

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

599

600

# PASS 10 - SHR

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

601

shr10_op = Operation(Op.SHR, self.op.name + "_shr10")

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

602

shr10_op.add_input_tensor(shifted_sum_minus_one)

603

shr10_op.add_input_tensor(

604

create_const_tensor(

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

605

shr10_op.name + "_const", [1, 1, 1, 1], DataType.int32, [15], np.int32, quantization=no_scale_quant

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

606

),

607

)

608

shifted_sum_minus_one_16 = Tensor(reduce_sum_shape, DataType.int32, shr10_op.name + "_0")

609

shifted_sum_minus_one_16.quantization = shifted_sum_minus_one.quantization.clone()

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

610

shr10_op.set_output_tensor(shifted_sum_minus_one_16)

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

611

612

# PASS 11 - Sub+LUT(one over one plus x)

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

613

sub11_op = Operation(Op.Sub, self.op.name + "_sub11")

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

614

sub11_op.add_input_tensor(shifted_sum_minus_one_16)

615

sub11_op.add_input_tensor(

616

create_const_tensor(

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

617

sub11_op.name + "_const", [1, 1, 1, 1], DataType.int32, [32768], np.int32, quantization=no_scale_quant

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

618

),

619

)

620

sub11_op.set_activation_lut(

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

621

create_const_tensor(

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

622

sub11_op.name + "_lut",

623

[1, 1, 1, 512],

624

DataType.int32,

625

self.ONE_OVER_ONE_PLUS_X_LUT,

Fredrik Svedberg

2020-08-13 10:02:53 +0200

[diff] [blame]

626

np.int32,

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

TensorPurpose.LUT,

)

)

reciprocal_scale = Tensor(reduce_sum_shape, DataType.int16, sub11_op.name + "_0")

631

reciprocal_scale.quantization = no_scale_quant

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

632

sub11_op.set_output_tensor(reciprocal_scale)

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

633

634

# PASS 12 - Multiply

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

635

mul_op = Operation(Op.Mul, self.op.name + "_mul12")

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

636

mul_op.add_input_tensor(exp_ofm)

637

mul_op.add_input_tensor(reciprocal_scale)

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

638

mul_ofm = Tensor(exp_ofm.shape, DataType.int32, mul_op.name + "_0")

639

mul_ofm.quantization = no_scale_quant

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

640

mul_op.set_output_tensor(mul_ofm)

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

641

642

# PASS 13 - SHR

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame^]

643

shr13_op = Operation(Op.SHR, self.op.name + "_shr13")

Michael McGeagh

2020-08-06 17:31:02 +0100

[diff] [blame]

644

shr13_op.add_input_tensor(mul_ofm)

645

shr13_op.add_input_tensor(reciprocal_right_shift)

646

shr13_op.set_output_tensor(ofm)

Fredrik Svedberg