blob: 5c7492b4895590abdd3011b0f57906e96f06195c [file] [log] [blame]
Louis Verhaard9bfe0f82020-12-03 12:26:25 +01001/*
2 * Copyright (c) 2020 Arm Limited. All rights reserved.
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Licensed under the Apache License, Version 2.0 (the License); you may
7 * not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
14 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * Description:
19 * Implementation of the search-based allocator.
20 */
21
22#include <algorithm>
23#include <cstdint>
24#include <set>
25#include <vector>
26
27#include "search_allocator.h"
28
29SearchAllocator::SearchAllocator(const std::vector<LiveRange> &live_ranges, uint32_t size_limit) {
30 lrs = live_ranges;
31 uint32_t max_end_time = 0;
32 for (size_t i = 0; i < lrs.size(); ++i) {
33 auto &lr = lrs[i];
Fredrik Svedberg5b513882020-12-11 13:42:22 +010034 lr.id = static_cast<int>(i);
Louis Verhaard9bfe0f82020-12-03 12:26:25 +010035 max_end_time = std::max(max_end_time, lr.end_time);
36 }
37 lrs_at_time.resize(max_end_time + 1);
38 size_at_time.resize(max_end_time + 1);
39 neighbours.resize(lrs.size());
40 // Calculate which live ranges are active at every timestamp
41 for (size_t t = 0; t <= max_end_time; ++t) {
42 lrs_at_time[t].clear();
43 }
44 for (auto &lr : lrs) {
45 for (auto t = lr.start_time; t <= lr.end_time; ++t) {
46 lrs_at_time[t].push_back(&lr);
47 }
48 }
49 min_required_size = 0;
50 for (size_t t = 0; t <= max_end_time; ++t) {
51 // Calculate minimum needed size at each timestamp
52 uint32_t size_at_t = 0;
53 for (auto &lr : lrs_at_time[t]) {
54 size_at_t += lr->size;
55 }
56 size_at_time[t] = size_at_t;
57 min_required_size = std::max(size_at_t, min_required_size);
58 // Calculate all neighbours
59 for (size_t i = 0; i < lrs_at_time[t].size(); ++i) {
60 auto lr1 = lrs_at_time[t][i];
61 auto &nb1 = neighbours[lr1->id];
62 for (size_t j = i + 1; j < lrs_at_time[t].size(); ++j) {
63 auto lr2 = lrs_at_time[t][j];
64 if (find(nb1.begin(), nb1.end(), lr2) == nb1.end()) {
65 nb1.push_back(lr2);
66 neighbours[lr2->id].push_back(lr1);
67 }
68 }
69 }
70 }
Louis Verhaardd4738e52021-01-21 13:16:18 +010071 target_size = std::max(min_required_size, size_limit);
Louis Verhaard9bfe0f82020-12-03 12:26:25 +010072 // Calculate the urgency of each live range
73 lr_urgency.resize(lrs.size());
74 for (size_t i = 0; i < lrs.size(); ++i) {
75 auto &lr = lrs[i];
76 uint32_t urgency = 0;
77 for (size_t t = lr.start_time; t <= lr.end_time; ++t) {
78 urgency = std::max(size_at_time[t], urgency);
79 }
80 lr_urgency[i] = urgency;
81 }
82 best_size = UINT32_MAX;
83}
84
85uint32_t SearchAllocator::allocate(std::vector<uint32_t> &output) {
86 output.clear();
87 nr_iterations = 0;
88 std::vector<size_t> indices;
89 // Initial solution, using a heuristic allocator
90 for (size_t i = 0; i < lrs.size(); ++i) {
91 indices.push_back(i);
92 }
93 sort_indices_on_prio(indices);
94 // Allocate the initial solution
95 best_size = UINT32_MAX;
96 best_size = allocate_indices(indices);
97 if (best_size <= target_size) {
98 // The heuristic allocation returned an optimal solution.
99 // No need to search.
100 } else {
101 // Try to improve the heuristic allocation
102 search(indices, best_size, MAX_ITERATIONS);
103 }
104 output.clear();
105 for (auto &lr : lrs) {
106 output.push_back(lr.address);
107 }
108 return best_size;
109}
110
111void SearchAllocator::allocate_lr(LiveRange &lr) const {
112 uint32_t address = 0;
113 int predecessor = NO_PREDECESSOR;
114 bool fits = false;
115 while (!fits) {
116 fits = true;
117 // Find neighbours that overlap with address
118 for (auto lr2_p : neighbours[lr.id]) {
119 if (lr2_p->address == NOT_ALLOCATED || lr2_p->end_address <= address) {
120 continue;
121 }
122 if (lr2_p->overlaps(address, lr.size)) {
123 // Overlap found; increase address
124 fits = false;
125 address = lr2_p->end_address;
126 predecessor = lr2_p->id;
127 }
128 }
129 }
130 lr.address = address;
131 lr.end_address = address + lr.size;
132 lr.predecessor = predecessor;
133}
134
135uint32_t SearchAllocator::allocate_indices(const std::vector<size_t> &indices) {
136 ++nr_iterations;
137 std::vector<size_t> count(indices.size());
138 for (auto &lr : lrs) {
139 lr.address = NOT_ALLOCATED;
140 }
141 uint32_t size = 0;
142 for (size_t turn = 0; size <= best_size && turn < indices.size(); ++turn) {
143 auto &lr = lrs[indices[turn]];
144 allocate_lr(lr);
145 lr.turn = turn;
146 size = std::max(size, lr.end_address);
147 }
148 return size;
149}
150
151void SearchAllocator::sort_indices_on_prio(std::vector<size_t> &indices) const {
152 std::sort(indices.begin(), indices.end(),
153 [this] (size_t const& a, size_t const& b) {
154 if (lr_urgency[a] != lr_urgency[b]) {
155 return lr_urgency[a] > lr_urgency[b];
156 }
157 auto &lr1 = lrs[a];
158 auto &lr2 = lrs[b];
159 auto duration1 = lr1.end_time - lr1.start_time;
160 auto duration2 = lr2.end_time - lr2.start_time;
161 if (duration1 != duration2) {
162 return duration1 > duration2;
163 }
164 if (lr1.start_time != lr2.start_time) {
165 return lr1.start_time < lr2.start_time;
166 }
167 if (lr1.size != lr2.size) {
168 return lr1.size > lr2.size;
169 }
170 return lr1.id < lr2.id;
171 });
172}
173
174void SearchAllocator::add_predecessor_turns(std::set<size_t> &turns, const LiveRange &lr) const {
175 turns.insert(lr.turn);
176 int id = lr.id;
177 while (lrs[id].predecessor != NO_PREDECESSOR) {
178 id = lrs[id].predecessor;
179 turns.insert(lrs[id].turn);
180 }
181}
182
183void SearchAllocator::attempt_bottleneck_fix(std::vector<size_t> &indices) {
184 // Find the bottleneck
185 LiveRange *max_lr = &lrs[0];
186 for (auto &lr : lrs) {
187 if (lr.end_address > max_lr->end_address) {
188 max_lr = &lr;
189 }
190 }
191 // Find all live ranges that affected the placement of the bottleneck live range.
192 // This consists of two types of live ranges:
193 // - direct neighbours of the bottleneck live range
194 // - direct and indirect predecessors of these neighbours + bottleneck
195 // The turns at which these live ranges were allocated are put in the turns vector.
196 std::set<size_t> turns;
197 add_predecessor_turns(turns, *max_lr);
198 for (auto lr_p : neighbours[max_lr->id]) {
199 add_predecessor_turns(turns, *lr_p);
200 }
201 // Non-direct neighbours that interfere with the allocation of the bottleneck are the
202 // immediate cause for gaps in the allocation, and are selected with higher probability.
203 std::vector<size_t> turn_list;
204 std::vector<size_t> non_nb_turn_list;
205 for (auto turn : turns) {
206 turn_list.push_back(turn);
207 auto &lr = lrs[indices[turn]];
208 if (!max_lr->is_neighbour(lr)) {
209 non_nb_turn_list.push_back(turn);
210 }
211 }
212 size_t ix1;
213 if (rng() % 100 < 30 && !non_nb_turn_list.empty()) {
214 // Pick a live range from the "non-neighbour list"
215 ix1 = non_nb_turn_list[rng() % non_nb_turn_list.size()];
216 } else {
217 // Pick any affecting live range.
218 ix1 = turn_list[rng() % turn_list.size()];
219 }
Louis Verhaardd4738e52021-01-21 13:16:18 +0100220 // Note: turn_list has always at least 2 elements for bottlenecks
221 size_t ix2 = turn_list[rng() % (turn_list.size() - 1)];
Louis Verhaard9bfe0f82020-12-03 12:26:25 +0100222 if (ix1 == ix2) {
223 ix2 = turn_list[turn_list.size() - 1];
224 }
225 // Swap indices
226 std::swap(indices[ix1], indices[ix2]);
227}
228
229void SearchAllocator::search(std::vector<size_t> &indices, uint32_t initial_size, int iterations) {
230 std::vector<size_t> best_indices = indices;
231 std::vector<LiveRange> best_lrs = lrs;
232 for (int i = 0; i < iterations; ++i) {
233 // Reorder the indices
234 attempt_bottleneck_fix(indices);
235 // Allocate the reordered indices and check if it gave an improvement
236 auto new_size = allocate_indices(indices);
237 if (new_size <= best_size) {
238 // The new allocation produced a new best result; remember it
239 best_size = new_size;
240 best_indices = indices;
241 best_lrs = lrs;
242 if (best_size <= target_size) {
243 // Target reached; stop
244 return;
245 }
246 } else {
247 // The new allocation produced worse result; undo the change
248 indices = best_indices;
249 lrs = best_lrs;
250 }
251 }
252 lrs = best_lrs;
253}
254
255uint32_t allocate(const std::vector<uint32_t> &input, int available_size, std::vector<uint32_t> &output) {
256 // Convert input to vector of live ranges
257 std::vector<LiveRange> lrs;
258 for (size_t i = 0; i < input.size(); i += 3) {
259 LiveRange lr;
260 lr.start_time = input[i];
261 lr.end_time = input[i+1];
262 lr.size = input[i+2];
263 lrs.push_back(lr);
264 }
265 SearchAllocator allocator(lrs, available_size);
266 return allocator.allocate(output);
267}