Louis Verhaard | 9bfe0f8 | 2020-12-03 12:26:25 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2020 Arm Limited. All rights reserved. |
| 3 | * |
| 4 | * SPDX-License-Identifier: Apache-2.0 |
| 5 | * |
| 6 | * Licensed under the Apache License, Version 2.0 (the License); you may |
| 7 | * not use this file except in compliance with the License. |
| 8 | * You may obtain a copy of the License at |
| 9 | * |
| 10 | * www.apache.org/licenses/LICENSE-2.0 |
| 11 | * |
| 12 | * Unless required by applicable law or agreed to in writing, software |
| 13 | * distributed under the License is distributed on an AS IS BASIS, WITHOUT |
| 14 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | * See the License for the specific language governing permissions and |
| 16 | * limitations under the License. |
| 17 | * |
| 18 | * Description: |
| 19 | * Implementation of the search-based allocator. |
| 20 | */ |
| 21 | |
| 22 | #include <algorithm> |
| 23 | #include <cstdint> |
| 24 | #include <set> |
| 25 | #include <vector> |
| 26 | |
| 27 | #include "search_allocator.h" |
| 28 | |
| 29 | SearchAllocator::SearchAllocator(const std::vector<LiveRange> &live_ranges, uint32_t size_limit) { |
| 30 | lrs = live_ranges; |
| 31 | uint32_t max_end_time = 0; |
| 32 | for (size_t i = 0; i < lrs.size(); ++i) { |
| 33 | auto &lr = lrs[i]; |
Fredrik Svedberg | 5b51388 | 2020-12-11 13:42:22 +0100 | [diff] [blame] | 34 | lr.id = static_cast<int>(i); |
Louis Verhaard | 9bfe0f8 | 2020-12-03 12:26:25 +0100 | [diff] [blame] | 35 | max_end_time = std::max(max_end_time, lr.end_time); |
| 36 | } |
| 37 | lrs_at_time.resize(max_end_time + 1); |
| 38 | size_at_time.resize(max_end_time + 1); |
| 39 | neighbours.resize(lrs.size()); |
| 40 | // Calculate which live ranges are active at every timestamp |
| 41 | for (size_t t = 0; t <= max_end_time; ++t) { |
| 42 | lrs_at_time[t].clear(); |
| 43 | } |
| 44 | for (auto &lr : lrs) { |
| 45 | for (auto t = lr.start_time; t <= lr.end_time; ++t) { |
| 46 | lrs_at_time[t].push_back(&lr); |
| 47 | } |
| 48 | } |
| 49 | min_required_size = 0; |
| 50 | for (size_t t = 0; t <= max_end_time; ++t) { |
| 51 | // Calculate minimum needed size at each timestamp |
| 52 | uint32_t size_at_t = 0; |
| 53 | for (auto &lr : lrs_at_time[t]) { |
| 54 | size_at_t += lr->size; |
| 55 | } |
| 56 | size_at_time[t] = size_at_t; |
| 57 | min_required_size = std::max(size_at_t, min_required_size); |
| 58 | // Calculate all neighbours |
| 59 | for (size_t i = 0; i < lrs_at_time[t].size(); ++i) { |
| 60 | auto lr1 = lrs_at_time[t][i]; |
| 61 | auto &nb1 = neighbours[lr1->id]; |
| 62 | for (size_t j = i + 1; j < lrs_at_time[t].size(); ++j) { |
| 63 | auto lr2 = lrs_at_time[t][j]; |
| 64 | if (find(nb1.begin(), nb1.end(), lr2) == nb1.end()) { |
| 65 | nb1.push_back(lr2); |
| 66 | neighbours[lr2->id].push_back(lr1); |
| 67 | } |
| 68 | } |
| 69 | } |
| 70 | } |
Louis Verhaard | d4738e5 | 2021-01-21 13:16:18 +0100 | [diff] [blame] | 71 | target_size = std::max(min_required_size, size_limit); |
Louis Verhaard | 9bfe0f8 | 2020-12-03 12:26:25 +0100 | [diff] [blame] | 72 | // Calculate the urgency of each live range |
| 73 | lr_urgency.resize(lrs.size()); |
| 74 | for (size_t i = 0; i < lrs.size(); ++i) { |
| 75 | auto &lr = lrs[i]; |
| 76 | uint32_t urgency = 0; |
| 77 | for (size_t t = lr.start_time; t <= lr.end_time; ++t) { |
| 78 | urgency = std::max(size_at_time[t], urgency); |
| 79 | } |
| 80 | lr_urgency[i] = urgency; |
| 81 | } |
| 82 | best_size = UINT32_MAX; |
| 83 | } |
| 84 | |
| 85 | uint32_t SearchAllocator::allocate(std::vector<uint32_t> &output) { |
| 86 | output.clear(); |
| 87 | nr_iterations = 0; |
| 88 | std::vector<size_t> indices; |
| 89 | // Initial solution, using a heuristic allocator |
| 90 | for (size_t i = 0; i < lrs.size(); ++i) { |
| 91 | indices.push_back(i); |
| 92 | } |
| 93 | sort_indices_on_prio(indices); |
| 94 | // Allocate the initial solution |
| 95 | best_size = UINT32_MAX; |
| 96 | best_size = allocate_indices(indices); |
| 97 | if (best_size <= target_size) { |
| 98 | // The heuristic allocation returned an optimal solution. |
| 99 | // No need to search. |
| 100 | } else { |
| 101 | // Try to improve the heuristic allocation |
| 102 | search(indices, best_size, MAX_ITERATIONS); |
| 103 | } |
| 104 | output.clear(); |
| 105 | for (auto &lr : lrs) { |
| 106 | output.push_back(lr.address); |
| 107 | } |
| 108 | return best_size; |
| 109 | } |
| 110 | |
| 111 | void SearchAllocator::allocate_lr(LiveRange &lr) const { |
| 112 | uint32_t address = 0; |
| 113 | int predecessor = NO_PREDECESSOR; |
| 114 | bool fits = false; |
| 115 | while (!fits) { |
| 116 | fits = true; |
| 117 | // Find neighbours that overlap with address |
| 118 | for (auto lr2_p : neighbours[lr.id]) { |
| 119 | if (lr2_p->address == NOT_ALLOCATED || lr2_p->end_address <= address) { |
| 120 | continue; |
| 121 | } |
| 122 | if (lr2_p->overlaps(address, lr.size)) { |
| 123 | // Overlap found; increase address |
| 124 | fits = false; |
| 125 | address = lr2_p->end_address; |
| 126 | predecessor = lr2_p->id; |
| 127 | } |
| 128 | } |
| 129 | } |
| 130 | lr.address = address; |
| 131 | lr.end_address = address + lr.size; |
| 132 | lr.predecessor = predecessor; |
| 133 | } |
| 134 | |
| 135 | uint32_t SearchAllocator::allocate_indices(const std::vector<size_t> &indices) { |
| 136 | ++nr_iterations; |
| 137 | std::vector<size_t> count(indices.size()); |
| 138 | for (auto &lr : lrs) { |
| 139 | lr.address = NOT_ALLOCATED; |
| 140 | } |
| 141 | uint32_t size = 0; |
| 142 | for (size_t turn = 0; size <= best_size && turn < indices.size(); ++turn) { |
| 143 | auto &lr = lrs[indices[turn]]; |
| 144 | allocate_lr(lr); |
| 145 | lr.turn = turn; |
| 146 | size = std::max(size, lr.end_address); |
| 147 | } |
| 148 | return size; |
| 149 | } |
| 150 | |
| 151 | void SearchAllocator::sort_indices_on_prio(std::vector<size_t> &indices) const { |
| 152 | std::sort(indices.begin(), indices.end(), |
| 153 | [this] (size_t const& a, size_t const& b) { |
| 154 | if (lr_urgency[a] != lr_urgency[b]) { |
| 155 | return lr_urgency[a] > lr_urgency[b]; |
| 156 | } |
| 157 | auto &lr1 = lrs[a]; |
| 158 | auto &lr2 = lrs[b]; |
| 159 | auto duration1 = lr1.end_time - lr1.start_time; |
| 160 | auto duration2 = lr2.end_time - lr2.start_time; |
| 161 | if (duration1 != duration2) { |
| 162 | return duration1 > duration2; |
| 163 | } |
| 164 | if (lr1.start_time != lr2.start_time) { |
| 165 | return lr1.start_time < lr2.start_time; |
| 166 | } |
| 167 | if (lr1.size != lr2.size) { |
| 168 | return lr1.size > lr2.size; |
| 169 | } |
| 170 | return lr1.id < lr2.id; |
| 171 | }); |
| 172 | } |
| 173 | |
| 174 | void SearchAllocator::add_predecessor_turns(std::set<size_t> &turns, const LiveRange &lr) const { |
| 175 | turns.insert(lr.turn); |
| 176 | int id = lr.id; |
| 177 | while (lrs[id].predecessor != NO_PREDECESSOR) { |
| 178 | id = lrs[id].predecessor; |
| 179 | turns.insert(lrs[id].turn); |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | void SearchAllocator::attempt_bottleneck_fix(std::vector<size_t> &indices) { |
| 184 | // Find the bottleneck |
| 185 | LiveRange *max_lr = &lrs[0]; |
| 186 | for (auto &lr : lrs) { |
| 187 | if (lr.end_address > max_lr->end_address) { |
| 188 | max_lr = &lr; |
| 189 | } |
| 190 | } |
| 191 | // Find all live ranges that affected the placement of the bottleneck live range. |
| 192 | // This consists of two types of live ranges: |
| 193 | // - direct neighbours of the bottleneck live range |
| 194 | // - direct and indirect predecessors of these neighbours + bottleneck |
| 195 | // The turns at which these live ranges were allocated are put in the turns vector. |
| 196 | std::set<size_t> turns; |
| 197 | add_predecessor_turns(turns, *max_lr); |
| 198 | for (auto lr_p : neighbours[max_lr->id]) { |
| 199 | add_predecessor_turns(turns, *lr_p); |
| 200 | } |
| 201 | // Non-direct neighbours that interfere with the allocation of the bottleneck are the |
| 202 | // immediate cause for gaps in the allocation, and are selected with higher probability. |
| 203 | std::vector<size_t> turn_list; |
| 204 | std::vector<size_t> non_nb_turn_list; |
| 205 | for (auto turn : turns) { |
| 206 | turn_list.push_back(turn); |
| 207 | auto &lr = lrs[indices[turn]]; |
| 208 | if (!max_lr->is_neighbour(lr)) { |
| 209 | non_nb_turn_list.push_back(turn); |
| 210 | } |
| 211 | } |
| 212 | size_t ix1; |
| 213 | if (rng() % 100 < 30 && !non_nb_turn_list.empty()) { |
| 214 | // Pick a live range from the "non-neighbour list" |
| 215 | ix1 = non_nb_turn_list[rng() % non_nb_turn_list.size()]; |
| 216 | } else { |
| 217 | // Pick any affecting live range. |
| 218 | ix1 = turn_list[rng() % turn_list.size()]; |
| 219 | } |
Louis Verhaard | d4738e5 | 2021-01-21 13:16:18 +0100 | [diff] [blame] | 220 | // Note: turn_list has always at least 2 elements for bottlenecks |
| 221 | size_t ix2 = turn_list[rng() % (turn_list.size() - 1)]; |
Louis Verhaard | 9bfe0f8 | 2020-12-03 12:26:25 +0100 | [diff] [blame] | 222 | if (ix1 == ix2) { |
| 223 | ix2 = turn_list[turn_list.size() - 1]; |
| 224 | } |
| 225 | // Swap indices |
| 226 | std::swap(indices[ix1], indices[ix2]); |
| 227 | } |
| 228 | |
| 229 | void SearchAllocator::search(std::vector<size_t> &indices, uint32_t initial_size, int iterations) { |
| 230 | std::vector<size_t> best_indices = indices; |
| 231 | std::vector<LiveRange> best_lrs = lrs; |
| 232 | for (int i = 0; i < iterations; ++i) { |
| 233 | // Reorder the indices |
| 234 | attempt_bottleneck_fix(indices); |
| 235 | // Allocate the reordered indices and check if it gave an improvement |
| 236 | auto new_size = allocate_indices(indices); |
| 237 | if (new_size <= best_size) { |
| 238 | // The new allocation produced a new best result; remember it |
| 239 | best_size = new_size; |
| 240 | best_indices = indices; |
| 241 | best_lrs = lrs; |
| 242 | if (best_size <= target_size) { |
| 243 | // Target reached; stop |
| 244 | return; |
| 245 | } |
| 246 | } else { |
| 247 | // The new allocation produced worse result; undo the change |
| 248 | indices = best_indices; |
| 249 | lrs = best_lrs; |
| 250 | } |
| 251 | } |
| 252 | lrs = best_lrs; |
| 253 | } |
| 254 | |
| 255 | uint32_t allocate(const std::vector<uint32_t> &input, int available_size, std::vector<uint32_t> &output) { |
| 256 | // Convert input to vector of live ranges |
| 257 | std::vector<LiveRange> lrs; |
| 258 | for (size_t i = 0; i < input.size(); i += 3) { |
| 259 | LiveRange lr; |
| 260 | lr.start_time = input[i]; |
| 261 | lr.end_time = input[i+1]; |
| 262 | lr.size = input[i+2]; |
| 263 | lrs.push_back(lr); |
| 264 | } |
| 265 | SearchAllocator allocator(lrs, available_size); |
| 266 | return allocator.allocate(output); |
| 267 | } |