blob: 5c7492b4895590abdd3011b0f57906e96f06195c [file] [log] [blame]
/*
* Copyright (c) 2020 Arm Limited. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Description:
* Implementation of the search-based allocator.
*/
#include <algorithm>
#include <cstdint>
#include <set>
#include <vector>
#include "search_allocator.h"
SearchAllocator::SearchAllocator(const std::vector<LiveRange> &live_ranges, uint32_t size_limit) {
lrs = live_ranges;
uint32_t max_end_time = 0;
for (size_t i = 0; i < lrs.size(); ++i) {
auto &lr = lrs[i];
lr.id = static_cast<int>(i);
max_end_time = std::max(max_end_time, lr.end_time);
}
lrs_at_time.resize(max_end_time + 1);
size_at_time.resize(max_end_time + 1);
neighbours.resize(lrs.size());
// Calculate which live ranges are active at every timestamp
for (size_t t = 0; t <= max_end_time; ++t) {
lrs_at_time[t].clear();
}
for (auto &lr : lrs) {
for (auto t = lr.start_time; t <= lr.end_time; ++t) {
lrs_at_time[t].push_back(&lr);
}
}
min_required_size = 0;
for (size_t t = 0; t <= max_end_time; ++t) {
// Calculate minimum needed size at each timestamp
uint32_t size_at_t = 0;
for (auto &lr : lrs_at_time[t]) {
size_at_t += lr->size;
}
size_at_time[t] = size_at_t;
min_required_size = std::max(size_at_t, min_required_size);
// Calculate all neighbours
for (size_t i = 0; i < lrs_at_time[t].size(); ++i) {
auto lr1 = lrs_at_time[t][i];
auto &nb1 = neighbours[lr1->id];
for (size_t j = i + 1; j < lrs_at_time[t].size(); ++j) {
auto lr2 = lrs_at_time[t][j];
if (find(nb1.begin(), nb1.end(), lr2) == nb1.end()) {
nb1.push_back(lr2);
neighbours[lr2->id].push_back(lr1);
}
}
}
}
target_size = std::max(min_required_size, size_limit);
// Calculate the urgency of each live range
lr_urgency.resize(lrs.size());
for (size_t i = 0; i < lrs.size(); ++i) {
auto &lr = lrs[i];
uint32_t urgency = 0;
for (size_t t = lr.start_time; t <= lr.end_time; ++t) {
urgency = std::max(size_at_time[t], urgency);
}
lr_urgency[i] = urgency;
}
best_size = UINT32_MAX;
}
uint32_t SearchAllocator::allocate(std::vector<uint32_t> &output) {
output.clear();
nr_iterations = 0;
std::vector<size_t> indices;
// Initial solution, using a heuristic allocator
for (size_t i = 0; i < lrs.size(); ++i) {
indices.push_back(i);
}
sort_indices_on_prio(indices);
// Allocate the initial solution
best_size = UINT32_MAX;
best_size = allocate_indices(indices);
if (best_size <= target_size) {
// The heuristic allocation returned an optimal solution.
// No need to search.
} else {
// Try to improve the heuristic allocation
search(indices, best_size, MAX_ITERATIONS);
}
output.clear();
for (auto &lr : lrs) {
output.push_back(lr.address);
}
return best_size;
}
void SearchAllocator::allocate_lr(LiveRange &lr) const {
uint32_t address = 0;
int predecessor = NO_PREDECESSOR;
bool fits = false;
while (!fits) {
fits = true;
// Find neighbours that overlap with address
for (auto lr2_p : neighbours[lr.id]) {
if (lr2_p->address == NOT_ALLOCATED || lr2_p->end_address <= address) {
continue;
}
if (lr2_p->overlaps(address, lr.size)) {
// Overlap found; increase address
fits = false;
address = lr2_p->end_address;
predecessor = lr2_p->id;
}
}
}
lr.address = address;
lr.end_address = address + lr.size;
lr.predecessor = predecessor;
}
uint32_t SearchAllocator::allocate_indices(const std::vector<size_t> &indices) {
++nr_iterations;
std::vector<size_t> count(indices.size());
for (auto &lr : lrs) {
lr.address = NOT_ALLOCATED;
}
uint32_t size = 0;
for (size_t turn = 0; size <= best_size && turn < indices.size(); ++turn) {
auto &lr = lrs[indices[turn]];
allocate_lr(lr);
lr.turn = turn;
size = std::max(size, lr.end_address);
}
return size;
}
void SearchAllocator::sort_indices_on_prio(std::vector<size_t> &indices) const {
std::sort(indices.begin(), indices.end(),
[this] (size_t const& a, size_t const& b) {
if (lr_urgency[a] != lr_urgency[b]) {
return lr_urgency[a] > lr_urgency[b];
}
auto &lr1 = lrs[a];
auto &lr2 = lrs[b];
auto duration1 = lr1.end_time - lr1.start_time;
auto duration2 = lr2.end_time - lr2.start_time;
if (duration1 != duration2) {
return duration1 > duration2;
}
if (lr1.start_time != lr2.start_time) {
return lr1.start_time < lr2.start_time;
}
if (lr1.size != lr2.size) {
return lr1.size > lr2.size;
}
return lr1.id < lr2.id;
});
}
void SearchAllocator::add_predecessor_turns(std::set<size_t> &turns, const LiveRange &lr) const {
turns.insert(lr.turn);
int id = lr.id;
while (lrs[id].predecessor != NO_PREDECESSOR) {
id = lrs[id].predecessor;
turns.insert(lrs[id].turn);
}
}
void SearchAllocator::attempt_bottleneck_fix(std::vector<size_t> &indices) {
// Find the bottleneck
LiveRange *max_lr = &lrs[0];
for (auto &lr : lrs) {
if (lr.end_address > max_lr->end_address) {
max_lr = &lr;
}
}
// Find all live ranges that affected the placement of the bottleneck live range.
// This consists of two types of live ranges:
// - direct neighbours of the bottleneck live range
// - direct and indirect predecessors of these neighbours + bottleneck
// The turns at which these live ranges were allocated are put in the turns vector.
std::set<size_t> turns;
add_predecessor_turns(turns, *max_lr);
for (auto lr_p : neighbours[max_lr->id]) {
add_predecessor_turns(turns, *lr_p);
}
// Non-direct neighbours that interfere with the allocation of the bottleneck are the
// immediate cause for gaps in the allocation, and are selected with higher probability.
std::vector<size_t> turn_list;
std::vector<size_t> non_nb_turn_list;
for (auto turn : turns) {
turn_list.push_back(turn);
auto &lr = lrs[indices[turn]];
if (!max_lr->is_neighbour(lr)) {
non_nb_turn_list.push_back(turn);
}
}
size_t ix1;
if (rng() % 100 < 30 && !non_nb_turn_list.empty()) {
// Pick a live range from the "non-neighbour list"
ix1 = non_nb_turn_list[rng() % non_nb_turn_list.size()];
} else {
// Pick any affecting live range.
ix1 = turn_list[rng() % turn_list.size()];
}
// Note: turn_list has always at least 2 elements for bottlenecks
size_t ix2 = turn_list[rng() % (turn_list.size() - 1)];
if (ix1 == ix2) {
ix2 = turn_list[turn_list.size() - 1];
}
// Swap indices
std::swap(indices[ix1], indices[ix2]);
}
void SearchAllocator::search(std::vector<size_t> &indices, uint32_t initial_size, int iterations) {
std::vector<size_t> best_indices = indices;
std::vector<LiveRange> best_lrs = lrs;
for (int i = 0; i < iterations; ++i) {
// Reorder the indices
attempt_bottleneck_fix(indices);
// Allocate the reordered indices and check if it gave an improvement
auto new_size = allocate_indices(indices);
if (new_size <= best_size) {
// The new allocation produced a new best result; remember it
best_size = new_size;
best_indices = indices;
best_lrs = lrs;
if (best_size <= target_size) {
// Target reached; stop
return;
}
} else {
// The new allocation produced worse result; undo the change
indices = best_indices;
lrs = best_lrs;
}
}
lrs = best_lrs;
}
uint32_t allocate(const std::vector<uint32_t> &input, int available_size, std::vector<uint32_t> &output) {
// Convert input to vector of live ranges
std::vector<LiveRange> lrs;
for (size_t i = 0; i < input.size(); i += 3) {
LiveRange lr;
lr.start_time = input[i];
lr.end_time = input[i+1];
lr.size = input[i+2];
lrs.push_back(lr);
}
SearchAllocator allocator(lrs, available_size);
return allocator.allocate(output);
}