mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-26 20:26:06 +00:00
333 lines
11 KiB
C++
333 lines
11 KiB
C++
//===- BalancedPartitioning.cpp -------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements BalancedPartitioning, a recursive balanced graph
|
|
// partitioning algorithm.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Support/BalancedPartitioning.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/Format.h"
|
|
#include "llvm/Support/FormatVariadic.h"
|
|
#include "llvm/Support/ThreadPool.h"
|
|
|
|
using namespace llvm;
|
|
#define DEBUG_TYPE "balanced-partitioning"
|
|
|
|
void BPFunctionNode::dump(raw_ostream &OS) const {
|
|
OS << formatv("{{ID={0} Utilities={{{1:$[,]}} Bucket={2}}", Id,
|
|
make_range(UtilityNodes.begin(), UtilityNodes.end()), Bucket);
|
|
}
|
|
|
|
template <typename Func>
|
|
void BalancedPartitioning::BPThreadPool::async(Func &&F) {
|
|
#if LLVM_ENABLE_THREADS
|
|
// This new thread could spawn more threads, so mark it as active
|
|
++NumActiveThreads;
|
|
TheThreadPool.async([=]() {
|
|
// Run the task
|
|
F();
|
|
|
|
// This thread will no longer spawn new threads, so mark it as inactive
|
|
if (--NumActiveThreads == 0) {
|
|
// There are no more active threads, so mark as finished and notify
|
|
{
|
|
std::unique_lock<std::mutex> lock(mtx);
|
|
assert(!IsFinishedSpawning);
|
|
IsFinishedSpawning = true;
|
|
}
|
|
cv.notify_one();
|
|
}
|
|
});
|
|
#else
|
|
llvm_unreachable("threads are disabled");
|
|
#endif
|
|
}
|
|
|
|
void BalancedPartitioning::BPThreadPool::wait() {
|
|
#if LLVM_ENABLE_THREADS
|
|
// TODO: We could remove the mutex and condition variable and use
|
|
// std::atomic::wait() instead, but that isn't available until C++20
|
|
{
|
|
std::unique_lock<std::mutex> lock(mtx);
|
|
cv.wait(lock, [&]() { return IsFinishedSpawning; });
|
|
assert(IsFinishedSpawning && NumActiveThreads == 0);
|
|
}
|
|
// Now we can call ThreadPool::wait() since all tasks have been submitted
|
|
TheThreadPool.wait();
|
|
#else
|
|
llvm_unreachable("threads are disabled");
|
|
#endif
|
|
}
|
|
|
|
BalancedPartitioning::BalancedPartitioning(
|
|
const BalancedPartitioningConfig &Config)
|
|
: Config(Config) {
|
|
// Pre-computing log2 values
|
|
Log2Cache[0] = 0.0;
|
|
for (unsigned I = 1; I < LOG_CACHE_SIZE; I++)
|
|
Log2Cache[I] = std::log2(I);
|
|
}
|
|
|
|
void BalancedPartitioning::run(std::vector<BPFunctionNode> &Nodes) const {
|
|
LLVM_DEBUG(
|
|
dbgs() << format(
|
|
"Partitioning %d nodes using depth %d and %d iterations per split\n",
|
|
Nodes.size(), Config.SplitDepth, Config.IterationsPerSplit));
|
|
std::optional<BPThreadPool> TP;
|
|
#if LLVM_ENABLE_THREADS
|
|
DefaultThreadPool TheThreadPool;
|
|
if (Config.TaskSplitDepth > 1)
|
|
TP.emplace(TheThreadPool);
|
|
#endif
|
|
|
|
// Record the input order
|
|
for (unsigned I = 0; I < Nodes.size(); I++)
|
|
Nodes[I].InputOrderIndex = I;
|
|
|
|
auto NodesRange = llvm::make_range(Nodes.begin(), Nodes.end());
|
|
auto BisectTask = [=, &TP]() {
|
|
bisect(NodesRange, /*RecDepth=*/0, /*RootBucket=*/1, /*Offset=*/0, TP);
|
|
};
|
|
if (TP) {
|
|
TP->async(std::move(BisectTask));
|
|
TP->wait();
|
|
} else {
|
|
BisectTask();
|
|
}
|
|
|
|
llvm::stable_sort(NodesRange, [](const auto &L, const auto &R) {
|
|
return L.Bucket < R.Bucket;
|
|
});
|
|
|
|
LLVM_DEBUG(dbgs() << "Balanced partitioning completed\n");
|
|
}
|
|
|
|
void BalancedPartitioning::bisect(const FunctionNodeRange Nodes,
|
|
unsigned RecDepth, unsigned RootBucket,
|
|
unsigned Offset,
|
|
std::optional<BPThreadPool> &TP) const {
|
|
unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
|
|
if (NumNodes <= 1 || RecDepth >= Config.SplitDepth) {
|
|
// We've reach the lowest level of the recursion tree. Fall back to the
|
|
// original order and assign to buckets.
|
|
llvm::sort(Nodes, [](const auto &L, const auto &R) {
|
|
return L.InputOrderIndex < R.InputOrderIndex;
|
|
});
|
|
for (auto &N : Nodes)
|
|
N.Bucket = Offset++;
|
|
return;
|
|
}
|
|
|
|
LLVM_DEBUG(dbgs() << format("Bisect with %d nodes and root bucket %d\n",
|
|
NumNodes, RootBucket));
|
|
|
|
std::mt19937 RNG(RootBucket);
|
|
|
|
unsigned LeftBucket = 2 * RootBucket;
|
|
unsigned RightBucket = 2 * RootBucket + 1;
|
|
|
|
// Split into two and assign to the left and right buckets
|
|
split(Nodes, LeftBucket);
|
|
|
|
runIterations(Nodes, LeftBucket, RightBucket, RNG);
|
|
|
|
// Split nodes wrt the resulting buckets
|
|
auto NodesMid =
|
|
llvm::partition(Nodes, [&](auto &N) { return N.Bucket == LeftBucket; });
|
|
unsigned MidOffset = Offset + std::distance(Nodes.begin(), NodesMid);
|
|
|
|
auto LeftNodes = llvm::make_range(Nodes.begin(), NodesMid);
|
|
auto RightNodes = llvm::make_range(NodesMid, Nodes.end());
|
|
|
|
auto LeftRecTask = [=, &TP]() {
|
|
bisect(LeftNodes, RecDepth + 1, LeftBucket, Offset, TP);
|
|
};
|
|
auto RightRecTask = [=, &TP]() {
|
|
bisect(RightNodes, RecDepth + 1, RightBucket, MidOffset, TP);
|
|
};
|
|
|
|
if (TP && RecDepth < Config.TaskSplitDepth && NumNodes >= 4) {
|
|
TP->async(std::move(LeftRecTask));
|
|
TP->async(std::move(RightRecTask));
|
|
} else {
|
|
LeftRecTask();
|
|
RightRecTask();
|
|
}
|
|
}
|
|
|
|
void BalancedPartitioning::runIterations(const FunctionNodeRange Nodes,
|
|
unsigned LeftBucket,
|
|
unsigned RightBucket,
|
|
std::mt19937 &RNG) const {
|
|
unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
|
|
DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeIndex;
|
|
for (auto &N : Nodes)
|
|
for (auto &UN : N.UtilityNodes)
|
|
++UtilityNodeIndex[UN];
|
|
// Remove utility nodes if they have just one edge or are connected to all
|
|
// functions
|
|
for (auto &N : Nodes)
|
|
llvm::erase_if(N.UtilityNodes, [&](auto &UN) {
|
|
return UtilityNodeIndex[UN] == 1 || UtilityNodeIndex[UN] == NumNodes;
|
|
});
|
|
|
|
// Renumber utility nodes so they can be used to index into Signatures
|
|
UtilityNodeIndex.clear();
|
|
for (auto &N : Nodes)
|
|
for (auto &UN : N.UtilityNodes)
|
|
UN = UtilityNodeIndex.insert({UN, UtilityNodeIndex.size()}).first->second;
|
|
|
|
// Initialize signatures
|
|
SignaturesT Signatures(/*Size=*/UtilityNodeIndex.size());
|
|
for (auto &N : Nodes) {
|
|
for (auto &UN : N.UtilityNodes) {
|
|
assert(UN < Signatures.size());
|
|
if (N.Bucket == LeftBucket) {
|
|
Signatures[UN].LeftCount++;
|
|
} else {
|
|
Signatures[UN].RightCount++;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (unsigned I = 0; I < Config.IterationsPerSplit; I++) {
|
|
unsigned NumMovedNodes =
|
|
runIteration(Nodes, LeftBucket, RightBucket, Signatures, RNG);
|
|
if (NumMovedNodes == 0)
|
|
break;
|
|
}
|
|
}
|
|
|
|
unsigned BalancedPartitioning::runIteration(const FunctionNodeRange Nodes,
|
|
unsigned LeftBucket,
|
|
unsigned RightBucket,
|
|
SignaturesT &Signatures,
|
|
std::mt19937 &RNG) const {
|
|
// Init signature cost caches
|
|
for (auto &Signature : Signatures) {
|
|
if (Signature.CachedGainIsValid)
|
|
continue;
|
|
unsigned L = Signature.LeftCount;
|
|
unsigned R = Signature.RightCount;
|
|
assert((L > 0 || R > 0) && "incorrect signature");
|
|
float Cost = logCost(L, R);
|
|
Signature.CachedGainLR = 0.f;
|
|
Signature.CachedGainRL = 0.f;
|
|
if (L > 0)
|
|
Signature.CachedGainLR = Cost - logCost(L - 1, R + 1);
|
|
if (R > 0)
|
|
Signature.CachedGainRL = Cost - logCost(L + 1, R - 1);
|
|
Signature.CachedGainIsValid = true;
|
|
}
|
|
|
|
// Compute move gains
|
|
typedef std::pair<float, BPFunctionNode *> GainPair;
|
|
std::vector<GainPair> Gains;
|
|
for (auto &N : Nodes) {
|
|
bool FromLeftToRight = (N.Bucket == LeftBucket);
|
|
float Gain = moveGain(N, FromLeftToRight, Signatures);
|
|
Gains.push_back(std::make_pair(Gain, &N));
|
|
}
|
|
|
|
// Collect left and right gains
|
|
auto LeftEnd = llvm::partition(
|
|
Gains, [&](const auto &GP) { return GP.second->Bucket == LeftBucket; });
|
|
auto LeftRange = llvm::make_range(Gains.begin(), LeftEnd);
|
|
auto RightRange = llvm::make_range(LeftEnd, Gains.end());
|
|
|
|
// Sort gains in descending order
|
|
auto LargerGain = [](const auto &L, const auto &R) {
|
|
return L.first > R.first;
|
|
};
|
|
llvm::stable_sort(LeftRange, LargerGain);
|
|
llvm::stable_sort(RightRange, LargerGain);
|
|
|
|
unsigned NumMovedDataVertices = 0;
|
|
for (auto [LeftPair, RightPair] : llvm::zip(LeftRange, RightRange)) {
|
|
auto &[LeftGain, LeftNode] = LeftPair;
|
|
auto &[RightGain, RightNode] = RightPair;
|
|
// Stop when the gain is no longer beneficial
|
|
if (LeftGain + RightGain <= 0.f)
|
|
break;
|
|
// Try to exchange the nodes between buckets
|
|
if (moveFunctionNode(*LeftNode, LeftBucket, RightBucket, Signatures, RNG))
|
|
++NumMovedDataVertices;
|
|
if (moveFunctionNode(*RightNode, LeftBucket, RightBucket, Signatures, RNG))
|
|
++NumMovedDataVertices;
|
|
}
|
|
return NumMovedDataVertices;
|
|
}
|
|
|
|
bool BalancedPartitioning::moveFunctionNode(BPFunctionNode &N,
|
|
unsigned LeftBucket,
|
|
unsigned RightBucket,
|
|
SignaturesT &Signatures,
|
|
std::mt19937 &RNG) const {
|
|
// Sometimes we skip the move. This helps to escape local optima
|
|
if (std::uniform_real_distribution<float>(0.f, 1.f)(RNG) <=
|
|
Config.SkipProbability)
|
|
return false;
|
|
|
|
bool FromLeftToRight = (N.Bucket == LeftBucket);
|
|
// Update the current bucket
|
|
N.Bucket = (FromLeftToRight ? RightBucket : LeftBucket);
|
|
|
|
// Update signatures and invalidate gain cache
|
|
if (FromLeftToRight) {
|
|
for (auto &UN : N.UtilityNodes) {
|
|
auto &Signature = Signatures[UN];
|
|
Signature.LeftCount--;
|
|
Signature.RightCount++;
|
|
Signature.CachedGainIsValid = false;
|
|
}
|
|
} else {
|
|
for (auto &UN : N.UtilityNodes) {
|
|
auto &Signature = Signatures[UN];
|
|
Signature.LeftCount++;
|
|
Signature.RightCount--;
|
|
Signature.CachedGainIsValid = false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void BalancedPartitioning::split(const FunctionNodeRange Nodes,
|
|
unsigned StartBucket) const {
|
|
unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
|
|
auto NodesMid = Nodes.begin() + (NumNodes + 1) / 2;
|
|
|
|
std::nth_element(Nodes.begin(), NodesMid, Nodes.end(), [](auto &L, auto &R) {
|
|
return L.InputOrderIndex < R.InputOrderIndex;
|
|
});
|
|
|
|
for (auto &N : llvm::make_range(Nodes.begin(), NodesMid))
|
|
N.Bucket = StartBucket;
|
|
for (auto &N : llvm::make_range(NodesMid, Nodes.end()))
|
|
N.Bucket = StartBucket + 1;
|
|
}
|
|
|
|
float BalancedPartitioning::moveGain(const BPFunctionNode &N,
|
|
bool FromLeftToRight,
|
|
const SignaturesT &Signatures) {
|
|
float Gain = 0.f;
|
|
for (auto &UN : N.UtilityNodes)
|
|
Gain += (FromLeftToRight ? Signatures[UN].CachedGainLR
|
|
: Signatures[UN].CachedGainRL);
|
|
return Gain;
|
|
}
|
|
|
|
float BalancedPartitioning::logCost(unsigned X, unsigned Y) const {
|
|
return -(X * log2Cached(X + 1) + Y * log2Cached(Y + 1));
|
|
}
|
|
|
|
float BalancedPartitioning::log2Cached(unsigned i) const {
|
|
return (i < LOG_CACHE_SIZE) ? Log2Cache[i] : std::log2(i);
|
|
}
|