2016-07-12 21:13:44 +00:00
|
|
|
//===-- IndirectCallPromotionAnalysis.cpp - Find promotion candidates ===//
|
|
|
|
//
|
2019-01-19 08:50:56 +00:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2016-07-12 21:13:44 +00:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// Helper methods for identifying profitable indirect call promotion
|
|
|
|
// candidates for an instruction when the indirect-call value profile metadata
|
|
|
|
// is available.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
|
2022-02-28 14:08:36 +01:00
|
|
|
#include "llvm/IR/Instruction.h"
|
2016-07-12 21:13:44 +00:00
|
|
|
#include "llvm/ProfileData/InstrProf.h"
|
2019-11-14 15:15:48 -08:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
2016-07-12 21:13:44 +00:00
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
#define DEBUG_TYPE "pgo-icall-prom-analysis"
|
|
|
|
|
2017-07-28 01:02:54 +00:00
|
|
|
// The percent threshold for the direct-call target (this call site vs the
|
|
|
|
// remaining call count) for it to be considered as the promotion target.
|
|
|
|
static cl::opt<unsigned> ICPRemainingPercentThreshold(
|
2022-06-03 21:59:05 -07:00
|
|
|
"icp-remaining-percent-threshold", cl::init(30), cl::Hidden,
|
2017-07-28 01:02:54 +00:00
|
|
|
cl::desc("The percentage threshold against remaining unpromoted indirect "
|
|
|
|
"call count for the promotion"));
|
|
|
|
|
2016-07-12 21:13:44 +00:00
|
|
|
// The percent threshold for the direct-call target (this call site vs the
|
|
|
|
// total call count) for it to be considered as the promotion target.
|
|
|
|
static cl::opt<unsigned>
|
2017-07-28 01:02:54 +00:00
|
|
|
ICPTotalPercentThreshold("icp-total-percent-threshold", cl::init(5),
|
2022-06-04 00:10:42 -07:00
|
|
|
cl::Hidden,
|
2017-07-28 01:02:54 +00:00
|
|
|
cl::desc("The percentage threshold against total "
|
|
|
|
"count for the promotion"));
|
2016-07-12 21:13:44 +00:00
|
|
|
|
|
|
|
// Set the maximum number of targets to promote for a single indirect-call
|
|
|
|
// callsite.
|
[SampleFDO] Another fix to prevent repeated indirect call promotion in
sample loader pass.
In https://reviews.llvm.org/rG5fb65c02ca5e91e7e1a00e0efdb8edc899f3e4b9,
to prevent repeated indirect call promotion for the same indirect call
and the same target, we used zero-count value profile to indicate an
indirect call has been promoted for a certain target. We removed
PromotedInsns cache in the same patch. However, there was a problem in
that patch described below, and that problem led me to add PromotedInsns
back as a mitigation in
https://reviews.llvm.org/rG4ffad1fb489f691825d6c7d78e1626de142f26cf.
When we get value profile from metadata by calling getValueProfDataFromInst,
we need to specify the maximum possible number of values we expect to read.
We uses MaxNumPromotions in the last patch so the maximum number of value
information extracted from metadata is MaxNumPromotions. If we have many
values including zero-count values when we write the metadata, some of them
will be dropped when we read them because we only read MaxNumPromotions
values. It will allow repeated indirect call promotion again. We need to
make sure if there are values indicating promoted targets, those values need
to be saved in metadata with higher priority than other values.
The patch fixed that problem. We change to use -1 to represent the count
of a promoted target instead of 0 so it is easier to sort the values.
When we prepare to update the metadata in updateIDTMetaData, we will sort
the values in the descending count order and extract only MaxNumPromotions
values to write into metadata. Since -1 is the max uint64_t number, if we
have equal to or less than MaxNumPromotions of -1 count values, they will
all be kept in metadata. If we have more than MaxNumPromotions of -1 count
values, we will only save MaxNumPromotions such values maximally. In such
case, we have logic in place in doesHistoryAllowICP to guarantee no more
promotion in sample loader pass will happen for the indirect call, because
it has been promoted enough.
With this change, now we can remove PromotedInsns without problem.
Differential Revision: https://reviews.llvm.org/D97350
2021-02-19 22:43:21 -08:00
|
|
|
static cl::opt<unsigned>
|
2022-06-03 21:59:05 -07:00
|
|
|
MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden,
|
2016-07-12 21:13:44 +00:00
|
|
|
cl::desc("Max number of promotions for a single indirect "
|
|
|
|
"call callsite"));
|
|
|
|
|
2024-04-01 15:14:49 -07:00
|
|
|
cl::opt<unsigned> MaxNumVTableAnnotations(
|
|
|
|
"icp-max-num-vtables", cl::init(6), cl::Hidden,
|
|
|
|
cl::desc("Max number of vtables annotated for a vtable load instruction."));
|
|
|
|
|
2016-07-12 21:13:44 +00:00
|
|
|
bool ICallPromotionAnalysis::isPromotionProfitable(uint64_t Count,
|
2017-07-28 01:02:54 +00:00
|
|
|
uint64_t TotalCount,
|
|
|
|
uint64_t RemainingCount) {
|
2017-08-08 20:57:33 +00:00
|
|
|
return Count * 100 >= ICPRemainingPercentThreshold * RemainingCount &&
|
2017-07-28 01:02:54 +00:00
|
|
|
Count * 100 >= ICPTotalPercentThreshold * TotalCount;
|
2016-07-12 21:13:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Indirect-call promotion heuristic. The direct targets are sorted based on
|
|
|
|
// the count. Stop at the first target that is not promoted. Returns the
|
|
|
|
// number of candidates deemed profitable.
|
|
|
|
uint32_t ICallPromotionAnalysis::getProfitablePromotionCandidates(
|
2024-07-01 14:54:59 -07:00
|
|
|
const Instruction *Inst, uint64_t TotalCount) {
|
2018-05-14 12:53:11 +00:00
|
|
|
LLVM_DEBUG(dbgs() << " \nWork on callsite " << *Inst
|
2024-07-01 14:54:59 -07:00
|
|
|
<< " Num_targets: " << ValueDataArray.size() << "\n");
|
2016-07-12 21:13:44 +00:00
|
|
|
|
|
|
|
uint32_t I = 0;
|
2017-07-28 01:02:54 +00:00
|
|
|
uint64_t RemainingCount = TotalCount;
|
2024-07-01 14:54:59 -07:00
|
|
|
for (; I < MaxNumPromotions && I < ValueDataArray.size(); I++) {
|
|
|
|
uint64_t Count = ValueDataArray[I].Count;
|
2017-07-28 01:02:54 +00:00
|
|
|
assert(Count <= RemainingCount);
|
2018-05-14 12:53:11 +00:00
|
|
|
LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
|
2024-07-01 14:54:59 -07:00
|
|
|
<< " Target_func: " << ValueDataArray[I].Value << "\n");
|
2016-07-12 21:13:44 +00:00
|
|
|
|
2017-07-28 01:02:54 +00:00
|
|
|
if (!isPromotionProfitable(Count, TotalCount, RemainingCount)) {
|
2018-05-14 12:53:11 +00:00
|
|
|
LLVM_DEBUG(dbgs() << " Not promote: Cold target.\n");
|
2016-07-12 21:13:44 +00:00
|
|
|
return I;
|
|
|
|
}
|
2017-07-28 01:02:54 +00:00
|
|
|
RemainingCount -= Count;
|
2016-07-12 21:13:44 +00:00
|
|
|
}
|
|
|
|
return I;
|
|
|
|
}
|
|
|
|
|
[TypeProf][InstrFDO]Implement more efficient comparison sequence for indirect-call-promotion with vtable profiles. (#81442)
Clang's `-fwhole-program-vtables` is required for this optimization to
take place. If `-fwhole-program-vtables` is not enabled, this change is
no-op.
* Function-comparison (before):
```
%vtable = load ptr, ptr %obj
%vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
%func = load ptr, ptr %vfn
%cond = icmp eq ptr %func, @callee
br i1 %cond, label bb1, label bb2:
bb1:
call @callee
bb2:
call %func
```
* VTable-comparison (after):
```
%vtable = load ptr, ptr %obj
%cond = icmp eq ptr %vtable, @vtable-address-point
br i1 %cond, label bb1, label bb2:
bb1:
call @callee
bb2:
%vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
%func = load ptr, ptr %vfn
call %func
```
Key changes:
1. Find out virtual calls and the vtables they come from.
- The ICP relies on type intrinsic `llvm.type.test` to find out virtual
calls and the
compatible vtables, and relies on type metadata to find the address
point for comparison.
2. ICP pass does cost-benefit analysis and compares vtable only when the
number of vtables for a function candidate is within (option specified)
threshold.
3. Sink the function addressing and vtable load instruction to indirect
fallback.
- The sink helper functions are simplified versions of
`InstCombinerImpl::tryToSinkInstruction`. Currently debug intrinsics are
not handled. Ideally `InstCombinerImpl::tryToSinkInstructionDbgValues`
and `InstCombinerImpl::tryToSinkInstructionDbgVariableRecords` could be
moved into Transforms/Utils/Local.cpp (or another util cpp file) to
handle debug intrinsics when moving instructions across basic blocks.
4. Keep value profiles updated
1) Update vtable value profiles after inline
2) For either function-based comparison or vtable-based comparison,
update both vtable and indirect call value profiles.
2024-06-29 23:21:33 -07:00
|
|
|
MutableArrayRef<InstrProfValueData>
|
2016-07-12 21:13:44 +00:00
|
|
|
ICallPromotionAnalysis::getPromotionCandidatesForInstruction(
|
2024-06-17 18:51:45 -07:00
|
|
|
const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates) {
|
2024-07-01 14:54:59 -07:00
|
|
|
ValueDataArray = getValueProfDataFromInst(*I, IPVK_IndirectCallTarget,
|
|
|
|
MaxNumPromotions, TotalCount);
|
|
|
|
if (ValueDataArray.empty()) {
|
2016-07-12 21:13:44 +00:00
|
|
|
NumCandidates = 0;
|
[TypeProf][InstrFDO]Implement more efficient comparison sequence for indirect-call-promotion with vtable profiles. (#81442)
Clang's `-fwhole-program-vtables` is required for this optimization to
take place. If `-fwhole-program-vtables` is not enabled, this change is
no-op.
* Function-comparison (before):
```
%vtable = load ptr, ptr %obj
%vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
%func = load ptr, ptr %vfn
%cond = icmp eq ptr %func, @callee
br i1 %cond, label bb1, label bb2:
bb1:
call @callee
bb2:
call %func
```
* VTable-comparison (after):
```
%vtable = load ptr, ptr %obj
%cond = icmp eq ptr %vtable, @vtable-address-point
br i1 %cond, label bb1, label bb2:
bb1:
call @callee
bb2:
%vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
%func = load ptr, ptr %vfn
call %func
```
Key changes:
1. Find out virtual calls and the vtables they come from.
- The ICP relies on type intrinsic `llvm.type.test` to find out virtual
calls and the
compatible vtables, and relies on type metadata to find the address
point for comparison.
2. ICP pass does cost-benefit analysis and compares vtable only when the
number of vtables for a function candidate is within (option specified)
threshold.
3. Sink the function addressing and vtable load instruction to indirect
fallback.
- The sink helper functions are simplified versions of
`InstCombinerImpl::tryToSinkInstruction`. Currently debug intrinsics are
not handled. Ideally `InstCombinerImpl::tryToSinkInstructionDbgValues`
and `InstCombinerImpl::tryToSinkInstructionDbgVariableRecords` could be
moved into Transforms/Utils/Local.cpp (or another util cpp file) to
handle debug intrinsics when moving instructions across basic blocks.
4. Keep value profiles updated
1) Update vtable value profiles after inline
2) For either function-based comparison or vtable-based comparison,
update both vtable and indirect call value profiles.
2024-06-29 23:21:33 -07:00
|
|
|
return MutableArrayRef<InstrProfValueData>();
|
2016-07-12 21:13:44 +00:00
|
|
|
}
|
2024-07-01 14:54:59 -07:00
|
|
|
NumCandidates = getProfitablePromotionCandidates(I, TotalCount);
|
|
|
|
return ValueDataArray;
|
2016-07-12 21:13:44 +00:00
|
|
|
}
|