llvm-project/clang/lib/CodeGen/CGLoopInfo.cpp
Mark Heffernan 397a98d86d Add new llvm.loop.unroll.enable metadata for use with "#pragma unroll".
This change adds the new unroll metadata "llvm.loop.unroll.enable" which directs
the optimizer to unroll a loop fully if the trip count is known at compile time, and
unroll partially if the trip count is not known at compile time. This differs from
"llvm.loop.unroll.full" which explicitly does not unroll a loop if the trip count is not
known at compile time

With this change "#pragma unroll" generates "llvm.loop.unroll.enable" rather than
"llvm.loop.unroll.full" metadata. This changes the semantics of "#pragma unroll" slightly
to mean "unroll aggressively (fully or partially)" rather than "unroll fully or not at all".

The motivating example for this change was some internal code with a loop marked
with "#pragma unroll" which only sometimes had a compile-time trip count depending
on template magic. When the trip count was a compile-time constant, everything works
as expected and the loop is fully unrolled. However, when the trip count was not a
compile-time constant the "#pragma unroll" explicitly disabled unrolling of the loop(!).
Removing "#pragma unroll" caused the loop to be unrolled partially which was desirable
from a performance perspective.

llvm-svn: 244467
2015-08-10 17:29:39 +00:00

249 lines
7.9 KiB
C++

//===---- CGLoopInfo.cpp - LLVM CodeGen for loop metadata -*- C++ -*-------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "CGLoopInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Attr.h"
#include "clang/Sema/LoopHint.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
using namespace clang::CodeGen;
using namespace llvm;
static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) {
if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 &&
Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 &&
Attrs.VectorizeEnable == LoopAttributes::Unspecified &&
Attrs.UnrollEnable == LoopAttributes::Unspecified)
return nullptr;
SmallVector<Metadata *, 4> Args;
// Reserve operand 0 for loop id self reference.
auto TempNode = MDNode::getTemporary(Ctx, None);
Args.push_back(TempNode.get());
// Setting vectorize.width
if (Attrs.VectorizeWidth > 0) {
Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.vectorize.width"),
ConstantAsMetadata::get(ConstantInt::get(
Type::getInt32Ty(Ctx), Attrs.VectorizeWidth))};
Args.push_back(MDNode::get(Ctx, Vals));
}
// Setting interleave.count
if (Attrs.InterleaveCount > 0) {
Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.interleave.count"),
ConstantAsMetadata::get(ConstantInt::get(
Type::getInt32Ty(Ctx), Attrs.InterleaveCount))};
Args.push_back(MDNode::get(Ctx, Vals));
}
// Setting interleave.count
if (Attrs.UnrollCount > 0) {
Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll.count"),
ConstantAsMetadata::get(ConstantInt::get(
Type::getInt32Ty(Ctx), Attrs.UnrollCount))};
Args.push_back(MDNode::get(Ctx, Vals));
}
// Setting vectorize.enable
if (Attrs.VectorizeEnable != LoopAttributes::Unspecified) {
Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.vectorize.enable"),
ConstantAsMetadata::get(ConstantInt::get(
Type::getInt1Ty(Ctx), (Attrs.VectorizeEnable ==
LoopAttributes::Enable)))};
Args.push_back(MDNode::get(Ctx, Vals));
}
// Setting unroll.full or unroll.disable
if (Attrs.UnrollEnable != LoopAttributes::Unspecified) {
std::string Name;
if (Attrs.UnrollEnable == LoopAttributes::Enable)
Name = "llvm.loop.unroll.enable";
else if (Attrs.UnrollEnable == LoopAttributes::Full)
Name = "llvm.loop.unroll.full";
else
Name = "llvm.loop.unroll.disable";
Metadata *Vals[] = {MDString::get(Ctx, Name)};
Args.push_back(MDNode::get(Ctx, Vals));
}
// Set the first operand to itself.
MDNode *LoopID = MDNode::get(Ctx, Args);
LoopID->replaceOperandWith(0, LoopID);
return LoopID;
}
LoopAttributes::LoopAttributes(bool IsParallel)
: IsParallel(IsParallel), VectorizeEnable(LoopAttributes::Unspecified),
UnrollEnable(LoopAttributes::Unspecified), VectorizeWidth(0),
InterleaveCount(0), UnrollCount(0) {}
void LoopAttributes::clear() {
IsParallel = false;
VectorizeWidth = 0;
InterleaveCount = 0;
UnrollCount = 0;
VectorizeEnable = LoopAttributes::Unspecified;
UnrollEnable = LoopAttributes::Unspecified;
}
LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs)
: LoopID(nullptr), Header(Header), Attrs(Attrs) {
LoopID = createMetadata(Header->getContext(), Attrs);
}
void LoopInfoStack::push(BasicBlock *Header) {
Active.push_back(LoopInfo(Header, StagedAttrs));
// Clear the attributes so nested loops do not inherit them.
StagedAttrs.clear();
}
void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
ArrayRef<const clang::Attr *> Attrs) {
// Identify loop hint attributes from Attrs.
for (const auto *Attr : Attrs) {
const LoopHintAttr *LH = dyn_cast<LoopHintAttr>(Attr);
// Skip non loop hint attributes
if (!LH)
continue;
auto *ValueExpr = LH->getValue();
unsigned ValueInt = 1;
if (ValueExpr) {
llvm::APSInt ValueAPS = ValueExpr->EvaluateKnownConstInt(Ctx);
ValueInt = ValueAPS.getSExtValue();
}
LoopHintAttr::OptionType Option = LH->getOption();
LoopHintAttr::LoopHintState State = LH->getState();
switch (State) {
case LoopHintAttr::Disable:
switch (Option) {
case LoopHintAttr::Vectorize:
// Disable vectorization by specifying a width of 1.
setVectorizeWidth(1);
break;
case LoopHintAttr::Interleave:
// Disable interleaving by speciyfing a count of 1.
setInterleaveCount(1);
break;
case LoopHintAttr::Unroll:
setUnrollState(LoopAttributes::Disable);
break;
case LoopHintAttr::UnrollCount:
case LoopHintAttr::VectorizeWidth:
case LoopHintAttr::InterleaveCount:
llvm_unreachable("Options cannot be disabled.");
break;
}
break;
case LoopHintAttr::Enable:
switch (Option) {
case LoopHintAttr::Vectorize:
case LoopHintAttr::Interleave:
setVectorizeEnable(true);
break;
case LoopHintAttr::Unroll:
setUnrollState(LoopAttributes::Enable);
break;
case LoopHintAttr::UnrollCount:
case LoopHintAttr::VectorizeWidth:
case LoopHintAttr::InterleaveCount:
llvm_unreachable("Options cannot enabled.");
break;
}
break;
case LoopHintAttr::AssumeSafety:
switch (Option) {
case LoopHintAttr::Vectorize:
case LoopHintAttr::Interleave:
// Apply "llvm.mem.parallel_loop_access" metadata to load/stores.
setParallel(true);
setVectorizeEnable(true);
break;
case LoopHintAttr::Unroll:
case LoopHintAttr::UnrollCount:
case LoopHintAttr::VectorizeWidth:
case LoopHintAttr::InterleaveCount:
llvm_unreachable("Options cannot be used to assume mem safety.");
break;
}
break;
case LoopHintAttr::Full:
switch (Option) {
case LoopHintAttr::Unroll:
setUnrollState(LoopAttributes::Full);
break;
case LoopHintAttr::Vectorize:
case LoopHintAttr::Interleave:
case LoopHintAttr::UnrollCount:
case LoopHintAttr::VectorizeWidth:
case LoopHintAttr::InterleaveCount:
llvm_unreachable("Options cannot be used with 'full' hint.");
break;
}
break;
case LoopHintAttr::Numeric:
switch (Option) {
case LoopHintAttr::VectorizeWidth:
setVectorizeWidth(ValueInt);
break;
case LoopHintAttr::InterleaveCount:
setInterleaveCount(ValueInt);
break;
case LoopHintAttr::UnrollCount:
setUnrollCount(ValueInt);
break;
case LoopHintAttr::Unroll:
case LoopHintAttr::Vectorize:
case LoopHintAttr::Interleave:
llvm_unreachable("Options cannot be assigned a value.");
break;
}
break;
}
}
/// Stage the attributes.
push(Header);
}
void LoopInfoStack::pop() {
assert(!Active.empty() && "No active loops to pop");
Active.pop_back();
}
void LoopInfoStack::InsertHelper(Instruction *I) const {
if (!hasInfo())
return;
const LoopInfo &L = getInfo();
if (!L.getLoopID())
return;
if (TerminatorInst *TI = dyn_cast<TerminatorInst>(I)) {
for (unsigned i = 0, ie = TI->getNumSuccessors(); i < ie; ++i)
if (TI->getSuccessor(i) == L.getHeader()) {
TI->setMetadata("llvm.loop", L.getLoopID());
break;
}
return;
}
if (L.getAttributes().IsParallel && I->mayReadOrWriteMemory())
I->setMetadata("llvm.mem.parallel_loop_access", L.getLoopID());
}