llvm-project/clang/lib/CodeGen/CGCUDANV.cpp
John McCall 882987f30c Use the actual ABI-determined C calling convention for runtime
calls and declarations.

LLVM has a default CC determined by the target triple.  This is
not always the actual default CC for the ABI we've been asked to
target, and so we sometimes find ourselves annotating all user
functions with an explicit calling convention.  Since these
calling conventions usually agree for the simple set of argument
types passed to most runtime functions, using the LLVM-default CC
in principle has no effect.  However, the LLVM optimizer goes
into histrionics if it sees this kind of formal CC mismatch,
since it has no concept of CC compatibility.  Therefore, if this
module happens to define the "runtime" function, or got LTO'ed
with such a definition, we can miscompile;  so it's quite
important to get this right.

Defining runtime functions locally is quite common in embedded
applications.

llvm-svn: 176286
2013-02-28 19:01:20 +00:00

126 lines
4.3 KiB
C++

//===----- CGCUDANV.cpp - Interface to NVIDIA CUDA Runtime ----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This provides a class for CUDA code generation targeting the NVIDIA CUDA
// runtime library.
//
//===----------------------------------------------------------------------===//
#include "CGCUDARuntime.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "clang/AST/Decl.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/Support/CallSite.h"
#include <vector>
using namespace clang;
using namespace CodeGen;
namespace {
class CGNVCUDARuntime : public CGCUDARuntime {
private:
llvm::Type *IntTy, *SizeTy;
llvm::PointerType *CharPtrTy, *VoidPtrTy;
llvm::Constant *getSetupArgumentFn() const;
llvm::Constant *getLaunchFn() const;
public:
CGNVCUDARuntime(CodeGenModule &CGM);
void EmitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args);
};
}
CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) : CGCUDARuntime(CGM) {
CodeGen::CodeGenTypes &Types = CGM.getTypes();
ASTContext &Ctx = CGM.getContext();
IntTy = Types.ConvertType(Ctx.IntTy);
SizeTy = Types.ConvertType(Ctx.getSizeType());
CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
}
llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const {
// cudaError_t cudaSetupArgument(void *, size_t, size_t)
std::vector<llvm::Type*> Params;
Params.push_back(VoidPtrTy);
Params.push_back(SizeTy);
Params.push_back(SizeTy);
return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
Params, false),
"cudaSetupArgument");
}
llvm::Constant *CGNVCUDARuntime::getLaunchFn() const {
// cudaError_t cudaLaunch(char *)
std::vector<llvm::Type*> Params;
Params.push_back(CharPtrTy);
return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
Params, false),
"cudaLaunch");
}
void CGNVCUDARuntime::EmitDeviceStubBody(CodeGenFunction &CGF,
FunctionArgList &Args) {
// Build the argument value list and the argument stack struct type.
SmallVector<llvm::Value *, 16> ArgValues;
std::vector<llvm::Type *> ArgTypes;
for (FunctionArgList::const_iterator I = Args.begin(), E = Args.end();
I != E; ++I) {
llvm::Value *V = CGF.GetAddrOfLocalVar(*I);
ArgValues.push_back(V);
assert(isa<llvm::PointerType>(V->getType()) && "Arg type not PointerType");
ArgTypes.push_back(cast<llvm::PointerType>(V->getType())->getElementType());
}
llvm::StructType *ArgStackTy = llvm::StructType::get(
CGF.getLLVMContext(), ArgTypes);
llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
// Emit the calls to cudaSetupArgument
llvm::Constant *cudaSetupArgFn = getSetupArgumentFn();
for (unsigned I = 0, E = Args.size(); I != E; ++I) {
llvm::Value *Args[3];
llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next");
Args[0] = CGF.Builder.CreatePointerCast(ArgValues[I], VoidPtrTy);
Args[1] = CGF.Builder.CreateIntCast(
llvm::ConstantExpr::getSizeOf(ArgTypes[I]),
SizeTy, false);
Args[2] = CGF.Builder.CreateIntCast(
llvm::ConstantExpr::getOffsetOf(ArgStackTy, I),
SizeTy, false);
llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args);
llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0);
llvm::Value *CSZero = CGF.Builder.CreateICmpEQ(CS.getInstruction(), Zero);
CGF.Builder.CreateCondBr(CSZero, NextBlock, EndBlock);
CGF.EmitBlock(NextBlock);
}
// Emit the call to cudaLaunch
llvm::Constant *cudaLaunchFn = getLaunchFn();
llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy);
CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg);
CGF.EmitBranch(EndBlock);
CGF.EmitBlock(EndBlock);
}
CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
return new CGNVCUDARuntime(CGM);
}