mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-15 22:16:30 +00:00
[flang][cuda] Add c_devloc as intrinsic and inline it during lowering (#120648)
Add `c_devloc` as intrinsic and inline it during lowering. `c_devloc` is used in CUDA Fortran to get the address of device variables. For the moment, we borrow almost all semantic checks from `c_loc` except for the pointer or target restriction. The specifications of `c_devloc` are are pretty vague and we will relax/enforce the restrictions based on library and apps usage comparing them to the reference compiler.
This commit is contained in:
parent
cdbba15c6c
commit
878a57468b
@ -769,6 +769,11 @@ mlir::Value genMaxWithZero(fir::FirOpBuilder &builder, mlir::Location loc,
|
||||
mlir::Value genCPtrOrCFunptrAddr(fir::FirOpBuilder &builder, mlir::Location loc,
|
||||
mlir::Value cPtr, mlir::Type ty);
|
||||
|
||||
/// The type(C_DEVPTR) is defined as the derived type with only one
|
||||
/// component of C_PTR type. Get the C address from the C_PTR component.
|
||||
mlir::Value genCDevPtrAddr(fir::FirOpBuilder &builder, mlir::Location loc,
|
||||
mlir::Value cDevPtr, mlir::Type ty);
|
||||
|
||||
/// Get the C address value.
|
||||
mlir::Value genCPtrOrCFunptrValue(fir::FirOpBuilder &builder,
|
||||
mlir::Location loc, mlir::Value cPtr);
|
||||
|
@ -214,6 +214,7 @@ struct IntrinsicLibrary {
|
||||
llvm::ArrayRef<fir::ExtendedValue>);
|
||||
fir::ExtendedValue genCAssociatedCPtr(mlir::Type,
|
||||
llvm::ArrayRef<fir::ExtendedValue>);
|
||||
fir::ExtendedValue genCDevLoc(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
|
||||
mlir::Value genErfcScaled(mlir::Type resultType,
|
||||
llvm::ArrayRef<mlir::Value> args);
|
||||
void genCFPointer(llvm::ArrayRef<fir::ExtendedValue>);
|
||||
|
@ -2663,6 +2663,8 @@ private:
|
||||
ActualArguments &, FoldingContext &) const;
|
||||
std::optional<SpecificCall> HandleC_Loc(
|
||||
ActualArguments &, FoldingContext &) const;
|
||||
std::optional<SpecificCall> HandleC_Devloc(
|
||||
ActualArguments &, FoldingContext &) const;
|
||||
const std::string &ResolveAlias(const std::string &name) const {
|
||||
auto iter{aliases_.find(name)};
|
||||
return iter == aliases_.end() ? name : iter->second;
|
||||
@ -2690,7 +2692,8 @@ bool IntrinsicProcTable::Implementation::IsIntrinsicFunction(
|
||||
return true;
|
||||
}
|
||||
// special cases
|
||||
return name == "__builtin_c_loc" || name == "null";
|
||||
return name == "__builtin_c_loc" || name == "__builtin_c_devloc" ||
|
||||
name == "null";
|
||||
}
|
||||
bool IntrinsicProcTable::Implementation::IsIntrinsicSubroutine(
|
||||
const std::string &name0) const {
|
||||
@ -3080,6 +3083,73 @@ std::optional<SpecificCall> IntrinsicProcTable::Implementation::HandleC_Loc(
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// CUDA Fortran C_DEVLOC(x)
|
||||
std::optional<SpecificCall> IntrinsicProcTable::Implementation::HandleC_Devloc(
|
||||
ActualArguments &arguments, FoldingContext &context) const {
|
||||
static const char *const keywords[]{"cptr", nullptr};
|
||||
|
||||
if (CheckAndRearrangeArguments(arguments, context.messages(), keywords)) {
|
||||
CHECK(arguments.size() == 1);
|
||||
const auto *expr{arguments[0].value().UnwrapExpr()};
|
||||
if (auto typeAndShape{characteristics::TypeAndShape::Characterize(
|
||||
arguments[0], context)}) {
|
||||
if (expr && !IsContiguous(*expr, context).value_or(true)) {
|
||||
context.messages().Say(arguments[0]->sourceLocation(),
|
||||
"C_DEVLOC() argument must be contiguous"_err_en_US);
|
||||
}
|
||||
if (auto constExtents{AsConstantExtents(context, typeAndShape->shape())};
|
||||
constExtents && GetSize(*constExtents) == 0) {
|
||||
context.messages().Say(arguments[0]->sourceLocation(),
|
||||
"C_DEVLOC() argument may not be a zero-sized array"_err_en_US);
|
||||
}
|
||||
if (!(typeAndShape->type().category() != TypeCategory::Derived ||
|
||||
typeAndShape->type().IsAssumedType() ||
|
||||
(!typeAndShape->type().IsPolymorphic() &&
|
||||
CountNonConstantLenParameters(
|
||||
typeAndShape->type().GetDerivedTypeSpec()) == 0))) {
|
||||
context.messages().Say(arguments[0]->sourceLocation(),
|
||||
"C_DEVLOC() argument must have an intrinsic type, assumed type, or non-polymorphic derived type with no non-constant length parameter"_err_en_US);
|
||||
} else if (typeAndShape->type().knownLength().value_or(1) == 0) {
|
||||
context.messages().Say(arguments[0]->sourceLocation(),
|
||||
"C_DEVLOC() argument may not be zero-length character"_err_en_US);
|
||||
} else if (typeAndShape->type().category() != TypeCategory::Derived &&
|
||||
!IsInteroperableIntrinsicType(typeAndShape->type()).value_or(true)) {
|
||||
if (typeAndShape->type().category() == TypeCategory::Character &&
|
||||
typeAndShape->type().kind() == 1) {
|
||||
// Default character kind, but length is not known to be 1
|
||||
if (context.languageFeatures().ShouldWarn(
|
||||
common::UsageWarning::CharacterInteroperability)) {
|
||||
context.messages().Say(
|
||||
common::UsageWarning::CharacterInteroperability,
|
||||
arguments[0]->sourceLocation(),
|
||||
"C_DEVLOC() argument has non-interoperable character length"_warn_en_US);
|
||||
}
|
||||
} else if (context.languageFeatures().ShouldWarn(
|
||||
common::UsageWarning::Interoperability)) {
|
||||
context.messages().Say(common::UsageWarning::Interoperability,
|
||||
arguments[0]->sourceLocation(),
|
||||
"C_DEVLOC() argument has non-interoperable intrinsic type or kind"_warn_en_US);
|
||||
}
|
||||
}
|
||||
|
||||
characteristics::DummyDataObject ddo{std::move(*typeAndShape)};
|
||||
ddo.intent = common::Intent::In;
|
||||
return SpecificCall{
|
||||
SpecificIntrinsic{"__builtin_c_devloc"s,
|
||||
characteristics::Procedure{
|
||||
characteristics::FunctionResult{
|
||||
DynamicType{GetBuiltinDerivedType(
|
||||
builtinsScope_, "__builtin_c_devptr")}},
|
||||
characteristics::DummyArguments{
|
||||
characteristics::DummyArgument{"cptr"s, std::move(ddo)}},
|
||||
characteristics::Procedure::Attrs{
|
||||
characteristics::Procedure::Attr::Pure}}},
|
||||
std::move(arguments)};
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
static bool CheckForNonPositiveValues(FoldingContext &context,
|
||||
const ActualArgument &arg, const std::string &procName,
|
||||
const std::string &argName) {
|
||||
@ -3270,6 +3340,8 @@ std::optional<SpecificCall> IntrinsicProcTable::Implementation::Probe(
|
||||
} else { // function
|
||||
if (call.name == "__builtin_c_loc") {
|
||||
return HandleC_Loc(arguments, context);
|
||||
} else if (call.name == "__builtin_c_devloc") {
|
||||
return HandleC_Devloc(arguments, context);
|
||||
} else if (call.name == "null") {
|
||||
return HandleNull(arguments, context);
|
||||
}
|
||||
|
@ -1626,6 +1626,25 @@ mlir::Value fir::factory::genCPtrOrCFunptrAddr(fir::FirOpBuilder &builder,
|
||||
cPtr, addrFieldIndex);
|
||||
}
|
||||
|
||||
mlir::Value fir::factory::genCDevPtrAddr(fir::FirOpBuilder &builder,
|
||||
mlir::Location loc,
|
||||
mlir::Value cDevPtr, mlir::Type ty) {
|
||||
auto recTy = mlir::cast<fir::RecordType>(ty);
|
||||
assert(recTy.getTypeList().size() == 1);
|
||||
auto cptrFieldName = recTy.getTypeList()[0].first;
|
||||
mlir::Type cptrFieldTy = recTy.getTypeList()[0].second;
|
||||
auto fieldIndexType = fir::FieldType::get(ty.getContext());
|
||||
mlir::Value cptrFieldIndex = builder.create<fir::FieldIndexOp>(
|
||||
loc, fieldIndexType, cptrFieldName, recTy,
|
||||
/*typeParams=*/mlir::ValueRange{});
|
||||
auto cptrCoord = builder.create<fir::CoordinateOp>(
|
||||
loc, builder.getRefType(cptrFieldTy), cDevPtr, cptrFieldIndex);
|
||||
auto [addrFieldIndex, addrFieldTy] =
|
||||
genCPtrOrCFunptrFieldIndex(builder, loc, cptrFieldTy);
|
||||
return builder.create<fir::CoordinateOp>(loc, builder.getRefType(addrFieldTy),
|
||||
cptrCoord, addrFieldIndex);
|
||||
}
|
||||
|
||||
mlir::Value fir::factory::genCPtrOrCFunptrValue(fir::FirOpBuilder &builder,
|
||||
mlir::Location loc,
|
||||
mlir::Value cPtr) {
|
||||
|
@ -167,6 +167,7 @@ static constexpr IntrinsicHandler handlers[]{
|
||||
&I::genCAssociatedCPtr,
|
||||
{{{"c_ptr_1", asAddr}, {"c_ptr_2", asAddr, handleDynamicOptional}}},
|
||||
/*isElemental=*/false},
|
||||
{"c_devloc", &I::genCDevLoc, {{{"x", asBox}}}, /*isElemental=*/false},
|
||||
{"c_f_pointer",
|
||||
&I::genCFPointer,
|
||||
{{{"cptr", asValue},
|
||||
@ -2867,11 +2868,14 @@ static mlir::Value getAddrFromBox(fir::FirOpBuilder &builder,
|
||||
static fir::ExtendedValue
|
||||
genCLocOrCFunLoc(fir::FirOpBuilder &builder, mlir::Location loc,
|
||||
mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args,
|
||||
bool isFunc = false) {
|
||||
bool isFunc = false, bool isDevLoc = false) {
|
||||
assert(args.size() == 1);
|
||||
mlir::Value res = builder.create<fir::AllocaOp>(loc, resultType);
|
||||
mlir::Value resAddr =
|
||||
fir::factory::genCPtrOrCFunptrAddr(builder, loc, res, resultType);
|
||||
mlir::Value resAddr;
|
||||
if (isDevLoc)
|
||||
resAddr = fir::factory::genCDevPtrAddr(builder, loc, res, resultType);
|
||||
else
|
||||
resAddr = fir::factory::genCPtrOrCFunptrAddr(builder, loc, res, resultType);
|
||||
assert(fir::isa_box_type(fir::getBase(args[0]).getType()) &&
|
||||
"argument must have been lowered to box type");
|
||||
mlir::Value argAddr = getAddrFromBox(builder, loc, args[0], isFunc);
|
||||
@ -2928,6 +2932,14 @@ IntrinsicLibrary::genCAssociatedCPtr(mlir::Type resultType,
|
||||
return genCAssociated(builder, loc, resultType, args);
|
||||
}
|
||||
|
||||
// C_DEVLOC
|
||||
fir::ExtendedValue
|
||||
IntrinsicLibrary::genCDevLoc(mlir::Type resultType,
|
||||
llvm::ArrayRef<fir::ExtendedValue> args) {
|
||||
return genCLocOrCFunLoc(builder, loc, resultType, args, /*isFunc=*/false,
|
||||
/*isDevLoc=*/true);
|
||||
}
|
||||
|
||||
// C_F_POINTER
|
||||
void IntrinsicLibrary::genCFPointer(llvm::ArrayRef<fir::ExtendedValue> args) {
|
||||
assert(args.size() == 3);
|
||||
|
@ -22,6 +22,9 @@ module __fortran_builtins
|
||||
intrinsic :: __builtin_c_loc
|
||||
public :: __builtin_c_loc
|
||||
|
||||
intrinsic :: __builtin_c_devloc
|
||||
public :: __builtin_c_devloc
|
||||
|
||||
intrinsic :: __builtin_c_f_pointer
|
||||
public :: __builtin_c_f_pointer
|
||||
|
||||
@ -144,6 +147,7 @@ module __fortran_builtins
|
||||
|
||||
type :: __force_derived_type_instantiations
|
||||
type(__builtin_c_ptr) :: c_ptr
|
||||
type(__builtin_c_devptr) :: c_devptr
|
||||
type(__builtin_c_funptr) :: c_funptr
|
||||
type(__builtin_event_type) :: event_type
|
||||
type(__builtin_lock_type) :: lock_type
|
||||
|
@ -14,7 +14,7 @@
|
||||
module __fortran_type_info
|
||||
|
||||
use, intrinsic :: __fortran_builtins, &
|
||||
only: __builtin_c_ptr, __builtin_c_funptr
|
||||
only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr
|
||||
implicit none
|
||||
|
||||
! Set PRIVATE by default to explicitly only export what is meant
|
||||
|
21
flang/test/Lower/CUDA/cuda-cdevloc.cuf
Normal file
21
flang/test/Lower/CUDA/cuda-cdevloc.cuf
Normal file
@ -0,0 +1,21 @@
|
||||
! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
|
||||
|
||||
attributes(global) subroutine testcdevloc(a)
|
||||
use __fortran_builtins, only: c_devloc => __builtin_c_devloc
|
||||
integer, device :: a(10)
|
||||
print*, c_devloc(a(1))
|
||||
end
|
||||
|
||||
! CHECK-LABEL: func.func @_QPtestcdevloc(
|
||||
! CHECK-SAME: %[[A_ARG:.*]]: !fir.ref<!fir.array<10xi32>> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "a"}) attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
|
||||
! CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_ARG]](%{{.*}}) dummy_scope %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtestcdevlocEa"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
|
||||
! CHECK: %[[A1:.*]] = hlfir.designate %[[A]]#0 (%c1{{.*}}) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
|
||||
! CHECK: %[[BOX:.*]] = fir.embox %[[A1]] : (!fir.ref<i32>) -> !fir.box<i32>
|
||||
! CHECK: %[[CDEVPTR:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
|
||||
! CHECK: %[[FIELD_CPTR:.*]] = fir.field_index cptr, !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
|
||||
! CHECK: %[[COORD_CPTR:.*]] = fir.coordinate_of %[[CDEVPTR]], %[[FIELD_CPTR]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>, !fir.field) -> !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>
|
||||
! CHECK: %[[FIELD_ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
|
||||
! CHECK: %[[COORD_ADDRESS:.*]] = fir.coordinate_of %[[COORD_CPTR]], %[[FIELD_ADDRESS]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.field) -> !fir.ref<i64>
|
||||
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX]] : (!fir.box<i32>) -> !fir.ref<i32>
|
||||
! CHECK: %[[ADDRESS_A1:.*]] = fir.convert %[[BOX_ADDR]] : (!fir.ref<i32>) -> i64
|
||||
! CHECK: fir.store %[[ADDRESS_A1]] to %[[COORD_ADDRESS]] : !fir.ref<i64>
|
Loading…
x
Reference in New Issue
Block a user