mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-17 22:16:46 +00:00

The intention of this work is to give MLIR->LLVMIR conversion freedom to control how the private variable is allocated so that it can be allocated on the stack in ordinary cases or as part of a structure used to give closure context for tasks which might outlive the current stack frame. See RFC: https://discourse.llvm.org/t/rfc-openmp-supporting-delayed-task-execution-with-firstprivate-variables/83084 For example, a privatizer for an integer used to look like ```mlir omp.private {type = private} @x.privatizer : !fir.ref<i32> alloc { ^bb0(%arg0: !fir.ref<i32>): %0 = ... allocate proper memory for the private clone ... omp.yield(%0 : !fir.ref<i32>) } ``` After this change, allocation become implicit in the operation: ```mlir omp.private {type = private} @x.privatizer : i32 ``` For more complex types that require initialization after allocation, an init region can be used: ``` mlir omp.private {type = private} @x.privatizer : !some.type init { ^bb0(%arg0: !some.pointer<!some.type>, %arg1: !some.pointer<!some.type>): // initialize %arg1, using %arg0 as a mold for allocations omp.yield(%arg1 : !some.pointer<!some.type>) } dealloc { ^bb0(%arg0: !some.pointer<!some.type>): ... deallocate memory allocated by the init region ... omp.yield } ``` This patch lays the groundwork for delayed task execution but is not enough on its own. After this patch all gfortran tests which previously passed still pass. There are the following changes to the Fujitsu test suite: - 0380_0009 and 0435_0009 are fixed - 0688_0041 now fails at runtime. This patch is testing firstprivate variables with tasks. Previously we got lucky with the undefined behavior and won the race. After these changes we no longer get lucky. This patch lays the groundwork for a proper fix for this issue. In flang the lowering re-uses the existing lowering used for reduction init and dealloc regions. In flang, before this patch we hit a TODO with the same wording when generating the copy region for firstprivate polymorphic variables. After this patch the box-like fir.class is passed by reference into the copy region, leading to a different path that didn't hit that old TODO but the generated code still didn't work so I added a new TODO in DataSharingProcessor.
106 lines
5.6 KiB
MLIR
106 lines
5.6 KiB
MLIR
// Use --mlir-disable-threading so that the AA queries are serialized
|
|
// as well as its diagnostic output.
|
|
// RUN: fir-opt %s -pass-pipeline='builtin.module(func.func(test-fir-alias-analysis))' -split-input-file --mlir-disable-threading 2>&1 | FileCheck %s
|
|
|
|
// Fortran code:
|
|
//
|
|
// program main
|
|
// integer :: arrayA(10,10)
|
|
// integer :: tmp(2)
|
|
// integer :: i,j
|
|
// !$omp teams distribute parallel do private(tmp)
|
|
// do j = 1, 10
|
|
// do i = 1,10
|
|
// tmp = [i,j]
|
|
// arrayA = tmp(1)
|
|
// end do
|
|
// end do
|
|
// end program main
|
|
|
|
// CHECK-LABEL: Testing : "_QQmain"
|
|
// CHECK-DAG: tmp_private_array#0 <-> unnamed_array#0: NoAlias
|
|
// CHECK-DAG: tmp_private_array#1 <-> unnamed_array#0: NoAlias
|
|
|
|
omp.private {type = private} @_QFEi_private_ref_i32 : i32
|
|
omp.private {type = private} @_QFEj_private_ref_i32 : i32
|
|
omp.private {type = private} @_QFEtmp_private_ref_2xi32 : !fir.array<2xi32>
|
|
|
|
func.func @_QQmain() attributes {fir.bindc_name = "main"} {
|
|
%0 = fir.address_of(@_QFEarraya) : !fir.ref<!fir.array<10x10xi32>>
|
|
%c10 = arith.constant 10 : index
|
|
%c10_0 = arith.constant 10 : index
|
|
%1 = fir.shape %c10, %c10_0 : (index, index) -> !fir.shape<2>
|
|
%2:2 = hlfir.declare %0(%1) {uniq_name = "_QFEarraya"} : (!fir.ref<!fir.array<10x10xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<10x10xi32>>, !fir.ref<!fir.array<10x10xi32>>)
|
|
%3 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFEi"}
|
|
%4:2 = hlfir.declare %3 {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
|
%5 = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFEj"}
|
|
%6:2 = hlfir.declare %5 {uniq_name = "_QFEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
|
%c2 = arith.constant 2 : index
|
|
%7 = fir.alloca !fir.array<2xi32> {bindc_name = "tmp", uniq_name = "_QFEtmp"}
|
|
%8 = fir.shape %c2 : (index) -> !fir.shape<1>
|
|
%9:2 = hlfir.declare %7(%8) {uniq_name = "_QFEtmp"} : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<2xi32>>, !fir.ref<!fir.array<2xi32>>)
|
|
omp.teams {
|
|
omp.parallel private(@_QFEtmp_private_ref_2xi32 %9#0 -> %arg0, @_QFEj_private_ref_i32 %6#0 -> %arg1, @_QFEi_private_ref_i32 %4#0 -> %arg2 : !fir.ref<!fir.array<2xi32>>, !fir.ref<i32>, !fir.ref<i32>) {
|
|
%c2_1 = arith.constant 2 : index
|
|
%10 = fir.shape %c2_1 : (index) -> !fir.shape<1>
|
|
%11:2 = hlfir.declare %arg0(%10) {uniq_name = "_QFEtmp", test.ptr = "tmp_private_array"} : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<2xi32>>, !fir.ref<!fir.array<2xi32>>)
|
|
%12:2 = hlfir.declare %arg1 {uniq_name = "_QFEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
|
%13:2 = hlfir.declare %arg2 {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
|
%c1_i32 = arith.constant 1 : i32
|
|
%c10_i32 = arith.constant 10 : i32
|
|
%c1_i32_2 = arith.constant 1 : i32
|
|
omp.distribute {
|
|
omp.wsloop {
|
|
omp.loop_nest (%arg3) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32_2) {
|
|
fir.store %arg3 to %12#1 : !fir.ref<i32>
|
|
%c1_i32_3 = arith.constant 1 : i32
|
|
%14 = fir.convert %c1_i32_3 : (i32) -> index
|
|
%c10_i32_4 = arith.constant 10 : i32
|
|
%15 = fir.convert %c10_i32_4 : (i32) -> index
|
|
%c1 = arith.constant 1 : index
|
|
%16 = fir.convert %14 : (index) -> i32
|
|
%17:2 = fir.do_loop %arg4 = %14 to %15 step %c1 iter_args(%arg5 = %16) -> (index, i32) {
|
|
fir.store %arg5 to %13#1 : !fir.ref<i32>
|
|
%c2_5 = arith.constant 2 : index
|
|
%c1_6 = arith.constant 1 : index
|
|
%c1_7 = arith.constant 1 : index
|
|
%18 = fir.allocmem !fir.array<2xi32> {bindc_name = ".tmp.arrayctor", uniq_name = ""}
|
|
%19 = fir.shape %c2_5 : (index) -> !fir.shape<1>
|
|
%20:2 = hlfir.declare %18(%19) {uniq_name = ".tmp.arrayctor"} : (!fir.heap<!fir.array<2xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<2xi32>>, !fir.heap<!fir.array<2xi32>>)
|
|
%21 = fir.load %13#0 : !fir.ref<i32>
|
|
%22 = arith.addi %c1_6, %c1_7 : index
|
|
%23 = hlfir.designate %20#0 (%c1_6) : (!fir.heap<!fir.array<2xi32>>, index) -> !fir.ref<i32>
|
|
hlfir.assign %21 to %23 : i32, !fir.ref<i32>
|
|
%24 = fir.load %12#0 : !fir.ref<i32>
|
|
%25 = hlfir.designate %20#0 (%22) : (!fir.heap<!fir.array<2xi32>>, index) -> !fir.ref<i32>
|
|
hlfir.assign %24 to %25 : i32, !fir.ref<i32>
|
|
%true = arith.constant true
|
|
%26 = hlfir.as_expr %20#0 move %true {test.ptr = "unnamed_array"} : (!fir.heap<!fir.array<2xi32>>, i1) -> !hlfir.expr<2xi32>
|
|
hlfir.assign %26 to %11#0 : !hlfir.expr<2xi32>, !fir.ref<!fir.array<2xi32>>
|
|
hlfir.destroy %26 : !hlfir.expr<2xi32>
|
|
%c1_8 = arith.constant 1 : index
|
|
%27 = hlfir.designate %11#0 (%c1_8) : (!fir.ref<!fir.array<2xi32>>, index) -> !fir.ref<i32>
|
|
%28 = fir.load %27 : !fir.ref<i32>
|
|
hlfir.assign %28 to %2#0 : i32, !fir.ref<!fir.array<10x10xi32>>
|
|
%29 = arith.addi %arg4, %c1 : index
|
|
%30 = fir.convert %c1 : (index) -> i32
|
|
%31 = fir.load %13#1 : !fir.ref<i32>
|
|
%32 = arith.addi %31, %30 : i32
|
|
fir.result %29, %32 : index, i32
|
|
}
|
|
fir.store %17#1 to %13#1 : !fir.ref<i32>
|
|
omp.yield
|
|
}
|
|
} {omp.composite}
|
|
} {omp.composite}
|
|
omp.terminator
|
|
} {omp.composite}
|
|
omp.terminator
|
|
}
|
|
return
|
|
}
|
|
fir.global internal @_QFEarraya : !fir.array<10x10xi32> {
|
|
%0 = fir.zero_bits !fir.array<10x10xi32>
|
|
fir.has_value %0 : !fir.array<10x10xi32>
|
|
}
|