llvm-project/llvm/test/CodeGen/Hexagon/bank-conflict.mir
Alexey Karyakin c0b2c10e9f
[hexagon] Bump the default version to v68 (#132304)
Set the default processor version to v68 when the user does not specify
one in the command line. This includes changes in the LLVM backed and
linker (lld). Since lld normally sets the version based on inputs, this
change will only affect cases when there are no inputs.

Fixes #127558
2025-03-21 20:08:45 -05:00

157 lines
5.7 KiB
YAML

# RUN: llc -mtriple=hexagon -run-pass post-RA-sched %s -o - | FileCheck %s
# RUN: llc -mtriple=hexagon -passes=post-RA-sched %s -o - | FileCheck %s
# Test that the Post RA scheduler does not schedule back-to-back loads
# when there is another instruction to schedule. The scheduler avoids
# the back-to-back loads to reduce potential bank conflicts.
# CHECK: = L2_loadrigp
# CHECK: = A2_tfr
# CHECK: = L2_loadrigp
# CHECK: = S2_tstbit_i
# CHECK: = L4_loadri_rr
# CHECK: = L4_loadri_rr
--- |
%s.0 = type { [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [3 x i32], [24 x i32], [8 x %s.1], [5 x i32] }
%s.1 = type { i32, i32 }
@g0 = global i64 0
@g1 = global i64 0
@g2 = global i32 0
@g3 = global i32 0
@g4 = global i8 0
declare i32 @llvm.hexagon.S2.cl0(i32) #0
declare i32 @llvm.hexagon.S2.setbit.r(i32, i32) #0
declare i64 @llvm.hexagon.M2.vmpy2s.s0(i32, i32) #0
declare i64 @llvm.hexagon.M2.vmac2s.s0(i64, i32, i32) #0
declare i64 @llvm.hexagon.A2.vaddws(i64, i64) #0
declare i32 @llvm.hexagon.A4.modwrapu(i32, i32) #0
define void @f0(i32 %a0) {
b0:
%v0 = bitcast ptr inttoptr (i32 -121502345 to ptr) to ptr
br label %b1
b1: ; preds = %b5, %b0
%v1 = phi i32 [ 0, %b0 ], [ %v28, %b5 ]
%v2 = phi i32 [ 0, %b0 ], [ %v27, %b5 ]
%v3 = load i32, ptr @g2, align 4
%v4 = load i32, ptr @g3, align 8
%v5 = and i32 %v4, %v3
%v6 = getelementptr [10 x %s.0], ptr %v0, i32 0, i32 %v2
%v8 = getelementptr %s.0, ptr %v6, i32 0, i32 12
%v9 = getelementptr %s.0, ptr %v6, i32 0, i32 13
br label %b2
b2: ; preds = %b4, %b1
%v10 = phi i64 [ %v24, %b4 ], [ 0, %b1 ]
%v11 = phi i32 [ %v13, %b4 ], [ %v5, %b1 ]
%v12 = tail call i32 @llvm.hexagon.S2.cl0(i32 %v11)
%v13 = tail call i32 @llvm.hexagon.S2.setbit.r(i32 %v11, i32 %v12)
%v14 = getelementptr [24 x i32], ptr %v8, i32 0, i32 %v12
%v15 = load i32, ptr %v14, align 4
%v16 = tail call i64 @llvm.hexagon.M2.vmpy2s.s0(i32 %v15, i32 %v15)
%v17 = getelementptr [24 x i32], ptr %v9, i32 0, i32 %v12
%v18 = load i32, ptr %v17, align 4
%v19 = tail call i64 @llvm.hexagon.M2.vmac2s.s0(i64 %v16, i32 %v18, i32 %v18)
%v20 = load i8, ptr @g4, align 1
%v21 = and i8 %v20, 1
%v22 = icmp eq i8 %v21, 0
br i1 %v22, label %b3, label %b4
b3: ; preds = %b2
%v23 = tail call i64 @llvm.hexagon.A2.vaddws(i64 %v10, i64 %v19)
store i64 %v23, ptr @g0, align 8
br label %b4
b4: ; preds = %b3, %b2
%v24 = phi i64 [ %v23, %b3 ], [ %v10, %b2 ]
%v25 = icmp eq i32 %v13, 0
br i1 %v25, label %b5, label %b2
b5: ; preds = %b4
%v26 = add i32 %v2, 1
%v27 = tail call i32 @llvm.hexagon.A4.modwrapu(i32 %v26, i32 10)
%v28 = add i32 %v1, 1
%v29 = icmp eq i32 %v28, %a0
br i1 %v29, label %b6, label %b1
b6: ; preds = %b5
store i64 %v19, ptr @g1, align 8
ret void
}
attributes #0 = { nounwind readnone }
...
---
name: f0
alignment: 16
tracksRegLiveness: true
registers:
liveins:
- { reg: '$r0', virtual-reg: '' }
fixedStack:
stack:
constants:
body: |
bb.0:
successors: %bb.1(0x80000000)
liveins: $r0:0x00000001
$r3 = A2_tfrsi 0
$r2 = A2_tfrsi -121502345
$r4 = A2_tfrsi 10
J2_loop0r %bb.1, killed $r0, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
bb.1 (machine-block-address-taken):
successors: %bb.2(0x80000000)
liveins: $lc0:0x00000004, $r2:0x00000001, $r3:0x00000001, $r4:0x00000001, $sa0:0x00000004
$r5 = M2_mpysip $r3, 1824
$r7 = L2_loadrigp @g2, implicit $gp :: (dereferenceable load (s32) from @g2)
$r8 = L2_loadrigp @g3, implicit killed $gp :: (dereferenceable load (s32) from @g3, align 8)
$r6 = A2_tfr $r5
$r7 = A2_and killed $r8, killed $r7
$r5 = M2_accii killed $r5, $r2, 1248
$r6 = M2_accii killed $r6, $r2, 1152
$d0 = A2_tfrpi 0
bb.2:
successors: %bb.3(0x04000000), %bb.2(0x7c000000)
liveins: $lc0:0x00000004, $r0:0x00000001, $r1:0x00000001, $r2:0x00000001, $r3:0x00000001, $r4:0x00000001, $r5:0x00000001, $r6:0x00000001, $r7:0x00000001, $sa0:0x00000004
$r8 = S2_cl0 $r7
$r12 = L2_loadrubgp @g4, implicit $gp :: (dereferenceable load (s8) from @g4)
$r7 = S2_setbit_r killed $r7, $r8
$r9 = L4_loadri_rr $r6, $r8, 2 :: (load (s32) from %ir.v14)
$r13 = L4_loadri_rr $r5, killed $r8, 2 :: (load (s32) from %ir.v17)
$d4 = M2_vmpy2s_s0 killed $r9, $r9, implicit-def dead $usr_ovf
$p0 = S2_tstbit_i killed $r12, 0
$d4 = M2_vmac2s_s0 killed $d4, killed $r13, $r13, implicit-def dead $usr_ovf
$p1 = C2_cmpeqi $r7, 0
$d6 = A2_vaddws $d0, $d4, implicit-def dead $usr_ovf
$d0 = A2_tfrpt $p0, killed $d0, implicit $d0
S4_pstorerdf_abs $p0, @g0, $d6, implicit killed $gp :: (store (s64) into @g0)
$d0 = A2_tfrpf killed $p0, killed $d6, implicit killed $d0
J2_jumpf killed $p1, %bb.2, implicit-def dead $pc
bb.3:
successors: %bb.4(0x04000000), %bb.1(0x7c000000)
liveins: $lc0:0x00000004, $r2:0x00000001, $r3:0x00000001, $r4:0x00000001, $r8:0x00000001, $r9:0x00000001, $sa0:0x00000004
$r3 = A2_addi killed $r3, 1
$r3 = A4_modwrapu killed $r3, $r4
ENDLOOP0 %bb.1, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
bb.4:
liveins: $r8:0x00000001, $r9:0x00000001
S2_storerdgp @g1, killed $d4, implicit killed $gp :: (store (s64) into @g1)
PS_jmpret killed $r31, implicit-def dead $pc
...