[AMDGPU] Add gfx10 uaddsat test coverage. NFC.

This commit is contained in:
Jay Foad 2021-10-28 09:39:19 +01:00
parent 8daf76935d
commit c6b4fb87c0

View File

@ -2,6 +2,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck --check-prefix=GFX6 %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck --check-prefix=GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s
define i8 @v_uaddsat_i8(i8 %lhs, i8 %rhs) {
; GFX6-LABEL: v_uaddsat_i8:
@ -27,6 +28,17 @@ define i8 @v_uaddsat_i8(i8 %lhs, i8 %rhs) {
; GFX9-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-NEXT: v_min_u16_e32 v0, 0xff, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_uaddsat_i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_movk_i32 s4, 0xff
; GFX10-NEXT: v_and_b32_e32 v1, s4, v1
; GFX10-NEXT: v_and_b32_e32 v0, s4, v0
; GFX10-NEXT: v_add_nc_u16 v0, v0, v1
; GFX10-NEXT: v_min_u16 v0, v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
%result = call i8 @llvm.uadd.sat.i8(i8 %lhs, i8 %rhs)
ret i8 %result
}
@ -53,6 +65,13 @@ define i16 @v_uaddsat_i16(i16 %lhs, i16 %rhs) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u16_e64 v0, v0, v1 clamp
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_uaddsat_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u16 v0, v0, v1 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%result = call i16 @llvm.uadd.sat.i16(i16 %lhs, i16 %rhs)
ret i16 %result
}
@ -77,6 +96,13 @@ define i32 @v_uaddsat_i32(i32 %lhs, i32 %rhs) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e64 v0, v0, v1 clamp
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_uaddsat_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v1 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%result = call i32 @llvm.uadd.sat.i32(i32 %lhs, i32 %rhs)
ret i32 %result
}
@ -112,6 +138,13 @@ define <2 x i16> @v_uaddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1 clamp
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_uaddsat_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs)
ret <2 x i16> %result
}
@ -154,6 +187,14 @@ define <3 x i16> @v_uaddsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) {
; GFX9-NEXT: v_pk_add_u16 v1, v1, v3 clamp
; GFX9-NEXT: v_pk_add_u16 v0, v0, v2 clamp
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_uaddsat_v3i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_add_u16 v0, v0, v2 clamp
; GFX10-NEXT: v_pk_add_u16 v1, v1, v3 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%result = call <3 x i16> @llvm.uadd.sat.v3i16(<3 x i16> %lhs, <3 x i16> %rhs)
ret <3 x i16> %result
}
@ -202,6 +243,14 @@ define <2 x float> @v_uaddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; GFX9-NEXT: v_pk_add_u16 v0, v0, v2 clamp
; GFX9-NEXT: v_pk_add_u16 v1, v1, v3 clamp
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_uaddsat_v4i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_add_u16 v0, v0, v2 clamp
; GFX10-NEXT: v_pk_add_u16 v1, v1, v3 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%result = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
%cast = bitcast <4 x i16> %result to <2 x float>
ret <2 x float> %cast
@ -232,6 +281,14 @@ define <2 x i32> @v_uaddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; GFX9-NEXT: v_add_u32_e64 v0, v0, v2 clamp
; GFX9-NEXT: v_add_u32_e64 v1, v1, v3 clamp
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_uaddsat_v2i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v2 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, v3 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
ret <2 x i32> %result
}
@ -266,6 +323,15 @@ define <3 x i32> @v_uaddsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
; GFX9-NEXT: v_add_u32_e64 v1, v1, v4 clamp
; GFX9-NEXT: v_add_u32_e64 v2, v2, v5 clamp
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_uaddsat_v3i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v3 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, v4 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v2, v2, v5 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%result = call <3 x i32> @llvm.uadd.sat.v3i32(<3 x i32> %lhs, <3 x i32> %rhs)
ret <3 x i32> %result
}
@ -305,6 +371,16 @@ define <4 x i32> @v_uaddsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; GFX9-NEXT: v_add_u32_e64 v2, v2, v6 clamp
; GFX9-NEXT: v_add_u32_e64 v3, v3, v7 clamp
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_uaddsat_v4i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v4 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, v5 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v2, v2, v6 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v3, v3, v7 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%result = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
ret <4 x i32> %result
}
@ -364,6 +440,20 @@ define <8 x i32> @v_uaddsat_v8i32(<8 x i32> %lhs, <8 x i32> %rhs) {
; GFX9-NEXT: v_add_u32_e64 v6, v6, v14 clamp
; GFX9-NEXT: v_add_u32_e64 v7, v7, v15 clamp
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_uaddsat_v8i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v8 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, v9 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v2, v2, v10 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v3, v3, v11 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v4, v4, v12 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v5, v5, v13 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v6, v6, v14 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v7, v7, v15 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%result = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %lhs, <8 x i32> %rhs)
ret <8 x i32> %result
}
@ -463,6 +553,28 @@ define <16 x i32> @v_uaddsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
; GFX9-NEXT: v_add_u32_e64 v14, v14, v30 clamp
; GFX9-NEXT: v_add_u32_e64 v15, v15, v31 clamp
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_uaddsat_v16i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v16 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, v17 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v2, v2, v18 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v3, v3, v19 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v4, v4, v20 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v5, v5, v21 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v6, v6, v22 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v7, v7, v23 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v8, v8, v24 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v9, v9, v25 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v10, v10, v26 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v11, v11, v27 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v12, v12, v28 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v13, v13, v29 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v14, v14, v30 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v15, v15, v31 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%result = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> %lhs, <16 x i32> %rhs)
ret <16 x i32> %result
}
@ -498,6 +610,17 @@ define i64 @v_uaddsat_i64(i64 %lhs, i64 %rhs) {
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, -1, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, -1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_uaddsat_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2
; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo
; GFX10-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1]
; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, -1, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, -1, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
%result = call i64 @llvm.uadd.sat.i64(i64 %lhs, i64 %rhs)
ret i64 %result
}