[libclc][amdgpu] Implement native_exp2 via AMD builtin (#133696)

This came up during a discussion on #129679, which has been split out as
a preparatory commit.

An example of the AMDGPU codegen is:

    define <2 x float> @_Z10native_expDv2_f(<2 x float> %val) {
      %mul = fmul afn <2 x float> %val, splat (float 0x3FF7154760000000)
      %0 = extractelement <2 x float> %mul, i64 0
      %1 = tail call float @llvm.amdgcn.exp2.f32(float %0)
      %vecinit.i = insertelement <2 x float> poison, float %1, i64 0
      %2 = extractelement <2 x float> %mul, i64 1
      %3 = tail call float @llvm.amdgcn.exp2.f32(float %2)
%vecinit2.i = insertelement <2 x float> %vecinit.i, float %3, i64 1
      ret <2 x float> %vecinit2.i
    }

    define <2 x float> @_Z11native_exp2Dv2_f(<2 x float> %x) {
      %0 = extractelement <2 x float> %x, i64 0
      %1 = tail call float @llvm.amdgcn.exp2.f32(float %0)
      %vecinit = insertelement <2 x float> poison, float %1, i64 0
      %2 = extractelement <2 x float> %x, i64 1
      %3 = tail call float @llvm.amdgcn.exp2.f32(float %2)
      %vecinit2 = insertelement <2 x float> %vecinit, float %3, i64 1
      ret <2 x float> %vecinit2
    }
This commit is contained in:
Fraser Cormack 2025-03-31 16:54:04 +01:00 committed by GitHub
parent ea06f7f96f
commit 3fd0eaae52
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 17 additions and 0 deletions

View File

@ -1,4 +1,5 @@
math/native_exp.cl
math/native_exp2.cl
math/native_log.cl
math/native_log10.cl
math/half_exp.cl

View File

@ -0,0 +1,16 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/clc.h>
#include <clc/clcmacro.h>
_CLC_OVERLOAD _CLC_DEF float native_exp2(float val) {
return __builtin_amdgcn_exp2f(val);
}
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, native_exp2, float)