This commit is contained in:
Georgi Gerganov 2025-04-07 13:18:07 +03:00
parent 6232ceec72
commit 5ef588ba58
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
3 changed files with 6 additions and 0 deletions

View File

@ -580,7 +580,11 @@ static __global__ void convert_unary(const void * __restrict__ vx, dst_t * __res
const src_t * x = (const src_t *) vx;
if constexpr (std::is_same_v<src_t, nv_bfloat16>) {
#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
y[i] = (float)x[i];
#else
y[i] = __bfloat162float(x[i]);
#endif
} else if constexpr (std::is_same_v<dst_t, nv_bfloat16> && std::is_same_v<src_t, half>) {
y[i] = (float)x[i];
} else {

View File

@ -20,6 +20,7 @@
#define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS
#define CUBLAS_TF32_TENSOR_OP_MATH 0
#define CUDA_R_16F HIPBLAS_R_16F
#define CUDA_R_16BF HIPBLAS_R_16BF
#define CUDA_R_32F HIPBLAS_R_32F
#define CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED hipDeviceAttributeVirtualMemoryManagementSupported
#define CU_MEM_ALLOC_GRANULARITY_RECOMMENDED hipMemAllocationGranularityRecommended

View File

@ -15,6 +15,7 @@
#define CUBLAS_STATUS_SUCCESS MUBLAS_STATUS_SUCCESS
#define CUBLAS_TF32_TENSOR_OP_MATH MUBLAS_MATH_MODE_DEFAULT
#define CUDA_R_16F MUSA_R_16F
#define CUDA_R_16BF MUSA_R_16BF
#define CUDA_R_32F MUSA_R_32F
#define cublasComputeType_t cudaDataType_t
#define cublasCreate mublasCreate