1
0
mirror of https://github.com/ggerganov/llama.cpp.git synced 2025-04-26 09:26:06 +00:00

ggml : prevent builds with -ffinite-math-only ()

This enforces a check that -fno-finite-math-only was set and that the operating
compiling mode is not in finite maths mode. This is because during rewriting of
silu and softmax for cpu  there emerged an issue where the result that was
observed when >1 slot was nondeterministic as found by @JohannesGaessler.

@LostRuins narrowed the problem down to -ffinite-math-only which was theorised
to be due to SiLU, instead of flushing small values to 0, returns NaN or some 
other garbage. @jart proposed a fix that @ggerganov then implemented in this fix

ref https://github.com/ggerganov/llama.cpp/pull/7154#issuecomment-2145661825
This commit is contained in:
Georgi Gerganov 2024-06-04 10:01:09 +03:00 committed by GitHub
parent bde7cd3cd9
commit 6d1616944d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 6 additions and 1 deletions

@ -9,7 +9,7 @@ set( CMAKE_CXX_COMPILER clang++ )
set( CMAKE_C_COMPILER_TARGET ${target} ) set( CMAKE_C_COMPILER_TARGET ${target} )
set( CMAKE_CXX_COMPILER_TARGET ${target} ) set( CMAKE_CXX_COMPILER_TARGET ${target} )
set( arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast" ) set( arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments" ) set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments" )
set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" ) set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )

5
ggml.c

@ -2272,6 +2272,11 @@ inline static float ggml_silu_f32(float x) {
return x/(1.0f + expf(-x)); return x/(1.0f + expf(-x));
} }
#if __FINITE_MATH_ONLY__
#error "some routines in ggml.c require non-finite math arithmetics -- pass -fno-finite-math-only to the compiler to fix"
#error "ref: https://github.com/ggerganov/llama.cpp/pull/7154#issuecomment-2143844461"
#endif
#if defined(__ARM_NEON) && defined(__aarch64__) #if defined(__ARM_NEON) && defined(__aarch64__)
// adapted from arm limited optimized routine // adapted from arm limited optimized routine