From 251364549fe4a78d4e2e66f1adfaf2bd53041d2c Mon Sep 17 00:00:00 2001 From: R0CKSTAR Date: Tue, 11 Mar 2025 01:18:25 +0800 Subject: [PATCH] musa: support new arch mp_31 and update doc (#12296) Signed-off-by: Xiaodong Ye --- Makefile | 2 +- docs/build.md | 48 +++++++++++++++++++++++-------- ggml/src/ggml-musa/CMakeLists.txt | 2 +- 3 files changed, 38 insertions(+), 14 deletions(-) diff --git a/Makefile b/Makefile index 5339d490b..1f9455eff 100644 --- a/Makefile +++ b/Makefile @@ -836,7 +836,7 @@ ifdef GGML_MUSA else MUSA_PATH ?= /opt/musa endif - MUSA_ARCHITECTURES ?= 21;22 + MUSA_ARCHITECTURES ?= 21;22;31 MK_CPPFLAGS += -DGGML_USE_MUSA -DGGML_USE_CUDA MK_LDFLAGS += -L$(MUSA_PATH)/lib -Wl,-rpath=$(MUSA_PATH)/lib diff --git a/docs/build.md b/docs/build.md index 3d8333328..2e3975c14 100644 --- a/docs/build.md +++ b/docs/build.md @@ -197,29 +197,53 @@ The following compilation options are also available to tweak performance: ## MUSA -This provides GPU acceleration using the MUSA cores of your Moore Threads MTT GPU. Make sure to have the MUSA SDK installed. You can download it from here: [MUSA SDK](https://developer.mthreads.com/sdk/download/musa). +This provides GPU acceleration using a Moore Threads GPU. Make sure to have the [MUSA SDK](https://developer.mthreads.com/musa/musa-sdk) installed. -- Using `CMake`: +#### Download directly from Moore Threads - ```bash - cmake -B build -DGGML_MUSA=ON - cmake --build build --config Release +You may find the official downloads here: [Moore Threads developer site](https://developer.mthreads.com/sdk/download/musa). + +### Compilation + +```bash +cmake -B build -DGGML_MUSA=ON +cmake --build build --config Release +``` + +#### Override Compute Capability Specifications + +By default, all supported compute capabilities are enabled. To customize this behavior, you can specify the `MUSA_ARCHITECTURES` option in the CMake command: + +```bash +cmake -B build -DGGML_MUSA=ON -DMUSA_ARCHITECTURES="21" +``` + +This configuration enables only compute capability `2.1` (MTT S80) during compilation, which can help reduce compilation time. + +#### Compilation options + +Most of the compilation options available for CUDA should also be available for MUSA, though they haven't been thoroughly tested yet. + +- For static builds, add `-DBUILD_SHARED_LIBS=OFF` and `-DCMAKE_POSITION_INDEPENDENT_CODE=ON`: ``` - - For static build: - - ```bash cmake -B build -DGGML_MUSA=ON \ -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON cmake --build build --config Release ``` -The environment variable [`MUSA_VISIBLE_DEVICES`](https://docs.mthreads.com/musa-sdk/musa-sdk-doc-online/programming_guide/Z%E9%99%84%E5%BD%95/) can be used to specify which GPU(s) will be used. +### Runtime MUSA environmental variables + +You may set the [musa environmental variables](https://docs.mthreads.com/musa-sdk/musa-sdk-doc-online/programming_guide/Z%E9%99%84%E5%BD%95/) at runtime. + +```bash +# Use `MUSA_VISIBLE_DEVICES` to hide the first compute device. +MUSA_VISIBLE_DEVICES="-0" ./build/bin/llama-server --model /srv/models/llama.gguf +``` + +### Unified Memory The environment variable `GGML_CUDA_ENABLE_UNIFIED_MEMORY=1` can be used to enable unified memory in Linux. This allows swapping to system RAM instead of crashing when the GPU VRAM is exhausted. -Most of the compilation options available for CUDA should also be available for MUSA, though they haven't been thoroughly tested yet. - ## HIP This provides GPU acceleration on HIP-supported AMD GPUs. diff --git a/ggml/src/ggml-musa/CMakeLists.txt b/ggml/src/ggml-musa/CMakeLists.txt index 2c75abf61..166970ca6 100644 --- a/ggml/src/ggml-musa/CMakeLists.txt +++ b/ggml/src/ggml-musa/CMakeLists.txt @@ -21,7 +21,7 @@ if (MUSAToolkit_FOUND) message(STATUS "MUSA Toolkit found") if (NOT DEFINED MUSA_ARCHITECTURES) - set(MUSA_ARCHITECTURES "21;22") + set(MUSA_ARCHITECTURES "21;22;31") endif() message(STATUS "Using MUSA architectures: ${MUSA_ARCHITECTURES}")