contrib: support modelscope community (#12664)

* support download from modelscope * support login * remove comments * add arguments * fix code * fix win32 * test passed * fix readme * revert readme * change to MODEL_ENDPOINT * revert tail line * fix readme * refactor model endpoint * remove blank line * fix header * fix as comments * update comment * update readme --------- Co-authored-by: tastelikefeet <yuze.zyz@alibaba-inc/com>
2025-04-13 01:56:09 +00:00 · 2025-04-11 20:01:56 +08:00 · 2025-04-11 20:01:56 +08:00 · b2034c2b55
commit b2034c2b55
parent 06bb53ad9b
5 changed files with 30 additions and 12 deletions
--- a/README.md
+++ b/README.md
@ -260,7 +260,9 @@ The [Hugging Face](https://huggingface.co) platform hosts a [number of LLMs](htt
 - [Trending](https://huggingface.co/models?library=gguf&sort=trending)
 - [LLaMA](https://huggingface.co/models?sort=trending&search=llama+gguf)

-You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from Hugging Face by using this CLI argument: `-hf <user>/<model>[:quant]`
+You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from [Hugging Face](https://huggingface.co/) or other model hosting sites, such as [ModelScope](https://modelscope.cn/), by using this CLI argument: `-hf <user>/<model>[:quant]`.
+
+By default, the CLI would download from Hugging Face, you can switch to other options with the environment variable `MODEL_ENDPOINT`. For example, you may opt to downloading model checkpoints from ModelScope or other model sharing communities by setting the environment variable, e.g. `MODEL_ENDPOINT=https://www.modelscope.cn/`.

 After downloading a model, use the CLI tools to run it locally - see below.

--- a/common/arg.cpp
+++ b/common/arg.cpp
@ -228,12 +228,13 @@ static bool common_download_file_single(const std::string & url, const std::stri
    curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
    curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);

+    http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
    // Check if hf-token or bearer-token was specified
    if (!bearer_token.empty()) {
        std::string auth_header = "Authorization: Bearer " + bearer_token;
        http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
-        curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
    }
+    curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);

 #if defined(_WIN32)
    // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
@ -544,7 +545,10 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
    curl_ptr       curl(curl_easy_init(), &curl_easy_cleanup);
    curl_slist_ptr http_headers;
    std::string res_str;
-    std::string url = "https://huggingface.co/v2/" + hf_repo + "/manifests/" + tag;
+
+    std::string model_endpoint = get_model_endpoint();
+
+    std::string url = model_endpoint + "v2/" + hf_repo + "/manifests/" + tag;
    curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
    curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
    typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
@ -659,13 +663,8 @@ static void common_params_handle_model(
                }
            }

-            std::string hf_endpoint = "https://huggingface.co/";
-            const char * hf_endpoint_env = getenv("HF_ENDPOINT");
-            if (hf_endpoint_env) {
-                hf_endpoint = hf_endpoint_env;
-                if (hf_endpoint.back() != '/') hf_endpoint += '/';
-            }
-            model.url = hf_endpoint + model.hf_repo + "/resolve/main/" + model.hf_file;
+            std::string model_endpoint = get_model_endpoint();
+            model.url = model_endpoint + model.hf_repo + "/resolve/main/" + model.hf_file;
            // make sure model path is present (for caching purposes)
            if (model.path.empty()) {
                // this is to avoid different repo having same file name, or same file name in different subdirs
--- a/common/common.cpp
+++ b/common/common.cpp
@ -1027,6 +1027,19 @@ struct common_init_result common_init_from_params(common_params & params) {
    return iparams;
 }

+std::string get_model_endpoint() {
+    const char * model_endpoint_env = getenv("MODEL_ENDPOINT");
+    // We still respect the use of environment-variable "HF_ENDPOINT" for backward-compatibility.
+    const char * hf_endpoint_env = getenv("HF_ENDPOINT");
+    const char * endpoint_env = model_endpoint_env ? model_endpoint_env : hf_endpoint_env;
+    std::string model_endpoint = "https://huggingface.co/";
+    if (endpoint_env) {
+        model_endpoint = endpoint_env;
+        if (model_endpoint.back() != '/') model_endpoint += '/';
+    }
+    return model_endpoint;
+}
+
 void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora) {
    llama_clear_adapter_lora(ctx);
    for (auto & la : lora) {
--- a/common/common.h
+++ b/common/common.h
@ -543,6 +543,8 @@ struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_p
 // clear LoRA adapters from context, then apply new list of adapters
 void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora);

+std::string                   get_model_endpoint();
+
 //
 // Batch utils
 //
--- a/examples/run/run.cpp
+++ b/examples/run/run.cpp
@ -697,8 +697,10 @@ class LlamaData {
        std::vector<std::string> headers = { "User-Agent: llama-cpp", "Accept: application/json" };
        std::string              url;

+        std::string model_endpoint = get_model_endpoint();
+
        if (pos == std::string::npos) {
-            auto [model_name, manifest_url] = extract_model_and_tag(model, "https://huggingface.co/v2/");
+            auto [model_name, manifest_url] = extract_model_and_tag(model, model_endpoint + "v2/");
            hfr                             = model_name;

            nlohmann::json manifest;
@ -713,7 +715,7 @@ class LlamaData {
            hff = model.substr(pos + 1);
        }

-        url = "https://huggingface.co/" + hfr + "/resolve/main/" + hff;
+        url = model_endpoint + hfr + "/resolve/main/" + hff;

        return download(url, bn, true, headers);
    }