From b2034c2b55b36b2192bdefb3b295db2a911370f5 Mon Sep 17 00:00:00 2001 From: tastelikefeet <58414341+tastelikefeet@users.noreply.github.com> Date: Fri, 11 Apr 2025 20:01:56 +0800 Subject: [PATCH] contrib: support modelscope community (#12664) * support download from modelscope * support login * remove comments * add arguments * fix code * fix win32 * test passed * fix readme * revert readme * change to MODEL_ENDPOINT * revert tail line * fix readme * refactor model endpoint * remove blank line * fix header * fix as comments * update comment * update readme --------- Co-authored-by: tastelikefeet --- README.md | 4 +++- common/arg.cpp | 17 ++++++++--------- common/common.cpp | 13 +++++++++++++ common/common.h | 2 ++ examples/run/run.cpp | 6 ++++-- 5 files changed, 30 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index a129d27d5..cf45f23cf 100644 --- a/README.md +++ b/README.md @@ -260,7 +260,9 @@ The [Hugging Face](https://huggingface.co) platform hosts a [number of LLMs](htt - [Trending](https://huggingface.co/models?library=gguf&sort=trending) - [LLaMA](https://huggingface.co/models?sort=trending&search=llama+gguf) -You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from Hugging Face by using this CLI argument: `-hf /[:quant]` +You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from [Hugging Face](https://huggingface.co/) or other model hosting sites, such as [ModelScope](https://modelscope.cn/), by using this CLI argument: `-hf /[:quant]`. + +By default, the CLI would download from Hugging Face, you can switch to other options with the environment variable `MODEL_ENDPOINT`. For example, you may opt to downloading model checkpoints from ModelScope or other model sharing communities by setting the environment variable, e.g. `MODEL_ENDPOINT=https://www.modelscope.cn/`. After downloading a model, use the CLI tools to run it locally - see below. diff --git a/common/arg.cpp b/common/arg.cpp index 642fefb57..0b57f9da1 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -228,12 +228,13 @@ static bool common_download_file_single(const std::string & url, const std::stri curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L); + http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp"); // Check if hf-token or bearer-token was specified if (!bearer_token.empty()) { std::string auth_header = "Authorization: Bearer " + bearer_token; http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str()); - curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr); } + curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr); #if defined(_WIN32) // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of @@ -544,7 +545,10 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_ curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); curl_slist_ptr http_headers; std::string res_str; - std::string url = "https://huggingface.co/v2/" + hf_repo + "/manifests/" + tag; + + std::string model_endpoint = get_model_endpoint(); + + std::string url = model_endpoint + "v2/" + hf_repo + "/manifests/" + tag; curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data); @@ -659,13 +663,8 @@ static void common_params_handle_model( } } - std::string hf_endpoint = "https://huggingface.co/"; - const char * hf_endpoint_env = getenv("HF_ENDPOINT"); - if (hf_endpoint_env) { - hf_endpoint = hf_endpoint_env; - if (hf_endpoint.back() != '/') hf_endpoint += '/'; - } - model.url = hf_endpoint + model.hf_repo + "/resolve/main/" + model.hf_file; + std::string model_endpoint = get_model_endpoint(); + model.url = model_endpoint + model.hf_repo + "/resolve/main/" + model.hf_file; // make sure model path is present (for caching purposes) if (model.path.empty()) { // this is to avoid different repo having same file name, or same file name in different subdirs diff --git a/common/common.cpp b/common/common.cpp index d4882c512..4e1a020d0 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1027,6 +1027,19 @@ struct common_init_result common_init_from_params(common_params & params) { return iparams; } +std::string get_model_endpoint() { + const char * model_endpoint_env = getenv("MODEL_ENDPOINT"); + // We still respect the use of environment-variable "HF_ENDPOINT" for backward-compatibility. + const char * hf_endpoint_env = getenv("HF_ENDPOINT"); + const char * endpoint_env = model_endpoint_env ? model_endpoint_env : hf_endpoint_env; + std::string model_endpoint = "https://huggingface.co/"; + if (endpoint_env) { + model_endpoint = endpoint_env; + if (model_endpoint.back() != '/') model_endpoint += '/'; + } + return model_endpoint; +} + void common_set_adapter_lora(struct llama_context * ctx, std::vector & lora) { llama_clear_adapter_lora(ctx); for (auto & la : lora) { diff --git a/common/common.h b/common/common.h index 725b5123d..e6eaa8e80 100644 --- a/common/common.h +++ b/common/common.h @@ -543,6 +543,8 @@ struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_p // clear LoRA adapters from context, then apply new list of adapters void common_set_adapter_lora(struct llama_context * ctx, std::vector & lora); +std::string get_model_endpoint(); + // // Batch utils // diff --git a/examples/run/run.cpp b/examples/run/run.cpp index 68e94b0b3..e63c2aac3 100644 --- a/examples/run/run.cpp +++ b/examples/run/run.cpp @@ -697,8 +697,10 @@ class LlamaData { std::vector headers = { "User-Agent: llama-cpp", "Accept: application/json" }; std::string url; + std::string model_endpoint = get_model_endpoint(); + if (pos == std::string::npos) { - auto [model_name, manifest_url] = extract_model_and_tag(model, "https://huggingface.co/v2/"); + auto [model_name, manifest_url] = extract_model_and_tag(model, model_endpoint + "v2/"); hfr = model_name; nlohmann::json manifest; @@ -713,7 +715,7 @@ class LlamaData { hff = model.substr(pos + 1); } - url = "https://huggingface.co/" + hfr + "/resolve/main/" + hff; + url = model_endpoint + hfr + "/resolve/main/" + hff; return download(url, bn, true, headers); }