From f11cfdfd7fe29436fce512d934c2ff6b94bd89d2 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 15 Jan 2025 18:28:35 +0200 Subject: [PATCH] ci : use -no-cnv in gguf-split tests (#11254) * ci : use -no-cnv in gguf-split tests ggml-ci * ci : use -no-cnv in requantize tests ggml-ci * scripts : fix [no ci] --- examples/gguf-split/tests.sh | 10 +-- examples/quantize/tests.sh | 4 +- scripts/hf.sh | 112 ------------------------ tests/test-lora-conversion-inference.sh | 6 +- 4 files changed, 10 insertions(+), 122 deletions(-) delete mode 100755 scripts/hf.sh diff --git a/examples/gguf-split/tests.sh b/examples/gguf-split/tests.sh index d5a92d605..05a932227 100755 --- a/examples/gguf-split/tests.sh +++ b/examples/gguf-split/tests.sh @@ -41,7 +41,7 @@ echo PASS echo # 2b. Test the sharded model is loading properly -$MAIN --model $WORK_PATH/ggml-model-split-00001-of-00006.gguf --n-predict 32 +$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-00001-of-00006.gguf --n-predict 32 echo PASS echo @@ -51,7 +51,7 @@ echo PASS echo # 3b. Test the merged model is loading properly -$MAIN --model $WORK_PATH/ggml-model-merge.gguf --n-predict 32 +$MAIN -no-cnv --model $WORK_PATH/ggml-model-merge.gguf --n-predict 32 echo PASS echo @@ -61,7 +61,7 @@ echo PASS echo # 4b. Test the sharded model is loading properly -$MAIN --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00007.gguf --n-predict 32 +$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00007.gguf --n-predict 32 echo PASS echo @@ -71,7 +71,7 @@ echo #echo # 5b. Test the merged model is loading properly -#$MAIN --model $WORK_PATH/ggml-model-merge-2.gguf --n-predict 32 +#$MAIN -no-cnv --model $WORK_PATH/ggml-model-merge-2.gguf --n-predict 32 #echo PASS #echo @@ -81,7 +81,7 @@ echo PASS echo # 6b. Test the sharded model is loading properly -$MAIN --model $WORK_PATH/ggml-model-split-2G-00001-of-00002.gguf --n-predict 32 +$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-2G-00001-of-00002.gguf --n-predict 32 echo PASS echo diff --git a/examples/quantize/tests.sh b/examples/quantize/tests.sh index 24bc970e8..70f7610f9 100644 --- a/examples/quantize/tests.sh +++ b/examples/quantize/tests.sh @@ -47,7 +47,7 @@ echo PASS echo # 3a. Test the requanted model is loading properly -$MAIN --model $WORK_PATH/ggml-model-requant-00001-of-00006.gguf --n-predict 32 +$MAIN -no-cnv --model $WORK_PATH/ggml-model-requant-00001-of-00006.gguf --n-predict 32 echo PASS echo @@ -57,7 +57,7 @@ echo PASS echo # 4b. Test the requanted model is loading properly -$MAIN --model $WORK_PATH/ggml-model-requant-merge.gguf --n-predict 32 +$MAIN -no-cnv --model $WORK_PATH/ggml-model-requant-merge.gguf --n-predict 32 echo PASS echo diff --git a/scripts/hf.sh b/scripts/hf.sh deleted file mode 100755 index b251925fa..000000000 --- a/scripts/hf.sh +++ /dev/null @@ -1,112 +0,0 @@ -#!/bin/bash -# -# Shortcut for downloading HF models -# -# Usage: -# ./llama-cli -m $(./scripts/hf.sh https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q4_K_M.gguf) -# ./llama-cli -m $(./scripts/hf.sh --url https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/blob/main/mixtral-8x7b-v0.1.Q4_K_M.gguf) -# ./llama-cli -m $(./scripts/hf.sh --repo TheBloke/Mixtral-8x7B-v0.1-GGUF --file mixtral-8x7b-v0.1.Q4_K_M.gguf) -# - -# all logs go to stderr -function log { - echo "$@" 1>&2 -} - -function usage { - log "Usage: $0 [[--url] ] [--repo ] [--file ] [--outdir [-h|--help]" - exit 1 -} - -# check for curl or wget -function has_cmd { - if ! [ -x "$(command -v $1)" ]; then - return 1 - fi -} - -if has_cmd wget; then - cmd="wget -q -c -O %s/%s %s" -elif has_cmd curl; then - cmd="curl -C - -f --output-dir %s -o %s -L %s" -else - log "[E] curl or wget not found" - exit 1 -fi - -url="" -repo="" -file="" -outdir="." - -# parse args -while [[ $# -gt 0 ]]; do - case "$1" in - --url) - url="$2" - shift 2 - ;; - --repo) - repo="$2" - shift 2 - ;; - --file) - file="$2" - shift 2 - ;; - --outdir) - outdir="$2" - shift 2 - ;; - -h|--help) - usage - ;; - *) - url="$1" - shift - ;; - esac -done - -if [ -n "$repo" ] && [ -n "$file" ]; then - url="https://huggingface.co/$repo/resolve/main/$file" -fi - -if [ -z "$url" ]; then - log "[E] missing --url" - usage -fi - -# check if the URL is a HuggingFace model, and if so, try to download it -is_url=false - -if [[ ${#url} -gt 22 ]]; then - if [[ ${url:0:22} == "https://huggingface.co" ]]; then - is_url=true - fi -fi - -if [ "$is_url" = false ]; then - log "[E] invalid URL, must start with https://huggingface.co" - exit 0 -fi - -# replace "blob/main" with "resolve/main" -url=${url/blob\/main/resolve\/main} - -basename=$(basename $url) - -log "[+] attempting to download $basename" - -if [ -n "$cmd" ]; then - cmd=$(printf "$cmd" "$outdir" "$basename" "$url") - log "[+] $cmd" - if $cmd; then - echo $outdir/$basename - exit 0 - fi -fi - -log "[-] failed to download" - -exit 1 diff --git a/tests/test-lora-conversion-inference.sh b/tests/test-lora-conversion-inference.sh index fb308a9ff..1d1f4886c 100755 --- a/tests/test-lora-conversion-inference.sh +++ b/tests/test-lora-conversion-inference.sh @@ -80,18 +80,18 @@ run_conversion_and_inference_lora() { # Run inference echo -e "\n\n---------------------------\n\n" echo "Running llama-cli without lora for $model_name with hidden_size $hidden_size..." - OUTPUT_BASE=$(./llama-cli -m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32.gguf \ + OUTPUT_BASE=$(./llama-cli -no-cnv -m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32.gguf \ -p "$EXPECTED_BASE_FIRST_WORD" -n 50 --seed 42 --temp 0) echo -e "\n\n---------------------------\n\n" echo "Running llama-cli with hot lora for $model_name with hidden_size $hidden_size..." - OUTPUT_LORA_HOT=$(./llama-cli -m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32.gguf \ + OUTPUT_LORA_HOT=$(./llama-cli -no-cnv -m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32.gguf \ --lora $MODELS_REPO/$model_name/hidden_size=$hidden_size/lora/Lora-F32-LoRA.gguf \ -p "$EXPECTED_LORA_FIRST_WORD" -n 50 --seed 42 --temp 0) echo -e "\n\n---------------------------\n\n" echo "Running llama-cli with merged lora for $model_name with hidden_size $hidden_size..." - OUTPUT_LORA_MERGED=$(./llama-cli -m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32-lora-merged.gguf \ + OUTPUT_LORA_MERGED=$(./llama-cli -no-cnv -m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32-lora-merged.gguf \ -p "$EXPECTED_LORA_FIRST_WORD" -n 50 --seed 42 --temp 0) # Remove any initial white space