From 8ba95dca2065c0073698afdfcda4c8a8f08bf0d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Sun, 16 Mar 2025 18:46:36 +0100 Subject: [PATCH] llama : fix OLMo-2-0325-32B-Instruct K-norm size (#12400) --- src/llama-model.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 750a702ff..4b288d8f6 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -1005,6 +1005,7 @@ void llama_model::load_hparams(llama_model_loader & ml) { case 16: type = LLM_TYPE_1B; break; case 32: type = LLM_TYPE_7B; break; case 40: type = LLM_TYPE_13B; break; + case 64: type = LLM_TYPE_32B; break; default: type = LLM_TYPE_UNKNOWN; } } break; @@ -2726,6 +2727,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) { } break; case LLM_ARCH_OLMO2: { + const int64_t n_embd_head = n_embd / n_head; + tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); // output @@ -2740,7 +2743,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) { layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_gqa}, 0); layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, 0); layer.attn_q_norm = create_tensor(tn(LLM_TENSOR_ATTN_Q_NORM, "weight", i), {n_embd}, 0); - layer.attn_k_norm = create_tensor(tn(LLM_TENSOR_ATTN_K_NORM, "weight", i), {n_embd}, 0); + layer.attn_k_norm = create_tensor(tn(LLM_TENSOR_ATTN_K_NORM, "weight", i), {n_head_kv * n_embd_head}, 0); layer.attn_post_norm = create_tensor(tn(LLM_TENSOR_ATTN_POST_NORM, "weight", i), {n_embd}, 0); layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, 0);