Use k_pe = ggml_reshape

2025-04-16 03:26:08 +00:00 · 2025-04-12 19:35:43 +01:00 · 2025-04-12 19:35:43 +01:00 · 57788614a0
commit 57788614a0
parent 815f4f9ecf
1 changed files with 2 additions and 5 deletions
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@ -9661,11 +9661,8 @@ struct llm_build_deepseek2 : public llm_graph_context {
                    q_states = ggml_permute(ctx0, q_states, 0, 2, 1, 3);
                    cb(q_states, "q_states_perm", il);

-                    k_pe = ggml_view_2d(ctx0, k_pe,
-                            n_embd_head_qk_rope, n_tokens,
-                            ggml_row_size(k_pe->type, n_embd_head_qk_rope),
-                            0);
-                    cb(k_pe, "k_pe_view", il);
+                    k_pe = ggml_reshape_2d(ctx0, k_pe, n_embd_head_qk_rope, n_tokens);
+                    cb(k_pe, "k_pe_reshape", il);

                    ggml_tensor * k_states = ggml_concat(ctx0, k_pe, kv_cmpr, 0);
                    cb(k_states, "k_states", il);