llama : recognize IBM Granite 3.3 FIM tokens (#12988)

The Granite's FIM tokens are very similar to Qwen's; it's just that they use underscore instead of a dash. So <fim_middle> for example instead of <fim-middle>. Opening up tokenizer_config.json in ibm-granite/granite-3.3-8b-base shows: ``` "<fim_prefix>", "<fim_middle>", "<fim_suffix>", "<fim_pad>", ... "<reponame>", ```
2025-04-19 13:06:10 +00:00 · 2025-04-17 01:37:05 -07:00 · 2025-04-17 01:37:05 -07:00 · 971f245b3b
commit 971f245b3b
parent 12b17501e6
1 changed files with 5 additions and 0 deletions
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@ -1841,6 +1841,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                if (false
                        || t.first == "<|fim_prefix|>"  // Qwen
                        || t.first == "<fim-prefix>"
+                        || t.first == "<fim_prefix>"    // Granite
                        || t.first == "<｜fim▁begin｜>" // DeepSeek
                        || t.first == "<PRE>"
                        || t.first == "▁<PRE>"          // CodeLlama
@ -1859,6 +1860,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                if (false
                        || t.first == "<|fim_suffix|>" // Qwen
                        || t.first == "<fim-suffix>"
+                        || t.first == "<fim_suffix>"   // Granite
                        || t.first == "<｜fim▁hole｜>" // DeepSeek
                        || t.first == "<SUF>"
                        || t.first == "▁<SUF>"         // CodeLlama
@ -1877,6 +1879,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                if (false
                        || t.first == "<|fim_middle|>" // Qwen
                        || t.first == "<fim-middle>"
+                        || t.first == "<fim_middle>"   // Granite
                        || t.first == "<｜fim▁end｜>"  // DeepSeek
                        || t.first == "<MID>"
                        || t.first == "▁<MID>"         // CodeLlama
@ -1895,6 +1898,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                if (false
                        || t.first == "<|fim_pad|>" // Qwen
                        || t.first == "<fim-pad>"
+                        || t.first == "<fim_pad>"   // Granite
                        || t.first == "<PAD>"
                        ) {
                    special_fim_pad_id = t.second;
@ -1913,6 +1917,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                        || t.first == "<|repo_name|>"
                        || t.first == "<fim-repo>"
                        || t.first == "<REPO>"
+                        || t.first == "<reponame>"    // Granite
                        ) {
                    special_fim_rep_id = t.second;
                    if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {