diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 8cb8d0033..ce0195475 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2040,6 +2040,18 @@ struct server_context { return ret; } + bool can_be_detokenized(const struct llama_context * ctx, const std::vector & tokens) { + const llama_model * model = llama_get_model(ctx); + const llama_vocab * vocab = llama_model_get_vocab(model); + const int32_t n_vocab = llama_vocab_n_tokens(vocab); + for (const auto & token : tokens) { + if (token < 0 || token >= n_vocab) { + return false; + } + } + return true; + } + bool launch_slot_with_task(server_slot & slot, const server_task & task) { slot.reset(); slot.id_task = task.id; @@ -2054,6 +2066,11 @@ struct server_context { slot.lora = task.params.lora; } + bool can_detokenize = can_be_detokenized(ctx, slot.prompt_tokens); + if (!can_detokenize) { + send_error(task, "Prompt contains invalid tokens", ERROR_TYPE_INVALID_REQUEST); + return false; + } SLT_DBG(slot, "launching slot : %s\n", safe_json_to_str(slot.to_json()).c_str()); if (slot.n_predict > 0 && slot.params.n_predict > slot.n_predict) {