mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-04-16 11:36:08 +00:00
server : fix crash when using verbose output with input tokens that are not in printable range (#12178) (#12338)
* Fix DOS index bug * Remove new APIs * remove extra line * Remove from API * Add extra newline * Update examples/server/server.cpp --------- Co-authored-by: Xuan-Son Nguyen <thichthat@gmail.com>
This commit is contained in:
parent
f08f4b3187
commit
2048b5913d
@ -2040,6 +2040,18 @@ struct server_context {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool can_be_detokenized(const struct llama_context * ctx, const std::vector<llama_token> & tokens) {
|
||||||
|
const llama_model * model = llama_get_model(ctx);
|
||||||
|
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||||
|
const int32_t n_vocab = llama_vocab_n_tokens(vocab);
|
||||||
|
for (const auto & token : tokens) {
|
||||||
|
if (token < 0 || token >= n_vocab) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool launch_slot_with_task(server_slot & slot, const server_task & task) {
|
bool launch_slot_with_task(server_slot & slot, const server_task & task) {
|
||||||
slot.reset();
|
slot.reset();
|
||||||
slot.id_task = task.id;
|
slot.id_task = task.id;
|
||||||
@ -2054,6 +2066,11 @@ struct server_context {
|
|||||||
slot.lora = task.params.lora;
|
slot.lora = task.params.lora;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool can_detokenize = can_be_detokenized(ctx, slot.prompt_tokens);
|
||||||
|
if (!can_detokenize) {
|
||||||
|
send_error(task, "Prompt contains invalid tokens", ERROR_TYPE_INVALID_REQUEST);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
SLT_DBG(slot, "launching slot : %s\n", safe_json_to_str(slot.to_json()).c_str());
|
SLT_DBG(slot, "launching slot : %s\n", safe_json_to_str(slot.to_json()).c_str());
|
||||||
|
|
||||||
if (slot.n_predict > 0 && slot.params.n_predict > slot.n_predict) {
|
if (slot.n_predict > 0 && slot.params.n_predict > slot.n_predict) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user