mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-04-14 10:36:07 +00:00
server : fix thread.join() on exit (#12831)
This commit is contained in:
parent
2dabf759e7
commit
78a1ba0a4f
@ -1705,6 +1705,8 @@ private:
|
||||
};
|
||||
|
||||
struct server_response {
|
||||
bool running = true;
|
||||
|
||||
// for keeping track of all tasks waiting for the result
|
||||
std::unordered_set<int> waiting_task_ids;
|
||||
|
||||
@ -1759,6 +1761,10 @@ struct server_response {
|
||||
while (true) {
|
||||
std::unique_lock<std::mutex> lock(mutex_results);
|
||||
condition_results.wait(lock, [&]{
|
||||
if (!running) {
|
||||
SRV_DBG("%s : queue result stop\n", __func__);
|
||||
std::terminate(); // we cannot return here since the caller is HTTP code
|
||||
}
|
||||
return !queue_results.empty();
|
||||
});
|
||||
|
||||
@ -1789,6 +1795,10 @@ struct server_response {
|
||||
}
|
||||
|
||||
std::cv_status cr_res = condition_results.wait_for(lock, std::chrono::seconds(timeout));
|
||||
if (!running) {
|
||||
SRV_DBG("%s : queue result stop\n", __func__);
|
||||
std::terminate(); // we cannot return here since the caller is HTTP code
|
||||
}
|
||||
if (cr_res == std::cv_status::timeout) {
|
||||
return nullptr;
|
||||
}
|
||||
@ -1818,6 +1828,12 @@ struct server_response {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// terminate the waiting loop
|
||||
void terminate() {
|
||||
running = false;
|
||||
condition_results.notify_all();
|
||||
}
|
||||
};
|
||||
|
||||
struct server_context {
|
||||
@ -4491,9 +4507,10 @@ int main(int argc, char ** argv) {
|
||||
svr->new_task_queue = [¶ms] { return new httplib::ThreadPool(params.n_threads_http); };
|
||||
|
||||
// clean up function, to be called before exit
|
||||
auto clean_up = [&svr]() {
|
||||
auto clean_up = [&svr, &ctx_server]() {
|
||||
SRV_INF("%s: cleaning up before exit...\n", __func__);
|
||||
svr->stop();
|
||||
ctx_server.queue_results.terminate();
|
||||
llama_backend_free();
|
||||
};
|
||||
|
||||
@ -4534,7 +4551,7 @@ int main(int argc, char ** argv) {
|
||||
|
||||
if (!ctx_server.load_model(params)) {
|
||||
clean_up();
|
||||
// t.join(); // FIXME: see below
|
||||
t.join();
|
||||
LOG_ERR("%s: exiting due to model loading error\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
@ -4582,7 +4599,7 @@ int main(int argc, char ** argv) {
|
||||
ctx_server.queue_tasks.start_loop();
|
||||
|
||||
clean_up();
|
||||
// t.join(); // FIXME: http thread may stuck if there is an on-going request. we don't need to care about this for now as the HTTP connection will already be closed at this point, but it's better to fix this
|
||||
t.join();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user