diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 760c36464..1bf1ee876 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1705,6 +1705,8 @@ private: }; struct server_response { + bool running = true; + // for keeping track of all tasks waiting for the result std::unordered_set<int> waiting_task_ids; @@ -1759,6 +1761,10 @@ struct server_response { while (true) { std::unique_lock<std::mutex> lock(mutex_results); condition_results.wait(lock, [&]{ + if (!running) { + SRV_DBG("%s : queue result stop\n", __func__); + std::terminate(); // we cannot return here since the caller is HTTP code + } return !queue_results.empty(); }); @@ -1789,6 +1795,10 @@ struct server_response { } std::cv_status cr_res = condition_results.wait_for(lock, std::chrono::seconds(timeout)); + if (!running) { + SRV_DBG("%s : queue result stop\n", __func__); + std::terminate(); // we cannot return here since the caller is HTTP code + } if (cr_res == std::cv_status::timeout) { return nullptr; } @@ -1818,6 +1828,12 @@ struct server_response { } } } + + // terminate the waiting loop + void terminate() { + running = false; + condition_results.notify_all(); + } }; struct server_context { @@ -4491,9 +4507,10 @@ int main(int argc, char ** argv) { svr->new_task_queue = [¶ms] { return new httplib::ThreadPool(params.n_threads_http); }; // clean up function, to be called before exit - auto clean_up = [&svr]() { + auto clean_up = [&svr, &ctx_server]() { SRV_INF("%s: cleaning up before exit...\n", __func__); svr->stop(); + ctx_server.queue_results.terminate(); llama_backend_free(); }; @@ -4534,7 +4551,7 @@ int main(int argc, char ** argv) { if (!ctx_server.load_model(params)) { clean_up(); - // t.join(); // FIXME: see below + t.join(); LOG_ERR("%s: exiting due to model loading error\n", __func__); return 1; } @@ -4582,7 +4599,7 @@ int main(int argc, char ** argv) { ctx_server.queue_tasks.start_loop(); clean_up(); - // t.join(); // FIXME: http thread may stuck if there is an on-going request. we don't need to care about this for now as the HTTP connection will already be closed at this point, but it's better to fix this + t.join(); return 0; }