server : fix thread.join() on exit (#12831)

2025-04-14 10:36:07 +00:00 · 2025-04-08 18:37:06 +02:00 · 2025-04-08 18:37:06 +02:00 · 78a1ba0a4f
commit 78a1ba0a4f
parent 2dabf759e7
1 changed files with 20 additions and 3 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -1705,6 +1705,8 @@ private:
 };

 struct server_response {
+    bool running = true;
+
    // for keeping track of all tasks waiting for the result
    std::unordered_set<int> waiting_task_ids;

@ -1759,6 +1761,10 @@ struct server_response {
        while (true) {
            std::unique_lock<std::mutex> lock(mutex_results);
            condition_results.wait(lock, [&]{
+                if (!running) {
+                    SRV_DBG("%s : queue result stop\n", __func__);
+                    std::terminate(); // we cannot return here since the caller is HTTP code
+                }
                return !queue_results.empty();
            });

@ -1789,6 +1795,10 @@ struct server_response {
            }

            std::cv_status cr_res = condition_results.wait_for(lock, std::chrono::seconds(timeout));
+            if (!running) {
+                SRV_DBG("%s : queue result stop\n", __func__);
+                std::terminate(); // we cannot return here since the caller is HTTP code
+            }
            if (cr_res == std::cv_status::timeout) {
                return nullptr;
            }
@ -1818,6 +1828,12 @@ struct server_response {
            }
        }
    }
+
+    // terminate the waiting loop
+    void terminate() {
+        running = false;
+        condition_results.notify_all();
+    }
 };

 struct server_context {
@ -4491,9 +4507,10 @@ int main(int argc, char ** argv) {
    svr->new_task_queue = [&params] { return new httplib::ThreadPool(params.n_threads_http); };

    // clean up function, to be called before exit
-    auto clean_up = [&svr]() {
+    auto clean_up = [&svr, &ctx_server]() {
        SRV_INF("%s: cleaning up before exit...\n", __func__);
        svr->stop();
+        ctx_server.queue_results.terminate();
        llama_backend_free();
    };

@ -4534,7 +4551,7 @@ int main(int argc, char ** argv) {

    if (!ctx_server.load_model(params)) {
        clean_up();
-        // t.join(); // FIXME: see below
+        t.join();
        LOG_ERR("%s: exiting due to model loading error\n", __func__);
        return 1;
    }
@ -4582,7 +4599,7 @@ int main(int argc, char ** argv) {
    ctx_server.queue_tasks.start_loop();

    clean_up();
-    // t.join(); // FIXME: http thread may stuck if there is an on-going request. we don't need to care about this for now as the HTTP connection will already be closed at this point, but it's better to fix this
+    t.join();

    return 0;
 }