@@ -300,6 +300,14 @@ void WorkerThreadsTaskRunner::BlockingDrain() {
300300 pending_worker_tasks_.Lock ().BlockingDrain ();
301301}
302302
303+ bool WorkerThreadsTaskRunner::TimedBlockingDrain (uint64_t timeout_in_ns) {
304+ return pending_worker_tasks_.Lock ().TimedBlockingDrain (timeout_in_ns);
305+ }
306+
307+ bool WorkerThreadsTaskRunner::HasOutstandingTasks () {
308+ return pending_worker_tasks_.Lock ().HasOutstandingTasks ();
309+ }
310+
303311void WorkerThreadsTaskRunner::Shutdown () {
304312 pending_worker_tasks_.Lock ().Stop ();
305313 delayed_task_scheduler_->Stop ();
@@ -581,26 +589,23 @@ void NodePlatform::DrainTasks(Isolate* isolate) {
581589 if (!per_isolate) return ;
582590
583591 do {
584- // FIXME(54918): we should not be blocking on the worker tasks on the
585- // main thread in one go. Doing so leads to two problems:
586- // 1. If any of the worker tasks post another foreground task and wait
587- // for it to complete, and that foreground task is posted right after
588- // we flush the foreground task queue and before the foreground thread
589- // goes into sleep, we'll never be able to wake up to execute that
590- // foreground task and in turn the worker task will never complete, and
591- // we have a deadlock.
592- // 2. Worker tasks can be posted from any thread, not necessarily associated
593- // with the current isolate, and we can be blocking on a worker task that
594- // is associated with a completely unrelated isolate in the event loop.
595- // This is suboptimal.
592+ // Worker tasks (e.g. V8 JIT compilation) may post foreground tasks and
593+ // wait for their completion. If we block indefinitely on worker tasks
594+ // without flushing foreground tasks, those worker tasks can never finish,
595+ // causing a deadlock (see https://github.com/nodejs/node/issues/54918).
596596 //
597- // However, not blocking on the worker tasks at all can lead to loss of some
598- // critical user-blocking worker tasks e.g. wasm async compilation tasks,
599- // which should block the main thread until they are completed, as the
600- // documentation suggets. As a compromise, we currently only block on
601- // user-blocking tasks to reduce the chance of deadlocks while making sure
602- // that criticl user-blocking tasks are not lost.
603- worker_thread_task_runner_->BlockingDrain ();
597+ // To avoid this, we interleave: wait briefly for worker tasks to complete,
598+ // then flush any foreground tasks that may have been posted, and repeat.
599+ // This ensures foreground tasks posted by workers get a chance to run.
600+ while (worker_thread_task_runner_->HasOutstandingTasks ()) {
601+ // Wait up to 1ms for outstanding worker tasks to complete.
602+ constexpr uint64_t kDrainTimeoutNs = 1'000'000 ; // 1ms
603+ if (worker_thread_task_runner_->TimedBlockingDrain (kDrainTimeoutNs )) {
604+ break ; // All outstanding tasks drained.
605+ }
606+ // Flush foreground tasks that worker tasks may be waiting on.
607+ per_isolate->FlushForegroundTasksInternal ();
608+ }
604609 } while (per_isolate->FlushForegroundTasksInternal ());
605610}
606611
@@ -832,6 +837,20 @@ void TaskQueue<T>::Locked::BlockingDrain() {
832837 }
833838}
834839
840+ template <class T >
841+ bool TaskQueue<T>::Locked::TimedBlockingDrain(uint64_t timeout_in_ns) {
842+ while (queue_->outstanding_tasks_ > 0 ) {
843+ int r = queue_->outstanding_tasks_drained_ .TimedWait (lock_, timeout_in_ns);
844+ if (r != 0 ) return false ; // Timed out, still has outstanding tasks.
845+ }
846+ return true ;
847+ }
848+
849+ template <class T >
850+ bool TaskQueue<T>::Locked::HasOutstandingTasks() {
851+ return queue_->outstanding_tasks_ > 0 ;
852+ }
853+
835854template <class T >
836855void TaskQueue<T>::Locked::Stop() {
837856 queue_->stopped_ = true ;
0 commit comments