Skip to content

Commit 17b3e0a

Browse files
committed
Address code review comments
1 parent ba1ef0f commit 17b3e0a

5 files changed

Lines changed: 82 additions & 15 deletions

File tree

sentry_sdk/_batcher.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,11 @@ def _reset_in_child() -> None:
5151
os.register_at_fork(after_in_child=_reset_in_child)
5252

5353
def _reset_thread_state(self) -> None:
54-
self._flusher = None
54+
self._buffer = []
5555
self._lock = threading.Lock()
56+
self._active = threading.local()
57+
self._flush_event = threading.Event()
58+
self._flusher = None
5659
self._flusher_pid = None
5760

5861
def _ensure_thread(self) -> bool:

sentry_sdk/_span_batcher.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,18 @@ def _reset_in_child() -> None:
6363

6464
os.register_at_fork(after_in_child=_reset_in_child)
6565

66+
def _reset_thread_state(self) -> None:
67+
self._span_buffer = defaultdict(list)
68+
self._running_size = defaultdict(lambda: 0)
69+
70+
self._lock = threading.Lock()
71+
self._active = threading.local()
72+
73+
self._flush_event = threading.Event()
74+
75+
self._flusher = None
76+
self._flusher_pid = None
77+
6678
def add(self, span: "StreamedSpan") -> None:
6779
# Bail out if the current thread is already executing batcher code.
6880
# This prevents deadlocks when code running inside the batcher (e.g.

tests/test_logs.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -835,14 +835,18 @@ def test_log_batcher_lock_reset_in_child_after_fork(sentry_init):
835835
child inherits the lock locked. The holding thread does not exist in
836836
the child, so the lock can never be released and _ensure_thread
837837
deadlocks forever. The after-fork hook must replace the lock with a
838-
fresh one in the child and reset _flusher / _flusher_pid.
838+
fresh one in the child and reset
839+
_flusher / _flusher_pid / _buffer / _active / _flush_event.
839840
"""
840841
sentry_init(enable_logs=True)
841842
batcher = sentry_sdk.get_client().log_batcher
842843
assert batcher is not None
843844

844845
original_lock = batcher._lock
845846
original_lock.acquire()
847+
batcher._buffer.append(object())
848+
batcher._active.flag = True
849+
batcher._flush_event.set()
846850
pid = os.fork()
847851
if pid == 0:
848852
# Child: was the lock object replaced and is the new one not
@@ -852,7 +856,19 @@ def test_log_batcher_lock_reset_in_child_after_fork(sentry_init):
852856
replaced = batcher._lock is not original_lock
853857
unheld = batcher._lock.acquire(blocking=False)
854858
flusher_reset = batcher._flusher is None and batcher._flusher_pid is None
855-
os._exit(0 if replaced and unheld and flusher_reset else 1)
859+
buffer_reset = len(batcher._buffer) == 0
860+
active_reset = not getattr(batcher._active, "flag", False)
861+
event_reset = not batcher._flush_event.is_set()
862+
os._exit(
863+
0
864+
if replaced
865+
and unheld
866+
and flusher_reset
867+
and buffer_reset
868+
and active_reset
869+
and event_reset
870+
else 1
871+
)
856872

857873
original_lock.release()
858874
_, status = os.waitpid(pid, 0)

tests/test_metrics.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -531,24 +531,39 @@ def test_metrics_batcher_lock_reset_in_child_after_fork(sentry_init):
531531
exist in the child, so the lock can never be released and
532532
_ensure_thread deadlocks forever. The after-fork hook must replace
533533
the lock with a fresh one in the child and reset
534-
_flusher / _flusher_pid.
534+
_flusher / _flusher_pid / _buffer / _active / _flush_event.
535535
"""
536536
sentry_init()
537537
batcher = sentry_sdk.get_client().metrics_batcher
538538
assert batcher is not None
539539

540540
original_lock = batcher._lock
541541
original_lock.acquire()
542+
543+
batcher._buffer.append(object())
544+
batcher._active.flag = True
545+
batcher._flush_event.set()
546+
542547
pid = os.fork()
543548
if pid == 0:
544-
# Child: was the lock object replaced and is the new one not
545-
# held? Without the fix, _lock is `original_lock` inherited
546-
# locked, so `replaced` is False. blocking=False guarantees the
547-
# child can't hang on a regression.
548549
replaced = batcher._lock is not original_lock
549550
unheld = batcher._lock.acquire(blocking=False)
551+
550552
flusher_reset = batcher._flusher is None and batcher._flusher_pid is None
551-
os._exit(0 if replaced and unheld and flusher_reset else 1)
553+
buffer_reset = len(batcher._buffer) == 0
554+
active_reset = not getattr(batcher._active, "flag", False)
555+
event_reset = not batcher._flush_event.is_set()
556+
557+
os._exit(
558+
0
559+
if replaced
560+
and unheld
561+
and flusher_reset
562+
and buffer_reset
563+
and active_reset
564+
and event_reset
565+
else 1
566+
)
552567

553568
original_lock.release()
554569
_, status = os.waitpid(pid, 0)

tests/tracing/test_span_streaming.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1570,7 +1570,9 @@ def test_span_batcher_lock_reset_in_child_after_fork(sentry_init):
15701570
child inherits the lock locked. The holding thread does not exist in
15711571
the child, so the lock can never be released and _ensure_thread
15721572
deadlocks forever. The after-fork hook must replace the lock with a
1573-
fresh one in the child and reset _flusher / _flusher_pid.
1573+
fresh one in the child and reset
1574+
_flusher / _flusher_pid / _span_buffer / _running_size / _active /
1575+
_flush_event.
15741576
"""
15751577
sentry_init(
15761578
traces_sample_rate=1.0,
@@ -1581,16 +1583,35 @@ def test_span_batcher_lock_reset_in_child_after_fork(sentry_init):
15811583

15821584
original_lock = batcher._lock
15831585
original_lock.acquire()
1586+
1587+
batcher._span_buffer["test-trace-id"].append(object())
1588+
batcher._running_size["test-trace-id"] = 42
1589+
batcher._active.flag = True
1590+
batcher._flush_event.set()
1591+
15841592
pid = os.fork()
15851593
if pid == 0:
1586-
# Child: was the lock object replaced and is the new one not
1587-
# held? Without the fix, _lock is `original_lock` inherited
1588-
# locked, so `replaced` is False. blocking=False guarantees the
1589-
# child can't hang on a regression.
15901594
replaced = batcher._lock is not original_lock
15911595
unheld = batcher._lock.acquire(blocking=False)
1596+
15921597
flusher_reset = batcher._flusher is None and batcher._flusher_pid is None
1593-
os._exit(0 if replaced and unheld and flusher_reset else 1)
1598+
span_buffer_reset = len(batcher._span_buffer) == 0
1599+
running_size_reset = len(batcher._running_size) == 0
1600+
1601+
active_reset = not getattr(batcher._active, "flag", False)
1602+
event_reset = not batcher._flush_event.is_set()
1603+
1604+
os._exit(
1605+
0
1606+
if replaced
1607+
and unheld
1608+
and flusher_reset
1609+
and span_buffer_reset
1610+
and running_size_reset
1611+
and active_reset
1612+
and event_reset
1613+
else 1
1614+
)
15941615

15951616
original_lock.release()
15961617
_, status = os.waitpid(pid, 0)

0 commit comments

Comments
 (0)