Skip to content

Commit 8efc066

Browse files
committed
test: add unit tests for nested_zip_loader
1 parent 652d078 commit 8efc066

3 files changed

Lines changed: 124 additions & 181 deletions

File tree

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Fix race condition in nested_zip_loader.py
2+
3+
## Problem
4+
Multiple concurrent Lambda processes race on:
5+
1. The shared `.stage.package-python-function` staging directory (one process can `rmtree` another's extraction)
6+
2. The `os.rename()` call — if process A wins, process B gets `OSError: Directory not empty`
7+
8+
## Solution
9+
10+
Use `fcntl.flock` (stdlib) to serialize extraction. All directories remain static. The lock means "I'm the one who gets to decide if extraction is needed."
11+
12+
### Changes to `package_python_function/nested_zip_loader.py`
13+
14+
Replace lines 36–56 with:
15+
16+
```python
17+
import fcntl
18+
import zipfile
19+
import shutil
20+
import os
21+
22+
lock_path = temp_path / ".package-python-function.lock"
23+
staging_package_path = temp_path / ".stage.package-python-function"
24+
25+
with open(str(lock_path), 'w') as lock_file:
26+
fcntl.flock(lock_file, fcntl.LOCK_EX)
27+
if not target_package_path.exists():
28+
if staging_package_path.exists():
29+
shutil.rmtree(str(staging_package_path))
30+
nested_zip_path = Path(__file__).parent / '.dependencies.zip'
31+
zipfile.ZipFile(str(nested_zip_path), 'r').extractall(str(staging_package_path))
32+
os.rename(str(staging_package_path), str(target_package_path))
33+
```
34+
35+
### Why this works
36+
- `fcntl.flock` serializes access — only one process extracts at a time
37+
- The lock is released automatically when the `with` block exits (file close releases flock)
38+
- Winner extracts if needed; losers wake up, see target exists, move on
39+
- All paths are static: `/tmp/.package-python-function.lock`, `/tmp/.stage.package-python-function`, `/tmp/package-python-function`
40+
- Still uses atomic `os.rename` so a terminated extraction never leaves a half-baked target
41+
- Warm start cost: opening a file + uncontested lock acquisition (microseconds)
42+
43+
## Verification
44+
- Unit test: mock `fcntl.flock`, verify only one process extracts when target is missing
45+
- Integration: launch multiple processes concurrently that call `load_nested_zip()`, verify all succeed and target is extracted exactly once

tests/test_nested_zip_loader.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import importlib
2+
import multiprocessing
3+
import shutil
4+
import sys
5+
import tempfile
6+
import zipfile
7+
from pathlib import Path
8+
9+
import pytest
10+
11+
# We have to use importlib here because nested_zip_loader calls load_nested_zip
12+
# at IMPORT TIME, which causes us a world of hurt in these tests if we try to
13+
# import it "normally" here.
14+
LOADER_PATH = Path(__file__).parent.parent / "package_python_function" / "nested_zip_loader.py"
15+
PKG_NAME = "_test_nested_zip"
16+
17+
def _make_deps_zip(path: Path) -> None:
18+
with zipfile.ZipFile(path, "w") as zf:
19+
zf.writestr(f"{PKG_NAME}/__init__.py", "LOADED = True\n")
20+
21+
@pytest.fixture()
22+
def lambda_env(tmp_path, monkeypatch):
23+
"""Simulate a Lambda-like layout: a task dir with a package whose __init__.py
24+
is the nested_zip_loader code, and a .dependencies.zip with the 'real' code."""
25+
task_dir = tmp_path / "task"
26+
pkg_dir = task_dir / PKG_NAME
27+
pkg_dir.mkdir(parents=True)
28+
shutil.copy(LOADER_PATH, pkg_dir / "__init__.py")
29+
_make_deps_zip(pkg_dir / ".dependencies.zip")
30+
31+
tmp_dir = tmp_path / "tmp"
32+
tmp_dir.mkdir()
33+
monkeypatch.setenv("TMPDIR", str(tmp_dir))
34+
tempfile.tempdir = None
35+
36+
monkeypatch.syspath_prepend(str(task_dir))
37+
38+
yield tmp_path
39+
40+
sys.modules.pop(PKG_NAME, None)
41+
tempfile.tempdir = None
42+
43+
def test_cold_start_extracts(lambda_env):
44+
mod = importlib.import_module(PKG_NAME)
45+
assert mod.LOADED is True
46+
assert (lambda_env / "tmp" / "package-python-function").exists()
47+
48+
def test_warm_start_skips_extraction(lambda_env):
49+
target_pkg = lambda_env / "tmp" / "package-python-function" / PKG_NAME
50+
target_pkg.mkdir(parents=True)
51+
(target_pkg / "__init__.py").write_text("LOADED = 'warm'\n")
52+
53+
mod = importlib.import_module(PKG_NAME)
54+
assert mod.LOADED == "warm"
55+
56+
def test_stale_staging_cleaned(lambda_env):
57+
staging = lambda_env / "tmp" / ".stage.package-python-function"
58+
staging.mkdir(parents=True)
59+
(staging / "stale.txt").write_text("leftover")
60+
61+
importlib.import_module(PKG_NAME)
62+
assert not staging.exists()
63+
64+
def _worker(task_dir):
65+
import importlib
66+
import sys
67+
68+
sys.path.insert(0, task_dir)
69+
assert importlib.import_module(PKG_NAME).LOADED is True
70+
71+
def test_concurrent_no_race(lambda_env):
72+
ctx = multiprocessing.get_context("forkserver")
73+
procs = [ctx.Process(target=_worker, args=(str(lambda_env / "task"),)) for _ in range(2)]
74+
for p in procs:
75+
p.start()
76+
for p in procs:
77+
p.join(timeout=10)
78+
assert p.exitcode == 0, "A race condition occured while extracting."
79+
assert (lambda_env / "tmp" / "package-python-function").exists()

tests/test_package_python_function.py

Lines changed: 0 additions & 181 deletions
This file was deleted.

0 commit comments

Comments
 (0)