Skip to content

Commit da69112

Browse files
authored
[INFRAHELP-2899] fix: address race condition in load_nested_zip() and add tests (#15)
1 parent e6ff144 commit da69112

3 files changed

Lines changed: 106 additions & 16 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,3 +158,4 @@ cython_debug/
158158
# and can be added to the global gitignore or merged into this file. For a more nuclear
159159
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
160160
#.idea/
161+
.opencode

package_python_function/nested_zip_loader.py

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,33 +24,43 @@
2424
"""
2525

2626
def load_nested_zip() -> None:
27-
from pathlib import Path
27+
import fcntl
28+
import importlib
2829
import sys
2930
import tempfile
30-
import importlib
31+
from pathlib import Path
3132

3233
temp_path = Path(tempfile.gettempdir())
3334

3435
target_package_path = temp_path / "package-python-function"
3536

36-
if not target_package_path.exists():
37-
import zipfile
38-
import shutil
39-
import os
37+
# We use manual locks here to allow target_package_path to stay static,
38+
# but avoid race conditions when multiple processes try to run this
39+
# function at the same time.
40+
lock_path = temp_path / ".package-python-function.lock"
41+
42+
with open(lock_path, "w") as lock_file:
43+
fcntl.flock(lock_file, fcntl.LOCK_EX)
44+
45+
if not target_package_path.exists():
46+
import zipfile
47+
import shutil
48+
import os
4049

41-
staging_package_path = temp_path / ".stage.package-python-function"
50+
staging_package_path = temp_path / ".stage.package-python-function"
4251

43-
if staging_package_path.exists():
44-
shutil.rmtree(str(staging_package_path))
52+
if staging_package_path.exists():
53+
shutil.rmtree(str(staging_package_path))
4554

46-
nested_zip_path = Path(__file__).parent / '.dependencies.zip'
55+
nested_zip_path = Path(__file__).parent / ".dependencies.zip"
4756

48-
zipfile.ZipFile(str(nested_zip_path), 'r').extractall(str(staging_package_path))
57+
with zipfile.ZipFile(str(nested_zip_path), "r") as nested_zip:
58+
nested_zip.extractall(str(staging_package_path))
4959

50-
# The idea here is that we don't rename the path until everything has been successfuly extracted.
51-
# This is expected to be a an atomic operation. That way, if AWS terminates us during the extraction,
52-
# we won't try and use the incomplete extraction.
53-
os.rename(str(staging_package_path), str(target_package_path))
60+
# The idea here is that we don't rename the path until everything has been successfully extracted.
61+
# This is expected to be an atomic operation. That way, if AWS terminates us during the extraction,
62+
# we won't try and use the incomplete extraction.
63+
os.rename(str(staging_package_path), str(target_package_path))
5464

5565
# Lambda sets up the sys.path like this:
5666
# ['/var/task', '/opt/python/lib/python3.13/site-packages', '/opt/python',
@@ -65,4 +75,4 @@ def load_nested_zip() -> None:
6575
sys.path[0] = str(target_package_path)
6676
importlib.reload(sys.modules[__name__])
6777

68-
load_nested_zip()
78+
load_nested_zip()

tests/test_nested_zip_loader.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import importlib
2+
import multiprocessing
3+
import shutil
4+
import sys
5+
import tempfile
6+
import zipfile
7+
from pathlib import Path
8+
9+
import pytest
10+
11+
# We have to use importlib here because nested_zip_loader calls load_nested_zip
12+
# at IMPORT TIME, which causes us a world of hurt in these tests if we try to
13+
# import it "normally" here.
14+
LOADER_PATH = Path(__file__).parent.parent / "package_python_function" / "nested_zip_loader.py"
15+
PKG_NAME = "_test_nested_zip"
16+
17+
def _make_deps_zip(path: Path) -> None:
18+
with zipfile.ZipFile(path, "w") as zf:
19+
zf.writestr(f"{PKG_NAME}/__init__.py", "LOADED = True\n")
20+
21+
@pytest.fixture()
22+
def lambda_env(tmp_path, monkeypatch):
23+
"""Simulate a Lambda-like layout: a task dir with a package whose __init__.py
24+
is the nested_zip_loader code, and a .dependencies.zip with the 'real' code."""
25+
task_dir = tmp_path / "task"
26+
pkg_dir = task_dir / PKG_NAME
27+
pkg_dir.mkdir(parents=True)
28+
shutil.copy(LOADER_PATH, pkg_dir / "__init__.py")
29+
_make_deps_zip(pkg_dir / ".dependencies.zip")
30+
31+
tmp_dir = tmp_path / "tmp"
32+
tmp_dir.mkdir()
33+
monkeypatch.setenv("TMPDIR", str(tmp_dir))
34+
tempfile.tempdir = None
35+
36+
monkeypatch.syspath_prepend(str(task_dir))
37+
38+
yield tmp_path
39+
40+
sys.modules.pop(PKG_NAME, None)
41+
tempfile.tempdir = None
42+
43+
def test_cold_start_extracts(lambda_env):
44+
mod = importlib.import_module(PKG_NAME)
45+
assert mod.LOADED is True
46+
assert (lambda_env / "tmp" / "package-python-function").exists()
47+
48+
def test_warm_start_skips_extraction(lambda_env):
49+
target_pkg = lambda_env / "tmp" / "package-python-function" / PKG_NAME
50+
target_pkg.mkdir(parents=True)
51+
(target_pkg / "__init__.py").write_text("LOADED = 'warm'\n")
52+
53+
mod = importlib.import_module(PKG_NAME)
54+
assert mod.LOADED == "warm"
55+
56+
def test_stale_staging_cleaned(lambda_env):
57+
staging = lambda_env / "tmp" / ".stage.package-python-function"
58+
staging.mkdir(parents=True)
59+
(staging / "stale.txt").write_text("leftover")
60+
61+
importlib.import_module(PKG_NAME)
62+
assert not staging.exists()
63+
64+
def _worker(task_dir):
65+
import importlib
66+
import sys
67+
68+
sys.path.insert(0, task_dir)
69+
assert importlib.import_module(PKG_NAME).LOADED is True
70+
71+
def test_concurrent_no_race(lambda_env):
72+
ctx = multiprocessing.get_context("forkserver")
73+
procs = [ctx.Process(target=_worker, args=(str(lambda_env / "task"),)) for _ in range(2)]
74+
for p in procs:
75+
p.start()
76+
for p in procs:
77+
p.join(timeout=10)
78+
assert p.exitcode == 0, "A race condition occurred while extracting."
79+
assert (lambda_env / "tmp" / "package-python-function").exists()

0 commit comments

Comments
 (0)