Skip to content

Commit cfdc6b3

Browse files
Fix sdist-only packages (vllm), add GPU AI benchmark
- sdist-only packages in pylock.toml (no wheels array) were silently dropped. Now included with size=0 so they go through uv pip install which can build from source. - GPU benchmark: torch 2.7x faster cold start, warm starts 2-3.5x faster Results on RTX 4090: torch: 31.8s cold (vs 86.5s uvx), 1.8s warm (vs 3.9s uvx) vllm: 157.8s cold, 2.6s warm (vs 9.1s uvx) triton: 6.7s cold, 0.2s warm Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 80fd905 commit cfdc6b3

2 files changed

Lines changed: 180 additions & 15 deletions

File tree

crates/zs-fast-wheel/src/resolve.rs

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -146,22 +146,33 @@ fn parse_pylock(content: &str) -> Result<Vec<WheelSpec>> {
146146
.to_string();
147147

148148
// Get wheels array — pick the best one
149-
let wheels = match pkg.get("wheels").and_then(|w| w.as_array()) {
150-
Some(w) => w,
151-
None => continue, // sdist-only, skip
152-
};
153-
154-
if let Some((url, size, hash)) = pick_best_wheel(wheels) {
155-
let import_roots = guess_import_roots(&name);
156-
specs.push(WheelSpec {
157-
url,
158-
distribution: name,
159-
version,
160-
import_roots,
161-
size,
162-
hash,
163-
});
149+
let wheels = pkg.get("wheels").and_then(|w| w.as_array());
150+
151+
if let Some(wheels) = wheels {
152+
if let Some((url, size, hash)) = pick_best_wheel(wheels) {
153+
let import_roots = guess_import_roots(&name);
154+
specs.push(WheelSpec {
155+
url,
156+
distribution: name,
157+
version,
158+
import_roots,
159+
size,
160+
hash,
161+
});
162+
continue;
163+
}
164164
}
165+
166+
// sdist-only or no matching wheel — include with empty URL so uv handles it
167+
let import_roots = guess_import_roots(&name);
168+
specs.push(WheelSpec {
169+
url: String::new(),
170+
distribution: name,
171+
version,
172+
import_roots,
173+
size: 0, // forces into uv_specs bucket (< DAEMON_THRESHOLD)
174+
hash: None,
175+
});
165176
}
166177

167178
Ok(specs)

tests/gpu_ai_bench.sh

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
#!/bin/bash
2+
set -euo pipefail
3+
4+
# Heavy AI package benchmark: zerostart vs uvx on GPU pod
5+
# Tests realistic AI cold-start scenarios
6+
# NOTE: Each torch-based test uses ~7-15GB disk. We clean between every run.
7+
8+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
9+
PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
10+
ZS="$PROJECT_DIR/bin/zerostart-linux-x86_64"
11+
export PATH="$HOME/.local/bin:$HOME/.cargo/bin:$PATH"
12+
export ZEROSTART_CACHE="/tmp/.zs-bench"
13+
14+
echo "=== AI Package Cold Start Benchmark ==="
15+
echo "Date: $(date -u)"
16+
echo "Python: $(python3 --version)"
17+
echo "uv: $(uv --version)"
18+
echo "GPU: $(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null || echo 'none')"
19+
echo "Binary: $ZS"
20+
df -h /tmp | tail -1 | awk '{print "Disk: " $4 " free"}'
21+
echo ""
22+
23+
# Results file
24+
RESULTS="$PROJECT_DIR/benches/results/ai_bench.csv"
25+
mkdir -p "$PROJECT_DIR/benches/results"
26+
echo "test,method,time_s,exit_code" > "$RESULTS"
27+
28+
measure() {
29+
local label=$1
30+
local method=$2
31+
shift 2
32+
echo "--- $label ($method) ---"
33+
local start=$(date +%s%3N)
34+
if "$@" 2>&1; then
35+
local rc=0
36+
else
37+
local rc=$?
38+
fi
39+
local end=$(date +%s%3N)
40+
local elapsed_ms=$((end - start))
41+
local elapsed_s=$((elapsed_ms / 1000)).$((elapsed_ms % 1000 / 100))
42+
echo " >> ${elapsed_s}s (exit=$rc)"
43+
echo "$label,$method,${elapsed_s},$rc" >> "$RESULTS"
44+
echo ""
45+
}
46+
47+
cleanup() {
48+
# Aggressively clean ALL caches to free disk for next test
49+
rm -rf /tmp/.zs-bench /tmp/.zs-vllm /tmp/.zs-test
50+
uv cache clean 2>/dev/null || true
51+
rm -rf ~/.local/share/uv/tools 2>/dev/null || true
52+
# Clean uv archive cache (this is where extracted venvs live)
53+
rm -rf /gpu-cli-workspaces/.cache/uv/archive-v0/* 2>/dev/null || true
54+
df -h /tmp | tail -1 | awk '{print " (disk: " $4 " free)"}'
55+
}
56+
57+
# Create test scripts
58+
cat > /tmp/torch_test.py << 'EOF'
59+
import torch
60+
print(f"torch {torch.__version__}")
61+
print(f"CUDA available: {torch.cuda.is_available()}")
62+
if torch.cuda.is_available():
63+
print(f"GPU: {torch.cuda.get_device_name(0)}")
64+
x = torch.randn(1000, 1000, device='cuda')
65+
print(f"CUDA tensor OK: {x.shape}")
66+
EOF
67+
68+
cat > /tmp/vllm_test.py << 'EOF'
69+
import vllm
70+
print(f"vllm {vllm.__version__}")
71+
import torch
72+
print(f"torch {torch.__version__}, CUDA: {torch.cuda.is_available()}")
73+
EOF
74+
75+
cat > /tmp/hf_test.py << 'EOF'
76+
import transformers
77+
import torch
78+
print(f"transformers {transformers.__version__}")
79+
print(f"torch {torch.__version__}, CUDA: {torch.cuda.is_available()}")
80+
EOF
81+
82+
cat > /tmp/diff_test.py << 'EOF'
83+
import diffusers
84+
import torch
85+
print(f"diffusers {diffusers.__version__}")
86+
print(f"torch {torch.__version__}, CUDA: {torch.cuda.is_available()}")
87+
EOF
88+
89+
cat > /tmp/triton_test.py << 'EOF'
90+
import triton
91+
print(f"triton {triton.__version__}")
92+
EOF
93+
94+
# ============================================================
95+
# Test 1: torch (the big one — ~900MB wheel + CUDA deps)
96+
# ============================================================
97+
echo "========== TEST 1: torch =========="
98+
99+
cleanup
100+
measure "torch" "zs_cold" $ZS run -v -p torch /tmp/torch_test.py
101+
measure "torch" "zs_warm" $ZS run -p torch /tmp/torch_test.py
102+
103+
cleanup
104+
measure "torch" "uvx_cold" uvx --from torch --with torch python -c "import torch; print(f'torch {torch.__version__}, CUDA: {torch.cuda.is_available()}')"
105+
measure "torch" "uvx_warm" uvx --from torch --with torch python -c "import torch; print(f'torch {torch.__version__}, CUDA: {torch.cuda.is_available()}')"
106+
107+
# ============================================================
108+
# Test 2: vllm (torch + vllm + many deps — realistic LLM serving)
109+
# ============================================================
110+
echo "========== TEST 2: vllm =========="
111+
112+
cleanup
113+
measure "vllm" "zs_cold" $ZS run -v -p vllm /tmp/vllm_test.py
114+
measure "vllm" "zs_warm" $ZS run -p vllm /tmp/vllm_test.py
115+
116+
cleanup
117+
measure "vllm" "uvx_cold" uvx --from vllm --with vllm python -c "import vllm; print(f'vllm {vllm.__version__}')"
118+
measure "vllm" "uvx_warm" uvx --from vllm --with vllm python -c "import vllm; print(f'vllm {vllm.__version__}')"
119+
120+
# ============================================================
121+
# Test 3: transformers + torch (common fine-tuning setup)
122+
# ============================================================
123+
echo "========== TEST 3: transformers+torch =========="
124+
125+
cleanup
126+
measure "hf_torch" "zs_cold" $ZS run -v -p torch -p transformers -p tokenizers /tmp/hf_test.py
127+
measure "hf_torch" "zs_warm" $ZS run -p torch -p transformers -p tokenizers /tmp/hf_test.py
128+
129+
# ============================================================
130+
# Test 4: diffusers + torch (image generation)
131+
# ============================================================
132+
echo "========== TEST 4: diffusers+torch =========="
133+
134+
cleanup
135+
measure "diffusers" "zs_cold" $ZS run -v -p torch -p diffusers -p transformers -p accelerate /tmp/diff_test.py
136+
measure "diffusers" "zs_warm" $ZS run -p torch -p diffusers -p transformers -p accelerate /tmp/diff_test.py
137+
138+
# ============================================================
139+
# Test 5: triton (GPU kernel compilation)
140+
# ============================================================
141+
echo "========== TEST 5: triton =========="
142+
143+
cleanup
144+
measure "triton" "zs_cold" $ZS run -v -p triton /tmp/triton_test.py
145+
measure "triton" "zs_warm" $ZS run -p triton /tmp/triton_test.py
146+
147+
# ============================================================
148+
# Summary
149+
# ============================================================
150+
echo ""
151+
echo "========== RESULTS =========="
152+
cat "$RESULTS"
153+
echo ""
154+
echo "Done."

0 commit comments

Comments
 (0)