|
| 1 | +#!/bin/bash |
| 2 | +set -euo pipefail |
| 3 | + |
| 4 | +# Heavy AI package benchmark: zerostart vs uvx on GPU pod |
| 5 | +# Tests realistic AI cold-start scenarios |
| 6 | +# NOTE: Each torch-based test uses ~7-15GB disk. We clean between every run. |
| 7 | + |
| 8 | +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" |
| 9 | +PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" |
| 10 | +ZS="$PROJECT_DIR/bin/zerostart-linux-x86_64" |
| 11 | +export PATH="$HOME/.local/bin:$HOME/.cargo/bin:$PATH" |
| 12 | +export ZEROSTART_CACHE="/tmp/.zs-bench" |
| 13 | + |
| 14 | +echo "=== AI Package Cold Start Benchmark ===" |
| 15 | +echo "Date: $(date -u)" |
| 16 | +echo "Python: $(python3 --version)" |
| 17 | +echo "uv: $(uv --version)" |
| 18 | +echo "GPU: $(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null || echo 'none')" |
| 19 | +echo "Binary: $ZS" |
| 20 | +df -h /tmp | tail -1 | awk '{print "Disk: " $4 " free"}' |
| 21 | +echo "" |
| 22 | + |
| 23 | +# Results file |
| 24 | +RESULTS="$PROJECT_DIR/benches/results/ai_bench.csv" |
| 25 | +mkdir -p "$PROJECT_DIR/benches/results" |
| 26 | +echo "test,method,time_s,exit_code" > "$RESULTS" |
| 27 | + |
| 28 | +measure() { |
| 29 | + local label=$1 |
| 30 | + local method=$2 |
| 31 | + shift 2 |
| 32 | + echo "--- $label ($method) ---" |
| 33 | + local start=$(date +%s%3N) |
| 34 | + if "$@" 2>&1; then |
| 35 | + local rc=0 |
| 36 | + else |
| 37 | + local rc=$? |
| 38 | + fi |
| 39 | + local end=$(date +%s%3N) |
| 40 | + local elapsed_ms=$((end - start)) |
| 41 | + local elapsed_s=$((elapsed_ms / 1000)).$((elapsed_ms % 1000 / 100)) |
| 42 | + echo " >> ${elapsed_s}s (exit=$rc)" |
| 43 | + echo "$label,$method,${elapsed_s},$rc" >> "$RESULTS" |
| 44 | + echo "" |
| 45 | +} |
| 46 | + |
| 47 | +cleanup() { |
| 48 | + # Aggressively clean ALL caches to free disk for next test |
| 49 | + rm -rf /tmp/.zs-bench /tmp/.zs-vllm /tmp/.zs-test |
| 50 | + uv cache clean 2>/dev/null || true |
| 51 | + rm -rf ~/.local/share/uv/tools 2>/dev/null || true |
| 52 | + # Clean uv archive cache (this is where extracted venvs live) |
| 53 | + rm -rf /gpu-cli-workspaces/.cache/uv/archive-v0/* 2>/dev/null || true |
| 54 | + df -h /tmp | tail -1 | awk '{print " (disk: " $4 " free)"}' |
| 55 | +} |
| 56 | + |
| 57 | +# Create test scripts |
| 58 | +cat > /tmp/torch_test.py << 'EOF' |
| 59 | +import torch |
| 60 | +print(f"torch {torch.__version__}") |
| 61 | +print(f"CUDA available: {torch.cuda.is_available()}") |
| 62 | +if torch.cuda.is_available(): |
| 63 | + print(f"GPU: {torch.cuda.get_device_name(0)}") |
| 64 | + x = torch.randn(1000, 1000, device='cuda') |
| 65 | + print(f"CUDA tensor OK: {x.shape}") |
| 66 | +EOF |
| 67 | + |
| 68 | +cat > /tmp/vllm_test.py << 'EOF' |
| 69 | +import vllm |
| 70 | +print(f"vllm {vllm.__version__}") |
| 71 | +import torch |
| 72 | +print(f"torch {torch.__version__}, CUDA: {torch.cuda.is_available()}") |
| 73 | +EOF |
| 74 | + |
| 75 | +cat > /tmp/hf_test.py << 'EOF' |
| 76 | +import transformers |
| 77 | +import torch |
| 78 | +print(f"transformers {transformers.__version__}") |
| 79 | +print(f"torch {torch.__version__}, CUDA: {torch.cuda.is_available()}") |
| 80 | +EOF |
| 81 | + |
| 82 | +cat > /tmp/diff_test.py << 'EOF' |
| 83 | +import diffusers |
| 84 | +import torch |
| 85 | +print(f"diffusers {diffusers.__version__}") |
| 86 | +print(f"torch {torch.__version__}, CUDA: {torch.cuda.is_available()}") |
| 87 | +EOF |
| 88 | + |
| 89 | +cat > /tmp/triton_test.py << 'EOF' |
| 90 | +import triton |
| 91 | +print(f"triton {triton.__version__}") |
| 92 | +EOF |
| 93 | + |
| 94 | +# ============================================================ |
| 95 | +# Test 1: torch (the big one — ~900MB wheel + CUDA deps) |
| 96 | +# ============================================================ |
| 97 | +echo "========== TEST 1: torch ==========" |
| 98 | + |
| 99 | +cleanup |
| 100 | +measure "torch" "zs_cold" $ZS run -v -p torch /tmp/torch_test.py |
| 101 | +measure "torch" "zs_warm" $ZS run -p torch /tmp/torch_test.py |
| 102 | + |
| 103 | +cleanup |
| 104 | +measure "torch" "uvx_cold" uvx --from torch --with torch python -c "import torch; print(f'torch {torch.__version__}, CUDA: {torch.cuda.is_available()}')" |
| 105 | +measure "torch" "uvx_warm" uvx --from torch --with torch python -c "import torch; print(f'torch {torch.__version__}, CUDA: {torch.cuda.is_available()}')" |
| 106 | + |
| 107 | +# ============================================================ |
| 108 | +# Test 2: vllm (torch + vllm + many deps — realistic LLM serving) |
| 109 | +# ============================================================ |
| 110 | +echo "========== TEST 2: vllm ==========" |
| 111 | + |
| 112 | +cleanup |
| 113 | +measure "vllm" "zs_cold" $ZS run -v -p vllm /tmp/vllm_test.py |
| 114 | +measure "vllm" "zs_warm" $ZS run -p vllm /tmp/vllm_test.py |
| 115 | + |
| 116 | +cleanup |
| 117 | +measure "vllm" "uvx_cold" uvx --from vllm --with vllm python -c "import vllm; print(f'vllm {vllm.__version__}')" |
| 118 | +measure "vllm" "uvx_warm" uvx --from vllm --with vllm python -c "import vllm; print(f'vllm {vllm.__version__}')" |
| 119 | + |
| 120 | +# ============================================================ |
| 121 | +# Test 3: transformers + torch (common fine-tuning setup) |
| 122 | +# ============================================================ |
| 123 | +echo "========== TEST 3: transformers+torch ==========" |
| 124 | + |
| 125 | +cleanup |
| 126 | +measure "hf_torch" "zs_cold" $ZS run -v -p torch -p transformers -p tokenizers /tmp/hf_test.py |
| 127 | +measure "hf_torch" "zs_warm" $ZS run -p torch -p transformers -p tokenizers /tmp/hf_test.py |
| 128 | + |
| 129 | +# ============================================================ |
| 130 | +# Test 4: diffusers + torch (image generation) |
| 131 | +# ============================================================ |
| 132 | +echo "========== TEST 4: diffusers+torch ==========" |
| 133 | + |
| 134 | +cleanup |
| 135 | +measure "diffusers" "zs_cold" $ZS run -v -p torch -p diffusers -p transformers -p accelerate /tmp/diff_test.py |
| 136 | +measure "diffusers" "zs_warm" $ZS run -p torch -p diffusers -p transformers -p accelerate /tmp/diff_test.py |
| 137 | + |
| 138 | +# ============================================================ |
| 139 | +# Test 5: triton (GPU kernel compilation) |
| 140 | +# ============================================================ |
| 141 | +echo "========== TEST 5: triton ==========" |
| 142 | + |
| 143 | +cleanup |
| 144 | +measure "triton" "zs_cold" $ZS run -v -p triton /tmp/triton_test.py |
| 145 | +measure "triton" "zs_warm" $ZS run -p triton /tmp/triton_test.py |
| 146 | + |
| 147 | +# ============================================================ |
| 148 | +# Summary |
| 149 | +# ============================================================ |
| 150 | +echo "" |
| 151 | +echo "========== RESULTS ==========" |
| 152 | +cat "$RESULTS" |
| 153 | +echo "" |
| 154 | +echo "Done." |
0 commit comments