Skip to content

Commit 3711dbe

Browse files
authored
fix: improve binary install experience (#38)
* fix: improve binary install experience - Add visible stderr output during binary download (P0) - Add gRPC health probe after socket appears in ensure_running (P0) - Add retry with exponential backoff for download (3 attempts) (P1) - Update test mock for retry-aware download error handling * fix: address copilot review on gRPC readiness loop - Close channel in finally block to prevent FD leaks on exceptions - Recompute time_left per iteration to avoid overshooting timeout - Add backoff sleep on FutureTimeoutError to prevent tight spin
1 parent efec5c1 commit 3711dbe

2 files changed

Lines changed: 82 additions & 29 deletions

File tree

capiscio_sdk/_rpc/process.py

Lines changed: 77 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import shutil
88
import stat
99
import subprocess
10+
import sys
1011
import time
1112
from pathlib import Path
1213
from typing import Optional, Tuple
@@ -152,6 +153,7 @@ def _download_binary(self) -> Path:
152153
"""Download the capiscio-core binary for the current platform.
153154
154155
Downloads from GitHub releases to ~/.capiscio/bin/<version>/.
156+
Retries up to 3 times with exponential backoff.
155157
Returns the path to the executable.
156158
"""
157159
os_name, arch_name = self._get_platform_info()
@@ -164,30 +166,50 @@ def _download_binary(self) -> Path:
164166
filename = f"capiscio-{os_name}-{arch_name}{ext}"
165167
url = f"https://github.com/{GITHUB_REPO}/releases/download/v{CORE_VERSION}/{filename}"
166168

169+
sys.stderr.write(
170+
f"capiscio-core v{CORE_VERSION} not found. "
171+
f"Downloading for {os_name}/{arch_name}...\n"
172+
)
173+
sys.stderr.flush()
167174
logger.info("Downloading capiscio-core v%s for %s/%s...", CORE_VERSION, os_name, arch_name)
168175

169176
target_path.parent.mkdir(parents=True, exist_ok=True)
170-
try:
171-
with httpx.stream("GET", url, follow_redirects=True, timeout=60.0) as resp:
172-
resp.raise_for_status()
173-
with open(target_path, "wb") as f:
174-
for chunk in resp.iter_bytes(chunk_size=8192):
175-
f.write(chunk)
177+
max_attempts = 3
178+
for attempt in range(1, max_attempts + 1):
179+
try:
180+
with httpx.stream("GET", url, follow_redirects=True, timeout=60.0) as resp:
181+
resp.raise_for_status()
182+
with open(target_path, "wb") as f:
183+
for chunk in resp.iter_bytes(chunk_size=8192):
184+
f.write(chunk)
176185

177-
# Make executable
178-
st = os.stat(target_path)
179-
os.chmod(target_path, st.st_mode | stat.S_IEXEC)
186+
# Make executable
187+
st = os.stat(target_path)
188+
os.chmod(target_path, st.st_mode | stat.S_IEXEC)
180189

181-
logger.info("Installed capiscio-core v%s at %s", CORE_VERSION, target_path)
182-
return target_path
190+
sys.stderr.write(f"Installed capiscio-core v{CORE_VERSION} at {target_path}\n")
191+
sys.stderr.flush()
192+
logger.info("Installed capiscio-core v%s at %s", CORE_VERSION, target_path)
193+
return target_path
183194

184-
except Exception as e:
185-
if target_path.exists():
186-
target_path.unlink()
187-
raise RuntimeError(
188-
f"Failed to download capiscio-core from {url}: {e}\n"
189-
"You can also set CAPISCIO_BINARY to point to an existing binary."
190-
) from e
195+
except Exception as e:
196+
if target_path.exists():
197+
target_path.unlink()
198+
if attempt < max_attempts:
199+
delay = 2 ** (attempt - 1)
200+
logger.warning(
201+
"Download attempt %d/%d failed: %s. Retrying in %ds...",
202+
attempt, max_attempts, e, delay,
203+
)
204+
time.sleep(delay)
205+
else:
206+
raise RuntimeError(
207+
f"Failed to download capiscio-core from {url} "
208+
f"after {max_attempts} attempts: {e}\n"
209+
"You can also set CAPISCIO_BINARY to point to an existing binary."
210+
) from e
211+
# unreachable, but keeps type checker happy
212+
raise RuntimeError("Download failed")
191213

192214
def ensure_running(
193215
self,
@@ -250,8 +272,7 @@ def ensure_running(
250272
start_time = time.time()
251273
while time.time() - start_time < timeout:
252274
if self._socket_path.exists():
253-
self._started = True
254-
return self.address
275+
break
255276

256277
# Check if process died
257278
if self._process.poll() is not None:
@@ -263,13 +284,43 @@ def ensure_running(
263284
)
264285

265286
time.sleep(0.1)
287+
else:
288+
# Timeout - kill process and raise
289+
self.stop()
290+
raise RuntimeError(
291+
f"capiscio server did not start within {timeout}s. "
292+
f"Socket not found at {self._socket_path}"
293+
)
266294

267-
# Timeout - kill process and raise
268-
self.stop()
269-
raise RuntimeError(
270-
f"capiscio server did not start within {timeout}s. "
271-
f"Socket not found at {self._socket_path}"
272-
)
295+
# Socket exists — verify gRPC is actually accepting connections
296+
remaining = timeout - (time.time() - start_time)
297+
if remaining > 0:
298+
import grpc
299+
addr = f"unix://{self._socket_path}"
300+
deadline = time.time() + remaining
301+
while time.time() < deadline:
302+
time_left = deadline - time.time()
303+
if time_left <= 0:
304+
break
305+
channel = grpc.insecure_channel(addr)
306+
try:
307+
grpc.channel_ready_future(channel).result(timeout=min(1.0, time_left))
308+
break
309+
except grpc.FutureTimeoutError:
310+
time.sleep(0.1)
311+
except Exception:
312+
time.sleep(0.1)
313+
finally:
314+
channel.close()
315+
else:
316+
self.stop()
317+
raise RuntimeError(
318+
f"capiscio server socket appeared but gRPC not ready "
319+
f"within {timeout}s at {self._socket_path}"
320+
)
321+
322+
self._started = True
323+
return self.address
273324

274325
def stop(self) -> None:
275326
"""Stop the gRPC server process."""

tests/unit/test_process.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -186,16 +186,18 @@ def test_download_binary_already_cached(self, mock_stream):
186186
mock_stream.assert_not_called()
187187
assert result == mock_path
188188

189+
@patch("capiscio_sdk._rpc.process.time.sleep")
189190
@patch("httpx.stream")
190-
def test_download_binary_http_error(self, mock_stream):
191-
"""Test download handles HTTP errors."""
191+
def test_download_binary_http_error(self, mock_stream, mock_sleep):
192+
"""Test download handles HTTP errors with retries."""
192193
pm = ProcessManager()
193194

194195
with patch("capiscio_sdk._rpc.process.platform.system", return_value="Linux"):
195196
with patch("capiscio_sdk._rpc.process.platform.machine", return_value="x86_64"):
196197
with patch.object(ProcessManager, "_get_cached_binary_path") as mock_cached:
197198
mock_path = MagicMock()
198-
mock_path.exists.side_effect = [False, False] # Not exists before download, not exists after cleanup
199+
# exists() called: once before loop, then once per attempt for cleanup
200+
mock_path.exists.return_value = False
199201
mock_path.parent = MagicMock()
200202
mock_cached.return_value = mock_path
201203

0 commit comments

Comments
 (0)